package org.wikipedia.miner.db;

import com.sleepycat.je.CheckpointConfig;
import com.sleepycat.je.DatabaseException;
import com.sleepycat.je.Environment;
import com.sleepycat.je.EnvironmentConfig;
import com.sleepycat.je.EnvironmentLockedException;
import gnu.trove.TIntHashSet;
import java.io.File;
import java.io.FilenameFilter;
import java.io.IOException;
import java.util.HashMap;
import java.util.Iterator;
import javax.xml.stream.XMLStreamException;
import org.apache.log4j.Logger;
import org.wikipedia.miner.db.WDatabase;
import org.wikipedia.miner.db.struct.DbIntList;
import org.wikipedia.miner.db.struct.DbLabelForPageList;
import org.wikipedia.miner.db.struct.DbLinkLocationList;
import org.wikipedia.miner.db.struct.DbPage;
import org.wikipedia.miner.db.struct.DbPageLinkCounts;
import org.wikipedia.miner.db.struct.DbTranslations;
import org.wikipedia.miner.util.ProgressTracker;
import org.wikipedia.miner.util.WikipediaConfiguration;
import org.wikipedia.miner.util.text.TextProcessor;

/* loaded from: input_file:org/wikipedia/miner/db/WEnvironment.class */
public class WEnvironment {
    private WikipediaConfiguration conf;
    private Environment env;
    private PreparationThread prepThread;
    private WDatabase<Integer, DbPage> dbPage;
    private LabelDatabase dbLabel;
    private HashMap<String, LabelDatabase> processedLabelDbs;
    private WDatabase<Integer, DbLabelForPageList> dbLabelsForPage;
    private WDatabase<String, Integer> dbArticlesByTitle;
    private WDatabase<String, Integer> dbCategoriesByTitle;
    private WDatabase<String, Integer> dbTemplatesByTitle;
    private WDatabase<Integer, Integer> dbRedirectTargetBySource;
    private WDatabase<Integer, DbIntList> dbRedirectSourcesByTarget;
    private WDatabase<Integer, DbLinkLocationList> dbPageLinkIn;
    private WDatabase<Integer, DbIntList> dbPageLinkInNoSentences;
    private WDatabase<Integer, DbLinkLocationList> dbPageLinkOut;
    private WDatabase<Integer, DbIntList> dbPageLinkOutNoSentences;
    private PageLinkCountDatabase dbPageLinkCounts;
    private WDatabase<Integer, DbIntList> dbCategoryParents;
    private WDatabase<Integer, DbIntList> dbArticleParents;
    private WDatabase<Integer, DbIntList> dbChildCategories;
    private WDatabase<Integer, DbIntList> dbChildArticles;
    private MarkupDatabase dbMarkup;
    private WDatabase<Integer, DbIntList> dbSentenceSplits;
    private WDatabase<Integer, DbTranslations> dbTranslations;
    private WDatabase<Integer, Long> dbStatistics;
    private HashMap<WDatabase.DatabaseType, WDatabase> databasesByType;

    /* loaded from: input_file:org/wikipedia/miner/db/WEnvironment$PreparationThread.class */
    private class PreparationThread extends Thread {
        WikipediaConfiguration conf;
        private ProgressTracker tracker;
        private boolean completed = false;
        private Exception failureCause = null;

        PreparationThread(WikipediaConfiguration wikipediaConfiguration) {
            this.conf = wikipediaConfiguration;
        }

        public boolean isCompleted() {
            return this.completed;
        }

        public boolean failed() {
            return this.failureCause != null;
        }

        public double getProgress() {
            if (this.completed) {
                return 1.0d;
            }
            if (this.tracker == null) {
                return 0.0d;
            }
            return this.tracker.getGlobalProgress();
        }

        public ProgressTracker getTracker() {
            return this.tracker;
        }

        @Override // java.lang.Thread, java.lang.Runnable
        public void run() {
            doPreparation();
        }

        public void doPreparation() {
            boolean z = this.conf.getMinLinksIn() > 0 && !this.conf.getDatabasesToCache().isEmpty() && this.conf.getArticlesOfInterest() == null;
            int size = this.conf.getDatabasesToCache().size() + 1;
            if (z) {
                size++;
            }
            this.tracker = new ProgressTracker(size, WEnvironment.class);
            try {
                this.tracker.startTask(1L, "Connecting to database");
                WEnvironment.this.dbStatistics.cache(this.conf, null);
                this.tracker.update();
                if (z) {
                    this.conf.setArticlesOfInterest(WEnvironment.this.getValidArticleIds(this.conf.getMinLinksIn(), this.tracker));
                }
                for (WDatabase.DatabaseType databaseType : this.conf.getDatabasesToCache()) {
                    if (databaseType == WDatabase.DatabaseType.label) {
                        WEnvironment.this.getDbLabel(this.conf.getDefaultTextProcessor()).cache(this.conf, this.tracker);
                    } else {
                        WEnvironment.this.getDatabase(databaseType).cache(this.conf, this.tracker);
                    }
                }
                this.conf.setArticlesOfInterest(null);
                System.gc();
            } catch (Exception e) {
                this.failureCause = e;
            }
            this.completed = true;
        }
    }

    /* loaded from: input_file:org/wikipedia/miner/db/WEnvironment$StatisticName.class */
    public enum StatisticName {
        articleCount,
        categoryCount,
        disambiguationCount,
        redirectCount,
        lastEdit,
        maxCategoryDepth,
        rootCategoryId
    }

    public WikipediaConfiguration getConfiguration() {
        return this.conf;
    }

    public WDatabase<Integer, DbPage> getDbPage() {
        return this.dbPage;
    }

    public LabelDatabase getDbLabel(TextProcessor textProcessor) {
        if (textProcessor == null) {
            return this.dbLabel;
        }
        LabelDatabase labelDatabase = this.processedLabelDbs.get(textProcessor.getName());
        if (labelDatabase == null) {
            labelDatabase = new LabelDatabase(this, textProcessor);
            this.processedLabelDbs.put(textProcessor.getName(), labelDatabase);
        }
        return labelDatabase;
    }

    public WDatabase<Integer, DbLabelForPageList> getDbLabelsForPage() {
        return this.dbLabelsForPage;
    }

    public WDatabase<String, Integer> getDbArticlesByTitle() {
        return this.dbArticlesByTitle;
    }

    public WDatabase<String, Integer> getDbCategoriesByTitle() {
        return this.dbCategoriesByTitle;
    }

    public WDatabase<String, Integer> getDbTemplatesByTitle() {
        return this.dbTemplatesByTitle;
    }

    public WDatabase<Integer, Integer> getDbRedirectTargetBySource() {
        return this.dbRedirectTargetBySource;
    }

    public WDatabase<Integer, DbIntList> getDbRedirectSourcesByTarget() {
        return this.dbRedirectSourcesByTarget;
    }

    public WDatabase<Integer, DbLinkLocationList> getDbPageLinkIn() {
        return this.dbPageLinkIn;
    }

    public WDatabase<Integer, DbIntList> getDbPageLinkInNoSentences() {
        return this.dbPageLinkInNoSentences;
    }

    public WDatabase<Integer, DbLinkLocationList> getDbPageLinkOut() {
        return this.dbPageLinkOut;
    }

    public WDatabase<Integer, DbIntList> getDbPageLinkOutNoSentences() {
        return this.dbPageLinkOutNoSentences;
    }

    public WDatabase<Integer, DbPageLinkCounts> getDbPageLinkCounts() {
        return this.dbPageLinkCounts;
    }

    public WDatabase<Integer, DbIntList> getDbCategoryParents() {
        return this.dbCategoryParents;
    }

    public WDatabase<Integer, DbIntList> getDbArticleParents() {
        return this.dbArticleParents;
    }

    public WDatabase<Integer, DbIntList> getDbChildCategories() {
        return this.dbChildCategories;
    }

    public WDatabase<Integer, DbIntList> getDbChildArticles() {
        return this.dbChildArticles;
    }

    public MarkupDatabase getDbMarkup() {
        return this.dbMarkup;
    }

    public WDatabase<Integer, DbIntList> getDbSentenceSplits() {
        return this.dbSentenceSplits;
    }

    public WDatabase<Integer, DbTranslations> getDbTranslations() {
        return this.dbTranslations;
    }

    public WEnvironment(WikipediaConfiguration wikipediaConfiguration, boolean z) throws EnvironmentLockedException {
        this.conf = wikipediaConfiguration;
        EnvironmentConfig environmentConfig = new EnvironmentConfig();
        environmentConfig.setAllowCreate(false);
        environmentConfig.setReadOnly(true);
        environmentConfig.setCachePercent(10);
        this.env = new Environment(wikipediaConfiguration.getDatabaseDirectory(), environmentConfig);
        initDatabases();
        this.prepThread = new PreparationThread(wikipediaConfiguration);
        if (z) {
            this.prepThread.start();
        } else {
            this.prepThread.doPreparation();
        }
    }

    private WEnvironment(WikipediaConfiguration wikipediaConfiguration) {
        this.conf = wikipediaConfiguration;
        initDatabases();
        EnvironmentConfig environmentConfig = new EnvironmentConfig();
        environmentConfig.setCachePercent(10);
        environmentConfig.setAllowCreate(true);
        environmentConfig.setReadOnly(false);
        this.env = new Environment(wikipediaConfiguration.getDatabaseDirectory(), environmentConfig);
    }

    private void initDatabases() {
        WDatabaseFactory wDatabaseFactory = new WDatabaseFactory(this);
        this.databasesByType = new HashMap<>();
        this.dbPage = wDatabaseFactory.buildPageDatabase();
        this.databasesByType.put(WDatabase.DatabaseType.page, this.dbPage);
        this.dbLabel = wDatabaseFactory.buildLabelDatabase();
        this.databasesByType.put(WDatabase.DatabaseType.label, this.dbLabel);
        this.processedLabelDbs = new HashMap<>();
        this.dbLabelsForPage = wDatabaseFactory.buildPageLabelDatabase();
        this.databasesByType.put(WDatabase.DatabaseType.pageLabel, this.dbLabelsForPage);
        this.dbArticlesByTitle = wDatabaseFactory.buildTitleDatabase(WDatabase.DatabaseType.articlesByTitle);
        this.databasesByType.put(WDatabase.DatabaseType.articlesByTitle, this.dbArticlesByTitle);
        this.dbCategoriesByTitle = wDatabaseFactory.buildTitleDatabase(WDatabase.DatabaseType.categoriesByTitle);
        this.databasesByType.put(WDatabase.DatabaseType.categoriesByTitle, this.dbCategoriesByTitle);
        this.dbTemplatesByTitle = wDatabaseFactory.buildTitleDatabase(WDatabase.DatabaseType.templatesByTitle);
        this.databasesByType.put(WDatabase.DatabaseType.templatesByTitle, this.dbTemplatesByTitle);
        this.dbPageLinkIn = wDatabaseFactory.buildPageLinkDatabase(WDatabase.DatabaseType.pageLinksIn);
        this.databasesByType.put(WDatabase.DatabaseType.pageLinksIn, this.dbPageLinkIn);
        this.dbPageLinkInNoSentences = wDatabaseFactory.buildPageLinkNoSentencesDatabase(WDatabase.DatabaseType.pageLinksInNoSentences);
        this.databasesByType.put(WDatabase.DatabaseType.pageLinksInNoSentences, this.dbPageLinkInNoSentences);
        this.dbPageLinkOut = wDatabaseFactory.buildPageLinkDatabase(WDatabase.DatabaseType.pageLinksOut);
        this.databasesByType.put(WDatabase.DatabaseType.pageLinksOut, this.dbPageLinkOut);
        this.dbPageLinkOutNoSentences = wDatabaseFactory.buildPageLinkNoSentencesDatabase(WDatabase.DatabaseType.pageLinksOutNoSentences);
        this.databasesByType.put(WDatabase.DatabaseType.pageLinksOutNoSentences, this.dbPageLinkOutNoSentences);
        this.dbPageLinkCounts = wDatabaseFactory.buildPageLinkCountDatabase();
        this.databasesByType.put(WDatabase.DatabaseType.pageLinkCounts, this.dbPageLinkCounts);
        this.dbCategoryParents = wDatabaseFactory.buildIntIntListDatabase(WDatabase.DatabaseType.categoryParents);
        this.databasesByType.put(WDatabase.DatabaseType.categoryParents, this.dbCategoryParents);
        this.dbArticleParents = wDatabaseFactory.buildIntIntListDatabase(WDatabase.DatabaseType.articleParents);
        this.databasesByType.put(WDatabase.DatabaseType.articleParents, this.dbArticleParents);
        this.dbChildCategories = wDatabaseFactory.buildIntIntListDatabase(WDatabase.DatabaseType.childCategories);
        this.databasesByType.put(WDatabase.DatabaseType.childCategories, this.dbChildCategories);
        this.dbChildArticles = wDatabaseFactory.buildIntIntListDatabase(WDatabase.DatabaseType.childArticles);
        this.databasesByType.put(WDatabase.DatabaseType.childArticles, this.dbChildArticles);
        this.dbRedirectSourcesByTarget = wDatabaseFactory.buildIntIntListDatabase(WDatabase.DatabaseType.redirectSourcesByTarget);
        this.databasesByType.put(WDatabase.DatabaseType.redirectSourcesByTarget, this.dbRedirectSourcesByTarget);
        this.dbRedirectTargetBySource = wDatabaseFactory.buildRedirectTargetBySourceDatabase();
        this.databasesByType.put(WDatabase.DatabaseType.redirectTargetBySource, this.dbRedirectTargetBySource);
        this.dbMarkup = new MarkupDatabase(this);
        this.databasesByType.put(WDatabase.DatabaseType.markup, this.dbMarkup);
        this.dbSentenceSplits = wDatabaseFactory.buildIntIntListDatabase(WDatabase.DatabaseType.sentenceSplits);
        this.databasesByType.put(WDatabase.DatabaseType.sentenceSplits, this.dbSentenceSplits);
        this.dbTranslations = wDatabaseFactory.buildTranslationsDatabase();
        this.databasesByType.put(WDatabase.DatabaseType.translations, this.dbTranslations);
        this.dbStatistics = wDatabaseFactory.buildStatisticsDatabase();
        this.databasesByType.put(WDatabase.DatabaseType.statistics, this.dbStatistics);
    }

    public boolean isReady() {
        return this.prepThread.isCompleted();
    }

    public double getProgress() {
        return this.prepThread.getProgress();
    }

    public ProgressTracker getPreparationTracker() {
        return this.prepThread.getTracker();
    }

    public Long retrieveStatistic(StatisticName statisticName) {
        return this.dbStatistics.retrieve(Integer.valueOf(statisticName.ordinal()));
    }

    public boolean isPreparedFor(TextProcessor textProcessor) {
        return getDbLabel(textProcessor).exists();
    }

    public TIntHashSet getValidArticleIds(int i, ProgressTracker progressTracker) {
        TIntHashSet tIntHashSet = new TIntHashSet();
        if (progressTracker == null) {
            progressTracker = new ProgressTracker(1, WEnvironment.class);
        }
        progressTracker.startTask(this.dbPageLinkIn.getDatabaseSize(), "gathering valid page ids");
        WIterator<Integer, DbLinkLocationList> iterator = this.dbPageLinkIn.getIterator();
        while (iterator.hasNext()) {
            WEntry<Integer, DbLinkLocationList> next = iterator.next();
            if (next.getValue().getLinkLocations().size() > i) {
                tIntHashSet.add(next.getKey().intValue());
            }
            progressTracker.update();
        }
        iterator.close();
        return tIntHashSet;
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public void cleanAndCheckpoint() throws DatabaseException {
        boolean z;
        Logger.getLogger(WEnvironment.class).info("Starting cleaning");
        boolean z2 = false;
        while (true) {
            z = z2;
            if (this.env.cleanLog() <= 0) {
                break;
            }
            System.out.println("cleaning");
            z2 = true;
        }
        Logger.getLogger(WEnvironment.class).info("Finished cleaning");
        if (z) {
            Logger.getLogger(WEnvironment.class).info("Starting checkpoint");
            CheckpointConfig checkpointConfig = new CheckpointConfig();
            checkpointConfig.setForce(true);
            this.env.checkpoint(checkpointConfig);
            Logger.getLogger(WEnvironment.class).info("Finished checkpoint");
        }
    }

    /* JADX INFO: Access modifiers changed from: private */
    public WDatabase getDatabase(WDatabase.DatabaseType databaseType) {
        return this.databasesByType.get(databaseType);
    }

    public Exception getCachingFailureReason() {
        if (this.prepThread == null) {
            return null;
        }
        return this.prepThread.failureCause;
    }

    public void close() {
        Iterator<LabelDatabase> it = this.processedLabelDbs.values().iterator();
        while (it.hasNext()) {
            it.next().close();
        }
        Iterator<WDatabase> it2 = this.databasesByType.values().iterator();
        while (it2.hasNext()) {
            it2.next().close();
        }
    }

    public void finalize() {
        if (this.env != null) {
            Logger.getLogger(WIterator.class).warn("Unclosed enviroment. You may be causing a memory leak.");
        }
    }

    public static void buildEnvironment(WikipediaConfiguration wikipediaConfiguration, File file, boolean z) throws IOException, XMLStreamException {
        File dataFile = getDataFile(file, "stats.csv");
        File dataFile2 = getDataFile(file, "page.csv");
        File dataFile3 = getDataFile(file, "label.csv");
        File dataFile4 = getDataFile(file, "pageLabel.csv");
        File dataFile5 = getDataFile(file, "pageLinkIn.csv");
        File dataFile6 = getDataFile(file, "pageLinkOut.csv");
        File dataFile7 = getDataFile(file, "categoryParents.csv");
        File dataFile8 = getDataFile(file, "articleParents.csv");
        File dataFile9 = getDataFile(file, "childCategories.csv");
        File dataFile10 = getDataFile(file, "childArticles.csv");
        File dataFile11 = getDataFile(file, "redirectTargetsBySource.csv");
        File dataFile12 = getDataFile(file, "redirectSourcesByTarget.csv");
        File dataFile13 = getDataFile(file, "sentenceSplits.csv");
        File dataFile14 = getDataFile(file, "translations.csv");
        File markupDataFile = getMarkupDataFile(file);
        if (!wikipediaConfiguration.getDatabaseDirectory().exists()) {
            wikipediaConfiguration.getDatabaseDirectory().mkdirs();
        }
        WEnvironment wEnvironment = new WEnvironment(wikipediaConfiguration);
        wEnvironment.dbStatistics.loadFromCsvFile(dataFile, z, null);
        wEnvironment.dbPage.loadFromCsvFile(dataFile2, z, null);
        wEnvironment.dbLabel.loadFromCsvFile(dataFile3, z, null);
        wEnvironment.dbLabelsForPage.loadFromCsvFile(dataFile4, z, null);
        wEnvironment.dbArticlesByTitle.loadFromCsvFile(dataFile2, z, null);
        wEnvironment.dbCategoriesByTitle.loadFromCsvFile(dataFile2, z, null);
        wEnvironment.dbTemplatesByTitle.loadFromCsvFile(dataFile2, z, null);
        wEnvironment.dbRedirectTargetBySource.loadFromCsvFile(dataFile11, z, null);
        wEnvironment.dbRedirectSourcesByTarget.loadFromCsvFile(dataFile12, z, null);
        wEnvironment.dbPageLinkIn.loadFromCsvFile(dataFile5, z, null);
        wEnvironment.dbPageLinkInNoSentences.loadFromCsvFile(dataFile5, z, null);
        wEnvironment.dbPageLinkOut.loadFromCsvFile(dataFile6, z, null);
        wEnvironment.dbPageLinkOutNoSentences.loadFromCsvFile(dataFile6, z, null);
        wEnvironment.dbPageLinkCounts.loadFromCsvFiles(dataFile5, dataFile6, z, null);
        wEnvironment.dbCategoryParents.loadFromCsvFile(dataFile7, z, null);
        wEnvironment.dbArticleParents.loadFromCsvFile(dataFile8, z, null);
        wEnvironment.dbChildCategories.loadFromCsvFile(dataFile9, z, null);
        wEnvironment.dbChildArticles.loadFromCsvFile(dataFile10, z, null);
        wEnvironment.dbSentenceSplits.loadFromCsvFile(dataFile13, z, null);
        wEnvironment.dbTranslations.loadFromCsvFile(dataFile14, z, null);
        wEnvironment.dbMarkup.loadFromXmlFile(markupDataFile, z, null);
        wEnvironment.close();
        TextProcessor defaultTextProcessor = wikipediaConfiguration.getDefaultTextProcessor();
        if (defaultTextProcessor != null) {
            File file2 = new File(wikipediaConfiguration.getDataDirectory() + File.separator + "tmp" + defaultTextProcessor.getName());
            file2.mkdir();
            file2.deleteOnExit();
            prepareTextProcessor(defaultTextProcessor, wikipediaConfiguration, file2, z, 5);
        }
    }

    public static void prepareTextProcessor(TextProcessor textProcessor, WikipediaConfiguration wikipediaConfiguration, File file, boolean z, int i) throws IOException {
        if (textProcessor == null) {
            return;
        }
        WEnvironment wEnvironment = new WEnvironment(wikipediaConfiguration);
        if (z || !wEnvironment.isPreparedFor(textProcessor)) {
            wEnvironment.getDbLabel(textProcessor).prepare(file, i);
            wEnvironment.cleanAndCheckpoint();
            wEnvironment.close();
        }
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public Environment getEnvironment() {
        return this.env;
    }

    private static File getDataFile(File file, String str) throws IOException {
        File file2 = new File(file + File.separator + str);
        if (file2.canRead()) {
            return file2;
        }
        throw new IOException(file2 + " is not readable");
    }

    private static File getMarkupDataFile(File file) throws IOException {
        File[] listFiles = file.listFiles(new FilenameFilter() { // from class: org.wikipedia.miner.db.WEnvironment.1
            @Override // java.io.FilenameFilter
            public boolean accept(File file2, String str) {
                return str.endsWith("-pages-articles.xml") || str.endsWith("-pages-articles.xml.bz2");
            }
        });
        if (listFiles.length == 0) {
            throw new IOException("Could not locate markup file in " + file);
        }
        if (listFiles.length > 1) {
            throw new IOException("There are multiple markup files in " + file);
        }
        if (listFiles[0].canRead()) {
            return listFiles[0];
        }
        throw new IOException(listFiles[0] + " is not readable");
    }
}
