package org.wikipedia.miner.db;

import com.sleepycat.bind.tuple.IntegerBinding;
import com.sleepycat.bind.tuple.LongBinding;
import com.sleepycat.je.Database;
import com.sleepycat.je.DatabaseEntry;
import com.sleepycat.je.Transaction;
import gnu.trove.TIntHashSet;
import java.io.BufferedReader;
import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.Iterator;
import org.apache.hadoop.record.CsvRecordInput;
import org.apache.log4j.Logger;
import org.wikipedia.miner.annotation.preprocessing.PreprocessedDocument;
import org.wikipedia.miner.db.WDatabase;
import org.wikipedia.miner.db.WEnvironment;
import org.wikipedia.miner.db.struct.DbIntList;
import org.wikipedia.miner.db.struct.DbLabelForPageList;
import org.wikipedia.miner.db.struct.DbLinkLocation;
import org.wikipedia.miner.db.struct.DbLinkLocationList;
import org.wikipedia.miner.db.struct.DbPage;
import org.wikipedia.miner.db.struct.DbTranslations;
import org.wikipedia.miner.extraction.SiteInfo;
import org.wikipedia.miner.model.Page;
import org.wikipedia.miner.util.ProgressTracker;
import org.wikipedia.miner.util.WikipediaConfiguration;
import org.wikipedia.miner.util.text.TextProcessor;

/* loaded from: input_file:org/wikipedia/miner/db/WDatabaseFactory.class */
public class WDatabaseFactory {
    WEnvironment env;

    /* JADX INFO: Access modifiers changed from: package-private */
    /* renamed from: org.wikipedia.miner.db.WDatabaseFactory$15, reason: invalid class name */
    /* loaded from: input_file:org/wikipedia/miner/db/WDatabaseFactory$15.class */
    public static /* synthetic */ class AnonymousClass15 {
        static final /* synthetic */ int[] $SwitchMap$org$wikipedia$miner$db$WDatabase$DatabaseType = new int[WDatabase.DatabaseType.values().length];

        static {
            try {
                $SwitchMap$org$wikipedia$miner$db$WDatabase$DatabaseType[WDatabase.DatabaseType.categoryParents.ordinal()] = 1;
            } catch (NoSuchFieldError e) {
            }
            try {
                $SwitchMap$org$wikipedia$miner$db$WDatabase$DatabaseType[WDatabase.DatabaseType.articleParents.ordinal()] = 2;
            } catch (NoSuchFieldError e2) {
            }
            try {
                $SwitchMap$org$wikipedia$miner$db$WDatabase$DatabaseType[WDatabase.DatabaseType.childCategories.ordinal()] = 3;
            } catch (NoSuchFieldError e3) {
            }
            try {
                $SwitchMap$org$wikipedia$miner$db$WDatabase$DatabaseType[WDatabase.DatabaseType.childArticles.ordinal()] = 4;
            } catch (NoSuchFieldError e4) {
            }
            try {
                $SwitchMap$org$wikipedia$miner$db$WDatabase$DatabaseType[WDatabase.DatabaseType.redirectSourcesByTarget.ordinal()] = 5;
            } catch (NoSuchFieldError e5) {
            }
            try {
                $SwitchMap$org$wikipedia$miner$db$WDatabase$DatabaseType[WDatabase.DatabaseType.sentenceSplits.ordinal()] = 6;
            } catch (NoSuchFieldError e6) {
            }
        }
    }

    public WDatabaseFactory(WEnvironment wEnvironment) {
        this.env = wEnvironment;
    }

    public WDatabase<Integer, DbPage> buildPageDatabase() {
        return new IntObjectDatabase<DbPage>(this.env, WDatabase.DatabaseType.page, new RecordBinding<DbPage>() { // from class: org.wikipedia.miner.db.WDatabaseFactory.1
            /* JADX WARN: Can't rename method to resolve collision */
            @Override // org.wikipedia.miner.db.RecordBinding
            public DbPage createRecordInstance() {
                return new DbPage();
            }
        }) { // from class: org.wikipedia.miner.db.WDatabaseFactory.2
            @Override // org.wikipedia.miner.db.WDatabase
            public WEntry<Integer, DbPage> deserialiseCsvRecord(CsvRecordInput csvRecordInput) throws IOException {
                Integer valueOf = Integer.valueOf(csvRecordInput.readInt((String) null));
                DbPage dbPage = new DbPage();
                dbPage.deserialize(csvRecordInput);
                return new WEntry<>(valueOf, dbPage);
            }

            @Override // org.wikipedia.miner.db.WDatabase
            public DbPage filterCacheEntry(WEntry<Integer, DbPage> wEntry, WikipediaConfiguration wikipediaConfiguration) {
                Page.PageType pageType = Page.PageType.values()[wEntry.getValue().getType()];
                TIntHashSet articlesOfInterest = wikipediaConfiguration.getArticlesOfInterest();
                if (articlesOfInterest == null || articlesOfInterest.contains(wEntry.getKey().intValue()) || pageType == Page.PageType.category || pageType == Page.PageType.redirect) {
                    return wEntry.getValue();
                }
                return null;
            }

            @Override // org.wikipedia.miner.db.WDatabase
            public /* bridge */ /* synthetic */ Object filterCacheEntry(WEntry wEntry, WikipediaConfiguration wikipediaConfiguration) {
                return filterCacheEntry((WEntry<Integer, DbPage>) wEntry, wikipediaConfiguration);
            }
        };
    }

    public WDatabase<String, Integer> buildTitleDatabase(WDatabase.DatabaseType databaseType) {
        return new TitleDatabase(this.env, databaseType);
    }

    public LabelDatabase buildLabelDatabase() {
        return new LabelDatabase(this.env);
    }

    public LabelDatabase buildLabelDatabase(TextProcessor textProcessor) {
        if (textProcessor == null) {
            throw new IllegalArgumentException("text processor must not be null");
        }
        return new LabelDatabase(this.env, textProcessor);
    }

    public WDatabase<Integer, DbLabelForPageList> buildPageLabelDatabase() {
        return new IntObjectDatabase<DbLabelForPageList>(this.env, WDatabase.DatabaseType.pageLabel, new RecordBinding<DbLabelForPageList>() { // from class: org.wikipedia.miner.db.WDatabaseFactory.3
            /* JADX WARN: Can't rename method to resolve collision */
            @Override // org.wikipedia.miner.db.RecordBinding
            public DbLabelForPageList createRecordInstance() {
                return new DbLabelForPageList();
            }
        }) { // from class: org.wikipedia.miner.db.WDatabaseFactory.4
            @Override // org.wikipedia.miner.db.WDatabase
            public WEntry<Integer, DbLabelForPageList> deserialiseCsvRecord(CsvRecordInput csvRecordInput) throws IOException {
                Integer valueOf = Integer.valueOf(csvRecordInput.readInt((String) null));
                DbLabelForPageList dbLabelForPageList = new DbLabelForPageList();
                dbLabelForPageList.deserialize(csvRecordInput);
                return new WEntry<>(valueOf, dbLabelForPageList);
            }

            @Override // org.wikipedia.miner.db.WDatabase
            public DbLabelForPageList filterCacheEntry(WEntry<Integer, DbLabelForPageList> wEntry, WikipediaConfiguration wikipediaConfiguration) {
                TIntHashSet articlesOfInterest = wikipediaConfiguration.getArticlesOfInterest();
                if (articlesOfInterest == null || articlesOfInterest.contains(wEntry.getKey().intValue())) {
                    return wEntry.getValue();
                }
                return null;
            }

            @Override // org.wikipedia.miner.db.WDatabase
            public /* bridge */ /* synthetic */ Object filterCacheEntry(WEntry wEntry, WikipediaConfiguration wikipediaConfiguration) {
                return filterCacheEntry((WEntry<Integer, DbLabelForPageList>) wEntry, wikipediaConfiguration);
            }
        };
    }

    public WDatabase<Integer, DbLinkLocationList> buildPageLinkDatabase(WDatabase.DatabaseType databaseType) {
        if (databaseType != WDatabase.DatabaseType.pageLinksIn && databaseType != WDatabase.DatabaseType.pageLinksOut) {
            throw new IllegalArgumentException("type must be either DatabaseType.pageLinksIn or DatabaseType.pageLinksOut");
        }
        return new IntObjectDatabase<DbLinkLocationList>(this.env, databaseType, new RecordBinding<DbLinkLocationList>() { // from class: org.wikipedia.miner.db.WDatabaseFactory.5
            /* JADX WARN: Can't rename method to resolve collision */
            @Override // org.wikipedia.miner.db.RecordBinding
            public DbLinkLocationList createRecordInstance() {
                return new DbLinkLocationList();
            }
        }) { // from class: org.wikipedia.miner.db.WDatabaseFactory.6
            @Override // org.wikipedia.miner.db.WDatabase
            public WEntry<Integer, DbLinkLocationList> deserialiseCsvRecord(CsvRecordInput csvRecordInput) throws IOException {
                Integer valueOf = Integer.valueOf(csvRecordInput.readInt((String) null));
                DbLinkLocationList dbLinkLocationList = new DbLinkLocationList();
                dbLinkLocationList.deserialize(csvRecordInput);
                return new WEntry<>(valueOf, dbLinkLocationList);
            }

            @Override // org.wikipedia.miner.db.WDatabase
            public DbLinkLocationList filterCacheEntry(WEntry<Integer, DbLinkLocationList> wEntry, WikipediaConfiguration wikipediaConfiguration) {
                int intValue = wEntry.getKey().intValue();
                DbLinkLocationList value = wEntry.getValue();
                TIntHashSet articlesOfInterest = wikipediaConfiguration.getArticlesOfInterest();
                if (articlesOfInterest != null && !articlesOfInterest.contains(intValue)) {
                    return null;
                }
                ArrayList<DbLinkLocation> arrayList = new ArrayList<>();
                Iterator<DbLinkLocation> it = value.getLinkLocations().iterator();
                while (it.hasNext()) {
                    DbLinkLocation next = it.next();
                    if (articlesOfInterest == null || articlesOfInterest.contains(next.getLinkId())) {
                        arrayList.add(next);
                    }
                }
                if (arrayList.size() == 0) {
                    return null;
                }
                value.setLinkLocations(arrayList);
                return value;
            }

            @Override // org.wikipedia.miner.db.WDatabase
            public /* bridge */ /* synthetic */ Object filterCacheEntry(WEntry wEntry, WikipediaConfiguration wikipediaConfiguration) {
                return filterCacheEntry((WEntry<Integer, DbLinkLocationList>) wEntry, wikipediaConfiguration);
            }
        };
    }

    public WDatabase<Integer, DbIntList> buildPageLinkNoSentencesDatabase(WDatabase.DatabaseType databaseType) {
        if (databaseType != WDatabase.DatabaseType.pageLinksInNoSentences && databaseType != WDatabase.DatabaseType.pageLinksOutNoSentences) {
            throw new IllegalArgumentException("type must be either DatabaseType.pageLinksInNoSentences or DatabaseType.pageLinksOutNoSentences");
        }
        return new IntObjectDatabase<DbIntList>(this.env, databaseType, new RecordBinding<DbIntList>() { // from class: org.wikipedia.miner.db.WDatabaseFactory.7
            /* JADX WARN: Can't rename method to resolve collision */
            @Override // org.wikipedia.miner.db.RecordBinding
            public DbIntList createRecordInstance() {
                return new DbIntList();
            }
        }) { // from class: org.wikipedia.miner.db.WDatabaseFactory.8
            @Override // org.wikipedia.miner.db.WDatabase
            public WEntry<Integer, DbIntList> deserialiseCsvRecord(CsvRecordInput csvRecordInput) throws IOException {
                Integer valueOf = Integer.valueOf(csvRecordInput.readInt((String) null));
                DbLinkLocationList dbLinkLocationList = new DbLinkLocationList();
                dbLinkLocationList.deserialize(csvRecordInput);
                ArrayList arrayList = new ArrayList();
                Iterator<DbLinkLocation> it = dbLinkLocationList.getLinkLocations().iterator();
                while (it.hasNext()) {
                    arrayList.add(Integer.valueOf(it.next().getLinkId()));
                }
                return new WEntry<>(valueOf, new DbIntList(arrayList));
            }

            @Override // org.wikipedia.miner.db.WDatabase
            public DbIntList filterCacheEntry(WEntry<Integer, DbIntList> wEntry, WikipediaConfiguration wikipediaConfiguration) {
                int intValue = wEntry.getKey().intValue();
                DbIntList value = wEntry.getValue();
                TIntHashSet articlesOfInterest = wikipediaConfiguration.getArticlesOfInterest();
                if (articlesOfInterest != null && !articlesOfInterest.contains(intValue)) {
                    return null;
                }
                ArrayList<Integer> arrayList = new ArrayList<>();
                Iterator<Integer> it = value.getValues().iterator();
                while (it.hasNext()) {
                    Integer next = it.next();
                    if (articlesOfInterest == null || articlesOfInterest.contains(next.intValue())) {
                        arrayList.add(next);
                    }
                }
                if (arrayList.size() == 0) {
                    return null;
                }
                value.setValues(arrayList);
                return value;
            }

            @Override // org.wikipedia.miner.db.WDatabase
            public void loadFromCsvFile(File file, boolean z, ProgressTracker progressTracker) throws IOException {
                if (exists() && !z) {
                    return;
                }
                if (progressTracker == null) {
                    progressTracker = new ProgressTracker(1, WDatabase.class);
                }
                progressTracker.startTask(file.length(), "Loading " + getName());
                BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(new FileInputStream(file), "UTF-8"));
                long j = 0;
                int i = 0;
                Database database = getDatabase(false);
                while (true) {
                    String readLine = bufferedReader.readLine();
                    if (readLine == null) {
                        bufferedReader.close();
                        this.env.cleanAndCheckpoint();
                        getDatabase(true);
                        return;
                    }
                    j = j + readLine.length() + 1;
                    i++;
                    WEntry<Integer, DbIntList> deserialiseCsvRecord = deserialiseCsvRecord(new CsvRecordInput(new ByteArrayInputStream((readLine + "\n").getBytes("UTF-8"))));
                    if (deserialiseCsvRecord != null) {
                        DatabaseEntry databaseEntry = new DatabaseEntry();
                        this.keyBinding.objectToEntry(deserialiseCsvRecord.getKey(), databaseEntry);
                        DatabaseEntry databaseEntry2 = new DatabaseEntry();
                        this.valueBinding.objectToEntry(deserialiseCsvRecord.getValue(), databaseEntry2);
                        database.put((Transaction) null, databaseEntry, databaseEntry2);
                    }
                    progressTracker.update(j);
                }
            }

            @Override // org.wikipedia.miner.db.WDatabase
            public /* bridge */ /* synthetic */ Object filterCacheEntry(WEntry wEntry, WikipediaConfiguration wikipediaConfiguration) {
                return filterCacheEntry((WEntry<Integer, DbIntList>) wEntry, wikipediaConfiguration);
            }
        };
    }

    public WDatabase<Integer, DbIntList> buildIntIntListDatabase(final WDatabase.DatabaseType databaseType) {
        switch (AnonymousClass15.$SwitchMap$org$wikipedia$miner$db$WDatabase$DatabaseType[databaseType.ordinal()]) {
            case PreprocessedDocument.RegionTag.REGION_OPEN /* 1 */:
            case PreprocessedDocument.RegionTag.REGION_CLOSE /* 2 */:
            case PreprocessedDocument.RegionTag.REGION_SPLIT /* 3 */:
            case 4:
            case 5:
            case SiteInfo.IMAGE_KEY /* 6 */:
                return new IntObjectDatabase<DbIntList>(this.env, databaseType, new RecordBinding<DbIntList>() { // from class: org.wikipedia.miner.db.WDatabaseFactory.9
                    /* JADX WARN: Can't rename method to resolve collision */
                    @Override // org.wikipedia.miner.db.RecordBinding
                    public DbIntList createRecordInstance() {
                        return new DbIntList();
                    }
                }) { // from class: org.wikipedia.miner.db.WDatabaseFactory.10
                    @Override // org.wikipedia.miner.db.WDatabase
                    public WEntry<Integer, DbIntList> deserialiseCsvRecord(CsvRecordInput csvRecordInput) throws IOException {
                        Integer valueOf = Integer.valueOf(csvRecordInput.readInt((String) null));
                        DbIntList dbIntList = new DbIntList();
                        dbIntList.deserialize(csvRecordInput);
                        return new WEntry<>(valueOf, dbIntList);
                    }

                    @Override // org.wikipedia.miner.db.WDatabase
                    public DbIntList filterCacheEntry(WEntry<Integer, DbIntList> wEntry, WikipediaConfiguration wikipediaConfiguration) {
                        int intValue = wEntry.getKey().intValue();
                        ArrayList<Integer> values = wEntry.getValue().getValues();
                        TIntHashSet articlesOfInterest = wikipediaConfiguration.getArticlesOfInterest();
                        ArrayList<Integer> arrayList = null;
                        switch (AnonymousClass15.$SwitchMap$org$wikipedia$miner$db$WDatabase$DatabaseType[databaseType.ordinal()]) {
                            case PreprocessedDocument.RegionTag.REGION_CLOSE /* 2 */:
                            case 5:
                            case SiteInfo.IMAGE_KEY /* 6 */:
                                if (articlesOfInterest == null || articlesOfInterest.contains(intValue)) {
                                    arrayList = values;
                                    break;
                                }
                                break;
                            case 4:
                                ArrayList arrayList2 = new ArrayList();
                                Iterator<Integer> it = values.iterator();
                                while (it.hasNext()) {
                                    int intValue2 = it.next().intValue();
                                    if (articlesOfInterest == null || articlesOfInterest.contains(intValue2)) {
                                        arrayList2.add(Integer.valueOf(intValue2));
                                    }
                                }
                                break;
                            case PreprocessedDocument.RegionTag.REGION_SPLIT /* 3 */:
                            default:
                                arrayList = values;
                                break;
                        }
                        if (arrayList == null || arrayList.size() == 0) {
                            return null;
                        }
                        return new DbIntList(arrayList);
                    }

                    @Override // org.wikipedia.miner.db.WDatabase
                    public /* bridge */ /* synthetic */ Object filterCacheEntry(WEntry wEntry, WikipediaConfiguration wikipediaConfiguration) {
                        return filterCacheEntry((WEntry<Integer, DbIntList>) wEntry, wikipediaConfiguration);
                    }
                };
            default:
                throw new IllegalArgumentException(databaseType.name() + " is not a valid DatabaseType for IntIntListDatabase");
        }
    }

    public WDatabase<Integer, Integer> buildRedirectTargetBySourceDatabase() {
        return new IntObjectDatabase<Integer>(this.env, WDatabase.DatabaseType.redirectTargetBySource, new IntegerBinding()) { // from class: org.wikipedia.miner.db.WDatabaseFactory.11
            @Override // org.wikipedia.miner.db.WDatabase
            public WEntry<Integer, Integer> deserialiseCsvRecord(CsvRecordInput csvRecordInput) throws IOException {
                return new WEntry<>(Integer.valueOf(csvRecordInput.readInt((String) null)), Integer.valueOf(csvRecordInput.readInt((String) null)));
            }

            @Override // org.wikipedia.miner.db.WDatabase
            public Integer filterCacheEntry(WEntry<Integer, Integer> wEntry, WikipediaConfiguration wikipediaConfiguration) {
                TIntHashSet articlesOfInterest = wikipediaConfiguration.getArticlesOfInterest();
                if (articlesOfInterest == null || articlesOfInterest.contains(wEntry.getValue().intValue())) {
                    return wEntry.getValue();
                }
                return null;
            }

            @Override // org.wikipedia.miner.db.WDatabase
            public /* bridge */ /* synthetic */ Object filterCacheEntry(WEntry wEntry, WikipediaConfiguration wikipediaConfiguration) {
                return filterCacheEntry((WEntry<Integer, Integer>) wEntry, wikipediaConfiguration);
            }
        };
    }

    public IntObjectDatabase<Long> buildStatisticsDatabase() {
        return new IntObjectDatabase<Long>(this.env, WDatabase.DatabaseType.statistics, new LongBinding()) { // from class: org.wikipedia.miner.db.WDatabaseFactory.12
            @Override // org.wikipedia.miner.db.WDatabase
            public WEntry<Integer, Long> deserialiseCsvRecord(CsvRecordInput csvRecordInput) throws IOException {
                String readString = csvRecordInput.readString((String) null);
                try {
                    return new WEntry<>(Integer.valueOf(WEnvironment.StatisticName.valueOf(readString).ordinal()), Long.valueOf(csvRecordInput.readLong((String) null)));
                } catch (Exception e) {
                    Logger.getLogger(WDatabaseFactory.class).warn("Ignoring unknown statistic: " + readString);
                    return null;
                }
            }

            @Override // org.wikipedia.miner.db.WDatabase
            public Long filterCacheEntry(WEntry<Integer, Long> wEntry, WikipediaConfiguration wikipediaConfiguration) {
                return wEntry.getValue();
            }

            @Override // org.wikipedia.miner.db.WDatabase
            public /* bridge */ /* synthetic */ Object filterCacheEntry(WEntry wEntry, WikipediaConfiguration wikipediaConfiguration) {
                return filterCacheEntry((WEntry<Integer, Long>) wEntry, wikipediaConfiguration);
            }
        };
    }

    public WDatabase<Integer, DbTranslations> buildTranslationsDatabase() {
        return new IntObjectDatabase<DbTranslations>(this.env, WDatabase.DatabaseType.translations, new RecordBinding<DbTranslations>() { // from class: org.wikipedia.miner.db.WDatabaseFactory.13
            /* JADX WARN: Can't rename method to resolve collision */
            @Override // org.wikipedia.miner.db.RecordBinding
            public DbTranslations createRecordInstance() {
                return new DbTranslations();
            }
        }) { // from class: org.wikipedia.miner.db.WDatabaseFactory.14
            @Override // org.wikipedia.miner.db.WDatabase
            public WEntry<Integer, DbTranslations> deserialiseCsvRecord(CsvRecordInput csvRecordInput) throws IOException {
                int readInt = csvRecordInput.readInt((String) null);
                DbTranslations dbTranslations = new DbTranslations();
                dbTranslations.deserialize(csvRecordInput);
                return new WEntry<>(Integer.valueOf(readInt), dbTranslations);
            }

            @Override // org.wikipedia.miner.db.WDatabase
            public DbTranslations filterCacheEntry(WEntry<Integer, DbTranslations> wEntry, WikipediaConfiguration wikipediaConfiguration) {
                TIntHashSet articlesOfInterest = wikipediaConfiguration.getArticlesOfInterest();
                if (articlesOfInterest == null || articlesOfInterest.contains(wEntry.getKey().intValue())) {
                    return wEntry.getValue();
                }
                return null;
            }

            @Override // org.wikipedia.miner.db.WDatabase
            public /* bridge */ /* synthetic */ Object filterCacheEntry(WEntry wEntry, WikipediaConfiguration wikipediaConfiguration) {
                return filterCacheEntry((WEntry<Integer, DbTranslations>) wEntry, wikipediaConfiguration);
            }
        };
    }

    public PageLinkCountDatabase buildPageLinkCountDatabase() {
        return new PageLinkCountDatabase(this.env);
    }

    public WDatabase<Integer, String> buildMarkupDatabase() {
        return new MarkupDatabase(this.env);
    }
}
