package org.wikipedia.miner.db;

import com.sleepycat.bind.tuple.StringBinding;
import com.sleepycat.je.Database;
import com.sleepycat.je.DatabaseEntry;
import com.sleepycat.je.Transaction;
import gnu.trove.THashMap;
import gnu.trove.TIntHashSet;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.Iterator;
import java.util.Map;
import java.util.TreeMap;
import org.apache.hadoop.record.CsvRecordInput;
import org.apache.hadoop.record.CsvRecordOutput;
import org.apache.hadoop.record.RecordOutput;
import org.apache.log4j.Logger;
import org.wikipedia.miner.db.WDatabase;
import org.wikipedia.miner.db.struct.DbLabel;
import org.wikipedia.miner.db.struct.DbSenseForLabel;
import org.wikipedia.miner.util.ProgressTracker;
import org.wikipedia.miner.util.WikipediaConfiguration;
import org.wikipedia.miner.util.text.TextProcessor;

/* loaded from: input_file:org/wikipedia/miner/db/LabelDatabase.class */
public class LabelDatabase extends WDatabase<String, DbLabel> {
    private TextProcessor textProcessor;

    public LabelDatabase(WEnvironment wEnvironment) {
        super(wEnvironment, WDatabase.DatabaseType.label, new StringBinding(), new RecordBinding<DbLabel>() { // from class: org.wikipedia.miner.db.LabelDatabase.1
            /* JADX WARN: Can't rename method to resolve collision */
            @Override // org.wikipedia.miner.db.RecordBinding
            public DbLabel createRecordInstance() {
                return new DbLabel();
            }
        });
        this.textProcessor = null;
    }

    public LabelDatabase(WEnvironment wEnvironment, TextProcessor textProcessor) {
        super(wEnvironment, WDatabase.DatabaseType.label, "label" + textProcessor.getName(), new StringBinding(), new RecordBinding<DbLabel>() { // from class: org.wikipedia.miner.db.LabelDatabase.2
            /* JADX WARN: Can't rename method to resolve collision */
            @Override // org.wikipedia.miner.db.RecordBinding
            public DbLabel createRecordInstance() {
                return new DbLabel();
            }
        });
        this.textProcessor = textProcessor;
    }

    public TextProcessor getTextProcessor() {
        return this.textProcessor;
    }

    public boolean isPrepared() {
        return getDatabase(true) != null;
    }

    @Override // org.wikipedia.miner.db.WDatabase
    public DbLabel retrieve(String str) {
        return this.textProcessor == null ? (DbLabel) super.retrieve((LabelDatabase) str) : (DbLabel) super.retrieve((LabelDatabase) this.textProcessor.processText(str));
    }

    /* JADX WARN: Can't rename method to resolve collision */
    @Override // org.wikipedia.miner.db.WDatabase
    public DbLabel filterCacheEntry(WEntry<String, DbLabel> wEntry, WikipediaConfiguration wikipediaConfiguration) {
        TIntHashSet articlesOfInterest = wikipediaConfiguration.getArticlesOfInterest();
        DbLabel value = wEntry.getValue();
        if (((float) value.getLinkDocCount()) / ((float) value.getTextDocCount()) < wikipediaConfiguration.getMinLinkProbability()) {
            return null;
        }
        ArrayList<DbSenseForLabel> arrayList = new ArrayList<>();
        Iterator<DbSenseForLabel> it = value.getSenses().iterator();
        while (it.hasNext()) {
            DbSenseForLabel next = it.next();
            if (articlesOfInterest == null || articlesOfInterest.contains(next.getId())) {
                if (next.getFromRedirect() || next.getFromTitle() || ((float) next.getLinkDocCount()) / ((float) value.getLinkDocCount()) >= wikipediaConfiguration.getMinSenseProbability()) {
                    arrayList.add(next);
                }
            }
        }
        if (arrayList.size() == 0) {
            return null;
        }
        value.setSenses(arrayList);
        return value;
    }

    @Override // org.wikipedia.miner.db.WDatabase
    public WEntry<String, DbLabel> deserialiseCsvRecord(CsvRecordInput csvRecordInput) throws IOException {
        String readString = csvRecordInput.readString((String) null);
        DbLabel dbLabel = new DbLabel();
        dbLabel.deserialize(csvRecordInput);
        return new WEntry<>(readString, dbLabel);
    }

    public void prepare(File file, int i) throws IOException {
        if (this.textProcessor == null) {
            return;
        }
        LabelDatabase dbLabel = this.env.getDbLabel(null);
        long databaseSize = dbLabel.getDatabaseSize();
        file.mkdirs();
        ProgressTracker progressTracker = new ProgressTracker((2 * i) + 1, LabelDatabase.class);
        for (int i2 = 0; i2 < i; i2++) {
            progressTracker.startTask(databaseSize, "Gathering and processing labels (pass " + (i2 + 1) + " of " + i + ")");
            TreeMap treeMap = new TreeMap();
            WIterator<String, DbLabel> iterator = dbLabel.getIterator();
            while (iterator.hasNext()) {
                WEntry<String, DbLabel> next = iterator.next();
                String processText = this.textProcessor.processText(next.getKey());
                if (Math.abs(processText.hashCode()) % i == i2) {
                    DbLabel dbLabel2 = (DbLabel) treeMap.get(processText);
                    if (dbLabel2 == null) {
                        treeMap.put(processText, next.getValue());
                    } else {
                        treeMap.put(processText, mergeLabels(dbLabel2, next.getValue()));
                    }
                }
                progressTracker.update();
            }
            iterator.close();
            progressTracker.startTask(treeMap.size(), "Dumping processed labels (pass " + (i2 + 1) + " of " + i + ")");
            File file2 = new File(file.getPath() + File.separator + "tmpLabels" + i2 + ".csv");
            file2.deleteOnExit();
            BufferedWriter bufferedWriter = new BufferedWriter(new FileWriter(file2));
            for (Map.Entry entry : treeMap.entrySet()) {
                progressTracker.update();
                ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
                RecordOutput csvRecordOutput = new CsvRecordOutput(byteArrayOutputStream);
                csvRecordOutput.writeString((String) entry.getKey(), (String) null);
                ((DbLabel) entry.getValue()).serialize(csvRecordOutput);
                bufferedWriter.write(byteArrayOutputStream.toString("UTF-8"));
            }
        }
        Database database = getDatabase(false);
        long j = 0;
        long j2 = 0;
        BufferedReader[] bufferedReaderArr = new BufferedReader[i];
        String[] strArr = new String[i];
        DbLabel[] dbLabelArr = new DbLabel[i];
        File[] fileArr = new File[i];
        for (int i3 = 0; i3 < i; i3++) {
            File file3 = new File(file.getPath() + File.separator + "tmpLabels" + i3 + ".csv");
            fileArr[i3] = file3;
            j += file3.length();
            bufferedReaderArr[i3] = new BufferedReader(new FileReader(file3));
            String readLine = bufferedReaderArr[i3].readLine();
            if (readLine != null) {
                j2 = j2 + readLine.length() + 1;
                String str = readLine + "\n";
                try {
                    CsvRecordInput csvRecordInput = new CsvRecordInput(new ByteArrayInputStream(str.getBytes("UTF8")));
                    strArr[i3] = csvRecordInput.readString((String) null);
                    dbLabelArr[i3] = new DbLabel();
                    dbLabelArr[i3].deserialize(csvRecordInput);
                } catch (Exception e) {
                    Logger.getLogger(LabelDatabase.class).error("Could not parse '" + str + "'");
                    strArr[i3] = null;
                    dbLabelArr[i3] = null;
                }
            } else {
                strArr[i3] = null;
                dbLabelArr[i3] = null;
            }
        }
        progressTracker.startTask(j, "Storing processed labels");
        while (true) {
            String str2 = null;
            int i4 = -1;
            for (int i5 = 0; i5 < i; i5++) {
                if (strArr[i5] != null && (str2 == null || strArr[i5].compareTo(str2) < 0)) {
                    str2 = strArr[i5];
                    i4 = i5;
                }
            }
            if (i4 < 0) {
                break;
            }
            DatabaseEntry databaseEntry = new DatabaseEntry();
            this.keyBinding.objectToEntry(str2, databaseEntry);
            DatabaseEntry databaseEntry2 = new DatabaseEntry();
            this.valueBinding.objectToEntry(dbLabelArr[i4], databaseEntry2);
            database.put((Transaction) null, databaseEntry, databaseEntry2);
            String readLine2 = bufferedReaderArr[i4].readLine();
            if (readLine2 != null) {
                j2 = j2 + readLine2.length() + 1;
                progressTracker.update(j2);
                String str3 = readLine2 + "\n";
                try {
                    CsvRecordInput csvRecordInput2 = new CsvRecordInput(new ByteArrayInputStream(str3.getBytes("UTF8")));
                    strArr[i4] = csvRecordInput2.readString((String) null);
                    dbLabelArr[i4] = new DbLabel();
                    dbLabelArr[i4].deserialize(csvRecordInput2);
                } catch (Exception e2) {
                    Logger.getLogger(LabelDatabase.class).error("Could not parse '" + str3 + "'");
                    strArr[i4] = null;
                    dbLabelArr[i4] = null;
                }
            } else {
                strArr[i4] = null;
                dbLabelArr[i4] = null;
            }
        }
        for (BufferedReader bufferedReader : bufferedReaderArr) {
            bufferedReader.close();
        }
        for (File file4 : fileArr) {
            file4.delete();
        }
        this.env.cleanAndCheckpoint();
        getDatabase(true);
    }

    private DbLabel mergeLabels(DbLabel dbLabel, DbLabel dbLabel2) {
        THashMap tHashMap = new THashMap();
        if (dbLabel.getSenses() != null) {
            Iterator<DbSenseForLabel> it = dbLabel.getSenses().iterator();
            while (it.hasNext()) {
                DbSenseForLabel next = it.next();
                tHashMap.put(Integer.valueOf(next.getId()), next);
            }
        }
        if (dbLabel2.getSenses() != null) {
            Iterator<DbSenseForLabel> it2 = dbLabel2.getSenses().iterator();
            while (it2.hasNext()) {
                DbSenseForLabel next2 = it2.next();
                DbSenseForLabel dbSenseForLabel = (DbSenseForLabel) tHashMap.get(Integer.valueOf(next2.getId()));
                if (dbSenseForLabel == null) {
                    tHashMap.put(Integer.valueOf(next2.getId()), next2);
                } else {
                    DbSenseForLabel dbSenseForLabel2 = new DbSenseForLabel();
                    dbSenseForLabel2.setId(next2.getId());
                    dbSenseForLabel2.setLinkDocCount(next2.getLinkDocCount() + dbSenseForLabel.getLinkDocCount());
                    dbSenseForLabel2.setLinkOccCount(next2.getLinkOccCount() + dbSenseForLabel.getLinkOccCount());
                    dbSenseForLabel2.setFromRedirect(next2.getFromRedirect() || dbSenseForLabel.getFromRedirect());
                    dbSenseForLabel2.setFromTitle(next2.getFromTitle() || dbSenseForLabel.getFromTitle());
                    tHashMap.put(Integer.valueOf(dbSenseForLabel2.getId()), dbSenseForLabel2);
                }
            }
        }
        ArrayList<DbSenseForLabel> arrayList = new ArrayList<>();
        Iterator it3 = tHashMap.values().iterator();
        while (it3.hasNext()) {
            arrayList.add((DbSenseForLabel) it3.next());
        }
        Collections.sort(arrayList, new Comparator<DbSenseForLabel>() { // from class: org.wikipedia.miner.db.LabelDatabase.3
            @Override // java.util.Comparator
            public int compare(DbSenseForLabel dbSenseForLabel3, DbSenseForLabel dbSenseForLabel4) {
                int compareTo = new Long(dbSenseForLabel4.getLinkOccCount()).compareTo(Long.valueOf(dbSenseForLabel3.getLinkOccCount()));
                if (compareTo != 0) {
                    return compareTo;
                }
                int compareTo2 = new Long(dbSenseForLabel4.getLinkDocCount()).compareTo(Long.valueOf(dbSenseForLabel3.getLinkDocCount()));
                return compareTo2 != 0 ? compareTo2 : new Integer(dbSenseForLabel3.getId()).compareTo(Integer.valueOf(dbSenseForLabel4.getId()));
            }
        });
        DbLabel dbLabel3 = new DbLabel();
        dbLabel3.setLinkDocCount(dbLabel.getLinkDocCount() + dbLabel2.getLinkDocCount());
        dbLabel3.setLinkOccCount(dbLabel.getLinkOccCount() + dbLabel2.getLinkOccCount());
        dbLabel3.setTextDocCount(dbLabel.getTextDocCount() + dbLabel2.getTextDocCount());
        dbLabel3.setTextOccCount(dbLabel.getTextOccCount() + dbLabel2.getTextOccCount());
        dbLabel3.setSenses(arrayList);
        return dbLabel3;
    }
}
