package org.wikipedia.miner.extraction;

import gnu.trove.TIntIntHashMap;
import gnu.trove.TObjectIntHashMap;
import java.io.BufferedReader;
import java.io.ByteArrayInputStream;
import java.io.FileReader;
import java.io.IOException;
import java.util.HashSet;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.record.CsvRecordInput;
import org.apache.hadoop.record.RecordInput;
import org.apache.log4j.Logger;
import org.wikipedia.miner.db.struct.DbPage;
import org.wikipedia.miner.model.Page;

/* loaded from: input_file:org/wikipedia/miner/extraction/Util.class */
public class Util {
    public static String normaliseTitle(String str) {
        StringBuffer stringBuffer = new StringBuffer();
        stringBuffer.append(Character.toUpperCase(str.charAt(0)));
        stringBuffer.append(str.substring(1).replace('_', ' '));
        return stringBuffer.toString();
    }

    public static TObjectIntHashMap<String> gatherPageIdsByTitle(Path path, HashSet<Page.PageType> hashSet, TObjectIntHashMap<String> tObjectIntHashMap, Reporter reporter) throws IOException {
        BufferedReader bufferedReader = new BufferedReader(new FileReader(path.toString()));
        while (true) {
            String readLine = bufferedReader.readLine();
            if (readLine == null) {
                return tObjectIntHashMap;
            }
            try {
                RecordInput csvRecordInput = new CsvRecordInput(new ByteArrayInputStream((readLine + "\n").getBytes("UTF-8")));
                int readInt = csvRecordInput.readInt("id");
                DbPage dbPage = new DbPage();
                dbPage.deserialize(csvRecordInput);
                String title = dbPage.getTitle();
                if (hashSet.contains(Page.PageType.values()[dbPage.getType()])) {
                    tObjectIntHashMap.put(normaliseTitle(title), readInt);
                }
                reporter.progress();
            } catch (Exception e) {
                Logger.getLogger(Util.class).error("Caught exception while gathering page from '" + readLine + "' in '" + path + "'", e);
            }
        }
    }

    public static TIntIntHashMap gatherRedirectTargetsBySource(Path path, TIntIntHashMap tIntIntHashMap, Reporter reporter) throws IOException {
        BufferedReader bufferedReader = new BufferedReader(new FileReader(path.toString()));
        while (true) {
            String readLine = bufferedReader.readLine();
            if (readLine == null) {
                return tIntIntHashMap;
            }
            try {
                String[] split = readLine.split(",");
                tIntIntHashMap.put(Integer.parseInt(split[0]), Integer.parseInt(split[1]));
                reporter.progress();
            } catch (Exception e) {
                Logger.getLogger(Util.class).error("Caught exception while gathering redirect from '" + readLine + "' in '" + path + "'", e);
            }
        }
    }

    public static Integer getTargetId(String str, TObjectIntHashMap<String> tObjectIntHashMap, TIntIntHashMap tIntIntHashMap) {
        String normaliseTitle = normaliseTitle(str);
        if (!tObjectIntHashMap.containsKey(normaliseTitle)) {
            return null;
        }
        Integer valueOf = Integer.valueOf(tObjectIntHashMap.get(normaliseTitle));
        HashSet hashSet = new HashSet();
        while (valueOf != null && tIntIntHashMap != null && tIntIntHashMap.containsKey(valueOf.intValue())) {
            if (hashSet.contains(valueOf)) {
                return null;
            }
            hashSet.add(valueOf);
            valueOf = tIntIntHashMap.containsKey(valueOf.intValue()) ? Integer.valueOf(tIntIntHashMap.get(valueOf.intValue())) : null;
        }
        return valueOf;
    }
}
