package org.wikipedia.miner.util;

import gnu.trove.TIntHashSet;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Collection;
import java.util.EnumSet;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Set;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import opennlp.tools.sentdetect.SentenceDetector;
import opennlp.tools.sentdetect.SentenceDetectorME;
import opennlp.tools.sentdetect.SentenceModel;
import opennlp.tools.tokenize.SimpleTokenizer;
import opennlp.tools.tokenize.Tokenizer;
import opennlp.tools.tokenize.TokenizerME;
import opennlp.tools.tokenize.TokenizerModel;
import org.apache.log4j.Logger;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.wikipedia.miner.annotation.preprocessing.PreprocessedDocument;
import org.wikipedia.miner.comparison.ArticleComparer;
import org.wikipedia.miner.db.WDatabase;
import org.wikipedia.miner.extraction.SiteInfo;
import org.wikipedia.miner.util.text.TextProcessor;
import org.xml.sax.SAXException;

/* loaded from: input_file:org/wikipedia/miner/util/WikipediaConfiguration.class */
public class WikipediaConfiguration {
    private String langCode;
    private File dbDirectory;
    private File dataDirectory;
    private EnumSet<ArticleComparer.DataDependency> articleComparisonDependencies;
    private File articleComparisonModel;
    private File labelDisambiguationModel;
    private File labelComparisonModel;
    private File comparisonSnippetModel;
    private File topicDisambiguationModel;
    private File linkDetectionModel;
    private Tokenizer tokenizer;
    private SentenceDetector sentenceDetector;
    private TIntHashSet articlesOfInterest;
    private TextProcessor defaultTextProcessor = null;
    private HashMap<WDatabase.DatabaseType, WDatabase.CachePriority> databasesToCache = new HashMap<>();
    private HashSet<String> stopwords = new HashSet<>();
    private int minLinksIn = 0;
    private float minLinkProbability = 0.0f;
    private float minSenseProbability = 0.0f;

    /* JADX INFO: Access modifiers changed from: package-private */
    /* renamed from: org.wikipedia.miner.util.WikipediaConfiguration$1, reason: invalid class name */
    /* loaded from: input_file:org/wikipedia/miner/util/WikipediaConfiguration$1.class */
    public static /* synthetic */ class AnonymousClass1 {
        static final /* synthetic */ int[] $SwitchMap$org$wikipedia$miner$util$WikipediaConfiguration$ParamName = new int[ParamName.values().length];

        static {
            try {
                $SwitchMap$org$wikipedia$miner$util$WikipediaConfiguration$ParamName[ParamName.langCode.ordinal()] = 1;
            } catch (NoSuchFieldError e) {
            }
            try {
                $SwitchMap$org$wikipedia$miner$util$WikipediaConfiguration$ParamName[ParamName.databaseDirectory.ordinal()] = 2;
            } catch (NoSuchFieldError e2) {
            }
            try {
                $SwitchMap$org$wikipedia$miner$util$WikipediaConfiguration$ParamName[ParamName.dataDirectory.ordinal()] = 3;
            } catch (NoSuchFieldError e3) {
            }
            try {
                $SwitchMap$org$wikipedia$miner$util$WikipediaConfiguration$ParamName[ParamName.defaultTextProcessor.ordinal()] = 4;
            } catch (NoSuchFieldError e4) {
            }
            try {
                $SwitchMap$org$wikipedia$miner$util$WikipediaConfiguration$ParamName[ParamName.minLinksIn.ordinal()] = 5;
            } catch (NoSuchFieldError e5) {
            }
            try {
                $SwitchMap$org$wikipedia$miner$util$WikipediaConfiguration$ParamName[ParamName.minSenseProbability.ordinal()] = 6;
            } catch (NoSuchFieldError e6) {
            }
            try {
                $SwitchMap$org$wikipedia$miner$util$WikipediaConfiguration$ParamName[ParamName.minLinkProbability.ordinal()] = 7;
            } catch (NoSuchFieldError e7) {
            }
            try {
                $SwitchMap$org$wikipedia$miner$util$WikipediaConfiguration$ParamName[ParamName.articlesOfInterest.ordinal()] = 8;
            } catch (NoSuchFieldError e8) {
            }
            try {
                $SwitchMap$org$wikipedia$miner$util$WikipediaConfiguration$ParamName[ParamName.databaseToCache.ordinal()] = 9;
            } catch (NoSuchFieldError e9) {
            }
            try {
                $SwitchMap$org$wikipedia$miner$util$WikipediaConfiguration$ParamName[ParamName.stopwordFile.ordinal()] = 10;
            } catch (NoSuchFieldError e10) {
            }
            try {
                $SwitchMap$org$wikipedia$miner$util$WikipediaConfiguration$ParamName[ParamName.articleComparisonDependency.ordinal()] = 11;
            } catch (NoSuchFieldError e11) {
            }
            try {
                $SwitchMap$org$wikipedia$miner$util$WikipediaConfiguration$ParamName[ParamName.articleComparisonModel.ordinal()] = 12;
            } catch (NoSuchFieldError e12) {
            }
            try {
                $SwitchMap$org$wikipedia$miner$util$WikipediaConfiguration$ParamName[ParamName.labelDisambiguationModel.ordinal()] = 13;
            } catch (NoSuchFieldError e13) {
            }
            try {
                $SwitchMap$org$wikipedia$miner$util$WikipediaConfiguration$ParamName[ParamName.labelComparisonModel.ordinal()] = 14;
            } catch (NoSuchFieldError e14) {
            }
            try {
                $SwitchMap$org$wikipedia$miner$util$WikipediaConfiguration$ParamName[ParamName.comparisonSnippetModel.ordinal()] = 15;
            } catch (NoSuchFieldError e15) {
            }
            try {
                $SwitchMap$org$wikipedia$miner$util$WikipediaConfiguration$ParamName[ParamName.topicDisambiguationModel.ordinal()] = 16;
            } catch (NoSuchFieldError e16) {
            }
            try {
                $SwitchMap$org$wikipedia$miner$util$WikipediaConfiguration$ParamName[ParamName.linkDetectionModel.ordinal()] = 17;
            } catch (NoSuchFieldError e17) {
            }
            try {
                $SwitchMap$org$wikipedia$miner$util$WikipediaConfiguration$ParamName[ParamName.tokenModel.ordinal()] = 18;
            } catch (NoSuchFieldError e18) {
            }
            try {
                $SwitchMap$org$wikipedia$miner$util$WikipediaConfiguration$ParamName[ParamName.sentenceModel.ordinal()] = 19;
            } catch (NoSuchFieldError e19) {
            }
        }
    }

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:org/wikipedia/miner/util/WikipediaConfiguration$ParamName.class */
    public enum ParamName {
        langCode,
        databaseDirectory,
        dataDirectory,
        defaultTextProcessor,
        minLinksIn,
        minSenseProbability,
        minLinkProbability,
        articlesOfInterest,
        databaseToCache,
        stopwordFile,
        articleComparisonDependency,
        articleComparisonModel,
        labelDisambiguationModel,
        labelComparisonModel,
        comparisonSnippetModel,
        topicDisambiguationModel,
        linkDetectionModel,
        tokenModel,
        sentenceModel,
        unknown
    }

    public WikipediaConfiguration(Element element) throws IOException, ClassNotFoundException, InstantiationException, IllegalAccessException {
        initFromXml(element);
    }

    public WikipediaConfiguration(File file) throws ParserConfigurationException, SAXException, IOException, ClassNotFoundException, InstantiationException, IllegalAccessException {
        Document parse = DocumentBuilderFactory.newInstance().newDocumentBuilder().parse(file);
        parse.getDocumentElement().normalize();
        initFromXml(parse.getDocumentElement());
    }

    public WikipediaConfiguration(InputStream inputStream) throws ParserConfigurationException, SAXException, IOException, ClassNotFoundException, InstantiationException, IllegalAccessException {
        Document parse = DocumentBuilderFactory.newInstance().newDocumentBuilder().parse(inputStream);
        parse.getDocumentElement().normalize();
        initFromXml(parse.getDocumentElement());
    }

    public WikipediaConfiguration(String str, File file) {
        this.langCode = str;
        this.dbDirectory = file;
    }

    public String getLangCode() {
        return this.langCode;
    }

    public File getDatabaseDirectory() {
        return this.dbDirectory;
    }

    public File getDataDirectory() {
        return this.dataDirectory;
    }

    public void setDataDirectory(File file) {
        this.dataDirectory = file;
    }

    public void setDefaultTextProcessor(TextProcessor textProcessor) {
        this.defaultTextProcessor = textProcessor;
    }

    public TextProcessor getDefaultTextProcessor() {
        return this.defaultTextProcessor;
    }

    public void addDatabaseToCache(WDatabase.DatabaseType databaseType) {
        this.databasesToCache.put(databaseType, WDatabase.CachePriority.space);
    }

    public void addDatabaseToCache(WDatabase.DatabaseType databaseType, WDatabase.CachePriority cachePriority) {
        System.out.println("Will cache " + databaseType + " for " + cachePriority);
        this.databasesToCache.put(databaseType, cachePriority);
    }

    public void clearDatabasesToCache() {
        this.databasesToCache.clear();
    }

    public Set<WDatabase.DatabaseType> getDatabasesToCache() {
        return this.databasesToCache.keySet();
    }

    public WDatabase.CachePriority getCachePriority(WDatabase.DatabaseType databaseType) {
        return this.databasesToCache.get(databaseType);
    }

    public int getMinLinksIn() {
        return this.minLinksIn;
    }

    public void setMinLinksIn(int i) {
        this.minLinksIn = i;
    }

    public float getMinLinkProbability() {
        return this.minLinkProbability;
    }

    public void setMinLinkProbability(float f) {
        this.minLinkProbability = f;
    }

    public float getMinSenseProbability() {
        return this.minSenseProbability;
    }

    public void setMinSenseProbability(float f) {
        this.minSenseProbability = f;
    }

    public TIntHashSet getArticlesOfInterest() {
        return this.articlesOfInterest;
    }

    public void setArticlesOfInterest(TIntHashSet tIntHashSet) {
        this.articlesOfInterest = tIntHashSet;
    }

    public boolean isStopword(String str) {
        return this.stopwords.contains(str.trim());
    }

    public void setStopwords(HashSet<String> hashSet) {
        this.stopwords = hashSet;
    }

    public void setStopwords(File file) throws IOException {
        this.stopwords = new HashSet<>();
        BufferedReader bufferedReader = new BufferedReader(new FileReader(file));
        while (true) {
            String readLine = bufferedReader.readLine();
            if (readLine == null) {
                return;
            } else {
                this.stopwords.add(readLine.trim());
            }
        }
    }

    public EnumSet<ArticleComparer.DataDependency> getArticleComparisonDependancies() {
        return this.articleComparisonDependencies;
    }

    public void setArticleComparisonDependancies(EnumSet<ArticleComparer.DataDependency> enumSet) {
        this.articleComparisonDependencies = enumSet;
    }

    public File getArticleComparisonModel() {
        return this.articleComparisonModel;
    }

    public void setArticleComparisonModel(File file) {
        this.articleComparisonModel = file;
    }

    public File getLabelDisambiguationModel() {
        return this.labelDisambiguationModel;
    }

    public void setLabelDisambiguationModel(File file) {
        this.labelDisambiguationModel = file;
    }

    public File getLabelComparisonModel() {
        return this.labelComparisonModel;
    }

    public void setLabelComparisonModel(File file) {
        this.labelComparisonModel = file;
    }

    public File getComparisonSnippetModel() {
        return this.comparisonSnippetModel;
    }

    public void setComparisonSnippetModel(File file) {
        this.comparisonSnippetModel = file;
    }

    public File getLinkDetectionModel() {
        return this.linkDetectionModel;
    }

    public void setLinkDetectionModel(File file) {
        this.linkDetectionModel = file;
    }

    public File getTopicDisambiguationModel() {
        return this.topicDisambiguationModel;
    }

    public void setTopicDisambiguationModel(File file) {
        this.topicDisambiguationModel = file;
    }

    public Tokenizer getTokenizer() {
        if (this.tokenizer == null) {
            this.tokenizer = SimpleTokenizer.INSTANCE;
        }
        return this.tokenizer;
    }

    public void setTokenizer(Tokenizer tokenizer) {
        this.tokenizer = tokenizer;
    }

    public void setTokenizer(File file) throws IOException {
        this.tokenizer = new TokenizerME(new TokenizerModel(new FileInputStream(file)));
    }

    public SentenceDetector getSentenceDetector() {
        return this.sentenceDetector;
    }

    public void setSentenceDetector(SentenceDetector sentenceDetector) {
        this.sentenceDetector = sentenceDetector;
    }

    public void setSentenceDetector(File file) throws IOException {
        this.sentenceDetector = new SentenceDetectorME(new SentenceModel(new FileInputStream(file)));
    }

    public EnumSet<ArticleComparer.DataDependency> getReccommendedRelatednessDependancies() {
        ArrayList arrayList = new ArrayList();
        boolean z = false;
        if (this.databasesToCache.containsKey(WDatabase.DatabaseType.pageLinksIn)) {
            arrayList.add(ArticleComparer.DataDependency.pageLinksIn);
            z = true;
        }
        if (this.databasesToCache.containsKey(WDatabase.DatabaseType.pageLinksOut)) {
            arrayList.add(ArticleComparer.DataDependency.pageLinksOut);
            z = true;
        }
        if (this.databasesToCache.containsKey(WDatabase.DatabaseType.pageLinkCounts)) {
            arrayList.add(ArticleComparer.DataDependency.linkCounts);
        }
        if (!z) {
            arrayList.add(ArticleComparer.DataDependency.pageLinksIn);
        }
        return EnumSet.copyOf((Collection) arrayList);
    }

    private void initFromXml(Element element) throws IOException, ClassNotFoundException, InstantiationException, IllegalAccessException {
        ArrayList arrayList = new ArrayList();
        NodeList childNodes = element.getChildNodes();
        for (int i = 0; i < childNodes.getLength(); i++) {
            Node item = childNodes.item(i);
            if (item.getNodeType() == 1) {
                Element element2 = (Element) item;
                String nodeName = element2.getNodeName();
                String paramValue = getParamValue(element2);
                if (paramValue != null) {
                    switch (AnonymousClass1.$SwitchMap$org$wikipedia$miner$util$WikipediaConfiguration$ParamName[resolveParamName(element2.getNodeName()).ordinal()]) {
                        case PreprocessedDocument.RegionTag.REGION_OPEN /* 1 */:
                            this.langCode = paramValue;
                            break;
                        case PreprocessedDocument.RegionTag.REGION_CLOSE /* 2 */:
                            this.dbDirectory = new File(paramValue);
                            break;
                        case PreprocessedDocument.RegionTag.REGION_SPLIT /* 3 */:
                            this.dataDirectory = new File(paramValue);
                            break;
                        case 4:
                            this.defaultTextProcessor = (TextProcessor) Class.forName(paramValue).newInstance();
                            break;
                        case 5:
                            this.minLinksIn = Integer.valueOf(paramValue).intValue();
                            break;
                        case SiteInfo.IMAGE_KEY /* 6 */:
                            this.minSenseProbability = Float.valueOf(paramValue).floatValue();
                            break;
                        case 7:
                            this.minLinkProbability = Float.valueOf(paramValue).floatValue();
                            break;
                        case 8:
                            this.articlesOfInterest = gatherArticles(new File(paramValue));
                            break;
                        case 9:
                            if (element2.hasAttribute("priority")) {
                                addDatabaseToCache(WDatabase.DatabaseType.valueOf(paramValue), WDatabase.CachePriority.valueOf(element2.getAttribute("priority")));
                                break;
                            } else {
                                addDatabaseToCache(WDatabase.DatabaseType.valueOf(paramValue));
                                break;
                            }
                        case SiteInfo.TEMPLATE_KEY /* 10 */:
                            setStopwords(new File(paramValue));
                            break;
                        case 11:
                            arrayList.add(ArticleComparer.DataDependency.valueOf(paramValue));
                            break;
                        case 12:
                            this.articleComparisonModel = new File(paramValue);
                            break;
                        case 13:
                            this.labelDisambiguationModel = new File(paramValue);
                            break;
                        case SiteInfo.CATEGORY_KEY /* 14 */:
                            this.labelComparisonModel = new File(paramValue);
                            break;
                        case 15:
                            this.comparisonSnippetModel = new File(paramValue);
                            break;
                        case 16:
                            this.topicDisambiguationModel = new File(paramValue);
                            break;
                        case 17:
                            this.linkDetectionModel = new File(paramValue);
                            break;
                        case 18:
                            setTokenizer(new File(paramValue));
                            break;
                        case 19:
                            setSentenceDetector(new File(paramValue));
                            break;
                        default:
                            Logger.getLogger(WikipediaConfiguration.class).warn("Ignoring unknown parameter: '" + nodeName + "'");
                            break;
                    }
                }
            }
            if (!arrayList.isEmpty()) {
                this.articleComparisonDependencies = EnumSet.copyOf((Collection) arrayList);
            }
        }
    }

    private String getParamValue(Element element) {
        Node item = element.getChildNodes().item(0);
        if (item == null || item.getNodeType() != 3) {
            return null;
        }
        String trim = item.getTextContent().trim();
        if (trim.length() == 0) {
            return null;
        }
        return trim;
    }

    private ParamName resolveParamName(String str) {
        try {
            return ParamName.valueOf(str.trim());
        } catch (Exception e) {
            return ParamName.unknown;
        }
    }

    private TIntHashSet gatherArticles(File file) throws NumberFormatException, IOException {
        TIntHashSet tIntHashSet = new TIntHashSet();
        BufferedReader bufferedReader = new BufferedReader(new FileReader(file));
        while (true) {
            String readLine = bufferedReader.readLine();
            if (readLine == null) {
                bufferedReader.close();
                return tIntHashSet;
            }
            tIntHashSet.add(new Integer(readLine.split("\t")[0].trim()).intValue());
        }
    }
}
