package org.wikipedia.miner.annotation;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.TreeSet;
import java.util.Vector;
import org.wikipedia.miner.annotation.preprocessing.PreprocessedDocument;
import org.wikipedia.miner.model.Article;
import org.wikipedia.miner.model.Label;
import org.wikipedia.miner.model.Page;
import org.wikipedia.miner.model.Wikipedia;
import org.wikipedia.miner.util.NGrammer;
import org.wikipedia.miner.util.Position;
import org.wikipedia.miner.util.RelatednessCache;

/* loaded from: input_file:org/wikipedia/miner/annotation/TopicDetector.class */
public class TopicDetector {
    private Wikipedia wikipedia;
    private Disambiguator disambiguator;
    private DisambiguationPolicy disambigPolicy = DisambiguationPolicy.STRICT;
    private boolean allowDisambiguations = false;
    private int maxTopicsForRelatedness = 25;
    private NGrammer nGrammer;

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:org/wikipedia/miner/annotation/TopicDetector$CachedSense.class */
    public class CachedSense implements Comparable<CachedSense> {
        int id;
        double commonness;
        double relatedness;
        double disambigConfidence;

        public CachedSense(int i, double d, double d2, double d3) {
            this.id = i;
            this.commonness = d;
            this.relatedness = d2;
            this.disambigConfidence = d3;
        }

        @Override // java.lang.Comparable
        public int compareTo(CachedSense cachedSense) {
            return (-1) * Double.valueOf(this.disambigConfidence).compareTo(Double.valueOf(cachedSense.disambigConfidence));
        }
    }

    /* loaded from: input_file:org/wikipedia/miner/annotation/TopicDetector$DisambiguationPolicy.class */
    public enum DisambiguationPolicy {
        STRICT,
        LOOSE
    }

    public TopicDetector(Wikipedia wikipedia, Disambiguator disambiguator) throws IOException {
        this.wikipedia = wikipedia;
        this.disambiguator = disambiguator;
        this.nGrammer = new NGrammer(wikipedia.getConfig().getSentenceDetector(), wikipedia.getConfig().getTokenizer());
        this.nGrammer.setMaxN(disambiguator.getMaxLabelLength());
    }

    public DisambiguationPolicy getDisambiguationPolicy() {
        return this.disambigPolicy;
    }

    public void setDisambiguationPolicy(DisambiguationPolicy disambiguationPolicy) {
        this.disambigPolicy = disambiguationPolicy;
    }

    public boolean areDisambiguationsAllowed() {
        return this.allowDisambiguations;
    }

    public void allowDisambiguations(boolean z) {
        this.allowDisambiguations = z;
    }

    public Vector<Topic> getTopics(PreprocessedDocument preprocessedDocument, RelatednessCache relatednessCache) throws Exception {
        if (relatednessCache == null) {
            relatednessCache = new RelatednessCache(this.disambiguator.getArticleComparer());
        }
        Collection<Topic> values = getTopics(getReferences(preprocessedDocument.getPreprocessedText()), preprocessedDocument.getContextText(), preprocessedDocument.getOriginalText().length(), relatednessCache).values();
        calculateRelatedness(values, relatednessCache);
        Vector<Topic> vector = new Vector<>();
        for (Topic topic : values) {
            if (!preprocessedDocument.isTopicBanned(topic.getId())) {
                vector.add(topic);
            }
        }
        return vector;
    }

    public Collection<Topic> getTopics(String str, RelatednessCache relatednessCache) throws Exception {
        if (relatednessCache == null) {
            relatednessCache = new RelatednessCache(this.disambiguator.getArticleComparer());
        }
        Collection<Topic> values = getTopics(getReferences(str), "", str.length(), relatednessCache).values();
        calculateRelatedness(values, relatednessCache);
        return values;
    }

    private void calculateRelatedness(Collection<Topic> collection, RelatednessCache relatednessCache) throws Exception {
        TreeSet treeSet = new TreeSet();
        for (Topic topic : collection) {
            if (topic.getType() == Page.PageType.article) {
                Article article = (Article) this.wikipedia.getPageById(topic.getId());
                article.setWeight(Double.valueOf(topic.getAverageLinkProbability() * topic.getOccurances()));
                treeSet.add(article);
            }
        }
        for (Topic topic2 : collection) {
            double d = 0.0d;
            double d2 = 0.0d;
            int i = 0;
            Iterator it = treeSet.iterator();
            while (it.hasNext()) {
                Article article2 = (Article) it.next();
                int i2 = i;
                i++;
                if (i2 > this.maxTopicsForRelatedness) {
                    break;
                }
                double doubleValue = article2.getWeight().doubleValue() * relatednessCache.getRelatedness(topic2, article2);
                d += article2.getWeight().doubleValue();
                d2 += doubleValue;
            }
            topic2.setRelatednessToOtherTopics((float) (d2 / d));
        }
    }

    private Vector<TopicReference> getReferences(String str) {
        Vector<TopicReference> vector = new Vector<>();
        for (NGrammer.NGramSpan nGramSpan : this.nGrammer.ngramPosDetect(str)) {
            Label label = this.wikipedia.getLabel(nGramSpan, str);
            if (label.exists() && label.getLinkProbability() >= this.disambiguator.getMinLinkProbability()) {
                vector.add(new TopicReference(label, new Position(nGramSpan.getStart(), nGramSpan.getEnd())));
            }
        }
        return vector;
    }

    private HashMap<Integer, Topic> getTopics(Vector<TopicReference> vector, String str, int i, RelatednessCache relatednessCache) throws Exception {
        HashMap<Integer, Topic> hashMap = new HashMap<>();
        HashSet hashSet = new HashSet();
        Vector vector2 = new Vector();
        Iterator<TopicReference> it = vector.iterator();
        while (it.hasNext()) {
            TopicReference next = it.next();
            if (!hashSet.contains(next.getLabel().getText())) {
                vector2.add(next.getLabel());
                hashSet.add(next.getLabel().getText());
            }
        }
        Iterator<TopicReference> it2 = getReferences(str).iterator();
        while (it2.hasNext()) {
            TopicReference next2 = it2.next();
            if (!hashSet.contains(next2.getLabel().getText())) {
                vector2.add(next2.getLabel());
                hashSet.add(next2.getLabel().getText());
            }
        }
        Context context = relatednessCache == null ? new Context(vector2, new RelatednessCache(this.disambiguator.getArticleComparer()), this.disambiguator.getMaxContextSize(), this.disambiguator.getMinSenseProbability() * 5.0d) : new Context(vector2, relatednessCache, this.disambiguator.getMaxContextSize(), this.disambiguator.getMinSenseProbability());
        HashMap hashMap2 = new HashMap();
        Iterator<TopicReference> it3 = vector.iterator();
        while (it3.hasNext()) {
            TopicReference next3 = it3.next();
            ArrayList arrayList = (ArrayList) hashMap2.get(next3.getLabel().getText());
            if (arrayList == null) {
                arrayList = new ArrayList();
                for (Label.Sense sense : next3.getLabel().getSenses()) {
                    if (sense.getPriorProbability() < this.disambiguator.getMinSenseProbability()) {
                        break;
                    }
                    if (this.allowDisambiguations || sense.getType() != Page.PageType.disambiguation) {
                        double relatednessTo = context.getRelatednessTo(sense);
                        double priorProbability = sense.getPriorProbability();
                        double probabilityOfSense = this.disambiguator.getProbabilityOfSense(priorProbability, relatednessTo, context);
                        if (probabilityOfSense > 0.1d) {
                            arrayList.add(new CachedSense(sense.getId(), priorProbability, relatednessTo, probabilityOfSense));
                        }
                    }
                }
                Collections.sort(arrayList);
                hashMap2.put(next3.getLabel().getText(), arrayList);
            }
            if (this.disambigPolicy != DisambiguationPolicy.STRICT) {
                Iterator it4 = arrayList.iterator();
                while (it4.hasNext()) {
                    CachedSense cachedSense = (CachedSense) it4.next();
                    Topic topic = hashMap.get(Integer.valueOf(cachedSense.id));
                    if (topic == null) {
                        topic = new Topic(this.wikipedia, cachedSense.id, cachedSense.relatedness, i);
                        hashMap.put(Integer.valueOf(cachedSense.id), topic);
                    }
                    topic.addReference(next3, cachedSense.disambigConfidence);
                }
            } else if (!arrayList.isEmpty()) {
                CachedSense cachedSense2 = (CachedSense) arrayList.get(0);
                Topic topic2 = hashMap.get(Integer.valueOf(cachedSense2.id));
                if (topic2 == null) {
                    topic2 = new Topic(this.wikipedia, cachedSense2.id, cachedSense2.relatedness, i);
                    hashMap.put(Integer.valueOf(cachedSense2.id), topic2);
                }
                topic2.addReference(next3, cachedSense2.disambigConfidence);
            }
        }
        return hashMap;
    }
}
