package org.wikipedia.miner.comparison;

import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.EnumSet;
import java.util.Iterator;
import jsc.correlation.SpearmanCorrelation;
import jsc.datastructures.PairedData;
import org.apache.log4j.Logger;
import org.wikipedia.miner.comparison.ComparisonDataSet;
import org.wikipedia.miner.db.WEnvironment;
import org.wikipedia.miner.db.struct.DbIntList;
import org.wikipedia.miner.model.Article;
import org.wikipedia.miner.model.Wikipedia;
import org.wikipedia.miner.util.ProgressTracker;
import org.wikipedia.miner.util.WikipediaConfiguration;
import weka.classifiers.Classifier;
import weka.classifiers.functions.GaussianProcesses;
import weka.core.Instance;
import weka.wrapper.AttributeMissingException;
import weka.wrapper.ClassMissingException;
import weka.wrapper.Dataset;
import weka.wrapper.Decider;
import weka.wrapper.DeciderBuilder;
import weka.wrapper.InstanceBuilder;

/* loaded from: input_file:org/wikipedia/miner/comparison/ArticleComparer.class */
public class ArticleComparer {
    Wikipedia wikipedia;
    EnumSet<DataDependency> dependancies;
    int wikipediaArticleCount;
    Double m;
    private long articlesCompared = 0;
    Decider<Attributes, Double> relatednessMeasurer;
    Dataset<Attributes, Double> trainingDataset;

    /* JADX INFO: Access modifiers changed from: package-private */
    /* loaded from: input_file:org/wikipedia/miner/comparison/ArticleComparer$Attributes.class */
    public enum Attributes {
        inLinkGoogleMeasure,
        inLinkIntersection,
        inLinkVectorMeasure,
        outLinkGoogleMeasure,
        outLinkIntersection,
        outLinkVectorMeasure
    }

    /* loaded from: input_file:org/wikipedia/miner/comparison/ArticleComparer$DataDependency.class */
    public enum DataDependency {
        pageLinksIn,
        pageLinksOut,
        linkCounts
    }

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:org/wikipedia/miner/comparison/ArticleComparer$LinkDirection.class */
    public enum LinkDirection {
        In,
        Out
    }

    public ArticleComparer(Wikipedia wikipedia) throws Exception {
        WikipediaConfiguration config = wikipedia.getConfig();
        if (config.getArticleComparisonDependancies() == null) {
            throw new Exception("The given wikipedia configuration does not specify default article comparison data dependancies");
        }
        init(wikipedia, config.getArticleComparisonDependancies());
    }

    public ArticleComparer(Wikipedia wikipedia, EnumSet<DataDependency> enumSet) throws Exception {
        init(wikipedia, enumSet);
    }

    private void init(Wikipedia wikipedia, EnumSet<DataDependency> enumSet) throws Exception {
        if (!enumSet.contains(DataDependency.pageLinksIn) && !enumSet.contains(DataDependency.pageLinksOut)) {
            throw new Exception("Dependancies must include at least pageLinksIn or pageLinksOut");
        }
        this.wikipedia = wikipedia;
        this.dependancies = enumSet;
        this.wikipediaArticleCount = new Long(wikipedia.getEnvironment().retrieveStatistic(WEnvironment.StatisticName.articleCount).longValue()).intValue();
        this.m = Double.valueOf(Math.log(this.wikipediaArticleCount));
        this.relatednessMeasurer = new DeciderBuilder("articleComparer", Attributes.class).setDefaultAttributeTypeNumeric().setClassAttributeTypeNumeric("relatedness").build();
        if (wikipedia.getConfig().getArticleComparisonModel() != null) {
            loadClassifier(wikipedia.getConfig().getArticleComparisonModel());
        }
    }

    public Double getRelatedness(Article article, Article article2) throws Exception {
        if (article.getId() == article2.getId()) {
            return Double.valueOf(1.0d);
        }
        ArticleComparison comparison = getComparison(article, article2);
        if (comparison == null) {
            return Double.valueOf(0.0d);
        }
        if ((comparison.getInLinkIntersectionProportion() == null || comparison.getInLinkIntersectionProportion().doubleValue() == 0.0d) && (comparison.getOutLinkIntersectionProportion() == null || comparison.getOutLinkIntersectionProportion().doubleValue() == 0.0d)) {
            return Double.valueOf(0.0d);
        }
        if (this.relatednessMeasurer.isReady()) {
            return (Double) this.relatednessMeasurer.getDecision(getInstance(comparison, null));
        }
        int i = 0;
        double d = 0.0d;
        if (this.dependancies.contains(DataDependency.pageLinksIn)) {
            i = 0 + 1;
            d = 0.0d + comparison.getInLinkGoogleMeasure().doubleValue();
            if (this.dependancies.contains(DataDependency.linkCounts)) {
                i++;
                d += comparison.getInLinkVectorMeasure().doubleValue();
            }
        }
        if (this.dependancies.contains(DataDependency.pageLinksOut)) {
            i++;
            d += comparison.getOutLinkGoogleMeasure().doubleValue();
            if (this.dependancies.contains(DataDependency.linkCounts)) {
                i++;
                d += comparison.getOutLinkVectorMeasure().doubleValue();
            }
        }
        return i == 0 ? Double.valueOf(0.0d) : Double.valueOf(d / i);
    }

    public void train(ComparisonDataSet comparisonDataSet) throws Exception {
        this.trainingDataset = this.relatednessMeasurer.createNewDataset();
        ProgressTracker progressTracker = new ProgressTracker(comparisonDataSet.getItems().size(), "training", ArticleComparer.class);
        Iterator<ComparisonDataSet.Item> it = comparisonDataSet.getItems().iterator();
        while (it.hasNext()) {
            ComparisonDataSet.Item next = it.next();
            if (next.getIdA() >= 0 && next.getIdB() >= 0) {
                Article article = null;
                try {
                    article = new Article(this.wikipedia.getEnvironment(), next.getIdA());
                } catch (Exception e) {
                    Logger.getLogger(ArticleComparer.class).warn(next.getIdA() + " is not a valid article");
                }
                Article article2 = null;
                try {
                    article2 = new Article(this.wikipedia.getEnvironment(), next.getIdB());
                } catch (Exception e2) {
                    Logger.getLogger(ArticleComparer.class).warn(next.getIdB() + " is not a valid article");
                }
                if (article != null && article2 != null) {
                    train(article, article2, next.getRelatedness());
                }
                progressTracker.update();
            }
        }
    }

    public void saveTrainingData(File file) throws IOException, Exception {
        this.trainingDataset.save(file);
    }

    public void loadTrainingData(File file) throws IOException, Exception {
        this.trainingDataset = this.relatednessMeasurer.createNewDataset();
        this.trainingDataset.load(file);
    }

    public void saveClassifier(File file) throws IOException {
        this.relatednessMeasurer.save(file);
    }

    public void loadClassifier(File file) throws Exception {
        this.relatednessMeasurer.load(file);
    }

    public void buildClassifier(Classifier classifier) throws Exception {
        this.relatednessMeasurer.train(classifier, this.trainingDataset);
    }

    public void buildDefaultClassifier() throws Exception {
        this.relatednessMeasurer.train(new GaussianProcesses(), this.trainingDataset);
    }

    public SpearmanCorrelation test(ComparisonDataSet comparisonDataSet) throws Exception {
        ArrayList arrayList = new ArrayList();
        ArrayList arrayList2 = new ArrayList();
        ProgressTracker progressTracker = new ProgressTracker(comparisonDataSet.getItems().size(), "testing", ArticleComparer.class);
        Iterator<ComparisonDataSet.Item> it = comparisonDataSet.getItems().iterator();
        while (it.hasNext()) {
            ComparisonDataSet.Item next = it.next();
            if (next.getIdA() < 0 || next.getIdB() < 0) {
                Logger.getLogger(ArticleComparer.class).info("- ignoring " + next.getIdA() + ":" + next.getTermA() + " vs. " + next.getIdB() + ":" + next.getTermB());
            } else {
                Article article = null;
                try {
                    article = new Article(this.wikipedia.getEnvironment(), next.getIdA());
                } catch (Exception e) {
                    Logger.getLogger(ArticleComparer.class).warn(next.getIdA() + " is not a valid article");
                }
                Article article2 = null;
                try {
                    article2 = new Article(this.wikipedia.getEnvironment(), next.getIdB());
                } catch (Exception e2) {
                    Logger.getLogger(ArticleComparer.class).warn(next.getIdB() + " is not a valid article");
                }
                if (article != null && article2 != null) {
                    arrayList.add(Double.valueOf(next.getRelatedness()));
                    arrayList2.add(getRelatedness(article, article2));
                }
                progressTracker.update();
            }
        }
        double[][] dArr = new double[arrayList.size()][2];
        for (int i = 0; i < arrayList.size(); i++) {
            dArr[i][0] = ((Double) arrayList.get(i)).doubleValue();
            dArr[i][1] = ((Double) arrayList2.get(i)).doubleValue();
        }
        return new SpearmanCorrelation(new PairedData(dArr));
    }

    private void train(Article article, Article article2, double d) throws Exception {
        ArticleComparison comparison = getComparison(article, article2);
        if (comparison == null) {
            return;
        }
        this.trainingDataset.add(getInstance(comparison, Double.valueOf(d)));
    }

    public ArticleComparison getComparison(Article article, Article article2) {
        ArticleComparison articleComparison = new ArticleComparison(article, article2);
        if (this.dependancies.contains(DataDependency.pageLinksIn)) {
            articleComparison = setPageLinkFeatures(articleComparison, LinkDirection.In, this.dependancies.contains(DataDependency.linkCounts));
        }
        if (this.dependancies.contains(DataDependency.pageLinksOut)) {
            articleComparison = setPageLinkFeatures(articleComparison, LinkDirection.Out, this.dependancies.contains(DataDependency.linkCounts));
        }
        if (articleComparison.inLinkFeaturesSet() || articleComparison.outLinkFeaturesSet()) {
            return articleComparison;
        }
        return null;
    }

    private ArticleComparison setPageLinkFeatures(ArticleComparison articleComparison, LinkDirection linkDirection, boolean z) {
        Article article;
        double lfiaf;
        Double valueOf;
        if (articleComparison.getArticleA().getId() == articleComparison.getArticleB().getId()) {
            return articleComparison;
        }
        ArrayList<Integer> links = getLinks(articleComparison.getArticleA().getId(), linkDirection);
        ArrayList<Integer> links2 = getLinks(articleComparison.getArticleB().getId(), linkDirection);
        if (links.isEmpty() || links2.isEmpty()) {
            return articleComparison;
        }
        int i = 0;
        int i2 = 0;
        int i3 = 0;
        int i4 = 0;
        ArrayList arrayList = new ArrayList();
        ArrayList arrayList2 = new ArrayList();
        Integer num = 0;
        Integer num2 = 0;
        if (z) {
            if (linkDirection == LinkDirection.Out) {
                num = Integer.valueOf(articleComparison.getArticleA().getTotalLinksOutCount());
                num2 = Integer.valueOf(articleComparison.getArticleB().getTotalLinksOutCount());
            } else {
                num = Integer.valueOf(articleComparison.getArticleA().getTotalLinksInCount());
                num2 = Integer.valueOf(articleComparison.getArticleB().getTotalLinksInCount());
            }
        }
        while (true) {
            if (i3 >= links.size() && i4 >= links2.size()) {
                break;
            }
            boolean z2 = false;
            boolean z3 = false;
            boolean z4 = false;
            Integer num3 = i3 < links.size() ? links.get(i3) : null;
            Integer num4 = i4 < links2.size() ? links2.get(i4) : null;
            if (num3 != null && num4 != null && num3.equals(num4)) {
                z2 = true;
                z3 = true;
                article = new Article(this.wikipedia.getEnvironment(), num3.intValue());
                i++;
            } else if (num3 == null || (num4 != null && num3.intValue() >= num4.intValue())) {
                z3 = true;
                article = new Article(this.wikipedia.getEnvironment(), num4.intValue());
                if (num4.equals(Integer.valueOf(articleComparison.getArticleA().getId()))) {
                    i++;
                    z4 = true;
                }
            } else {
                z2 = true;
                article = new Article(this.wikipedia.getEnvironment(), num3.intValue());
                if (num3.equals(Integer.valueOf(articleComparison.getArticleB().getId()))) {
                    i++;
                    z4 = true;
                }
            }
            i2++;
            if (z) {
                int totalLinksInCount = linkDirection == LinkDirection.Out ? article.getTotalLinksInCount() : article.getTotalLinksOutCount();
                double d = 0.0d;
                if (z4) {
                    lfiaf = 1.0d;
                    d = 1.0d;
                } else {
                    lfiaf = z2 ? getLfiaf(1, num.intValue(), totalLinksInCount) : 0.0d;
                    if (z3) {
                        d = getLfiaf(1, num2.intValue(), totalLinksInCount);
                    }
                }
                arrayList.add(Double.valueOf(lfiaf));
                arrayList2.add(Double.valueOf(d));
            }
            if (z2) {
                i3++;
            }
            if (z3) {
                i4++;
            }
        }
        if (i == 0) {
            valueOf = Double.valueOf(1.0d);
        } else {
            double log = Math.log(links.size());
            double log2 = Math.log(links2.size());
            valueOf = Double.valueOf((Math.max(log, log2) - Math.log(i)) / (this.m.doubleValue() - Math.min(log, log2)));
        }
        Double valueOf2 = Double.valueOf(ArticleComparison.normalizeGoogleMeasure(valueOf));
        Double d2 = null;
        if (z) {
            if (arrayList.isEmpty() || arrayList2.isEmpty()) {
                d2 = Double.valueOf(1.5707963267948966d);
            } else {
                double d3 = 0.0d;
                double d4 = 0.0d;
                double d5 = 0.0d;
                for (int i5 = 0; i5 < arrayList.size(); i5++) {
                    double doubleValue = ((Double) arrayList.get(i5)).doubleValue();
                    double doubleValue2 = ((Double) arrayList2.get(i5)).doubleValue();
                    d3 += doubleValue * doubleValue2;
                    d4 += doubleValue * doubleValue;
                    d5 += doubleValue2 * doubleValue2;
                }
                Double valueOf3 = Double.valueOf(Math.acos(d3 / (Math.sqrt(d4) * Math.sqrt(d5))));
                if (valueOf3.isNaN()) {
                    valueOf3 = Double.valueOf(1.5707963267948966d);
                }
                d2 = Double.valueOf(ArticleComparison.normalizeVectorMeasure(valueOf3));
            }
        }
        double d6 = i2 == 0 ? 0.0d : i / i2;
        if (linkDirection == LinkDirection.Out) {
            articleComparison.setOutLinkFeatures(valueOf2, d2, Integer.valueOf(i2), Double.valueOf(d6));
        } else {
            articleComparison.setInLinkFeatures(valueOf2, d2, Integer.valueOf(i2), Double.valueOf(d6));
        }
        return articleComparison;
    }

    private ArrayList<Integer> getLinks(int i, LinkDirection linkDirection) {
        DbIntList retrieve = linkDirection == LinkDirection.In ? this.wikipedia.getEnvironment().getDbPageLinkInNoSentences().retrieve(Integer.valueOf(i)) : this.wikipedia.getEnvironment().getDbPageLinkOutNoSentences().retrieve(Integer.valueOf(i));
        return (retrieve == null || retrieve.getValues() == null) ? new ArrayList<>() : retrieve.getValues();
    }

    private double getLfiaf(int i, int i2, int i3) {
        if (i == 0 || i2 == 0) {
            return 0.0d;
        }
        return (i / i3) * Math.log(this.wikipediaArticleCount / i2);
    }

    private double wrapMissingValue(Number number) {
        return number == null ? Instance.missingValue() : number.doubleValue();
    }

    private Instance getInstance(ArticleComparison articleComparison, Double d) throws ClassMissingException, AttributeMissingException {
        InstanceBuilder instanceBuilder = this.relatednessMeasurer.getInstanceBuilder();
        if (this.dependancies.contains(DataDependency.pageLinksIn)) {
            instanceBuilder.setAttribute(Attributes.inLinkGoogleMeasure, articleComparison.getInLinkGoogleMeasure());
            instanceBuilder.setAttribute(Attributes.inLinkIntersection, articleComparison.getInLinkIntersectionProportion());
            if (this.dependancies.contains(DataDependency.linkCounts)) {
                instanceBuilder.setAttribute(Attributes.inLinkVectorMeasure, articleComparison.getInLinkVectorMeasure());
            } else {
                instanceBuilder.setAttribute(Attributes.inLinkVectorMeasure, 0);
            }
        } else {
            instanceBuilder.setAttribute(Attributes.inLinkGoogleMeasure, 0);
            instanceBuilder.setAttribute(Attributes.inLinkIntersection, 0);
            instanceBuilder.setAttribute(Attributes.inLinkVectorMeasure, 0);
        }
        if (this.dependancies.contains(DataDependency.pageLinksOut)) {
            instanceBuilder.setAttribute(Attributes.outLinkGoogleMeasure, Double.valueOf(wrapMissingValue(articleComparison.getOutLinkGoogleMeasure())));
            instanceBuilder.setAttribute(Attributes.outLinkIntersection, Double.valueOf(wrapMissingValue(articleComparison.getOutLinkIntersectionProportion())));
            if (this.dependancies.contains(DataDependency.linkCounts)) {
                instanceBuilder.setAttribute(Attributes.outLinkVectorMeasure, Double.valueOf(wrapMissingValue(articleComparison.getOutLinkVectorMeasure())));
            } else {
                instanceBuilder.setAttribute(Attributes.outLinkVectorMeasure, 0);
            }
        } else {
            instanceBuilder.setAttribute(Attributes.outLinkGoogleMeasure, 0);
            instanceBuilder.setAttribute(Attributes.outLinkIntersection, 0);
            instanceBuilder.setAttribute(Attributes.outLinkVectorMeasure, 0);
        }
        if (d != null) {
            instanceBuilder.setClassAttribute(d);
        }
        instanceBuilder.replaceAllMissingValuesWith(Double.valueOf(0.0d));
        return instanceBuilder.build();
    }
}
