package org.wikipedia.miner.comparison;

import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.StringTokenizer;
import java.util.Vector;
import jsc.correlation.SpearmanCorrelation;
import jsc.datastructures.PairedData;
import org.wikipedia.miner.model.Wikipedia;
import org.wikipedia.miner.util.ProgressTracker;
import weka.classifiers.Classifier;
import weka.classifiers.functions.GaussianProcesses;
import weka.core.Instance;
import weka.wrapper.Dataset;
import weka.wrapper.Decider;
import weka.wrapper.DeciderBuilder;
import weka.wrapper.InstanceBuilder;

/* loaded from: input_file:org/wikipedia/miner/comparison/ConnectionSnippetWeighter.class */
public class ConnectionSnippetWeighter {
    private Wikipedia wikipedia;
    private ArticleComparer cmp;
    private Decider<Attributes, Double> snippetWeighter = new DeciderBuilder("connectionSnippetWeighter", Attributes.class).setDefaultAttributeTypeNumeric().setAttributeTypeBoolean(Attributes.isTopic1).setAttributeTypeBoolean(Attributes.isTopic2).setAttributeTypeBoolean(Attributes.isAfterHeading).setAttributeTypeBoolean(Attributes.isListItem).setAttributeTypeBoolean(Attributes.isFromFirstParagraph).setClassAttributeTypeNumeric("snippetWeight").build();
    private Dataset<Attributes, Double> trainingDataset;

    /* JADX INFO: Access modifiers changed from: package-private */
    /* loaded from: input_file:org/wikipedia/miner/comparison/ConnectionSnippetWeighter$Attributes.class */
    public enum Attributes {
        generality,
        inLinkCount,
        outLinkCount,
        isTopic1,
        relatednessToTopic1,
        isTopic2,
        relatednessToTopic2,
        sentenceIndex,
        wordCount,
        isListItem,
        isFromFirstParagraph,
        isAfterHeading
    }

    public ConnectionSnippetWeighter(Wikipedia wikipedia, ArticleComparer articleComparer) throws Exception {
        this.wikipedia = wikipedia;
        this.cmp = articleComparer;
        if (wikipedia.getConfig().getComparisonSnippetModel() != null) {
            loadClassifier(wikipedia.getConfig().getComparisonSnippetModel());
        }
    }

    public double getWeight(ConnectionSnippet connectionSnippet) throws Exception {
        return !this.snippetWeighter.isReady() ? ((0.0d + this.cmp.getRelatedness(connectionSnippet.getSource(), connectionSnippet.getTopic1()).doubleValue()) + this.cmp.getRelatedness(connectionSnippet.getSource(), connectionSnippet.getTopic2()).doubleValue()) / 2.0d : ((Double) this.snippetWeighter.getDecision(getInstance(connectionSnippet))).doubleValue();
    }

    public void train(Vector<ConnectionSnippet> vector) throws Exception {
        this.trainingDataset = this.snippetWeighter.createNewDataset();
        ProgressTracker progressTracker = new ProgressTracker(vector.size(), "training", ConnectionSnippetWeighter.class);
        Iterator<ConnectionSnippet> it = vector.iterator();
        while (it.hasNext()) {
            ConnectionSnippet next = it.next();
            if (next.getWeight() == null) {
                throw new Exception("Training snippet is not weighted");
            }
            this.trainingDataset.add(getInstance(next));
            progressTracker.update();
        }
    }

    public SpearmanCorrelation test(Vector<ConnectionSnippet> vector) throws Exception {
        ArrayList arrayList = new ArrayList();
        ArrayList arrayList2 = new ArrayList();
        ProgressTracker progressTracker = new ProgressTracker(vector.size(), "testing", ArticleComparer.class);
        Iterator<ConnectionSnippet> it = vector.iterator();
        while (it.hasNext()) {
            ConnectionSnippet next = it.next();
            if (next.getWeight() == null) {
                throw new Exception("Testing snippet is not weighted");
            }
            arrayList.add(next.getWeight());
            arrayList2.add(Double.valueOf(getWeight(next)));
            progressTracker.update();
        }
        double[][] dArr = new double[arrayList.size()][2];
        for (int i = 0; i < arrayList.size(); i++) {
            dArr[i][0] = ((Double) arrayList.get(i)).doubleValue();
            dArr[i][1] = ((Double) arrayList2.get(i)).doubleValue();
        }
        return new SpearmanCorrelation(new PairedData(dArr));
    }

    public void saveTrainingData(File file) throws IOException, Exception {
        this.trainingDataset.save(file);
    }

    public void loadTrainingData(File file) throws IOException, Exception {
        this.trainingDataset.load(file);
    }

    public void saveClassifier(File file) throws IOException {
        this.snippetWeighter.save(file);
    }

    public void loadClassifier(File file) throws Exception {
        this.snippetWeighter.load(file);
    }

    public void buildClassifier(Classifier classifier) throws Exception {
        this.snippetWeighter.train(classifier, this.trainingDataset);
    }

    public void buildDefaultClassifier() throws Exception {
        this.snippetWeighter.train(new GaussianProcesses(), this.trainingDataset);
    }

    private Instance getInstance(ConnectionSnippet connectionSnippet) throws Exception {
        InstanceBuilder instanceBuilder = this.snippetWeighter.getInstanceBuilder();
        instanceBuilder.setAttribute(Attributes.generality, connectionSnippet.getSource().getGenerality());
        instanceBuilder.setAttribute(Attributes.inLinkCount, Double.valueOf(Math.log(connectionSnippet.getSource().getDistinctLinksInCount() + 1)));
        instanceBuilder.setAttribute(Attributes.outLinkCount, Double.valueOf(Math.log(connectionSnippet.getSource().getDistinctLinksOutCount() + 1)));
        instanceBuilder.setAttribute(Attributes.isTopic1, Boolean.valueOf(connectionSnippet.getSource().getId() == connectionSnippet.getTopic1().getId()));
        instanceBuilder.setAttribute(Attributes.relatednessToTopic1, this.cmp.getRelatedness(connectionSnippet.getSource(), connectionSnippet.getTopic1()));
        instanceBuilder.setAttribute(Attributes.isTopic2, Boolean.valueOf(connectionSnippet.getSource().getId() == connectionSnippet.getTopic2().getId()));
        instanceBuilder.setAttribute(Attributes.relatednessToTopic2, this.cmp.getRelatedness(connectionSnippet.getSource(), connectionSnippet.getTopic2()));
        instanceBuilder.setAttribute(Attributes.sentenceIndex, Integer.valueOf(connectionSnippet.getSentenceIndex()));
        instanceBuilder.setAttribute(Attributes.wordCount, Integer.valueOf(new StringTokenizer(connectionSnippet.getPlainText()).countTokens()));
        instanceBuilder.setAttribute(Attributes.isListItem, Boolean.valueOf(connectionSnippet.isListItem()));
        instanceBuilder.setAttribute(Attributes.isAfterHeading, Boolean.valueOf(connectionSnippet.followsHeading()));
        if (connectionSnippet.getWeight() != null) {
            instanceBuilder.setClassAttribute(connectionSnippet.getWeight());
        }
        return instanceBuilder.build();
    }
}
