package org.wikipedia.miner.service;

import com.google.gson.annotations.Expose;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import javax.servlet.ServletConfig;
import javax.servlet.ServletException;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
import org.simpleframework.xml.Attribute;
import org.simpleframework.xml.Element;
import org.simpleframework.xml.ElementList;
import org.wikipedia.miner.annotation.Disambiguator;
import org.wikipedia.miner.annotation.TopicDetector;
import org.wikipedia.miner.annotation.preprocessing.DocumentPreprocessor;
import org.wikipedia.miner.annotation.preprocessing.HtmlPreprocessor;
import org.wikipedia.miner.annotation.preprocessing.PreprocessedDocument;
import org.wikipedia.miner.annotation.preprocessing.WikiPreprocessor;
import org.wikipedia.miner.annotation.tagging.DocumentTagger;
import org.wikipedia.miner.annotation.tagging.HtmlTagger;
import org.wikipedia.miner.annotation.tagging.WikiTagger;
import org.wikipedia.miner.annotation.weighting.LinkDetector;
import org.wikipedia.miner.model.Wikipedia;
import org.wikipedia.miner.util.Position;
import org.wikipedia.miner.util.RelatednessCache;
import org.xjsf.Service;
import org.xjsf.UtilityMessages;
import org.xjsf.param.BooleanParameter;
import org.xjsf.param.EnumParameter;
import org.xjsf.param.FloatParameter;
import org.xjsf.param.StringParameter;

/* loaded from: input_file:org/wikipedia/miner/service/WikifyService.class */
public class WikifyService extends WMService {
    private StringParameter prmSource;
    private EnumParameter<SourceMode> prmSourceMode;
    private EnumParameter<LinkFormat> prmLinkFormat;
    private FloatParameter prmMinProb;
    private EnumParameter<DocumentTagger.RepeatMode> prmRepeatMode;
    private StringParameter prmLinkStyle;
    private BooleanParameter prmTooltips;
    private BooleanParameter prmTopics;
    private BooleanParameter prmReferences;
    private EnumParameter<TopicDetector.DisambiguationPolicy> prmDisambigPolicy;
    private HashMap<String, TopicDetector> topicDetectors;
    private HashMap<String, LinkDetector> linkDetectors;
    private String linkClassName;
    private int maxTokenCount;

    /* loaded from: input_file:org/wikipedia/miner/service/WikifyService$LinkFormat.class */
    public enum LinkFormat {
        AUTO,
        WIKI,
        WIKI_ID,
        WIKI_ID_WEIGHT,
        HTML,
        HTML_ID,
        HTML_ID_WEIGHT
    }

    /* loaded from: input_file:org/wikipedia/miner/service/WikifyService$Message.class */
    public static class Message extends Service.Message {

        @Element(data = true)
        @Expose
        private String wikifiedDocument;

        @Attribute
        @Expose
        private SourceMode sourceMode;

        @Attribute
        @Expose
        private double documentScore;

        @Expose
        @ElementList(entry = "detectedTopic")
        private ArrayList<Topic> detectedTopics;

        private Message(HttpServletRequest httpServletRequest, String str, SourceMode sourceMode, double d) {
            super(httpServletRequest);
            this.wikifiedDocument = str;
            this.sourceMode = sourceMode;
            this.documentScore = d;
            this.detectedTopics = new ArrayList<>();
        }

        /* JADX INFO: Access modifiers changed from: private */
        public void addTopic(org.wikipedia.miner.annotation.Topic topic, boolean z) {
            if (this.detectedTopics == null) {
                this.detectedTopics = new ArrayList<>();
            }
            this.detectedTopics.add(new Topic(topic, z));
        }

        public String getWikifiedDocument() {
            return this.wikifiedDocument;
        }

        public SourceMode getSourceMode() {
            return this.sourceMode;
        }

        public double getDocumentScore() {
            return this.documentScore;
        }

        public List<Topic> getDetectedTopics() {
            return Collections.unmodifiableList(this.detectedTopics);
        }
    }

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:org/wikipedia/miner/service/WikifyService$MyHtmlTagger.class */
    public class MyHtmlTagger extends HtmlTagger {
        LinkFormat linkFormat;
        String linkStyle;
        Wikipedia wikipedia;

        protected MyHtmlTagger(LinkFormat linkFormat, String str, Wikipedia wikipedia) {
            this.linkFormat = linkFormat;
            this.linkStyle = str;
            if (this.linkStyle != null) {
                this.linkStyle = this.linkStyle.trim();
            }
            this.wikipedia = wikipedia;
        }

        @Override // org.wikipedia.miner.annotation.tagging.HtmlTagger, org.wikipedia.miner.annotation.tagging.DocumentTagger
        public String getTag(String str, org.wikipedia.miner.annotation.Topic topic) {
            StringBuffer stringBuffer = new StringBuffer("<a");
            stringBuffer.append(" href=\"http://www." + this.wikipedia.getConfig().getLangCode() + ".wikipedia.org/wiki/" + topic.getTitle() + "\"");
            stringBuffer.append(" class=\"" + WikifyService.this.linkClassName + "\"");
            if (this.linkFormat == LinkFormat.HTML_ID || this.linkFormat == LinkFormat.HTML_ID_WEIGHT) {
                stringBuffer.append(" pageId=\"" + topic.getId() + "\"");
            }
            if (this.linkFormat == LinkFormat.HTML_ID_WEIGHT) {
                stringBuffer.append(" linkProb=\"" + WikifyService.this.getHub().format(topic.getWeight().doubleValue()) + "\"");
            }
            if (this.linkStyle != null && this.linkStyle.length() > 0) {
                stringBuffer.append(" style=\"" + this.linkStyle + "\"");
            }
            stringBuffer.append(">");
            stringBuffer.append(str);
            stringBuffer.append("</a>");
            return stringBuffer.toString();
        }
    }

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:org/wikipedia/miner/service/WikifyService$MyWikiTagger.class */
    public class MyWikiTagger extends WikiTagger {
        LinkFormat linkFormat;

        MyWikiTagger(LinkFormat linkFormat) {
            this.linkFormat = linkFormat;
        }

        @Override // org.wikipedia.miner.annotation.tagging.WikiTagger, org.wikipedia.miner.annotation.tagging.DocumentTagger
        public String getTag(String str, org.wikipedia.miner.annotation.Topic topic) {
            StringBuffer stringBuffer = new StringBuffer("[[");
            if (this.linkFormat == LinkFormat.WIKI_ID || this.linkFormat == LinkFormat.WIKI_ID_WEIGHT) {
                stringBuffer.append(topic.getId());
                if (this.linkFormat == LinkFormat.WIKI_ID_WEIGHT) {
                    stringBuffer.append("|");
                    stringBuffer.append(WikifyService.this.getHub().format(topic.getWeight().doubleValue()));
                }
                stringBuffer.append("|");
                stringBuffer.append(str);
            } else if (topic.getTitle().compareToIgnoreCase(str) == 0) {
                stringBuffer.append(str);
            } else {
                stringBuffer.append(topic.getTitle());
                stringBuffer.append("|");
                stringBuffer.append(str);
            }
            stringBuffer.append("]]");
            return stringBuffer.toString();
        }
    }

    /* loaded from: input_file:org/wikipedia/miner/service/WikifyService$Reference.class */
    public static class Reference {

        @Attribute
        @Expose
        private int start;

        @Attribute
        @Expose
        private int end;

        private Reference(int i, int i2) {
            this.start = i;
            this.end = i2;
        }

        public int getStart() {
            return this.start;
        }

        public int getEnd() {
            return this.end;
        }
    }

    /* loaded from: input_file:org/wikipedia/miner/service/WikifyService$SourceMode.class */
    public enum SourceMode {
        AUTO,
        URL,
        HTML,
        WIKI
    }

    /* loaded from: input_file:org/wikipedia/miner/service/WikifyService$TooLongException.class */
    public static class TooLongException extends Exception {
        private int tokenCount;
        private int maxTokenCount;

        public TooLongException(int i, int i2) {
            super("Input document is too long");
            this.tokenCount = i;
            this.maxTokenCount = i2;
        }

        public int getTokenCount() {
            return this.tokenCount;
        }

        public int getMaxTokenCount() {
            return this.maxTokenCount;
        }
    }

    /* loaded from: input_file:org/wikipedia/miner/service/WikifyService$TooLongMessage.class */
    public static class TooLongMessage extends UtilityMessages.ErrorMessage {

        @Attribute
        @Expose
        private int tokenCount;

        @Attribute
        @Expose
        private int maxTokenCount;

        protected TooLongMessage(HttpServletRequest httpServletRequest, TooLongException tooLongException) {
            super(httpServletRequest, tooLongException);
            this.tokenCount = tooLongException.getTokenCount();
            this.maxTokenCount = tooLongException.getMaxTokenCount();
        }

        public int getTokenCount() {
            return this.tokenCount;
        }

        public int getMaxTokenCount() {
            return this.maxTokenCount;
        }
    }

    /* loaded from: input_file:org/wikipedia/miner/service/WikifyService$Topic.class */
    public static class Topic {

        @Attribute
        @Expose
        private int id;

        @Attribute
        @Expose
        private String title;

        @Attribute
        @Expose
        private double weight;

        @Expose
        @ElementList(entry = "reference", required = false)
        private ArrayList<Reference> references;

        private Topic(org.wikipedia.miner.annotation.Topic topic, boolean z) {
            this.id = topic.getId();
            this.title = topic.getTitle();
            this.weight = topic.getWeight().doubleValue();
            if (z) {
                this.references = new ArrayList<>();
                Iterator<Position> it = topic.getPositions().iterator();
                while (it.hasNext()) {
                    Position next = it.next();
                    this.references.add(new Reference(next.getStart(), next.getEnd()));
                }
            }
        }

        public int getId() {
            return this.id;
        }

        public String getTitle() {
            return this.title;
        }

        public double getWeight() {
            return this.weight;
        }

        public List<Reference> getReferences() {
            return this.references == null ? Collections.unmodifiableList(new ArrayList()) : Collections.unmodifiableList(this.references);
        }
    }

    public WikifyService() {
        super("core", "Augments textual documents with links to the appropriate Wikipedia articles", "<p>This service automatically detects the topics mentioned in the given document, and provides links to the appropriate Wikipedia articles. </p><p> It doesn't just use Wikipedia as a source of information to link to, but also as training data for how best to do it. In other words, it has been trained to make the same decisions as the people who edit Wikipedia. </p><p> It may not work very well if the document does not fit the model of what it has been trained on. Documents should not be too short, and should be dedicated to a particular topic.</p>", true);
        this.topicDetectors = new HashMap<>();
        this.linkDetectors = new HashMap<>();
        this.linkClassName = "wm_wikifiedLink";
        this.maxTokenCount = 10000;
    }

    @Override // org.wikipedia.miner.service.WMService
    public void init(ServletConfig servletConfig) throws ServletException {
        super.init(servletConfig);
        this.prmSource = new StringParameter("source", "The document to be wikified (either its content or a web-accessible URL)", (String) null);
        addGlobalParameter(this.prmSource);
        this.prmSourceMode = new EnumParameter<>("sourceMode", "the type of the source document", SourceMode.AUTO, SourceMode.values(), new String[]{"detect automatically", "web-accessable url", "snippet of html markup", "snippet of mediawiki markup"});
        addGlobalParameter(this.prmSourceMode);
        this.prmLinkFormat = new EnumParameter<>("linkFormat", "the format of links", LinkFormat.AUTO, LinkFormat.values(), new String[]{"WIKI or HTML, depending on source", "as mediawiki markup", "as modified mediawiki markup [[id|anchor]]", "as modified mediawiki markup [[id|weight|anchor]]", "as html links to wikipedia (with '" + this.linkClassName + "' as the class attribute)", "as modified html links to wikipedia, with pageId as an additional attribute", "as modified html links to wikipedia, with pageId and linkProb as additional attributes"});
        addGlobalParameter(this.prmLinkFormat);
        this.prmMinProb = new FloatParameter("minProbability", "The system calculates a probability for each topic of whether a Wikipedian would consider it interesting enough to link to. This parameter specifies the minimum probability a topic must have before it will be linked.", Float.valueOf(0.5f));
        addGlobalParameter(this.prmMinProb);
        this.prmRepeatMode = new EnumParameter<>("repeatMode", "whether repeat mentions of topics should be tagged or ignored", DocumentTagger.RepeatMode.FIRST_IN_REGION, DocumentTagger.RepeatMode.values(), new String[]{"all mentions", "the first mention of each topic", "the first mention of each topic within each region"});
        addGlobalParameter(this.prmRepeatMode);
        this.prmLinkStyle = new StringParameter("linkStyle", "the css style of links. This is only valid if processing a URL", "");
        addGlobalParameter(this.prmLinkStyle);
        this.prmTooltips = new BooleanParameter("tooltips", "<b>true</b> if javascript for adding tooltips should be included, otherwise <b>false</b>. This is only valid if processing a URL.", false);
        addGlobalParameter(this.prmTooltips);
        this.prmTopics = new BooleanParameter("topics", "<b>true</b> if to return a list of topics, otherwise <b>false</b> ", true);
        addGlobalParameter(this.prmTopics);
        this.prmReferences = new BooleanParameter("references", "<b>true</b> to return details of where each topic was found within text, otherwise <b>false</b>", false);
        addGlobalParameter(this.prmReferences);
        this.prmDisambigPolicy = new EnumParameter<>("disambiguationPolicy", "wheither each term should be disambiguated to a single interpretation, or to multiple ones", TopicDetector.DisambiguationPolicy.STRICT, TopicDetector.DisambiguationPolicy.values(), new String[]{"only one interpretation allowed each term", "multiple interpretations allowed"});
        addGlobalParameter(this.prmDisambigPolicy);
        for (String str : getWMHub().getWikipediaNames()) {
            Wikipedia wikipedia = getWMHub().getWikipedia(str);
            try {
                Disambiguator disambiguator = new Disambiguator(wikipedia);
                disambiguator.loadClassifier(wikipedia.getConfig().getTopicDisambiguationModel());
                TopicDetector topicDetector = new TopicDetector(wikipedia, disambiguator);
                LinkDetector linkDetector = new LinkDetector(wikipedia);
                linkDetector.loadClassifier(wikipedia.getConfig().getLinkDetectionModel());
                this.topicDetectors.put(str, topicDetector);
                this.linkDetectors.put(str, linkDetector);
            } catch (Exception e) {
                throw new ServletException(e);
            }
        }
        addExample(new Service.ExampleBuilder(this, "Wikify a small snippet of text, and view details of the detected topics").addParam(this.prmSource, "At around the size of a domestic chicken, kiwi are by far the smallest living ratites and lay the largest egg in relation to their body size of any species of bird in the world.").build());
        addExample(new Service.ExampleBuilder(this, "Wikify a small snippet of text, and view result as html without additional details").addParam(this.prmSource, "At around the size of a domestic chicken, kiwi are by far the smallest living ratites and lay the largest egg in relation to their body size of any species of bird in the world.").addParam(this.prmResponseFormat, Service.ResponseFormat.DIRECT).addParam(this.prmSourceMode, SourceMode.HTML).build());
        addExample(new Service.ExampleBuilder(this, "Wikify a web page, and view result as html with added tooltips").addParam(this.prmSource, "http://www.kcc.org.nz/kiwi").addParam(this.prmResponseFormat, Service.ResponseFormat.DIRECT).addParam(this.prmTooltips, true).build());
    }

    public Service.Message buildWrappedResponse(HttpServletRequest httpServletRequest) throws Exception {
        Wikipedia wikipedia = getWikipedia(httpServletRequest);
        String value = this.prmSource.getValue(httpServletRequest);
        if (value == null || value.trim().length() == 0) {
            return new UtilityMessages.ParameterMissingMessage(httpServletRequest);
        }
        SourceMode sourceMode = (SourceMode) this.prmSourceMode.getValue(httpServletRequest);
        if (sourceMode == SourceMode.AUTO) {
            sourceMode = resolveSourceMode(value);
        }
        ArrayList<org.wikipedia.miner.annotation.Topic> arrayList = new ArrayList<>();
        try {
            String wikifyAndGatherTopics = wikifyAndGatherTopics(httpServletRequest, arrayList, wikipedia);
            double d = 0.0d;
            Iterator<org.wikipedia.miner.annotation.Topic> it = arrayList.iterator();
            while (it.hasNext()) {
                d += it.next().getRelatednessToOtherTopics();
            }
            Message message = new Message(httpServletRequest, wikifyAndGatherTopics, sourceMode, d);
            float floatValue = this.prmMinProb.getValue(httpServletRequest).floatValue();
            boolean booleanValue = this.prmTopics.getValue(httpServletRequest).booleanValue();
            boolean booleanValue2 = this.prmReferences.getValue(httpServletRequest).booleanValue();
            if (booleanValue) {
                Iterator<org.wikipedia.miner.annotation.Topic> it2 = arrayList.iterator();
                while (it2.hasNext()) {
                    org.wikipedia.miner.annotation.Topic next = it2.next();
                    if (next.getWeight().doubleValue() < floatValue) {
                        break;
                    }
                    message.addTopic(next, booleanValue2);
                }
            }
            return message;
        } catch (TooLongException e) {
            return new TooLongMessage(httpServletRequest, e);
        }
    }

    public void buildUnwrappedResponse(HttpServletRequest httpServletRequest, HttpServletResponse httpServletResponse) throws Exception {
        Wikipedia wikipedia = getWikipedia(httpServletRequest);
        httpServletResponse.setContentType("text/html");
        httpServletResponse.setHeader("Cache-Control", "no-cache");
        httpServletResponse.setCharacterEncoding("UTF8");
        httpServletResponse.getWriter().append((CharSequence) wikifyAndGatherTopics(httpServletRequest, new ArrayList<>(), wikipedia));
    }

    private String wikifyAndGatherTopics(HttpServletRequest httpServletRequest, ArrayList<org.wikipedia.miner.annotation.Topic> arrayList, Wikipedia wikipedia) throws TooLongException, IOException, Exception {
        String str;
        String wikipediaName = getWikipediaName(httpServletRequest);
        TopicDetector topicDetector = this.topicDetectors.get(wikipediaName);
        LinkDetector linkDetector = this.linkDetectors.get(wikipediaName);
        topicDetector.setDisambiguationPolicy((TopicDetector.DisambiguationPolicy) this.prmDisambigPolicy.getValue(httpServletRequest));
        String value = this.prmSource.getValue(httpServletRequest);
        if (value == null || value.trim().equals("")) {
            return "";
        }
        SourceMode sourceMode = (SourceMode) this.prmSourceMode.getValue(httpServletRequest);
        if (sourceMode == SourceMode.AUTO) {
            sourceMode = resolveSourceMode(value);
        }
        LinkFormat linkFormat = (LinkFormat) this.prmLinkFormat.getValue(httpServletRequest);
        if (linkFormat == LinkFormat.AUTO) {
            linkFormat = sourceMode == SourceMode.WIKI ? LinkFormat.WIKI : LinkFormat.HTML_ID_WEIGHT;
        }
        String value2 = this.prmLinkStyle.getValue(httpServletRequest);
        DocumentPreprocessor wikiPreprocessor = sourceMode == SourceMode.WIKI ? new WikiPreprocessor(wikipedia) : new HtmlPreprocessor();
        DocumentTagger myHtmlTagger = (linkFormat == LinkFormat.HTML || linkFormat == LinkFormat.HTML_ID || linkFormat == LinkFormat.HTML_ID_WEIGHT) ? new MyHtmlTagger(linkFormat, value2, wikipedia) : new MyWikiTagger(linkFormat);
        if (sourceMode == SourceMode.URL) {
            if (value.matches("(?i)^www\\.(.*)$")) {
                value = "http://" + value;
            }
            str = getWMHub().getRetriever().getWebContent(new URL(value));
        } else {
            str = value;
        }
        PreprocessedDocument preprocess = wikiPreprocessor.preprocess(str);
        String[] strArr = wikipedia.getConfig().getTokenizer().tokenize(preprocess.getPreprocessedText());
        if (strArr.length > this.maxTokenCount) {
            throw new TooLongException(strArr.length, this.maxTokenCount);
        }
        ArrayList<org.wikipedia.miner.annotation.Topic> weightedTopics = linkDetector.getWeightedTopics(topicDetector.getTopics(preprocess, (RelatednessCache) null));
        ArrayList arrayList2 = new ArrayList();
        float floatValue = this.prmMinProb.getValue(httpServletRequest).floatValue();
        Iterator<org.wikipedia.miner.annotation.Topic> it = weightedTopics.iterator();
        while (it.hasNext()) {
            org.wikipedia.miner.annotation.Topic next = it.next();
            if (next.getWeight().doubleValue() >= floatValue) {
                arrayList2.add(next);
            }
            arrayList.add(next);
        }
        String tag = myHtmlTagger.tag(preprocess, arrayList2, (DocumentTagger.RepeatMode) this.prmRepeatMode.getValue(httpServletRequest));
        if (sourceMode == SourceMode.URL) {
            tag = tag.replaceAll("(?i)<html", "<base href=\"" + value + "\" target=\"_top\"/><html");
            if (this.prmTooltips.getValue(httpServletRequest).booleanValue()) {
                String basePath = getBasePath(httpServletRequest);
                if (!basePath.endsWith("/")) {
                    basePath = basePath + "/";
                }
                StringBuffer stringBuffer = new StringBuffer();
                stringBuffer.append("<link type=\"text/css\" rel=\"stylesheet\" href=\"" + basePath + "/css/tooltips.css\"/>\n");
                stringBuffer.append("<link type=\"text/css\" rel=\"stylesheet\" href=\"" + basePath + "/css/jquery-ui/jquery-ui-1.8.14.custom.css\"/>\n");
                String value3 = this.prmLinkStyle.getValue(httpServletRequest);
                if (value3 != null && value3.trim().length() > 0) {
                    stringBuffer.append("<style type='text/css'> ." + this.linkClassName + "{" + value3 + ";}</style>\n");
                }
                stringBuffer.append("<style type='text/css'> .qtip-content div, .qtip-content div p, .qtip-content div b {color:inherit;} </style>");
                stringBuffer.append("<script type=\"text/javascript\" src=\"" + basePath + "/js/jquery-1.5.1.min.js\"></script>\n");
                stringBuffer.append("<script type=\"text/javascript\" src=\"" + basePath + "/js/jquery.qtip-1.0.0-rc3.min.js\"></script>\n");
                stringBuffer.append("<script type=\"text/javascript\" src=\"" + basePath + "/js/tooltips.js\"></script>\n");
                stringBuffer.append("<script type=\"text/javascript\"> \n");
                stringBuffer.append("  var wm_host=\"" + basePath + "\" ; \n");
                stringBuffer.append("  $(document).ready(function() { \n");
                stringBuffer.append("    wm_addDefinitionTooltipsToAllLinks(null, \"" + this.linkClassName + "\") ; \n");
                stringBuffer.append("  });\n");
                stringBuffer.append("</script>\n");
                tag = tag.replaceAll("(?i)\\</head>", Matcher.quoteReplacement(stringBuffer.toString()) + "</head>");
            }
        }
        return tag;
    }

    private SourceMode resolveSourceMode(String str) {
        try {
            if (str.matches("(?i)^www\\.(.*)$")) {
                str = "http://" + str;
            }
            new URL(str);
            return SourceMode.URL;
        } catch (MalformedURLException e) {
            int i = 0;
            while (Pattern.compile("<(.*?)>").matcher(str).find()) {
                i++;
            }
            int i2 = 0;
            while (Pattern.compile("\\[\\[(.*?)\\]\\]").matcher(str).find()) {
                i2++;
            }
            return i > i2 ? SourceMode.HTML : SourceMode.WIKI;
        }
    }
}
