package org.wikipedia.miner.util;

import java.util.Iterator;
import java.util.Vector;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/* loaded from: input_file:org/wikipedia/miner/util/MarkupStripper.class */
public class MarkupStripper {
    private Pattern linkPattern = Pattern.compile("\\[\\[(.*?:)?(.*?)(\\|.*?)?\\]\\]");
    private Pattern isolatedBefore = Pattern.compile("(\\s*|.*\\n(\\s*))", 32);
    private Pattern isolatedAfter = Pattern.compile("(\\s*|(\\s*)\\n.*)", 32);
    private EmphasisResolver emphasisResolver = new EmphasisResolver();

    public String stripAllButInternalLinksAndEmphasis(String str, Character ch) {
        String stripRegions = stripRegions(str, mergeRegionLists(gatherSimpleRegions(str, "\\<\\!--(.*?)--\\>"), gatherComplexRegions(str, "\\<math(\\s*?)([^>\\/]*?)\\>", "\\<\\/math(\\s*?)\\>")), ch);
        String stripRegions2 = stripRegions(stripRegions, gatherTemplates(stripRegions), ch);
        String stripRegions3 = stripRegions(stripRegions2, mergeRegionLists(mergeRegionLists(mergeRegionLists(gatherTables(stripRegions2), gatherHTML(stripRegions2)), gatherExternalLinks(stripRegions2)), gatherMagicWords(stripRegions2)), ch);
        return stripRegions(stripRegions3, gatherMisformattedStarts(stripRegions3), ch);
    }

    public String stripInternalLinks(String str, Character ch) {
        Vector<int[]> gatherComplexRegions = gatherComplexRegions(str, "\\[\\[", "\\]\\]");
        StringBuffer stringBuffer = new StringBuffer();
        int length = str.length();
        int size = gatherComplexRegions.size();
        while (size > 0) {
            size--;
            int[] elementAt = gatherComplexRegions.elementAt(size);
            if (elementAt[0] < length) {
                stringBuffer.insert(0, str.substring(elementAt[1], length));
                String substring = str.substring(elementAt[0], elementAt[1]);
                String str2 = substring;
                Matcher matcher = this.linkPattern.matcher(substring);
                if (matcher.matches()) {
                    String group = matcher.group(1);
                    String group2 = matcher.group(2);
                    String group3 = matcher.group(3);
                    str2 = group != null ? ch != null ? substring.replaceAll(".", ch.toString()) : "" : group3 != null ? ch != null ? (ch.charValue() + ch.charValue()) + group2.replaceAll(".", ch.toString()) + ch + group3.substring(1) + group3.substring(1) + ch : group3.substring(1) : ch != null ? (ch.charValue() + ch.charValue()) + group2 + ch + ch : group2;
                }
                stringBuffer.insert(0, str2);
                length = elementAt[0];
            }
        }
        if (length > 0) {
            stringBuffer.insert(0, str.substring(0, length));
        }
        return stringBuffer.toString();
    }

    public String stripEmphasis(String str, Character ch) {
        String resolveEmphasis = this.emphasisResolver.resolveEmphasis(str);
        Vector<int[]> gatherSimpleRegions = gatherSimpleRegions(resolveEmphasis, "\\<\\/?[bi]\\>");
        StringBuffer stringBuffer = new StringBuffer();
        int i = 0;
        int size = gatherSimpleRegions.size();
        while (size > 0) {
            size--;
            int[] elementAt = gatherSimpleRegions.elementAt(size);
            if (elementAt[0] < i) {
                if (elementAt[1] < i) {
                    stringBuffer.insert(0, resolveEmphasis.substring(elementAt[1], i));
                }
                if (ch != null) {
                    String str2 = resolveEmphasis.substring(elementAt[0], elementAt[1]).matches("\\<\\/?b\\>") ? "'''" : "''";
                    str2.replaceAll(".", ch.toString());
                    stringBuffer.insert(0, str2);
                }
                i = elementAt[0];
            }
        }
        stringBuffer.insert(0, resolveEmphasis.substring(0, i));
        return stringBuffer.toString();
    }

    public String stripNonArticleInternalLinks(String str, Character ch) {
        Vector<int[]> gatherComplexRegions = gatherComplexRegions(str, "\\[\\[", "\\]\\]");
        StringBuffer stringBuffer = new StringBuffer();
        int length = str.length();
        int size = gatherComplexRegions.size();
        while (size > 0) {
            size--;
            int[] elementAt = gatherComplexRegions.elementAt(size);
            if (elementAt[0] < length) {
                stringBuffer.insert(0, str.substring(elementAt[1], length));
                String substring = str.substring(elementAt[0], elementAt[1]);
                String str2 = substring;
                Matcher matcher = this.linkPattern.matcher(substring);
                if (matcher.matches() && matcher.group(1) != null) {
                    str2 = ch != null ? substring.replaceAll(".", ch.toString()) : "";
                }
                stringBuffer.insert(0, str2);
                length = elementAt[0];
            }
        }
        if (length > 0) {
            stringBuffer.insert(0, str.substring(0, length));
        }
        return stringBuffer.toString();
    }

    public String stripSections(String str, String[] strArr, Character ch) {
        Vector<int[]> vector = new Vector<>();
        for (String str2 : strArr) {
            vector = mergeRegionLists(vector, gatherSection(str, str2));
        }
        return stripRegions(str, vector, ch);
    }

    public String stripSectionHeaders(String str, Character ch) {
        return stripRegions(str, gatherSectionHeaders(str), ch);
    }

    public String stripToPlainText(String str, Character ch) {
        return stripInternalLinks(stripAllButInternalLinksAndEmphasis(str, ch), ch);
    }

    public String stripRegions(String str, Vector<int[]> vector, Character ch) {
        StringBuffer stringBuffer = new StringBuffer();
        int length = str.length();
        int size = vector.size();
        while (size > 0) {
            size--;
            int[] elementAt = vector.elementAt(size);
            if (elementAt[0] < length) {
                if (elementAt[1] < length) {
                    stringBuffer.insert(0, str.substring(elementAt[1], length));
                }
                if (ch != null) {
                    stringBuffer.insert(0, str.substring(elementAt[0], elementAt[1]).replaceAll(".", ch.toString()));
                }
                length = elementAt[0];
            }
        }
        stringBuffer.insert(0, str.substring(0, length));
        return stringBuffer.toString();
    }

    public String stripExcessNewlines(String str) {
        return str.replaceAll("\n{3,}", "\n\n").trim();
    }

    public Vector<int[]> gatherInternalLinks(String str) {
        return gatherComplexRegions(str, "\\[\\[", "\\]\\]");
    }

    public Vector<int[]> gatherTemplates(String str) {
        return gatherComplexRegions(str, "\\{\\{", "\\}\\}");
    }

    public Vector<int[]> getIsolatedRegions(Vector<int[]> vector, String str) {
        Vector<int[]> vector2 = new Vector<>();
        Iterator<int[]> it = vector.iterator();
        while (it.hasNext()) {
            int[] next = it.next();
            if (isIsolated(next, str)) {
                vector2.add(next);
            }
        }
        return vector2;
    }

    public Vector<int[]> excludeIsolatedRegions(Vector<int[]> vector, String str) {
        Vector<int[]> vector2 = new Vector<>();
        Iterator<int[]> it = vector.iterator();
        while (it.hasNext()) {
            int[] next = it.next();
            if (!isIsolated(next, str)) {
                vector2.add(next);
            }
        }
        return vector2;
    }

    private boolean isIsolated(int[] iArr, String str) {
        return this.isolatedBefore.matcher(str.substring(0, iArr[0])).matches() && this.isolatedAfter.matcher(str.substring(iArr[1])).matches();
    }

    public Vector<int[]> gatherTables(String str) {
        return gatherComplexRegions(str, "\\{\\|", "\\|\\}");
    }

    public Vector<int[]> gatherHTML(String str) {
        return mergeRegionLists(mergeRegionLists(gatherReferences(str), gatherComplexRegions(str, "\\<div(\\s*?)([^>\\/]*?)\\>", "\\<\\/div(\\s*?)\\>")), gatherSimpleRegions(str, "\\<(.*?)\\>"));
    }

    public Vector<int[]> gatherReferences(String str) {
        return mergeRegionLists(gatherSimpleRegions(str, "\\<ref(\\s*?)([^>]*?)\\/\\>"), gatherComplexRegions(str, "\\<ref(\\s*?)([^>\\/]*?)\\>", "\\<\\/ref(\\s*?)\\>"));
    }

    public Vector<int[]> gatherMagicWords(String str) {
        return gatherSimpleRegions(str, "\\_\\_([A-Z]+)\\_\\_");
    }

    public Vector<int[]> gatherExternalLinks(String str) {
        return gatherSimpleRegions(str, "\\[(http|www|ftp).*?\\]");
    }

    public Vector<int[]> gatherEmphasis(String str) {
        return gatherSimpleRegions(str, "'{2,}");
    }

    public Vector<int[]> gatherSectionHeaders(String str) {
        Vector<int[]> vector = new Vector<>();
        Matcher matcher = Pattern.compile("\\n\\s*((={2,})[^=].*?\\2)[^=]").matcher(str);
        while (matcher.find()) {
            vector.add(new int[]{matcher.start(1), matcher.end(1)});
        }
        return vector;
    }

    public Vector<int[]> gatherSection(String str, String str2) {
        Vector<int[]> vector = new Vector<>();
        Matcher matcher = Pattern.compile("\\n\\s*(={2,})\\s*" + str2 + "\\s*\\1", 2).matcher(str);
        if (matcher.find()) {
            int start = matcher.start(1);
            Matcher matcher2 = Pattern.compile("\\n\\s*(={2," + matcher.group(1).length() + "})[^=].*\\1").matcher(str);
            vector.add(new int[]{start, matcher2.find(matcher.end()) ? matcher2.start() : str.length() - 1});
        }
        return vector;
    }

    public Vector<int[]> gatherListAndIndentMarkers(String str) {
        Vector<int[]> gatherSimpleRegions = gatherSimpleRegions(str, "\n( *)([//*:]+)");
        Iterator<int[]> it = gatherSimpleRegions.iterator();
        while (it.hasNext()) {
            int[] next = it.next();
            next[0] = next[0] + 1;
        }
        return mergeRegionLists(gatherSimpleRegions, gatherSimpleRegions(str, "^( *)([//*:]+)"));
    }

    private boolean isEntirelyItalicised(String str) {
        Matcher matcher = Pattern.compile("(\\s*)\\<i\\>(.*?)\\<\\/i\\>\\.?(\\s*)").matcher(this.emphasisResolver.resolveEmphasis(str));
        return matcher.matches() && !matcher.group(1).contains("</i>");
    }

    public Vector<int[]> gatherMisformattedStarts(String str) {
        int i = 0;
        for (String str2 : str.split("\n")) {
            boolean matches = str2.matches("^(\\s*)$");
            boolean matches2 = str2.matches("^(\\s*):.*");
            boolean isEntirelyItalicised = isEntirelyItalicised(str2);
            boolean matches3 = str2.matches("^(\\s*)\\[\\[Image\\:(.*?)\\]\\](\\s*)");
            if (!matches && !matches2 && !isEntirelyItalicised && !matches3) {
                break;
            }
            i = i + str2.length() + 1;
        }
        Vector<int[]> vector = new Vector<>();
        vector.add(new int[]{0, i});
        return vector;
    }

    public Vector<int[]> gatherSimpleRegions(String str, String str2) {
        Vector<int[]> vector = new Vector<>();
        Matcher matcher = Pattern.compile(str2, 32).matcher(str);
        while (matcher.find()) {
            vector.add(new int[]{matcher.start(), matcher.end()});
        }
        return vector;
    }

    public Vector<int[]> gatherComplexRegions(String str, String str2, String str3) {
        Vector<int[]> vector = new Vector<>();
        Vector vector2 = new Vector();
        Matcher matcher = Pattern.compile("((" + str2 + ")|(" + str3 + "))", 32).matcher(str);
        while (matcher.find()) {
            Integer valueOf = Integer.valueOf(matcher.start());
            Integer valueOf2 = Integer.valueOf(matcher.end());
            if (matcher.group(2) != null) {
                vector2.add(valueOf);
            } else if (!vector2.isEmpty()) {
                int intValue = ((Integer) vector2.elementAt(vector2.size() - 1)).intValue();
                vector2.removeElementAt(vector2.size() - 1);
                vector.add(new int[]{intValue, valueOf2.intValue()});
            }
        }
        if (!vector2.isEmpty()) {
        }
        return vector;
    }

    private Vector<int[]> mergeRegionLists(Vector<int[]> vector, Vector<int[]> vector2) {
        int size = vector.size() - 1;
        int size2 = vector2.size() - 1;
        Vector<int[]> vector3 = new Vector<>();
        int i = -1;
        while (size >= 0 && size2 >= 0) {
            int[] elementAt = vector.elementAt(size);
            int[] elementAt2 = vector2.elementAt(size2);
            if (i >= 0 && elementAt[0] >= i && elementAt[0] >= i) {
                size--;
                size2--;
            } else if (elementAt2[1] > elementAt[1]) {
                if ((elementAt2[0] < elementAt[0] || elementAt2[1] > elementAt[1]) && (i < 0 || elementAt2[0] < i)) {
                    vector3.add(0, new int[]{elementAt2[0], min(elementAt2[1], i)});
                    i = elementAt2[0];
                }
                size2--;
            } else {
                if ((elementAt[0] < elementAt2[0] || elementAt[1] > elementAt2[1]) && (i < 0 || elementAt[0] < i)) {
                    vector3.add(0, new int[]{elementAt[0], min(elementAt[1], i)});
                    i = elementAt[0];
                }
                size--;
            }
        }
        while (size >= 0) {
            int[] elementAt3 = vector.elementAt(size);
            if (i < 0 || elementAt3[0] <= i) {
                vector3.add(0, new int[]{elementAt3[0], min(elementAt3[1], i)});
                i = elementAt3[0];
            }
            size--;
        }
        while (size2 >= 0) {
            int[] elementAt4 = vector2.elementAt(size2);
            if (i < 0 || elementAt4[0] <= i) {
                vector3.add(0, new int[]{elementAt4[0], min(elementAt4[1], i)});
                i = elementAt4[0];
            }
            size2--;
        }
        return vector3;
    }

    private int min(int i, int i2) {
        return (i < 0 || i2 < 0) ? i >= 0 ? i : i2 : Math.min(i, i2);
    }
}
