package org.wikipedia.miner.util;

import java.util.ArrayList;
import opennlp.tools.sentdetect.SentenceDetector;
import opennlp.tools.tokenize.Tokenizer;
import opennlp.tools.util.Span;
import org.wikipedia.miner.annotation.preprocessing.PreprocessedDocument;

/* loaded from: input_file:org/wikipedia/miner/util/NGrammer.class */
public class NGrammer {
    private Tokenizer _tokenizer;
    private SentenceDetector _sentenceDetector;
    private int _maxN = 10;

    /* JADX INFO: Access modifiers changed from: package-private */
    /* renamed from: org.wikipedia.miner.util.NGrammer$1, reason: invalid class name */
    /* loaded from: input_file:org/wikipedia/miner/util/NGrammer$1.class */
    public static /* synthetic */ class AnonymousClass1 {
        static final /* synthetic */ int[] $SwitchMap$org$wikipedia$miner$util$NGrammer$CaseContext = new int[CaseContext.values().length];

        static {
            try {
                $SwitchMap$org$wikipedia$miner$util$NGrammer$CaseContext[CaseContext.upper.ordinal()] = 1;
            } catch (NoSuchFieldError e) {
            }
            try {
                $SwitchMap$org$wikipedia$miner$util$NGrammer$CaseContext[CaseContext.lower.ordinal()] = 2;
            } catch (NoSuchFieldError e2) {
            }
            try {
                $SwitchMap$org$wikipedia$miner$util$NGrammer$CaseContext[CaseContext.upperFirst.ordinal()] = 3;
            } catch (NoSuchFieldError e3) {
            }
            try {
                $SwitchMap$org$wikipedia$miner$util$NGrammer$CaseContext[CaseContext.mixed.ordinal()] = 4;
            } catch (NoSuchFieldError e4) {
            }
        }
    }

    /* loaded from: input_file:org/wikipedia/miner/util/NGrammer$CaseContext.class */
    public enum CaseContext {
        lower,
        upper,
        upperFirst,
        mixed
    }

    /* loaded from: input_file:org/wikipedia/miner/util/NGrammer$NGramSpan.class */
    public class NGramSpan extends Span {
        private Span[] _tokenSpans;
        private CaseContext _caseContext;
        private boolean _isSentenceStart;

        private NGramSpan(int i, int i2, Span[] spanArr, CaseContext caseContext, boolean z) {
            super(i, i2);
            this._tokenSpans = spanArr;
            this._caseContext = caseContext;
            this._isSentenceStart = z;
        }

        public Span[] getTokenSpans() {
            return this._tokenSpans;
        }

        public CaseContext getCaseContext() {
            return this._caseContext;
        }

        public boolean isSentenceStart() {
            return this._isSentenceStart;
        }

        public String getNgram(String str) {
            return str.substring(getStart(), getEnd());
        }

        public String getNgramUpperFirst(String str) {
            char[] charArray = getNgram(str).toLowerCase().toCharArray();
            for (Span span : this._tokenSpans) {
                charArray[span.getStart()] = Character.toUpperCase(charArray[span.getStart()]);
            }
            return new String(charArray);
        }

        /* synthetic */ NGramSpan(NGrammer nGrammer, int i, int i2, Span[] spanArr, CaseContext caseContext, boolean z, AnonymousClass1 anonymousClass1) {
            this(i, i2, spanArr, caseContext, z);
        }
    }

    public NGrammer(SentenceDetector sentenceDetector, Tokenizer tokenizer) {
        if (tokenizer == null) {
            throw new NullPointerException();
        }
        this._sentenceDetector = sentenceDetector;
        this._tokenizer = tokenizer;
    }

    public void setMaxN(int i) {
        this._maxN = i;
    }

    public int getMaxN() {
        return this._maxN;
    }

    public String[] ngramDetect(String str) {
        NGramSpan[] ngramPosDetect = ngramPosDetect(str);
        String[] strArr = new String[ngramPosDetect.length];
        for (int i = 0; i < ngramPosDetect.length; i++) {
            strArr[i] = str.substring(ngramPosDetect[i].getStart(), ngramPosDetect[i].getEnd());
        }
        return strArr;
    }

    public NGramSpan[] ngramPosDetect(String str) {
        ArrayList arrayList = new ArrayList();
        int i = 0;
        for (String str2 : str.split("\n")) {
            for (Span span : this._sentenceDetector == null ? new Span[]{new Span(0, str2.length())} : this._sentenceDetector.sentPosDetect(str2)) {
                String substring = str2.substring(span.getStart(), span.getEnd());
                Span[] spanArr = this._tokenizer.tokenizePos(substring);
                CaseContext identifyCaseContext = identifyCaseContext(substring, spanArr);
                int i2 = 0;
                while (i2 < spanArr.length) {
                    if (spanArr[i2].length() != 1 || Character.isLetterOrDigit(substring.charAt(spanArr[i2].getStart()))) {
                        int start = spanArr[i2].getStart();
                        for (int min = Math.min(i2 + this._maxN, spanArr.length - 1); min >= i2; min--) {
                            if (spanArr[min].length() != 1 || Character.isLetterOrDigit(substring.charAt(spanArr[min].getStart()))) {
                                int start2 = i + span.getStart() + spanArr[i2].getStart();
                                int start3 = i + span.getStart() + spanArr[min].getEnd();
                                Span[] spanArr2 = new Span[(min - i2) + 1];
                                for (int i3 = 0; i3 < spanArr2.length; i3++) {
                                    Span span2 = spanArr[i2 + i3];
                                    spanArr2[i3] = new Span(span2.getStart() - start, span2.getEnd() - start);
                                }
                                arrayList.add(new NGramSpan(this, start2, start3, spanArr2, identifyCaseContext, i2 == 0, null));
                            }
                        }
                    }
                    i2++;
                }
            }
            i = i + str2.length() + 1;
        }
        return (NGramSpan[]) arrayList.toArray(new NGramSpan[arrayList.size()]);
    }

    private CaseContext identifyCaseContext(String str, Span[] spanArr) {
        boolean z = true;
        boolean z2 = true;
        boolean z3 = true;
        for (Span span : spanArr) {
            if (!z && !z2 && !z3) {
                return CaseContext.mixed;
            }
            switch (AnonymousClass1.$SwitchMap$org$wikipedia$miner$util$NGrammer$CaseContext[identifyCaseContext(str.substring(span.getStart(), span.getEnd())).ordinal()]) {
                case PreprocessedDocument.RegionTag.REGION_OPEN /* 1 */:
                    z2 = false;
                    break;
                case PreprocessedDocument.RegionTag.REGION_CLOSE /* 2 */:
                    z = false;
                    z3 = false;
                    break;
                case PreprocessedDocument.RegionTag.REGION_SPLIT /* 3 */:
                    z2 = false;
                    z = false;
                    break;
                case 4:
                    z2 = false;
                    z = false;
                    z3 = false;
                    break;
            }
        }
        return z ? CaseContext.upper : z2 ? CaseContext.lower : z3 ? CaseContext.upperFirst : CaseContext.mixed;
    }

    private CaseContext identifyCaseContext(String str) {
        boolean z = true;
        boolean z2 = true;
        boolean z3 = true;
        for (int i = 0; i < str.length(); i++) {
            if (!z && !z2) {
                return z3 ? CaseContext.upperFirst : CaseContext.mixed;
            }
            char charAt = str.charAt(i);
            if (Character.isUpperCase(charAt)) {
                z2 = false;
            }
            if (Character.isLowerCase(charAt)) {
                z = false;
                if (i == 0) {
                    z3 = false;
                }
            }
        }
        return z ? CaseContext.upper : z2 ? CaseContext.lower : z3 ? CaseContext.upperFirst : CaseContext.mixed;
    }
}
