package org.wikipedia.miner.util.text;

/* loaded from: input_file:org/wikipedia/miner/util/text/Cleaner.class */
public class Cleaner extends TextProcessor {
    private boolean disallowInternalPeriods = false;

    @Override // org.wikipedia.miner.util.text.TextProcessor
    public String processText(String str) {
        return cleanPunctuation(str).replace('\n', ' ').replaceAll("\\'", "").replace('\"', ' ').trim().toLowerCase();
    }

    private String cleanPunctuation(String str) {
        StringBuffer stringBuffer = new StringBuffer();
        int i = 0;
        boolean z = true;
        boolean z2 = false;
        boolean z3 = false;
        boolean z4 = false;
        while (i < str.length()) {
            boolean z5 = false;
            boolean z6 = false;
            int i2 = i;
            while (i < str.length()) {
                char charAt = str.charAt(i);
                if (!Character.isLetterOrDigit(charAt)) {
                    if ((this.disallowInternalPeriods || charAt != '.') && charAt != '@' && charAt != '_' && charAt != '&' && charAt != '/' && charAt != '-') {
                        if (charAt != '\'' || i <= 0 || !Character.isLetterOrDigit(str.charAt(i - 1))) {
                            break;
                        }
                        i++;
                    } else {
                        if (i <= 0 || i + 1 >= str.length() || !Character.isLetterOrDigit(str.charAt(i - 1)) || !Character.isLetterOrDigit(str.charAt(i + 1))) {
                            break;
                        }
                        i++;
                    }
                } else {
                    z6 = true;
                    z5 = true;
                    if (Character.isLetter(charAt)) {
                        z5 = true;
                    }
                    i++;
                }
            }
            if (z5) {
                if (!z) {
                    if (z3) {
                        stringBuffer.append('-');
                    } else if (z4) {
                        stringBuffer.append('/');
                    } else {
                        stringBuffer.append(' ');
                    }
                }
                stringBuffer.append(str.substring(i2, i));
                if (i == str.length()) {
                    break;
                }
                z = false;
                z2 = false;
                z3 = false;
                z4 = false;
                if (Character.isWhitespace(str.charAt(i))) {
                    if (str.charAt(i) == '\n') {
                        z2 = true;
                    }
                } else if (str.charAt(i) == '-') {
                    z3 = true;
                } else if (str.charAt(i) == '/') {
                    z4 = true;
                } else {
                    z = true;
                    stringBuffer.append('\n');
                }
                i++;
            } else {
                if (i == str.length()) {
                    break;
                }
                if (str.charAt(i) == '\n') {
                    if (z2) {
                        if (!z) {
                            stringBuffer.append('\n');
                            z = true;
                        }
                    } else if (z6 && !z) {
                        z = true;
                        stringBuffer.append('\n');
                    }
                    z2 = true;
                    i++;
                } else if (Character.isWhitespace(str.charAt(i))) {
                    if (z6 && !z) {
                        z = true;
                        stringBuffer.append('\n');
                    }
                    i++;
                } else {
                    if (!z) {
                        stringBuffer.append('\n');
                        z = true;
                    }
                    i++;
                }
            }
        }
        return stringBuffer.toString();
    }
}
