package org.wikipedia.miner.annotation.preprocessing;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.wikipedia.miner.annotation.preprocessing.PreprocessedDocument;

/* loaded from: input_file:org/wikipedia/miner/annotation/preprocessing/DocumentPreprocessor.class */
public abstract class DocumentPreprocessor {
    protected Pattern openPattern;
    protected Pattern closePattern;
    protected Pattern splitPattern;

    public DocumentPreprocessor(Pattern pattern, Pattern pattern2, Pattern pattern3) {
        this.openPattern = pattern;
        this.closePattern = pattern2;
        this.splitPattern = pattern3;
    }

    public abstract PreprocessedDocument preprocess(String str);

    public PreprocessedDocument preprocess(File file) throws IOException {
        return preprocess(getContent(file));
    }

    public static String getContent(File file) throws IOException {
        StringBuffer stringBuffer = new StringBuffer();
        BufferedReader bufferedReader = new BufferedReader(new FileReader(file));
        while (true) {
            String readLine = bufferedReader.readLine();
            if (readLine == null) {
                return stringBuffer.toString();
            }
            stringBuffer.append(readLine + "\n");
        }
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public String clearAllMentions(String str, String str2) {
        Matcher matcher = Pattern.compile(str, 34).matcher(str2);
        int i = 0;
        StringBuffer stringBuffer = new StringBuffer();
        while (matcher.find()) {
            stringBuffer.append(str2.substring(i, matcher.start()));
            stringBuffer.append(getSpaceString(matcher.group().length()));
            i = matcher.end();
        }
        stringBuffer.append(str2.substring(i));
        return stringBuffer.toString();
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public String getSpaceString(int i) {
        StringBuffer stringBuffer = new StringBuffer();
        for (int i2 = 0; i2 < i; i2++) {
            stringBuffer.append(" ");
        }
        return stringBuffer.toString();
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public ArrayList<PreprocessedDocument.RegionTag> getRegionTags(String str) {
        ArrayList<PreprocessedDocument.RegionTag> arrayList = new ArrayList<>();
        if (this.openPattern != null) {
            Matcher matcher = this.openPattern.matcher(str);
            while (matcher.find()) {
                arrayList.add(new PreprocessedDocument.RegionTag(matcher.start(), 1));
            }
        }
        if (this.closePattern != null) {
            Matcher matcher2 = this.closePattern.matcher(str);
            while (matcher2.find()) {
                arrayList.add(new PreprocessedDocument.RegionTag(matcher2.start(), 2));
            }
        }
        if (this.splitPattern != null) {
            Matcher matcher3 = this.splitPattern.matcher(str);
            while (matcher3.find()) {
                arrayList.add(new PreprocessedDocument.RegionTag(matcher3.start(), 3));
            }
        }
        Collections.sort(arrayList);
        return arrayList;
    }
}
