package org.wikipedia.miner.extraction;

import java.io.StringReader;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.regex.Matcher;
import javax.xml.stream.XMLInputFactory;
import javax.xml.stream.XMLStreamException;
import javax.xml.stream.XMLStreamReader;
import org.apache.log4j.Logger;
import org.wikipedia.miner.annotation.preprocessing.PreprocessedDocument;
import org.wikipedia.miner.model.Page;

/* loaded from: input_file:org/wikipedia/miner/extraction/DumpPageParser.class */
public class DumpPageParser {
    private LanguageConfiguration languageConfiguration;
    private SiteInfo siteInfo;
    private Logger log = Logger.getLogger(DumpPageParser.class);
    private XMLInputFactory xmlStreamFactory = XMLInputFactory.newInstance();
    private SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'");

    /* renamed from: org.wikipedia.miner.extraction.DumpPageParser$1, reason: invalid class name */
    /* loaded from: input_file:org/wikipedia/miner/extraction/DumpPageParser$1.class */
    static /* synthetic */ class AnonymousClass1 {
        static final /* synthetic */ int[] $SwitchMap$org$wikipedia$miner$extraction$DumpPageParser$DumpTag = new int[DumpTag.values().length];

        static {
            try {
                $SwitchMap$org$wikipedia$miner$extraction$DumpPageParser$DumpTag[DumpTag.id.ordinal()] = 1;
            } catch (NoSuchFieldError e) {
            }
            try {
                $SwitchMap$org$wikipedia$miner$extraction$DumpPageParser$DumpTag[DumpTag.title.ordinal()] = 2;
            } catch (NoSuchFieldError e2) {
            }
            try {
                $SwitchMap$org$wikipedia$miner$extraction$DumpPageParser$DumpTag[DumpTag.text.ordinal()] = 3;
            } catch (NoSuchFieldError e3) {
            }
            try {
                $SwitchMap$org$wikipedia$miner$extraction$DumpPageParser$DumpTag[DumpTag.timestamp.ordinal()] = 4;
            } catch (NoSuchFieldError e4) {
            }
        }
    }

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:org/wikipedia/miner/extraction/DumpPageParser$DumpTag.class */
    public enum DumpTag {
        page,
        id,
        title,
        text,
        timestamp,
        ignorable
    }

    public DumpPageParser(LanguageConfiguration languageConfiguration, SiteInfo siteInfo) {
        this.languageConfiguration = languageConfiguration;
        this.siteInfo = siteInfo;
    }

    /* JADX WARN: Can't fix incorrect switch cases order, some code will duplicate */
    public DumpPage parsePage(String str) throws XMLStreamException {
        Page.PageType pageType;
        Integer num = null;
        String str2 = null;
        String str3 = null;
        Date date = null;
        StringBuffer stringBuffer = new StringBuffer();
        XMLStreamReader createXMLStreamReader = this.xmlStreamFactory.createXMLStreamReader(new StringReader(str));
        while (createXMLStreamReader.hasNext()) {
            switch (createXMLStreamReader.next()) {
                case PreprocessedDocument.RegionTag.REGION_CLOSE /* 2 */:
                    switch (AnonymousClass1.$SwitchMap$org$wikipedia$miner$extraction$DumpPageParser$DumpTag[resolveDumpTag(createXMLStreamReader.getLocalName()).ordinal()]) {
                        case PreprocessedDocument.RegionTag.REGION_OPEN /* 1 */:
                            if (num == null) {
                                num = Integer.valueOf(Integer.parseInt(stringBuffer.toString().trim()));
                                break;
                            }
                            break;
                        case PreprocessedDocument.RegionTag.REGION_CLOSE /* 2 */:
                            str2 = stringBuffer.toString().trim();
                            break;
                        case PreprocessedDocument.RegionTag.REGION_SPLIT /* 3 */:
                            str3 = stringBuffer.toString().trim();
                            break;
                        case 4:
                            try {
                                date = this.dateFormat.parse(stringBuffer.toString().trim());
                                break;
                            } catch (ParseException e) {
                                date = null;
                                break;
                            }
                    }
                case 4:
                    stringBuffer.append(createXMLStreamReader.getText());
                    break;
            }
        }
        createXMLStreamReader.close();
        if (num == null || str2 == null || str3 == null) {
            throw new XMLStreamException("Could not parse xml markup for page");
        }
        Integer num2 = 0;
        int indexOf = str2.indexOf(":");
        if (indexOf > 0) {
            num2 = this.siteInfo.getNamespaceKey(str2.substring(0, indexOf));
            if (num2 == null) {
                num2 = 0;
            } else {
                str2 = str2.substring(indexOf + 1);
            }
        }
        if (num2.intValue() != 14 && num2.intValue() != 0 && num2.intValue() != 10) {
            Logger.getLogger(DumpPageParser.class).info("Ignoring page " + num + ":" + str2);
            return null;
        }
        String str4 = null;
        if (num2.intValue() == 14) {
            pageType = Page.PageType.category;
        } else if (num2.intValue() == 10) {
            pageType = Page.PageType.template;
        } else {
            pageType = Page.PageType.article;
            if (this.languageConfiguration.getDisambiguationPattern().matcher(str3).find()) {
                pageType = Page.PageType.disambiguation;
            }
            Matcher matcher = this.languageConfiguration.getRedirectPattern().matcher(str3);
            if (matcher.find()) {
                pageType = Page.PageType.redirect;
                str4 = matcher.group(2) != null ? matcher.group(2) : matcher.group(3);
            }
        }
        return new DumpPage(num.intValue(), num2.intValue(), pageType, str2, str3, str4, date);
        stringBuffer = new StringBuffer();
    }

    private DumpTag resolveDumpTag(String str) {
        try {
            return DumpTag.valueOf(str);
        } catch (IllegalArgumentException e) {
            return DumpTag.ignorable;
        }
    }
}
