package org.wikipedia.miner.extraction;

import java.io.IOException;
import java.io.OutputStream;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Iterator;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.Mapper;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.RecordWriter;
import org.apache.hadoop.mapred.Reducer;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapred.TextInputFormat;
import org.apache.hadoop.mapred.TextOutputFormat;
import org.apache.hadoop.record.CsvRecordOutput;
import org.apache.hadoop.util.Progressable;
import org.apache.hadoop.util.Tool;
import org.wikipedia.miner.annotation.preprocessing.PreprocessedDocument;
import org.wikipedia.miner.db.struct.DbIntList;
import org.wikipedia.miner.extraction.DumpExtractor;
import org.wikipedia.miner.extraction.LabelSensesStep;
import org.wikipedia.miner.extraction.struct.ExLinkKey;

/* loaded from: input_file:org/wikipedia/miner/extraction/CategoryLinkSummaryStep.class */
public class CategoryLinkSummaryStep extends Configured implements Tool {
    protected static final String KEY_LINKS_TO_SUMMARIZE = "wm.linksToSummarize";
    private DumpExtractor.ExtractionStep linksToSummarize;

    /* renamed from: org.wikipedia.miner.extraction.CategoryLinkSummaryStep$1, reason: invalid class name */
    /* loaded from: input_file:org/wikipedia/miner/extraction/CategoryLinkSummaryStep$1.class */
    static /* synthetic */ class AnonymousClass1 {
        static final /* synthetic */ int[] $SwitchMap$org$wikipedia$miner$extraction$DumpExtractor$ExtractionStep = new int[DumpExtractor.ExtractionStep.values().length];

        static {
            try {
                $SwitchMap$org$wikipedia$miner$extraction$DumpExtractor$ExtractionStep[DumpExtractor.ExtractionStep.categoryParent.ordinal()] = 1;
            } catch (NoSuchFieldError e) {
            }
            try {
                $SwitchMap$org$wikipedia$miner$extraction$DumpExtractor$ExtractionStep[DumpExtractor.ExtractionStep.articleParent.ordinal()] = 2;
            } catch (NoSuchFieldError e2) {
            }
        }
    }

    /* loaded from: input_file:org/wikipedia/miner/extraction/CategoryLinkSummaryStep$CategoryLinkSummaryMapper.class */
    private static class CategoryLinkSummaryMapper extends MapReduceBase implements Mapper<LongWritable, Text, ExLinkKey, DbIntList> {
        private CategoryLinkSummaryMapper() {
        }

        public void map(LongWritable longWritable, Text text, OutputCollector<ExLinkKey, DbIntList> outputCollector, Reporter reporter) throws IOException {
            String[] split = text.toString().split(",");
            int parseInt = Integer.parseInt(split[0]);
            int parseInt2 = Integer.parseInt(split[1]);
            ArrayList arrayList = new ArrayList();
            arrayList.add(Integer.valueOf(parseInt2));
            outputCollector.collect(new ExLinkKey(parseInt, true), new DbIntList(arrayList));
            ArrayList arrayList2 = new ArrayList();
            arrayList2.add(Integer.valueOf(parseInt));
            outputCollector.collect(new ExLinkKey(parseInt2, false), new DbIntList(arrayList2));
        }

        public /* bridge */ /* synthetic */ void map(Object obj, Object obj2, OutputCollector outputCollector, Reporter reporter) throws IOException {
            map((LongWritable) obj, (Text) obj2, (OutputCollector<ExLinkKey, DbIntList>) outputCollector, reporter);
        }
    }

    /* loaded from: input_file:org/wikipedia/miner/extraction/CategoryLinkSummaryStep$CategoryLinkSummaryOutputFormat.class */
    protected static class CategoryLinkSummaryOutputFormat extends TextOutputFormat<ExLinkKey, DbIntList> {

        /* loaded from: input_file:org/wikipedia/miner/extraction/CategoryLinkSummaryStep$CategoryLinkSummaryOutputFormat$CategoryLinkSummaryRecordWriter.class */
        protected static class CategoryLinkSummaryRecordWriter implements RecordWriter<ExLinkKey, DbIntList> {
            protected OutputStream linksOut_outStream;
            protected OutputStream linksIn_outStream;

            public CategoryLinkSummaryRecordWriter(OutputStream outputStream, OutputStream outputStream2) {
                this.linksOut_outStream = outputStream;
                this.linksIn_outStream = outputStream2;
            }

            public synchronized void write(ExLinkKey exLinkKey, DbIntList dbIntList) throws IOException {
                ArrayList<Integer> values = dbIntList.getValues();
                Collections.sort(values);
                OutputStream outputStream = exLinkKey.getIsOut() ? this.linksOut_outStream : this.linksIn_outStream;
                CsvRecordOutput csvRecordOutput = new CsvRecordOutput(outputStream);
                csvRecordOutput.writeInt(exLinkKey.getId(), "id");
                csvRecordOutput.startVector(values, "links");
                Iterator<Integer> it = values.iterator();
                while (it.hasNext()) {
                    csvRecordOutput.writeInt(it.next().intValue(), "link");
                }
                csvRecordOutput.endVector(values, "links");
                outputStream.write(10);
            }

            public synchronized void close(Reporter reporter) throws IOException {
                this.linksOut_outStream.close();
                this.linksIn_outStream.close();
            }
        }

        protected CategoryLinkSummaryOutputFormat() {
        }

        public RecordWriter<ExLinkKey, DbIntList> getRecordWriter(FileSystem fileSystem, JobConf jobConf, String str, Progressable progressable) throws IOException {
            String str2 = null;
            String str3 = null;
            switch (AnonymousClass1.$SwitchMap$org$wikipedia$miner$extraction$DumpExtractor$ExtractionStep[DumpExtractor.ExtractionStep.valueOf(jobConf.get(CategoryLinkSummaryStep.KEY_LINKS_TO_SUMMARIZE)).ordinal()]) {
                case PreprocessedDocument.RegionTag.REGION_OPEN /* 1 */:
                    str2 = str.replace("part", Output.categoryParents.name());
                    str3 = str.replace("part", Output.childCategories.name());
                    break;
                case PreprocessedDocument.RegionTag.REGION_CLOSE /* 2 */:
                    str2 = str.replace("part", Output.articleParents.name());
                    str3 = str.replace("part", Output.childArticles.name());
                    break;
            }
            Path taskOutputPath = FileOutputFormat.getTaskOutputPath(jobConf, str2);
            FSDataOutputStream create = taskOutputPath.getFileSystem(jobConf).create(taskOutputPath, progressable);
            Path taskOutputPath2 = FileOutputFormat.getTaskOutputPath(jobConf, str3);
            return new CategoryLinkSummaryRecordWriter(create, taskOutputPath2.getFileSystem(jobConf).create(taskOutputPath2, progressable));
        }
    }

    /* loaded from: input_file:org/wikipedia/miner/extraction/CategoryLinkSummaryStep$CategoryLinkSummaryReducer.class */
    public static class CategoryLinkSummaryReducer extends MapReduceBase implements Reducer<ExLinkKey, DbIntList, ExLinkKey, DbIntList> {
        public void reduce(ExLinkKey exLinkKey, Iterator<DbIntList> it, OutputCollector<ExLinkKey, DbIntList> outputCollector, Reporter reporter) throws IOException {
            ArrayList arrayList = new ArrayList();
            while (it.hasNext()) {
                Iterator<Integer> it2 = it.next().getValues().iterator();
                while (it2.hasNext()) {
                    arrayList.add(it2.next());
                }
            }
            outputCollector.collect(exLinkKey, new DbIntList(arrayList));
        }

        public /* bridge */ /* synthetic */ void reduce(Object obj, Iterator it, OutputCollector outputCollector, Reporter reporter) throws IOException {
            reduce((ExLinkKey) obj, (Iterator<DbIntList>) it, (OutputCollector<ExLinkKey, DbIntList>) outputCollector, reporter);
        }
    }

    /* loaded from: input_file:org/wikipedia/miner/extraction/CategoryLinkSummaryStep$Output.class */
    protected enum Output {
        categoryParents,
        articleParents,
        childCategories,
        childArticles
    }

    public CategoryLinkSummaryStep(DumpExtractor.ExtractionStep extractionStep) {
        this.linksToSummarize = extractionStep;
    }

    public int run(String[] strArr) throws Exception {
        JobConf jobConf = new JobConf(CategoryLinkSummaryStep.class);
        DumpExtractor.configureJob(jobConf, strArr);
        jobConf.set(KEY_LINKS_TO_SUMMARIZE, this.linksToSummarize.name());
        jobConf.setJobName("WM: summarize " + this.linksToSummarize.name());
        jobConf.setOutputKeyClass(ExLinkKey.class);
        jobConf.setOutputValueClass(DbIntList.class);
        jobConf.setMapperClass(CategoryLinkSummaryMapper.class);
        jobConf.setCombinerClass(CategoryLinkSummaryReducer.class);
        jobConf.setReducerClass(CategoryLinkSummaryReducer.class);
        jobConf.setInputFormat(TextInputFormat.class);
        switch (AnonymousClass1.$SwitchMap$org$wikipedia$miner$extraction$DumpExtractor$ExtractionStep[this.linksToSummarize.ordinal()]) {
            case PreprocessedDocument.RegionTag.REGION_OPEN /* 1 */:
                FileInputFormat.setInputPaths(jobConf, new Path[]{new Path(jobConf.get("wm.outputDir") + "/" + DumpExtractor.getDirectoryName(DumpExtractor.ExtractionStep.labelSense) + "/" + LabelSensesStep.Output.tempCategoryParent.name() + "*")});
                break;
            case PreprocessedDocument.RegionTag.REGION_CLOSE /* 2 */:
                FileInputFormat.setInputPaths(jobConf, new Path[]{new Path(jobConf.get("wm.outputDir") + "/" + DumpExtractor.getDirectoryName(DumpExtractor.ExtractionStep.labelSense) + "/" + LabelSensesStep.Output.tempArticleParent.name() + "*")});
                break;
        }
        jobConf.setOutputFormat(CategoryLinkSummaryOutputFormat.class);
        FileOutputFormat.setOutputPath(jobConf, new Path(jobConf.get("wm.outputDir") + "/" + DumpExtractor.getDirectoryName(this.linksToSummarize)));
        JobClient.runJob(jobConf);
        return 0;
    }
}
