package org.wikipedia.miner.extraction;

import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Iterator;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.Mapper;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.RecordWriter;
import org.apache.hadoop.mapred.Reducer;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapred.TextInputFormat;
import org.apache.hadoop.mapred.TextOutputFormat;
import org.apache.hadoop.record.CsvRecordInput;
import org.apache.hadoop.record.CsvRecordOutput;
import org.apache.hadoop.record.RecordInput;
import org.apache.hadoop.util.Progressable;
import org.apache.hadoop.util.Tool;
import org.wikipedia.miner.db.struct.DbLinkLocation;
import org.wikipedia.miner.db.struct.DbLinkLocationList;
import org.wikipedia.miner.extraction.DumpExtractor;
import org.wikipedia.miner.extraction.LabelSensesStep;
import org.wikipedia.miner.extraction.struct.ExLinkKey;

/* loaded from: input_file:org/wikipedia/miner/extraction/PageLinkSummaryStep.class */
public class PageLinkSummaryStep extends Configured implements Tool {

    /* JADX INFO: Access modifiers changed from: protected */
    /* loaded from: input_file:org/wikipedia/miner/extraction/PageLinkSummaryStep$Output.class */
    public enum Output {
        pageLinkOut,
        pageLinkIn
    }

    /* loaded from: input_file:org/wikipedia/miner/extraction/PageLinkSummaryStep$PageLinkSummaryMapper.class */
    private static class PageLinkSummaryMapper extends MapReduceBase implements Mapper<LongWritable, Text, ExLinkKey, DbLinkLocationList> {
        private PageLinkSummaryMapper() {
        }

        public void map(LongWritable longWritable, Text text, OutputCollector<ExLinkKey, DbLinkLocationList> outputCollector, Reporter reporter) throws IOException {
            RecordInput csvRecordInput = new CsvRecordInput(new ByteArrayInputStream((text.toString() + "\n").getBytes("UTF-8")));
            int readInt = csvRecordInput.readInt((String) null);
            DbLinkLocation dbLinkLocation = new DbLinkLocation();
            dbLinkLocation.deserialize(csvRecordInput);
            int linkId = dbLinkLocation.getLinkId();
            DbLinkLocation dbLinkLocation2 = new DbLinkLocation(readInt, dbLinkLocation.getSentenceIndexes());
            ArrayList arrayList = new ArrayList();
            arrayList.add(dbLinkLocation);
            outputCollector.collect(new ExLinkKey(readInt, true), new DbLinkLocationList(arrayList));
            ArrayList arrayList2 = new ArrayList();
            arrayList2.add(dbLinkLocation2);
            outputCollector.collect(new ExLinkKey(linkId, false), new DbLinkLocationList(arrayList2));
        }

        public /* bridge */ /* synthetic */ void map(Object obj, Object obj2, OutputCollector outputCollector, Reporter reporter) throws IOException {
            map((LongWritable) obj, (Text) obj2, (OutputCollector<ExLinkKey, DbLinkLocationList>) outputCollector, reporter);
        }
    }

    /* loaded from: input_file:org/wikipedia/miner/extraction/PageLinkSummaryStep$PageLinkSummaryOutputFormat.class */
    protected static class PageLinkSummaryOutputFormat extends TextOutputFormat<ExLinkKey, DbLinkLocationList> {

        /* loaded from: input_file:org/wikipedia/miner/extraction/PageLinkSummaryStep$PageLinkSummaryOutputFormat$LinkSummaryRecordWriter.class */
        protected static class LinkSummaryRecordWriter implements RecordWriter<ExLinkKey, DbLinkLocationList> {
            protected OutputStream linksOut_outStream;
            protected OutputStream linksIn_outStream;

            public LinkSummaryRecordWriter(OutputStream outputStream, OutputStream outputStream2) {
                this.linksOut_outStream = outputStream;
                this.linksIn_outStream = outputStream2;
            }

            public synchronized void write(ExLinkKey exLinkKey, DbLinkLocationList dbLinkLocationList) throws IOException {
                ArrayList<DbLinkLocation> linkLocations = dbLinkLocationList.getLinkLocations();
                Collections.sort(linkLocations);
                DbLinkLocationList dbLinkLocationList2 = new DbLinkLocationList(linkLocations);
                CsvRecordOutput csvRecordOutput = new CsvRecordOutput(exLinkKey.getIsOut() ? this.linksOut_outStream : this.linksIn_outStream);
                csvRecordOutput.writeInt(exLinkKey.getId(), (String) null);
                dbLinkLocationList2.serialize(csvRecordOutput);
            }

            public synchronized void close(Reporter reporter) throws IOException {
                this.linksOut_outStream.close();
                this.linksIn_outStream.close();
            }
        }

        protected PageLinkSummaryOutputFormat() {
        }

        public RecordWriter<ExLinkKey, DbLinkLocationList> getRecordWriter(FileSystem fileSystem, JobConf jobConf, String str, Progressable progressable) throws IOException {
            String replace = str.replace("part", Output.pageLinkOut.name());
            String replace2 = str.replace("part", Output.pageLinkIn.name());
            Path taskOutputPath = FileOutputFormat.getTaskOutputPath(jobConf, replace);
            FSDataOutputStream create = taskOutputPath.getFileSystem(jobConf).create(taskOutputPath, progressable);
            Path taskOutputPath2 = FileOutputFormat.getTaskOutputPath(jobConf, replace2);
            return new LinkSummaryRecordWriter(create, taskOutputPath2.getFileSystem(jobConf).create(taskOutputPath2, progressable));
        }
    }

    /* loaded from: input_file:org/wikipedia/miner/extraction/PageLinkSummaryStep$PageLinkSummaryReducer.class */
    public static class PageLinkSummaryReducer extends MapReduceBase implements Reducer<ExLinkKey, DbLinkLocationList, ExLinkKey, DbLinkLocationList> {
        public void reduce(ExLinkKey exLinkKey, Iterator<DbLinkLocationList> it, OutputCollector<ExLinkKey, DbLinkLocationList> outputCollector, Reporter reporter) throws IOException {
            ArrayList arrayList = new ArrayList();
            while (it.hasNext()) {
                arrayList.addAll(it.next().getLinkLocations());
            }
            outputCollector.collect(exLinkKey, new DbLinkLocationList(arrayList));
        }

        public /* bridge */ /* synthetic */ void reduce(Object obj, Iterator it, OutputCollector outputCollector, Reporter reporter) throws IOException {
            reduce((ExLinkKey) obj, (Iterator<DbLinkLocationList>) it, (OutputCollector<ExLinkKey, DbLinkLocationList>) outputCollector, reporter);
        }
    }

    public int run(String[] strArr) throws Exception {
        JobConf jobConf = new JobConf(CategoryLinkSummaryStep.class);
        DumpExtractor.configureJob(jobConf, strArr);
        jobConf.setJobName("WM: summarize pagelinks");
        jobConf.setOutputKeyClass(ExLinkKey.class);
        jobConf.setOutputValueClass(DbLinkLocationList.class);
        jobConf.setMapperClass(PageLinkSummaryMapper.class);
        jobConf.setCombinerClass(PageLinkSummaryReducer.class);
        jobConf.setReducerClass(PageLinkSummaryReducer.class);
        jobConf.setInputFormat(TextInputFormat.class);
        FileInputFormat.setInputPaths(jobConf, new Path[]{new Path(jobConf.get("wm.outputDir") + "/" + DumpExtractor.getDirectoryName(DumpExtractor.ExtractionStep.labelSense) + "/" + LabelSensesStep.Output.tempPageLink.name() + "*")});
        jobConf.setOutputFormat(PageLinkSummaryOutputFormat.class);
        FileOutputFormat.setOutputPath(jobConf, new Path(jobConf.get("wm.outputDir") + "/" + DumpExtractor.getDirectoryName(DumpExtractor.ExtractionStep.pageLink)));
        JobClient.runJob(jobConf);
        return 0;
    }
}
