package org.wikipedia.miner.extraction;

import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.Iterator;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.Mapper;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.RecordWriter;
import org.apache.hadoop.mapred.Reducer;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapred.TextInputFormat;
import org.apache.hadoop.mapred.TextOutputFormat;
import org.apache.hadoop.record.CsvRecordInput;
import org.apache.hadoop.record.CsvRecordOutput;
import org.apache.hadoop.record.RecordInput;
import org.apache.hadoop.util.Progressable;
import org.apache.hadoop.util.Tool;
import org.wikipedia.miner.db.struct.DbLabel;
import org.wikipedia.miner.db.struct.DbLabelForPage;
import org.wikipedia.miner.db.struct.DbLabelForPageList;
import org.wikipedia.miner.db.struct.DbSenseForLabel;
import org.wikipedia.miner.extraction.DumpExtractor;
import org.wikipedia.miner.extraction.LabelSensesStep;
import org.wikipedia.miner.extraction.struct.ExLabel;

/* loaded from: input_file:org/wikipedia/miner/extraction/PageLabelStep.class */
public class PageLabelStep extends Configured implements Tool {

    /* JADX INFO: Access modifiers changed from: protected */
    /* loaded from: input_file:org/wikipedia/miner/extraction/PageLabelStep$Output.class */
    public enum Output {
        pageLabel
    }

    /* loaded from: input_file:org/wikipedia/miner/extraction/PageLabelStep$PageLabelMapper.class */
    private static class PageLabelMapper extends MapReduceBase implements Mapper<LongWritable, Text, IntWritable, DbLabelForPageList> {
        private PageLabelMapper() {
        }

        public void map(LongWritable longWritable, Text text, OutputCollector<IntWritable, DbLabelForPageList> outputCollector, Reporter reporter) throws IOException {
            RecordInput csvRecordInput = new CsvRecordInput(new ByteArrayInputStream((text.toString() + "\n").getBytes("UTF-8")));
            String readString = csvRecordInput.readString((String) null);
            ExLabel exLabel = new ExLabel();
            exLabel.deserialize(csvRecordInput);
            DbLabel convert = DumpExtractor.convert(exLabel);
            if (convert.getSenses() == null || convert.getSenses().isEmpty()) {
                return;
            }
            boolean z = true;
            Iterator<DbSenseForLabel> it = convert.getSenses().iterator();
            while (it.hasNext()) {
                DbSenseForLabel next = it.next();
                DbLabelForPage dbLabelForPage = new DbLabelForPage(readString, next.getLinkOccCount(), next.getLinkDocCount(), next.getFromTitle(), next.getFromRedirect(), z);
                ArrayList arrayList = new ArrayList();
                arrayList.add(dbLabelForPage);
                outputCollector.collect(new IntWritable(next.getId()), new DbLabelForPageList(arrayList));
                z = false;
            }
        }

        public /* bridge */ /* synthetic */ void map(Object obj, Object obj2, OutputCollector outputCollector, Reporter reporter) throws IOException {
            map((LongWritable) obj, (Text) obj2, (OutputCollector<IntWritable, DbLabelForPageList>) outputCollector, reporter);
        }
    }

    /* loaded from: input_file:org/wikipedia/miner/extraction/PageLabelStep$PageLabelOutputFormat.class */
    protected static class PageLabelOutputFormat extends TextOutputFormat<IntWritable, DbLabelForPageList> {

        /* loaded from: input_file:org/wikipedia/miner/extraction/PageLabelStep$PageLabelOutputFormat$PageLabelRecordWriter.class */
        protected static class PageLabelRecordWriter implements RecordWriter<IntWritable, DbLabelForPageList> {
            protected OutputStream outStream;

            public PageLabelRecordWriter(OutputStream outputStream) {
                this.outStream = outputStream;
            }

            public synchronized void write(IntWritable intWritable, DbLabelForPageList dbLabelForPageList) throws IOException {
                ArrayList<DbLabelForPage> labels = dbLabelForPageList.getLabels();
                Collections.sort(labels, new Comparator<DbLabelForPage>() { // from class: org.wikipedia.miner.extraction.PageLabelStep.PageLabelOutputFormat.PageLabelRecordWriter.1
                    @Override // java.util.Comparator
                    public int compare(DbLabelForPage dbLabelForPage, DbLabelForPage dbLabelForPage2) {
                        int compareTo = new Long(dbLabelForPage2.getLinkOccCount()).compareTo(Long.valueOf(dbLabelForPage.getLinkOccCount()));
                        if (compareTo != 0) {
                            return compareTo;
                        }
                        int compareTo2 = new Long(dbLabelForPage2.getLinkDocCount()).compareTo(Long.valueOf(dbLabelForPage.getLinkDocCount()));
                        if (compareTo2 != 0) {
                            return compareTo2;
                        }
                        int compareTo3 = new Boolean(dbLabelForPage2.getFromTitle()).compareTo(Boolean.valueOf(dbLabelForPage.getFromTitle()));
                        if (compareTo3 != 0) {
                            return compareTo3;
                        }
                        int compareTo4 = new Boolean(dbLabelForPage2.getFromRedirect()).compareTo(Boolean.valueOf(dbLabelForPage.getFromRedirect()));
                        if (compareTo4 != 0) {
                            return compareTo4;
                        }
                        int compareTo5 = new Boolean(dbLabelForPage2.getIsPrimary()).compareTo(Boolean.valueOf(dbLabelForPage.getIsPrimary()));
                        return compareTo5 != 0 ? compareTo5 : dbLabelForPage.getText().compareTo(dbLabelForPage2.getText());
                    }
                });
                DbLabelForPageList dbLabelForPageList2 = new DbLabelForPageList(labels);
                CsvRecordOutput csvRecordOutput = new CsvRecordOutput(this.outStream);
                csvRecordOutput.writeInt(intWritable.get(), (String) null);
                dbLabelForPageList2.serialize(csvRecordOutput);
            }

            public synchronized void close(Reporter reporter) throws IOException {
                this.outStream.close();
            }
        }

        protected PageLabelOutputFormat() {
        }

        public RecordWriter<IntWritable, DbLabelForPageList> getRecordWriter(FileSystem fileSystem, JobConf jobConf, String str, Progressable progressable) throws IOException {
            Path taskOutputPath = FileOutputFormat.getTaskOutputPath(jobConf, str.replace("part", Output.pageLabel.name()));
            return new PageLabelRecordWriter(taskOutputPath.getFileSystem(jobConf).create(taskOutputPath, progressable));
        }
    }

    /* loaded from: input_file:org/wikipedia/miner/extraction/PageLabelStep$PageLabelReducer.class */
    public static class PageLabelReducer extends MapReduceBase implements Reducer<IntWritable, DbLabelForPageList, IntWritable, DbLabelForPageList> {
        public void reduce(IntWritable intWritable, Iterator<DbLabelForPageList> it, OutputCollector<IntWritable, DbLabelForPageList> outputCollector, Reporter reporter) throws IOException {
            ArrayList arrayList = new ArrayList();
            while (it.hasNext()) {
                arrayList.addAll(it.next().getLabels());
            }
            outputCollector.collect(intWritable, new DbLabelForPageList(arrayList));
        }

        public /* bridge */ /* synthetic */ void reduce(Object obj, Iterator it, OutputCollector outputCollector, Reporter reporter) throws IOException {
            reduce((IntWritable) obj, (Iterator<DbLabelForPageList>) it, (OutputCollector<IntWritable, DbLabelForPageList>) outputCollector, reporter);
        }
    }

    public int run(String[] strArr) throws Exception {
        JobConf jobConf = new JobConf(CategoryLinkSummaryStep.class);
        DumpExtractor.configureJob(jobConf, strArr);
        jobConf.setJobName("WM: summarize page labels");
        jobConf.setOutputKeyClass(IntWritable.class);
        jobConf.setOutputValueClass(DbLabelForPageList.class);
        jobConf.setMapperClass(PageLabelMapper.class);
        jobConf.setCombinerClass(PageLabelReducer.class);
        jobConf.setReducerClass(PageLabelReducer.class);
        jobConf.setInputFormat(TextInputFormat.class);
        FileInputFormat.setInputPaths(jobConf, new Path[]{new Path(jobConf.get("wm.outputDir") + "/" + DumpExtractor.getDirectoryName(DumpExtractor.ExtractionStep.labelSense) + "/" + LabelSensesStep.Output.tempLabel.name() + "*")});
        jobConf.setOutputFormat(PageLabelOutputFormat.class);
        FileOutputFormat.setOutputPath(jobConf, new Path(jobConf.get("wm.outputDir") + "/" + DumpExtractor.getDirectoryName(DumpExtractor.ExtractionStep.pageLabel)));
        JobClient.runJob(jobConf);
        return 0;
    }
}
