/*
 * Decompiled with CFR 0.152.
 */
package edu.jhu.thrax.hadoop.jobs;

import edu.jhu.thrax.extraction.Labeling;
import edu.jhu.thrax.hadoop.jobs.ThraxJob;
import edu.jhu.thrax.util.FormatUtils;
import edu.jhu.thrax.util.Vocabulary;
import java.io.IOException;
import java.util.HashSet;
import java.util.Set;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Partitioner;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;

public class VocabularyJob
implements ThraxJob {
    @Override
    public Job getJob(Configuration conf) throws IOException {
        Job job = new Job(conf, "vocabulary");
        job.setJarByClass(VocabularyJob.class);
        job.setMapperClass(Map.class);
        job.setCombinerClass(Combine.class);
        job.setReducerClass(Reduce.class);
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(NullWritable.class);
        job.setOutputKeyClass(IntWritable.class);
        job.setOutputValueClass(Text.class);
        job.setSortComparatorClass(Text.Comparator.class);
        job.setOutputFormatClass(SequenceFileOutputFormat.class);
        FileInputFormat.setInputPaths((Job)job, (Path[])new Path[]{new Path(conf.get("thrax.input-file"))});
        int maxSplitSize = conf.getInt("thrax.max-split-size", 0);
        if (maxSplitSize != 0) {
            FileInputFormat.setMaxInputSplitSize((Job)job, (long)maxSplitSize);
        }
        FileOutputFormat.setOutputPath((Job)job, (Path)new Path(conf.get("thrax.work-dir") + "vocabulary"));
        int num_reducers = conf.getInt("thrax.reducers", 4);
        job.setNumReduceTasks(num_reducers);
        return job;
    }

    @Override
    public String getOutputSuffix() {
        return "vocabulary";
    }

    @Override
    public String getName() {
        return "vocabulary";
    }

    @Override
    public Set<Class<? extends ThraxJob>> getPrerequisites() {
        return new HashSet<Class<? extends ThraxJob>>();
    }

    private static class Reduce
    extends Reducer<Text, NullWritable, IntWritable, Text> {
        private int reducerNumber;
        private int numReducers;

        private Reduce() {
        }

        protected void setup(Reducer.Context context) throws IOException, InterruptedException {
            this.numReducers = context.getNumReduceTasks();
            this.reducerNumber = context.getTaskAttemptID().getTaskID().getId();
            Vocabulary.initialize(context.getConfiguration());
        }

        protected void reduce(Text key, Iterable<NullWritable> values, Reducer.Context context) throws IOException, InterruptedException {
            String token = key.toString();
            if (token == null || token.isEmpty()) {
                throw new RuntimeException("Unexpected empty token.");
            }
            Vocabulary.id(token);
            context.progress();
        }

        protected void cleanup(Reducer.Context context) throws IOException, InterruptedException {
            for (int i = Vocabulary.head(); i < Vocabulary.size(); ++i) {
                context.write((Object)new IntWritable((i - 1) * this.numReducers + this.reducerNumber + 1), (Object)new Text(Vocabulary.word(i)));
            }
        }
    }

    private static class Combine
    extends Reducer<Text, NullWritable, Text, NullWritable> {
        private Combine() {
        }

        protected void reduce(Text key, Iterable<NullWritable> values, Reducer.Context context) throws IOException, InterruptedException {
            context.write((Object)key, (Object)NullWritable.get());
        }
    }

    public static class VocabularyPartitioner
    extends Partitioner<Text, Writable> {
        public int getPartition(Text key, Writable value, int numPartitions) {
            return (key.hashCode() & Integer.MAX_VALUE) % numPartitions;
        }
    }

    private static class Map
    extends Mapper<LongWritable, Text, Text, NullWritable> {
        private boolean sourceParsed;
        private boolean targetParsed;
        private Labeling labeling;
        private boolean allowConstituent = true;
        private boolean allowCCG = true;
        private boolean allowConcat = true;
        private boolean allowDoubleConcat = true;

        private Map() {
        }

        protected void setup(Mapper.Context context) {
            Configuration conf = context.getConfiguration();
            this.sourceParsed = conf.getBoolean("thrax.source-is-parsed", false);
            this.targetParsed = conf.getBoolean("thrax.target-is-parsed", false);
            this.allowConstituent = conf.getBoolean("thrax.allow-constituent-label", true);
            this.allowCCG = conf.getBoolean("thrax.allow-ccg-label", true);
            this.allowConcat = conf.getBoolean("thrax.allow-concat-label", true);
            this.allowDoubleConcat = conf.getBoolean("thrax.allow-double-plus", true);
            this.labeling = conf.get("thrax.grammar", "hiero").equalsIgnoreCase("samt") ? Labeling.SYNTAX : (conf.get("thrax.grammar", "hiero").equalsIgnoreCase("manual") ? Labeling.MANUAL : Labeling.HIERO);
        }

        protected void map(LongWritable key, Text input, Mapper.Context context) throws IOException, InterruptedException {
            String[] parts = FormatUtils.P_DELIM.split(input.toString());
            if (parts.length < 3) {
                return;
            }
            if (this.sourceParsed) {
                this.extractTokensFromParsed(parts[0], this.labeling != Labeling.SYNTAX, context);
            } else {
                this.extractTokens(parts[0], context);
            }
            if (this.targetParsed) {
                this.extractTokensFromParsed(parts[1], this.labeling != Labeling.SYNTAX, context);
            } else {
                this.extractTokens(parts[1], context);
            }
            if (this.labeling == Labeling.MANUAL && parts.length > 3) {
                String[] labels;
                for (String label : labels = FormatUtils.P_SPACE.split(parts[3].trim())) {
                    context.write((Object)new Text("[" + label), (Object)NullWritable.get());
                }
            }
        }

        protected void extractTokens(String input, Mapper.Context context) throws IOException, InterruptedException {
            String[] tokens;
            if (input == null || input.isEmpty()) {
                return;
            }
            for (String token : tokens = FormatUtils.P_SPACE.split(input)) {
                if (token.isEmpty()) continue;
                context.write((Object)new Text(token), (Object)NullWritable.get());
            }
        }

        protected void extractTokensFromParsed(String input, boolean terminals_only, Mapper.Context context) throws IOException, InterruptedException {
            int from = 0;
            int to = 0;
            boolean seeking = true;
            boolean nonterminal = false;
            HashSet<String> nonterminals = new HashSet<String>();
            if (input == null || input.isEmpty() || input.charAt(0) != '(') {
                return;
            }
            while (from < input.length() && to < input.length()) {
                char current;
                if (seeking) {
                    current = input.charAt(from);
                    if (current == '(' || current == ')' || current == ' ') {
                        ++from;
                        continue;
                    }
                    to = from + 1;
                    seeking = false;
                    nonterminal = input.charAt(from - 1) == '(';
                    continue;
                }
                current = input.charAt(to);
                if (current == ' ' || current == ')' || current == '(') {
                    if (terminals_only) {
                        if (!nonterminal) {
                            context.write((Object)new Text(input.substring(from, to)), (Object)NullWritable.get());
                        }
                    } else if (nonterminal) {
                        String nt = input.substring(from, to);
                        if (nt.equals(",")) {
                            nt = "COMMA";
                        }
                        nonterminals.add("[" + nt);
                    } else {
                        context.write((Object)new Text(input.substring(from, to)), (Object)NullWritable.get());
                    }
                    from = to + 1;
                    seeking = true;
                    continue;
                }
                ++to;
            }
            if (!terminals_only) {
                this.combineNonterminals(context, nonterminals);
            }
        }

        private void combineNonterminals(Mapper.Context context, Set<String> nonterminals) throws IOException, InterruptedException {
            if (this.allowConstituent) {
                Map.writeNonterminals(nonterminals, context);
            }
            if (this.allowConcat) {
                Set<String> concatenated = this.joinNonterminals("+", nonterminals, nonterminals);
                Map.writeNonterminals(concatenated, context);
            }
            if (this.allowCCG) {
                Set<String> forward = this.joinNonterminals("/", nonterminals, nonterminals);
                Map.writeNonterminals(forward, context);
                Set<String> backward = this.joinNonterminals("\\", nonterminals, nonterminals);
                Map.writeNonterminals(backward, context);
            }
            if (this.allowDoubleConcat) {
                Set<String> concat = this.joinNonterminals("+", nonterminals, nonterminals);
                Set<String> double_concat = this.joinNonterminals("+", concat, nonterminals);
                Map.writeNonterminals(double_concat, context);
            }
        }

        private Set<String> joinNonterminals(String glue, Set<String> prefixes, Set<String> nonterminals) {
            HashSet<String> joined = new HashSet<String>();
            for (String prefix : prefixes) {
                for (String nt : nonterminals) {
                    joined.add(prefix + glue + nt.substring(1));
                }
            }
            return joined;
        }

        private static void writeNonterminals(Set<String> nts, Mapper.Context context) throws IOException, InterruptedException {
            for (String nt : nts) {
                context.write((Object)new Text(nt + "]"), (Object)NullWritable.get());
            }
        }
    }
}

