/*
 * Decompiled with CFR 0.152.
 */
package edu.jhu.thrax.hadoop.extraction;

import edu.jhu.thrax.datatypes.AlignedSentencePair;
import edu.jhu.thrax.datatypes.Alignment;
import edu.jhu.thrax.datatypes.HierarchicalRule;
import edu.jhu.thrax.datatypes.PhrasePair;
import edu.jhu.thrax.extraction.HierarchicalRuleExtractor;
import edu.jhu.thrax.extraction.HieroLabeler;
import edu.jhu.thrax.extraction.ManualSpanLabeler;
import edu.jhu.thrax.extraction.SAMTLabeler;
import edu.jhu.thrax.extraction.SpanLabeler;
import edu.jhu.thrax.hadoop.datatypes.AlignmentWritable;
import edu.jhu.thrax.hadoop.datatypes.Annotation;
import edu.jhu.thrax.hadoop.datatypes.RuleWritable;
import edu.jhu.thrax.hadoop.extraction.AnnotatedRule;
import edu.jhu.thrax.hadoop.extraction.RuleWritableExtractor;
import edu.jhu.thrax.util.BackwardsCompatibility;
import edu.jhu.thrax.util.FormatUtils;
import edu.jhu.thrax.util.Vocabulary;
import edu.jhu.thrax.util.exceptions.MalformedInputException;
import edu.jhu.thrax.util.io.InputUtilities;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

public class HierarchicalRuleWritableExtractor
implements RuleWritableExtractor {
    private Mapper.Context context;
    private boolean sourceParsed;
    private boolean targetParsed;
    private boolean reverse;
    private boolean sourceLabels;
    private int defaultLabel;
    private int fullSentenceLabel;
    private int spanLimit;
    private AllowDefaultLabelPolicy allowDefaultLabel;
    private HierarchicalRuleExtractor extractor;

    public HierarchicalRuleWritableExtractor(Mapper.Context c) {
        this.context = c;
        Configuration conf = c.getConfiguration();
        this.sourceParsed = conf.getBoolean("thrax.source-is-parsed", false);
        this.targetParsed = conf.getBoolean("thrax.target-is-parsed", false);
        this.reverse = conf.getBoolean("thrax.reverse", false);
        this.sourceLabels = !conf.getBoolean("thrax.target-is-samt-syntax", true);
        this.defaultLabel = Vocabulary.id(FormatUtils.markup(conf.get("thrax.default-nt", "X")));
        this.fullSentenceLabel = Vocabulary.id(FormatUtils.markup(conf.get("thrax.full-sentence-nt", "_S")));
        this.spanLimit = conf.getInt("thrax.initial-phrase-length", 10);
        this.setDefaultLabelPolicy(conf);
        this.extractor = HierarchicalRuleWritableExtractor.getExtractor(conf);
    }

    private void setDefaultLabelPolicy(Configuration conf) {
        String policy = conf.get("thrax.allow-nonlexical-x") != null ? BackwardsCompatibility.defaultLabelPolicy(conf.getBoolean("thrax.allow-nonlexical-x", true)) : conf.get("thrax.allow-default-nt", "always");
        this.allowDefaultLabel = AllowDefaultLabelPolicy.fromString(policy);
    }

    private static HierarchicalRuleExtractor getExtractor(Configuration conf) {
        int arity = conf.getInt("thrax.arity", 2);
        int initialPhraseSource = conf.getInt("thrax.initial-phrase-length", 10);
        int initialPhraseTarget = conf.getInt("thrax.initial-phrase-length", 10);
        int initialAlignment = conf.getInt("thrax.initial-lexicality", 1);
        boolean initialAligned = !conf.getBoolean("thrax.loose", false);
        int sourceLimit = conf.getInt("thrax.nonlex-source-length", 5);
        int targetLimit = conf.getInt("thrax.nonlex-target-length", 5);
        int lexSourceLimit = conf.getInt("thrax.lex-source-words", initialPhraseSource);
        int lexTargetLimit = conf.getInt("thrax.lex-target-words", initialPhraseTarget);
        int ruleAlignment = conf.getInt("thrax.lexicality", 1);
        boolean adjacent = conf.getBoolean("thrax.adjacent-nts", false);
        boolean abs = conf.getBoolean("thrax.allow-abstract-rules", false);
        boolean mixed = conf.getBoolean("thrax.allow-mixed-rules", true);
        boolean fullSentence = conf.getBoolean("thrax.allow-full-sentence-rules", true);
        return new HierarchicalRuleExtractor(arity, initialPhraseSource, initialPhraseTarget, initialAlignment, initialAligned, sourceLimit, targetLimit, ruleAlignment, adjacent, abs, mixed, fullSentence, lexSourceLimit, lexTargetLimit);
    }

    @Override
    public Iterable<AnnotatedRule> extract(Text line) {
        AlignedSentencePair sentencePair;
        try {
            sentencePair = InputUtilities.alignedSentencePair(line.toString(), this.sourceParsed, this.targetParsed, this.reverse);
        }
        catch (MalformedInputException e) {
            this.context.getCounter("input errors", e.getMessage()).increment(1L);
            return Collections.emptyList();
        }
        int[] source = sentencePair.source;
        int[] target = sentencePair.target;
        Alignment alignment = sentencePair.alignment;
        List<HierarchicalRule> rules = this.extractor.extract(source.length, target.length, alignment);
        ArrayList<AnnotatedRule> result = new ArrayList<AnnotatedRule>(rules.size());
        SpanLabeler labeler = this.getSpanLabeler(line, this.context.getConfiguration());
        if (labeler instanceof HieroLabeler) {
            this.allowDefaultLabel = AllowDefaultLabelPolicy.ALWAYS;
        }
        for (HierarchicalRule r : rules) {
            RuleWritable rule = this.toRuleWritable(r, labeler, source, target);
            if (rule == null) continue;
            result.add(new AnnotatedRule(rule, new AlignmentWritable(r.compactSourceAlignment(alignment)), HierarchicalRuleWritableExtractor.getRuleAnnotation(r, labeler, source, target, alignment)));
        }
        return result;
    }

    private RuleWritable toRuleWritable(HierarchicalRule r, SpanLabeler spanLabeler, int[] source, int[] target) {
        int[] tgt;
        int[] src;
        PhrasePair lhsPP = r.getLhs();
        int lhsSourceSpan = lhsPP.sourceEnd - lhsPP.sourceStart;
        int lhsTargetSpan = lhsPP.targetEnd - lhsPP.targetStart;
        int lhs = lhsSourceSpan > this.spanLimit || lhsTargetSpan > this.spanLimit ? this.fullSentenceLabel : r.lhsLabel(spanLabeler, this.sourceLabels);
        if (!this.isValidUseOfDefaultLabel(lhs, src = r.sourceSide(source, spanLabeler, this.sourceLabels), tgt = r.targetSide(target, spanLabeler, this.sourceLabels))) {
            return null;
        }
        RuleWritable rw = new RuleWritable(lhs, src, tgt, r.monotonic());
        return rw;
    }

    private static Annotation getRuleAnnotation(HierarchicalRule r, SpanLabeler spanLabeler, int[] source, int[] target, Alignment alignment) {
        return new Annotation(1);
    }

    private SpanLabeler getSpanLabeler(Text line, Configuration conf) {
        String labelType = conf.get("thrax.grammar", "hiero");
        if (labelType.equalsIgnoreCase("hiero")) {
            return new HieroLabeler(this.defaultLabel);
        }
        if (labelType.equalsIgnoreCase("samt")) {
            String[] fields = FormatUtils.P_DELIM.split(line.toString());
            if (fields.length < 2) {
                return new HieroLabeler(this.defaultLabel);
            }
            String parse = fields[this.sourceLabels ? 0 : 1].trim();
            boolean constituent = conf.getBoolean("thrax.allow-constituent-label", true);
            boolean ccg = conf.getBoolean("thrax.allow-ccg-label", true);
            boolean concat = conf.getBoolean("thrax.allow-concat-label", true);
            boolean double_concat = conf.getBoolean("thrax.allow-double-plus", true);
            String unary = conf.get("thrax.unary-category-handler", "all");
            return new SAMTLabeler(parse, constituent, ccg, concat, double_concat, unary, this.defaultLabel);
        }
        if (labelType.equalsIgnoreCase("manual")) {
            String[] fields = FormatUtils.P_DELIM.split(line.toString());
            if (fields.length < 4) {
                return new HieroLabeler(this.defaultLabel);
            }
            int[] labels = Vocabulary.addAll(fields[3].trim());
            return new ManualSpanLabeler(labels, this.defaultLabel);
        }
        return new HieroLabeler(this.defaultLabel);
    }

    private boolean isValidUseOfDefaultLabel(int lhs, int[] source, int[] target) {
        if (this.allowDefaultLabel == AllowDefaultLabelPolicy.ALWAYS) {
            return true;
        }
        if (this.allowDefaultLabel == AllowDefaultLabelPolicy.PHRASES) {
            if (this.defaultLabel == lhs && HierarchicalRuleWritableExtractor.hasNonterminal(source) && HierarchicalRuleWritableExtractor.hasNonterminal(target)) {
                return false;
            }
            return !HierarchicalRuleWritableExtractor.hasNonterminal(source, this.defaultLabel) && !HierarchicalRuleWritableExtractor.hasNonterminal(target, this.defaultLabel);
        }
        if (this.allowDefaultLabel == AllowDefaultLabelPolicy.NEVER) {
            return lhs != this.defaultLabel && !HierarchicalRuleWritableExtractor.hasNonterminal(source, this.defaultLabel) && !HierarchicalRuleWritableExtractor.hasNonterminal(target, this.defaultLabel);
        }
        return true;
    }

    private static boolean hasNonterminal(int[] s) {
        for (int w : s) {
            if (w >= 0) continue;
            return true;
        }
        return false;
    }

    private static boolean hasNonterminal(int[] s, int nt) {
        for (int w : s) {
            if (w != nt) continue;
            return true;
        }
        return false;
    }

    private static enum AllowDefaultLabelPolicy {
        ALWAYS,
        PHRASES,
        NEVER;


        public static AllowDefaultLabelPolicy fromString(String s) {
            if (s.equalsIgnoreCase("always")) {
                return ALWAYS;
            }
            if (s.equalsIgnoreCase("phrases")) {
                return PHRASES;
            }
            if (s.equalsIgnoreCase("never")) {
                return NEVER;
            }
            return ALWAYS;
        }
    }
}

