/*
 * Decompiled with CFR 0.152.
 */
package edu.jhu.thrax.distributional;

import edu.jhu.thrax.distributional.ContextPhrase;
import edu.jhu.thrax.syntax.LatticeArray;
import edu.jhu.thrax.util.FormatUtils;
import edu.jhu.thrax.util.Vocabulary;
import edu.jhu.thrax.util.exceptions.MalformedInputException;
import edu.jhu.thrax.util.exceptions.NotEnoughFieldsException;
import edu.jhu.thrax.util.io.LineReader;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.TreeMap;
import org.apache.commons.lang3.StringEscapeUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;

public class ContextPhraseExtractor {
    private final String G = "_";
    private final String L = "l_";
    private final String C = "c_";
    private final String R = "r_";
    private final String LEX = "lex_";
    private final String POS = "pos_";
    private final String LEM = "lem_";
    private final String SYN = "syn_";
    private final String NER = "ner_";
    private final String DEP = "dep_";
    private final String GOV = "gov_";
    private final String ROOT = "ROOT";
    private int MAX_PHRASE_LENGTH;
    private final boolean USE_SYN;
    private int MAX_LEX_CONTEXT;
    private int MAX_POS_CONTEXT;
    private int MAX_LEM_CONTEXT;
    private int MAX_NER_CONTEXT;
    private int MAX_LEX_GRAM;
    private int MAX_POS_GRAM;
    private int MAX_LEM_GRAM;
    private int MAX_NER_GRAM;
    private final boolean USE_LEX;
    private final boolean USE_POS;
    private final boolean USE_LEM;
    private final boolean USE_NER;
    private final boolean READ_DEP;
    private final boolean USE_LEX_DEP;
    private final boolean USE_POS_DEP;
    private final boolean USE_LEM_DEP;
    private final boolean USE_NER_DEP;
    private final boolean READ_CDEP;
    private final boolean USE_LEX_CDEP;
    private final boolean USE_POS_CDEP;
    private final boolean USE_LEM_CDEP;
    private final boolean USE_NER_CDEP;
    private final boolean READ_CPDEP;
    private final boolean USE_LEX_CPDEP;
    private final boolean USE_POS_CPDEP;
    private final boolean USE_LEM_CPDEP;
    private final boolean USE_NER_CPDEP;
    private String[][] lex_features;
    private String[][] pos_features;
    private String[][] lem_features;
    private String[][] ner_features;
    private int size;
    private LatticeArray parse;
    private String[] lemma;
    private String[] ner;
    private ArrayList<Dependency>[] govern;
    private Dependency[] depend;
    private ArrayList<Dependency>[] c_govern;
    private Dependency[] c_depend;
    private ArrayList<Dependency>[] cp_govern;
    private Dependency[] cp_depend;

    public ContextPhraseExtractor(Configuration conf) {
        this.MAX_PHRASE_LENGTH = conf.getInt("thrax.max-phrase-length", 4);
        this.USE_SYN = conf.getBoolean("thrax.use-syn", false);
        this.MAX_LEX_CONTEXT = conf.getInt("thrax.max-lex-context", 4);
        this.MAX_POS_CONTEXT = conf.getInt("thrax.max-pos-context", 4);
        this.MAX_LEM_CONTEXT = conf.getInt("thrax.max-lem-context", 4);
        this.MAX_NER_CONTEXT = conf.getInt("thrax.max-ner-context", 4);
        this.MAX_LEX_GRAM = conf.getInt("thrax.max-lex-gram", 2);
        this.MAX_POS_GRAM = conf.getInt("thrax.max-pos-gram", 2);
        this.MAX_LEM_GRAM = conf.getInt("thrax.max-lem-gram", 2);
        this.MAX_NER_GRAM = conf.getInt("thrax.max-ner-gram", 2);
        this.USE_LEX = conf.getBoolean("thrax.use-lex-ngrams", false);
        this.USE_POS = conf.getBoolean("thrax.use-pos-ngrams", false);
        this.USE_LEM = conf.getBoolean("thrax.use-lem-ngrams", false);
        this.USE_NER = conf.getBoolean("thrax.use-ner-ngrams", false);
        this.USE_LEX_DEP = conf.getBoolean("thrax.use-lex-dep", false);
        this.USE_POS_DEP = conf.getBoolean("thrax.use-pos-dep", false);
        this.USE_LEM_DEP = conf.getBoolean("thrax.use-lem-dep", false);
        this.USE_NER_DEP = conf.getBoolean("thrax.use-ner-dep", false);
        this.READ_DEP = this.USE_LEX_DEP || this.USE_POS_DEP || this.USE_LEM_DEP || this.USE_NER_DEP;
        this.USE_LEX_CDEP = conf.getBoolean("thrax.use-lex-cdep", false);
        this.USE_POS_CDEP = conf.getBoolean("thrax.use-pos-cdep", false);
        this.USE_LEM_CDEP = conf.getBoolean("thrax.use-lem-cdep", false);
        this.USE_NER_CDEP = conf.getBoolean("thrax.use-ner-cdep", false);
        this.READ_CDEP = this.USE_LEX_CDEP || this.USE_POS_CDEP || this.USE_LEM_CDEP || this.USE_NER_CDEP;
        this.USE_LEX_CPDEP = conf.getBoolean("thrax.use-lex-cpdep", false);
        this.USE_POS_CPDEP = conf.getBoolean("thrax.use-pos-cpdep", false);
        this.USE_LEM_CPDEP = conf.getBoolean("thrax.use-lem-cpdep", false);
        this.USE_NER_CPDEP = conf.getBoolean("thrax.use-ner-cpdep", false);
        this.READ_CPDEP = this.USE_LEX_CPDEP || this.USE_POS_CPDEP || this.USE_LEM_CPDEP || this.USE_NER_CPDEP;
    }

    public List<ContextPhrase> extract(String input) throws MalformedInputException {
        ArrayList<ContextPhrase> output = new ArrayList<ContextPhrase>();
        try {
            int i;
            input = StringEscapeUtils.unescapeXml((String)input);
            String[] inputs = FormatUtils.P_DELIM.split(input);
            if (inputs.length < 6) {
                throw new NotEnoughFieldsException();
            }
            this.parse = new LatticeArray(inputs[0].trim(), true);
            this.lemma = FormatUtils.P_SPACE.split(inputs[1].trim().toLowerCase());
            this.size = this.lemma.length;
            if (this.size != this.parse.size()) {
                throw new MalformedInputException();
            }
            String[] ner_entries = FormatUtils.P_SPACE.split(inputs[2].trim().toLowerCase());
            this.ner = new String[ner_entries.length];
            if (this.ner.length != this.size) {
                throw new MalformedInputException("NER: " + this.ner.length + " vs. Size: " + this.size);
            }
            for (i = 0; i < ner_entries.length; ++i) {
                this.ner[i] = FormatUtils.P_SLASH.split(ner_entries[i])[1];
            }
            this.generateAllGramFeatures();
            if (this.READ_DEP) {
                this.govern = new ArrayList[this.size];
                this.depend = new Dependency[this.size];
                this.initDependencyStructure(inputs[3], this.govern, this.depend, this.USE_LEX_DEP, this.USE_LEM_DEP, this.USE_POS_DEP, this.USE_NER_DEP);
            }
            if (this.READ_CDEP) {
                this.c_govern = new ArrayList[this.size];
                this.c_depend = new Dependency[this.size];
                this.initDependencyStructure(inputs[4], this.c_govern, this.c_depend, this.USE_LEX_CDEP, this.USE_LEM_CDEP, this.USE_POS_CDEP, this.USE_NER_CDEP);
            }
            if (this.READ_CPDEP) {
                this.cp_govern = new ArrayList[this.size];
                this.cp_depend = new Dependency[this.size];
                this.initDependencyStructure(inputs[5], this.cp_govern, this.cp_depend, this.USE_LEX_CPDEP, this.USE_LEM_CPDEP, this.USE_POS_CPDEP, this.USE_NER_CPDEP);
            }
            for (i = 0; i < this.size; ++i) {
                for (int j = i + 1; j <= Math.min(i + this.MAX_PHRASE_LENGTH, this.size); ++j) {
                    ContextPhrase cp = new ContextPhrase(this.parse.getTerminalPhrase(i, j));
                    if (this.USE_LEX) {
                        this.addGramFeatures(cp, i, j, this.MAX_LEX_CONTEXT, this.MAX_LEX_GRAM, "lex_", this.lex_features);
                    }
                    if (this.USE_POS) {
                        this.addGramFeatures(cp, i, j, this.MAX_POS_CONTEXT, this.MAX_POS_GRAM, "pos_", this.pos_features);
                    }
                    if (this.USE_LEM) {
                        this.addGramFeatures(cp, i, j, this.MAX_LEM_CONTEXT, this.MAX_LEM_GRAM, "lem_", this.lem_features);
                    }
                    if (this.USE_NER) {
                        this.addGramFeatures(cp, i, j, this.MAX_NER_CONTEXT, this.MAX_NER_GRAM, "ner_", this.ner_features);
                    }
                    if (this.USE_SYN) {
                        this.addSyntaxFeatures(i, j, cp);
                    }
                    if (this.READ_DEP) {
                        this.addDependencyFeatures(i, j, cp, this.govern, this.depend, this.USE_LEX_DEP, this.USE_LEM_DEP, this.USE_POS_DEP, this.USE_NER_DEP);
                    }
                    if (this.READ_CDEP) {
                        this.addDependencyFeatures(i, j, cp, this.c_govern, this.c_depend, this.USE_LEX_CDEP, this.USE_LEM_CDEP, this.USE_POS_CDEP, this.USE_NER_CDEP);
                    }
                    if (this.READ_CPDEP) {
                        this.addDependencyFeatures(i, j, cp, this.cp_govern, this.cp_depend, this.USE_LEX_CPDEP, this.USE_LEM_CPDEP, this.USE_POS_CPDEP, this.USE_NER_CPDEP);
                    }
                    output.add(cp);
                }
            }
        }
        catch (Exception e) {
            e.printStackTrace();
            throw new MalformedInputException();
        }
        return output;
    }

    private void generateAllGramFeatures() {
        if (this.USE_LEX) {
            this.lex_features = this.buildGramFeatures(this.parse.getTerminals(), this.MAX_LEX_GRAM);
        }
        if (this.USE_POS) {
            this.pos_features = this.buildGramFeatures(this.parse.getPOS(), this.MAX_POS_GRAM);
        }
        if (this.USE_LEM) {
            this.lem_features = this.buildGramFeatures(this.lemma, this.MAX_LEM_GRAM);
        }
        if (this.USE_NER) {
            this.ner_features = this.buildGramFeatures(this.ner, this.MAX_NER_GRAM);
        }
    }

    private String[][] buildGramFeatures(String[] sentence, int N) {
        int i;
        String[][] cache = new String[this.size][];
        for (i = 0; i <= this.size - N; ++i) {
            cache[i] = new String[N];
        }
        for (i = 1; i < N; ++i) {
            cache[this.size - N + i] = new String[N - i];
        }
        StringBuilder sb = new StringBuilder();
        for (int cf = 0; cf < this.size; ++cf) {
            sb.delete(0, sb.length());
            for (int l = 0; l < Math.min(N, this.size - cf); ++l) {
                sb.append(sentence[cf + l]).append("_");
                cache[cf][l] = sb.toString();
            }
        }
        return cache;
    }

    private void addGramFeatures(ContextPhrase cp, int from, int to, int max_window, int N, String tag, String[][] cache) {
        String left_prefix = "l_" + tag;
        for (int cf = Math.max(0, from - max_window); cf < from; ++cf) {
            for (int l = 0; l < Math.min(N, from - cf); ++l) {
                cp.addFeature(left_prefix + cache[cf][l] + (from - cf));
            }
        }
        String right_prefix = "r_" + tag;
        int right_boundary = Math.min(this.size, to + max_window);
        for (int cf = to; cf < right_boundary; ++cf) {
            for (int l = 0; l < Math.min(N, right_boundary - cf); ++l) {
                cp.addFeature(right_prefix + cache[cf][l] + (cf - to + 1));
            }
        }
    }

    private void addSyntaxFeatures(int from, int to, ContextPhrase cp) {
        Collection<Integer> constituents = this.parse.getConstituentLabels(from, to);
        for (int c : constituents) {
            cp.addFeature("c_syn_span_" + Vocabulary.word(c));
        }
        Collection<Integer> ccg = this.parse.getCcgLabels(from, to);
        for (int c : ccg) {
            String[] parts;
            String label = Vocabulary.word(c);
            if (label.contains("/")) {
                parts = FormatUtils.P_SLASH.split(label);
                cp.addFeature("r_syn_pref_" + parts[0]);
                cp.addFeature("r_syn_miss_" + parts[1]);
                continue;
            }
            parts = FormatUtils.P_BSLASH.split(label);
            cp.addFeature("l_syn_suff_" + parts[0]);
            cp.addFeature("l_syn_miss_" + parts[1]);
        }
    }

    private void initDependencyStructure(String input, ArrayList<Dependency>[] gov, Dependency[] dep, boolean use_lex, boolean use_lem, boolean use_pos, boolean use_ner) {
        String[] entries;
        for (int i = 0; i < this.size; ++i) {
            gov[i] = new ArrayList();
        }
        for (String entry : entries = FormatUtils.P_SPACE.split(input.trim())) {
            Dependency d = new Dependency(entry, use_lex, use_lem, use_pos, use_ner);
            if (d.gov >= 0) {
                gov[d.gov].add(d);
            }
            dep[d.dep] = d;
        }
    }

    private void addDependencyFeatures(int from, int to, ContextPhrase cp, ArrayList<Dependency>[] gov, Dependency[] dep, boolean use_lex, boolean use_lem, boolean use_pos, boolean use_ner) {
        int head = from;
        boolean seen_outlink = false;
        boolean valid = true;
        for (int p = from; p < to; ++p) {
            if (dep[p] != null) {
                if (dep[p].gov < from || dep[p].gov >= to) {
                    dep[p].addDependingFeatures(cp, use_lex, use_lem, use_pos, use_ner);
                    boolean bl = valid = valid && !seen_outlink;
                    if (valid) {
                        head = p;
                    }
                    seen_outlink = true;
                } else if (valid && p == head) {
                    head = dep[p].gov;
                }
            } else if (gov[p].isEmpty()) {
                valid = false;
            }
            for (Dependency d : gov[p]) {
                if (d.dep >= from && d.dep < to) continue;
                d.addGoverningFeatures(cp, use_lex, use_lem, use_pos, use_ner);
                valid = false;
            }
        }
        if (valid) {
            if (use_lex) {
                cp.addFeature("c_head_lex_" + this.parse.getTerminal(head));
            }
            if (use_lem) {
                cp.addFeature("c_head_lem_" + this.lemma[head]);
            }
            if (use_pos) {
                cp.addFeature("c_head_pos_" + this.parse.getPOS(head));
            }
            if (use_ner) {
                cp.addFeature("c_head_ner_" + this.ner[head]);
            }
        }
    }

    public static void main(String[] args) throws Exception {
        LineReader reader = new LineReader(args[0]);
        ContextPhraseExtractor cpe = new ContextPhraseExtractor(new Configuration());
        while (reader.hasNext()) {
            String line = reader.next().trim();
            List<ContextPhrase> cps = cpe.extract(line);
            for (ContextPhrase cp : cps) {
                TreeMap<Text, Integer> feature_map = new TreeMap<Text, Integer>();
                for (Writable fn : cp.getFeatures().keySet()) {
                    feature_map.put((Text)fn, ((IntWritable)cp.getFeatures().get((Object)fn)).get());
                }
                System.out.println(FormatUtils.contextPhraseToText(cp.getPhrase(), feature_map));
            }
        }
    }

    class Dependency {
        final String type;
        final int gov;
        final int dep;
        final String dep_lex;
        final String gov_lex;
        final String dep_lem;
        final String gov_lem;
        final String dep_pos;
        final String gov_pos;
        final String dep_ner;
        final String gov_ner;

        public Dependency(String entry, boolean use_lex, boolean use_lem, boolean use_pos, boolean use_ner) {
            String gov_side;
            String[] fields = FormatUtils.P_DASH.split(entry);
            this.gov = Integer.parseInt(fields[1]) - 1;
            this.dep = Integer.parseInt(fields[0]) - 1;
            this.type = fields[2];
            String dep_side = this.gov > this.dep ? "r_" : "l_";
            String string = gov_side = this.gov > this.dep ? "l_" : "r_";
            if (use_lex) {
                this.dep_lex = dep_side + "dep_" + this.type + "_" + "lex_" + (this.gov == -1 ? "ROOT" : ContextPhraseExtractor.this.parse.getTerminal(this.gov));
                this.gov_lex = gov_side + "gov_" + this.type + "_" + "lex_" + ContextPhraseExtractor.this.parse.getTerminal(this.dep);
            } else {
                this.dep_lex = null;
                this.gov_lex = null;
            }
            if (use_pos) {
                this.dep_pos = dep_side + "dep_" + this.type + "_" + "pos_" + (this.gov == -1 ? "ROOT" : ContextPhraseExtractor.this.parse.getPOS(this.gov));
                this.gov_pos = gov_side + "gov_" + this.type + "_" + "pos_" + ContextPhraseExtractor.this.parse.getPOS(this.dep);
            } else {
                this.dep_pos = null;
                this.gov_pos = null;
            }
            if (use_lem) {
                this.dep_lem = dep_side + "dep_" + this.type + "_" + "lem_" + (this.gov == -1 ? "ROOT" : ContextPhraseExtractor.this.lemma[this.gov]);
                this.gov_lem = gov_side + "gov_" + this.type + "_" + "lem_" + ContextPhraseExtractor.this.lemma[this.dep];
            } else {
                this.dep_lem = null;
                this.gov_lem = null;
            }
            if (use_ner) {
                this.dep_ner = dep_side + "dep_" + this.type + "_" + "ner_" + (this.gov == -1 ? "ROOT" : ContextPhraseExtractor.this.ner[this.gov]);
                this.gov_ner = gov_side + "gov_" + this.type + "_" + "ner_" + ContextPhraseExtractor.this.ner[this.dep];
            } else {
                this.dep_ner = null;
                this.gov_ner = null;
            }
        }

        final void addGoverningFeatures(ContextPhrase cp, boolean use_lex, boolean use_lem, boolean use_pos, boolean use_ner) {
            if (use_lex) {
                cp.addFeature(this.gov_lex);
            }
            if (use_pos) {
                cp.addFeature(this.gov_pos);
            }
            if (use_lem) {
                cp.addFeature(this.gov_lem);
            }
            if (use_ner) {
                cp.addFeature(this.gov_ner);
            }
        }

        final void addDependingFeatures(ContextPhrase cp, boolean use_lex, boolean use_lem, boolean use_pos, boolean use_ner) {
            if (use_lex) {
                cp.addFeature(this.dep_lex);
            }
            if (use_pos) {
                cp.addFeature(this.dep_pos);
            }
            if (use_lem) {
                cp.addFeature(this.dep_lem);
            }
            if (use_ner) {
                cp.addFeature(this.dep_ner);
            }
        }
    }
}

