/*
 * Decompiled with CFR 0.152.
 */
package edu.jhu.thrax.util;

import edu.jhu.thrax.util.FormatUtils;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.PrintWriter;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Scanner;
import java.util.Set;
import java.util.regex.Pattern;
import java.util.zip.GZIPInputStream;
import java.util.zip.GZIPOutputStream;

public class TestSetFilter {
    private List<String> testSentences = new ArrayList<String>();
    private Map<String, Set<Integer>> sentencesByWord = new HashMap<String, Set<Integer>>();
    private Set<String> ngrams;
    private String lastSourceSide = null;
    private boolean acceptedLastSourceSide = false;
    private final String NT_REGEX = "\\[[^\\]]+?\\]";
    public int cached = 0;
    public int RULE_LENGTH = 12;
    public boolean verbose = false;
    public boolean parallel = false;
    public boolean fast = false;

    public void setVerbose(boolean value) {
        this.verbose = value;
    }

    public void setParallel(boolean value) {
        this.parallel = value;
    }

    public void setFast(boolean value) {
        this.fast = value;
    }

    public void setRuleLength(int value) {
        this.RULE_LENGTH = value;
    }

    private void getTestSentences(String filename) {
        try {
            Scanner scanner = new Scanner(new File(filename), "UTF-8");
            while (scanner.hasNextLine()) {
                String line = scanner.nextLine();
                TestSetFilter.addSentenceToWordHash(this.sentencesByWord, line, this.testSentences.size());
                this.testSentences.add(line);
            }
        }
        catch (FileNotFoundException e) {
            System.err.printf("Could not open %s\n", e.getMessage());
        }
        if (this.verbose) {
            System.err.println("Added " + this.testSentences.size() + " sentences.\n");
        }
        this.ngrams = this.getTestNGrams(this.testSentences);
    }

    public void setSentence(String sentence) {
        if (this.testSentences == null) {
            this.testSentences = new ArrayList<String>();
        }
        if (this.sentencesByWord == null) {
            this.sentencesByWord = new HashMap<String, Set<Integer>>();
        }
        this.testSentences.clear();
        this.sentencesByWord.clear();
        TestSetFilter.addSentenceToWordHash(this.sentencesByWord, sentence, 0);
        this.testSentences.add(sentence);
        this.ngrams = this.getTestNGrams(this.testSentences);
    }

    public void filterGrammarToFile(String fullGrammarFile, String sentence, String filteredGrammarFile, boolean fast) {
        System.err.println(String.format("filterGrammarToFile(%s,%s,%s,%s)\n", fullGrammarFile, sentence, filteredGrammarFile, fast ? "fast" : "exact"));
        this.fast = fast;
        this.setSentence(sentence);
        try {
            Scanner scanner = new Scanner((InputStream)new GZIPInputStream(new FileInputStream(fullGrammarFile)), "UTF-8");
            int rulesIn = 0;
            int rulesOut = 0;
            boolean verbose = false;
            if (verbose) {
                System.err.println("Processing rules...");
            }
            PrintWriter out = new PrintWriter(new GZIPOutputStream(new FileOutputStream(filteredGrammarFile)));
            while (scanner.hasNextLine()) {
                if (verbose) {
                    if ((rulesIn + 1) % 2000 == 0) {
                        System.err.print(".");
                        System.err.flush();
                    }
                    if ((rulesIn + 1) % 100000 == 0) {
                        System.err.println(" [" + (rulesIn + 1) + "]");
                        System.err.flush();
                    }
                }
                ++rulesIn;
                String rule = scanner.nextLine();
                if (!this.inTestSet(rule)) continue;
                out.println(rule);
                ++rulesOut;
            }
            out.close();
            if (verbose) {
                System.err.println("[INFO] Total rules read: " + rulesIn);
                System.err.println("[INFO] Rules kept: " + rulesOut);
                System.err.println("[INFO] Rules dropped: " + (rulesIn - rulesOut));
            }
        }
        catch (FileNotFoundException e) {
            System.err.printf("* FATAL: could not open %s\n", e.getMessage());
        }
        catch (IOException e) {
            System.err.printf("* FATAL: could not write to %s\n", e.getMessage());
        }
    }

    public Pattern getPattern(String rule) {
        String[] parts = FormatUtils.P_DELIM.split(rule);
        if (parts.length != 4) {
            return null;
        }
        String source = parts[1].trim();
        String pattern = Pattern.quote(source);
        pattern = pattern.replaceAll("\\[[^\\]]+?\\]", "\\\\E.+\\\\Q");
        pattern = pattern.replaceAll("\\\\Q\\\\E", "");
        pattern = "(?:^|\\s)" + pattern + "(?:$|\\s)";
        return Pattern.compile(pattern);
    }

    public boolean inTestSet(String rule) {
        String[] parts = FormatUtils.P_DELIM.split(rule);
        if (parts.length != 4) {
            return false;
        }
        String sourceSide = parts[1].trim();
        if (!sourceSide.equals(this.lastSourceSide)) {
            this.lastSourceSide = sourceSide;
            this.acceptedLastSourceSide = this.fast ? this.inTestSetFast(rule) : this.inTestSetExact(rule);
        } else {
            ++this.cached;
        }
        return this.acceptedLastSourceSide;
    }

    private boolean inTestSetFast(String rule) {
        String[] parts = FormatUtils.P_DELIM.split(rule);
        String source = parts[1];
        for (String chunk : source.split("\\[[^\\]]+?\\]")) {
            if ((chunk = chunk.trim()).equals("") || this.ngrams.contains(chunk)) continue;
            return false;
        }
        return true;
    }

    private boolean inTestSetExact(String rule) {
        if (this.inTestSetFast(rule)) {
            Pattern pattern = this.getPattern(rule);
            for (int i : this.getSentencesForRule(this.sentencesByWord, rule)) {
                if (!pattern.matcher(this.testSentences.get(i)).find()) continue;
                return true;
            }
            return this.hasAbstractSource(rule) > 1;
        }
        return false;
    }

    private static void addSentenceToWordHash(Map<String, Set<Integer>> sentencesByWord, String sentence, int index) {
        String[] tokens;
        for (String t : tokens = sentence.split("\\s+")) {
            if (sentencesByWord.containsKey(t)) {
                sentencesByWord.get(t).add(index);
                continue;
            }
            HashSet<Integer> set = new HashSet<Integer>();
            set.add(index);
            sentencesByWord.put(t, set);
        }
    }

    private Set<Integer> getSentencesForRule(Map<String, Set<Integer>> sentencesByWord, String rule) {
        String[] parts = FormatUtils.P_DELIM.split(rule);
        if (parts.length != 4) {
            return Collections.emptySet();
        }
        String source = parts[1].trim();
        ArrayList list = new ArrayList();
        for (String t : source.split("\\s+")) {
            if (t.matches("\\[[^\\]]+?\\]")) continue;
            if (sentencesByWord.containsKey(t)) {
                list.add(sentencesByWord.get(t));
                continue;
            }
            return Collections.emptySet();
        }
        return TestSetFilter.intersect(list);
    }

    private int hasAbstractSource(String rule) {
        String[] parts = FormatUtils.P_DELIM.split(rule);
        if (parts.length != 4) {
            return 0;
        }
        String source = parts[1].trim();
        int nonterminalCount = 0;
        for (String t : source.split("\\s+")) {
            if (!t.matches("\\[[^\\]]+?\\]")) {
                return 0;
            }
            ++nonterminalCount;
        }
        return nonterminalCount;
    }

    private static <T> Set<T> intersect(List<Set<T>> list) {
        if (list.isEmpty()) {
            return Collections.emptySet();
        }
        HashSet result = new HashSet(list.get(0));
        for (int i = 1; i < list.size(); ++i) {
            result.retainAll((Collection)list.get(i));
            if (!result.isEmpty()) continue;
            return Collections.emptySet();
        }
        if (result.isEmpty()) {
            return Collections.emptySet();
        }
        return result;
    }

    private Set<String> getTestNGrams(List<String> sentences) {
        if (sentences.isEmpty()) {
            return Collections.emptySet();
        }
        HashSet<String> result = new HashSet<String>();
        for (String s : sentences) {
            result.addAll(TestSetFilter.getNGramsUpToLength(this.RULE_LENGTH, s));
        }
        return result;
    }

    private static Set<String> getNGramsUpToLength(int length, String sentence) {
        if (length < 1) {
            return Collections.emptySet();
        }
        String[] tokens = sentence.trim().split("\\s+");
        int maxOrder = length < tokens.length ? length : tokens.length;
        HashSet<String> result = new HashSet<String>();
        for (int order = 1; order <= maxOrder; ++order) {
            for (int start = 0; start < tokens.length - order + 1; ++start) {
                result.add(TestSetFilter.createNGram(tokens, start, order));
            }
        }
        return result;
    }

    private static String createNGram(String[] tokens, int start, int order) {
        if (order < 1 || start + order > tokens.length) {
            return "";
        }
        String result = tokens[start];
        for (int i = 1; i < order; ++i) {
            result = result + " " + tokens[start + i];
        }
        return result;
    }

    public static void main(String[] argv) {
        if (argv.length < 1) {
            System.err.println("usage: TestSetFilter [-v|-p|-f|-n N] <test set1> [test set2 ...]");
            System.err.println("    -v    verbose output");
            System.err.println("    -p    parallel compatibility");
            System.err.println("    -f    fast mode");
            System.err.println("    -n    max n-gram to compare to (default 12)");
            return;
        }
        TestSetFilter filter = new TestSetFilter();
        for (int i = 0; i < argv.length; ++i) {
            if (argv[i].equals("-v")) {
                filter.setVerbose(true);
                continue;
            }
            if (argv[i].equals("-p")) {
                filter.setParallel(true);
                continue;
            }
            if (argv[i].equals("-f")) {
                filter.setFast(true);
                continue;
            }
            if (argv[i].equals("-n")) {
                filter.setRuleLength(Integer.parseInt(argv[i + 1]));
                ++i;
                continue;
            }
            filter.getTestSentences(argv[i]);
        }
        Scanner scanner = new Scanner(System.in, "UTF-8");
        int rulesIn = 0;
        int rulesOut = 0;
        if (filter.verbose) {
            System.err.println("Processing rules...");
            if (filter.fast) {
                System.err.println("Using fast version...");
            }
            System.err.println("Using at max " + filter.RULE_LENGTH + " n-grams...");
        }
        while (scanner.hasNextLine()) {
            if (filter.verbose) {
                if ((rulesIn + 1) % 2000 == 0) {
                    System.err.print(".");
                    System.err.flush();
                }
                if ((rulesIn + 1) % 100000 == 0) {
                    System.err.println(" [" + (rulesIn + 1) + "]");
                    System.err.flush();
                }
            }
            ++rulesIn;
            String rule = scanner.nextLine();
            if (filter.inTestSet(rule)) {
                System.out.println(rule);
                if (filter.parallel) {
                    System.out.flush();
                }
                ++rulesOut;
                continue;
            }
            if (!filter.parallel) continue;
            System.out.println("");
            System.out.flush();
        }
        if (filter.verbose) {
            System.err.println("[INFO] Total rules read: " + rulesIn);
            System.err.println("[INFO] Rules kept: " + rulesOut);
            System.err.println("[INFO] Rules dropped: " + (rulesIn - rulesOut));
            System.err.println("[INFO] cached queries: " + filter.cached);
        }
    }
}

