/*
 * Decompiled with CFR 0.152.
 */
package edu.berkeley.nlp.PCFGLA;

import edu.berkeley.nlp.PCFGLA.ArrayParser;
import edu.berkeley.nlp.PCFGLA.Binarization;
import edu.berkeley.nlp.PCFGLA.Corpus;
import edu.berkeley.nlp.PCFGLA.CorpusStatistics;
import edu.berkeley.nlp.PCFGLA.Grammar;
import edu.berkeley.nlp.PCFGLA.GrammarMerger;
import edu.berkeley.nlp.PCFGLA.Lexicon;
import edu.berkeley.nlp.PCFGLA.Option;
import edu.berkeley.nlp.PCFGLA.OptionParser;
import edu.berkeley.nlp.PCFGLA.ParserData;
import edu.berkeley.nlp.PCFGLA.SimpleLexicon;
import edu.berkeley.nlp.PCFGLA.SophisticatedLexicon;
import edu.berkeley.nlp.PCFGLA.StateSetTreeList;
import edu.berkeley.nlp.PCFGLA.smoothing.NoSmoothing;
import edu.berkeley.nlp.PCFGLA.smoothing.SmoothAcrossParentBits;
import edu.berkeley.nlp.syntax.StateSet;
import edu.berkeley.nlp.syntax.Tree;
import edu.berkeley.nlp.util.Numberer;
import java.util.Arrays;
import java.util.Iterator;
import java.util.List;
import java.util.Random;

/*
 * This class specifies class file version 49.0 but uses Java 6 signatures.  Assumed Java 6.
 */
public class GrammarTrainer {
    public static boolean VERBOSE = false;
    public static int HORIZONTAL_MARKOVIZATION = 1;
    public static int VERTICAL_MARKOVIZATION = 2;
    public static Random RANDOM = new Random(0L);

    public static void main(String[] args) {
        ParserData pData;
        OptionParser optParser = new OptionParser(Options.class);
        Options opts = (Options)optParser.parse(args, true);
        System.out.println("Calling with " + optParser.getPassedInOptions());
        String path = opts.path;
        System.out.println("Loading trees from " + path + " and using language " + (Object)((Object)opts.treebank));
        double trainingFractionToKeep = opts.trainingFractionToKeep;
        int maxSentenceLength = opts.maxSentenceLength;
        System.out.println("Will remove sentences with more than " + maxSentenceLength + " words.");
        HORIZONTAL_MARKOVIZATION = opts.horizontalMarkovization;
        VERTICAL_MARKOVIZATION = opts.verticalMarkovization;
        System.out.println("Using horizontal=" + HORIZONTAL_MARKOVIZATION + " and vertical=" + VERTICAL_MARKOVIZATION + " markovization.");
        Binarization binarization = opts.binarization;
        System.out.println("Using " + binarization.name() + " binarization.");
        double randomness = opts.randomization;
        System.out.println("Using a randomness value of " + randomness);
        String outFileName = opts.outFileName;
        if (outFileName == null) {
            System.out.println("Output File name is required.");
            System.exit(-1);
        } else {
            System.out.println("Using grammar output file " + outFileName + ".");
        }
        VERBOSE = opts.verbose;
        RANDOM = new Random(opts.randSeed);
        System.out.println("Random number generator seeded at " + opts.randSeed + ".");
        boolean manualAnnotation = false;
        boolean baseline = opts.baseline;
        boolean noSplit = opts.noSplit;
        int numSplitTimes = opts.numSplits;
        if (baseline) {
            numSplitTimes = 0;
        }
        String splitGrammarFile = opts.inFile;
        int allowedDroppingIters = opts.di;
        int maxIterations = opts.splitMaxIterations;
        int minIterations = opts.splitMinIterations;
        if (minIterations > 0) {
            System.out.println("I will do at least " + minIterations + " iterations.");
        }
        double[] smoothParams = new double[]{opts.smoothingParameter1, opts.smoothingParameter2};
        System.out.println("Using smoothing parameters " + smoothParams[0] + " and " + smoothParams[1]);
        boolean allowMoreSubstatesThanCounts = false;
        boolean findClosedUnaryPaths = opts.findClosedUnaryPaths;
        Corpus corpus = new Corpus(path, opts.treebank, trainingFractionToKeep, false, opts.skipSection, opts.skipBilingual);
        List<Tree<String>> trainTrees = Corpus.binarizeAndFilterTrees(corpus.getTrainTrees(), VERTICAL_MARKOVIZATION, HORIZONTAL_MARKOVIZATION, maxSentenceLength, binarization, manualAnnotation, VERBOSE);
        List<Tree<String>> validationTrees = Corpus.binarizeAndFilterTrees(corpus.getValidationTrees(), VERTICAL_MARKOVIZATION, HORIZONTAL_MARKOVIZATION, maxSentenceLength, binarization, manualAnnotation, VERBOSE);
        Numberer tagNumberer = Numberer.getGlobalNumberer("tags");
        if (opts.trainOnDevSet) {
            System.out.println("Adding devSet to training data.");
            trainTrees.addAll(validationTrees);
        }
        if (opts.lowercase) {
            System.out.println("Lowercasing the treebank.");
            Corpus.lowercaseWords(trainTrees);
            Corpus.lowercaseWords(validationTrees);
        }
        int nTrees = trainTrees.size();
        System.out.println("There are " + nTrees + " trees in the training set.");
        double filter = opts.filter;
        if (filter > 0.0) {
            System.out.println("Will remove rules with prob under " + filter + ".\nEven though only unlikely rules are pruned the training LL is not guaranteed to increase in every round anymore " + "(especially when we are close to converging)." + "\nFurthermore it increases the variance because 'good' rules can be pruned away in early stages.");
        }
        short nSubstates = opts.nSubStates;
        short[] numSubStatesArray = GrammarTrainer.initializeSubStateArray(trainTrees, validationTrees, tagNumberer, nSubstates);
        if (baseline) {
            short one = 1;
            Arrays.fill(numSubStatesArray, one);
            System.out.println("Training just the baseline grammar (1 substate for all states)");
            randomness = 0.0;
        }
        if (VERBOSE) {
            int i = 0;
            while (i < numSubStatesArray.length) {
                System.out.println("Tag " + (String)tagNumberer.object(i) + " " + i);
                ++i;
            }
        }
        System.out.println("There are " + numSubStatesArray.length + " observed categories.");
        Lexicon lexicon = null;
        Lexicon maxLexicon = null;
        Lexicon previousLexicon = null;
        Grammar grammar = null;
        Grammar maxGrammar = null;
        Grammar previousGrammar = null;
        double maxLikelihood = Double.NEGATIVE_INFINITY;
        int iter = 0;
        int droppingIter = 0;
        int startSplit = 0;
        if (splitGrammarFile != null) {
            System.out.println("Loading old grammar from " + splitGrammarFile);
            startSplit = 1;
            ParserData pData2 = ParserData.Load(splitGrammarFile);
            maxGrammar = pData2.gr;
            maxLexicon = pData2.lex;
            numSubStatesArray = maxGrammar.numSubStates;
            previousGrammar = grammar = maxGrammar;
            previousLexicon = lexicon = maxLexicon;
            Numberer.setNumberers(pData2.getNumbs());
            tagNumberer = Numberer.getGlobalNumberer("tags");
            System.out.println("Loading old grammar complete.");
            if (noSplit) {
                System.out.println("Will NOT split the loaded grammar.");
                startSplit = 0;
            }
        }
        double mergingPercentage = opts.mergingPercentage;
        boolean separateMergingThreshold = opts.separateMergingThreshold;
        if (mergingPercentage > 0.0) {
            System.out.println("Will merge " + (int)(mergingPercentage * 100.0) + "% of the splits in each round.");
            System.out.println("The threshold for merging lexical and phrasal categories will be set separately: " + separateMergingThreshold);
        }
        StateSetTreeList trainStateSetTrees = new StateSetTreeList(trainTrees, numSubStatesArray, false, tagNumberer);
        StateSetTreeList validationStateSetTrees = new StateSetTreeList(validationTrees, numSubStatesArray, false, tagNumberer);
        trainTrees = null;
        validationTrees = null;
        corpus = null;
        System.gc();
        if (opts.simpleLexicon) {
            System.out.println("Replacing words which have been seen less than 5 times with their signature.");
            Corpus.replaceRareWords(trainStateSetTrees, new SimpleLexicon(numSubStatesArray, -1.0), opts.rare);
        }
        if (splitGrammarFile == null) {
            grammar = new Grammar(numSubStatesArray, findClosedUnaryPaths, new NoSmoothing(), null, filter);
            lexicon = opts.simpleLexicon ? new SimpleLexicon(numSubStatesArray, -1, smoothParams, new NoSmoothing(), filter, trainStateSetTrees) : new SophisticatedLexicon(numSubStatesArray, SophisticatedLexicon.DEFAULT_SMOOTHING_CUTOFF, smoothParams, new NoSmoothing(), filter);
            int n = 0;
            boolean secondHalf = false;
            for (Tree<StateSet> stateSetTree : trainStateSetTrees) {
                secondHalf = (double)n++ > (double)nTrees / 2.0;
                lexicon.trainTree(stateSetTree, randomness, null, secondHalf, false, opts.rare);
                grammar.tallyUninitializedStateSetTree(stateSetTree);
            }
            lexicon.optimize();
            grammar.optimize(randomness);
            previousGrammar = maxGrammar = grammar;
            previousLexicon = maxLexicon = lexicon;
        }
        int splitIndex = startSplit;
        while (splitIndex < numSplitTimes * 3) {
            block29: {
                String opString;
                block30: {
                    block31: {
                        block28: {
                            opString = "";
                            if (splitIndex % 3 != 2) break block28;
                            if (opts.smooth.equals("NoSmoothing")) break block29;
                            System.out.println("Setting smoother for grammar and lexicon.");
                            SmoothAcrossParentBits grSmoother = new SmoothAcrossParentBits(0.01, maxGrammar.splitTrees);
                            SmoothAcrossParentBits lexSmoother = new SmoothAcrossParentBits(0.1, maxGrammar.splitTrees);
                            maxGrammar.setSmoother(grSmoother);
                            maxLexicon.setSmoother(lexSmoother);
                            maxIterations = 10;
                            minIterations = 10;
                            opString = "smoothing";
                            break block30;
                        }
                        if (splitIndex % 3 != 0) break block31;
                        if (opts.noSplit) break block29;
                        System.out.println("Before splitting, we have a total of " + maxGrammar.totalSubStates() + " substates.");
                        CorpusStatistics corpusStatistics = new CorpusStatistics(tagNumberer, trainStateSetTrees);
                        int[] counts = corpusStatistics.getSymbolCounts();
                        maxGrammar = maxGrammar.splitAllStates(randomness, counts, allowMoreSubstatesThanCounts, 0);
                        maxLexicon = maxLexicon.splitAllStates(counts, allowMoreSubstatesThanCounts, 0);
                        NoSmoothing grSmoother = new NoSmoothing();
                        NoSmoothing lexSmoother = new NoSmoothing();
                        maxGrammar.setSmoother(grSmoother);
                        maxLexicon.setSmoother(lexSmoother);
                        System.out.println("After splitting, we have a total of " + maxGrammar.totalSubStates() + " substates.");
                        System.out.println("Rule probabilities are NOT normalized in the split, therefore the training LL is not guaranteed to improve between iteration 0 and 1!");
                        opString = "splitting";
                        maxIterations = opts.splitMaxIterations;
                        minIterations = opts.splitMinIterations;
                        break block30;
                    }
                    if (mergingPercentage == 0.0) break block29;
                    double[][] mergeWeights = GrammarMerger.computeMergeWeights(maxGrammar, maxLexicon, trainStateSetTrees);
                    double[][][] deltas = GrammarMerger.computeDeltas(maxGrammar, maxLexicon, mergeWeights, trainStateSetTrees);
                    boolean[][][] mergeThesePairs = GrammarMerger.determineMergePairs(deltas, separateMergingThreshold, mergingPercentage, maxGrammar);
                    grammar = GrammarMerger.doTheMerges(maxGrammar, maxLexicon, mergeThesePairs, mergeWeights);
                    short[] newNumSubStatesArray = grammar.numSubStates;
                    trainStateSetTrees = new StateSetTreeList(trainStateSetTrees, newNumSubStatesArray, false);
                    validationStateSetTrees = new StateSetTreeList(validationStateSetTrees, newNumSubStatesArray, false);
                    lexicon = opts.simpleLexicon ? new SimpleLexicon(newNumSubStatesArray, -1, smoothParams, maxLexicon.getSmoother(), filter, trainStateSetTrees) : new SophisticatedLexicon(newNumSubStatesArray, SophisticatedLexicon.DEFAULT_SMOOTHING_CUTOFF, maxLexicon.getSmoothingParams(), maxLexicon.getSmoother(), maxLexicon.getPruningThreshold());
                    boolean updateOnlyLexicon = true;
                    double trainingLikelihood = GrammarTrainer.doOneEStep(grammar, maxLexicon, null, lexicon, trainStateSetTrees, updateOnlyLexicon, opts.rare);
                    lexicon.optimize();
                    GrammarMerger.printMergingStatistics(maxGrammar, grammar);
                    opString = "merging";
                    maxGrammar = grammar;
                    maxLexicon = lexicon;
                    maxIterations = opts.mergeMaxIterations;
                    minIterations = opts.mergeMinIterations;
                }
                previousGrammar = grammar = maxGrammar;
                previousLexicon = lexicon = maxLexicon;
                droppingIter = 0;
                numSubStatesArray = grammar.numSubStates;
                trainStateSetTrees = new StateSetTreeList(trainStateSetTrees, numSubStatesArray, false);
                validationStateSetTrees = new StateSetTreeList(validationStateSetTrees, numSubStatesArray, false);
                maxLikelihood = GrammarTrainer.calculateLogLikelihood(maxGrammar, maxLexicon, validationStateSetTrees);
                System.out.println("After " + opString + " in the " + (splitIndex / 3 + 1) + "th round, we get a validation likelihood of " + maxLikelihood);
                iter = 0;
                do {
                    System.out.println("Beginning iteration " + (++iter - 1) + ":");
                    System.out.print("Calculating validation likelihood...");
                    double validationLikelihood = GrammarTrainer.calculateLogLikelihood(previousGrammar, previousLexicon, validationStateSetTrees);
                    System.out.println("done: " + validationLikelihood);
                    System.out.print("Calculating training likelihood...");
                    grammar = new Grammar(grammar.numSubStates, grammar.findClosedPaths, grammar.smoother, grammar, grammar.threshold);
                    lexicon = opts.simpleLexicon ? new SimpleLexicon(grammar.numSubStates, -1, smoothParams, lexicon.getSmoother(), filter, trainStateSetTrees) : new SophisticatedLexicon(grammar.numSubStates, SophisticatedLexicon.DEFAULT_SMOOTHING_CUTOFF, lexicon.getSmoothingParams(), lexicon.getSmoother(), lexicon.getPruningThreshold());
                    boolean updateOnlyLexicon = false;
                    double trainingLikelihood = GrammarTrainer.doOneEStep(previousGrammar, previousLexicon, grammar, lexicon, trainStateSetTrees, updateOnlyLexicon, opts.rare);
                    System.out.println("done: " + trainingLikelihood);
                    lexicon.optimize();
                    grammar.optimize(0.0);
                    if (iter < minIterations || validationLikelihood >= maxLikelihood) {
                        maxLikelihood = validationLikelihood;
                        maxGrammar = previousGrammar;
                        maxLexicon = previousLexicon;
                        droppingIter = 0;
                    } else {
                        ++droppingIter;
                    }
                    previousGrammar = grammar;
                    previousLexicon = lexicon;
                } while (droppingIter < allowedDroppingIters && !baseline && iter < maxIterations);
                pData = new ParserData(maxLexicon, maxGrammar, null, Numberer.getNumberers(), numSubStatesArray, VERTICAL_MARKOVIZATION, HORIZONTAL_MARKOVIZATION, binarization);
                String outTmpName = String.valueOf(outFileName) + "_" + (splitIndex / 3 + 1) + "_" + opString + ".gr";
                System.out.println("Saving grammar to " + outTmpName + ".");
                if (pData.Save(outTmpName)) {
                    System.out.println("Saving successful.");
                } else {
                    System.out.println("Saving failed!");
                }
                pData = null;
            }
            ++splitIndex;
        }
        System.out.print("Calculating last validation likelihood...");
        double validationLikelihood = GrammarTrainer.calculateLogLikelihood(grammar, lexicon, validationStateSetTrees);
        System.out.println("done.\n  Iteration " + iter + " (final) gives validation likelihood " + validationLikelihood);
        if (validationLikelihood > maxLikelihood) {
            maxLikelihood = validationLikelihood;
            maxGrammar = previousGrammar;
            maxLexicon = previousLexicon;
        }
        pData = new ParserData(maxLexicon, maxGrammar, null, Numberer.getNumberers(), numSubStatesArray, VERTICAL_MARKOVIZATION, HORIZONTAL_MARKOVIZATION, binarization);
        System.out.println("Saving grammar to " + outFileName + ".");
        System.out.println("It gives a validation data log likelihood of: " + maxLikelihood);
        if (pData.Save(outFileName)) {
            System.out.println("Saving successful.");
        } else {
            System.out.println("Saving failed!");
        }
        System.exit(0);
    }

    public static double doOneEStep(Grammar previousGrammar, Lexicon previousLexicon, Grammar grammar, Lexicon lexicon, StateSetTreeList trainStateSetTrees, boolean updateOnlyLexicon, int unkThreshold) {
        boolean secondHalf = false;
        ArrayParser parser = new ArrayParser(previousGrammar, previousLexicon);
        double trainingLikelihood = 0.0;
        int n = 0;
        int nTrees = trainStateSetTrees.size();
        for (Tree<StateSet> stateSetTree : trainStateSetTrees) {
            secondHalf = (double)n++ > (double)nTrees / 2.0;
            boolean noSmoothing = true;
            boolean debugOutput = false;
            parser.doInsideOutsideScores(stateSetTree, noSmoothing, debugOutput);
            double ll = stateSetTree.getLabel().getIScore(0);
            ll = Math.log(ll) + (double)(100 * stateSetTree.getLabel().getIScale());
            if (Double.isInfinite(ll) || Double.isNaN(ll)) {
                if (!VERBOSE) continue;
                System.out.println("Training sentence " + n + " is given " + ll + " log likelihood!");
                System.out.println("Root iScore " + stateSetTree.getLabel().getIScore(0) + " scale " + stateSetTree.getLabel().getIScale());
                continue;
            }
            lexicon.trainTree(stateSetTree, -1.0, previousLexicon, secondHalf, noSmoothing, unkThreshold);
            if (!updateOnlyLexicon) {
                grammar.tallyStateSetTree(stateSetTree, previousGrammar);
            }
            trainingLikelihood += ll;
        }
        lexicon.tieRareWordStats(unkThreshold);
        return trainingLikelihood;
    }

    public static double calculateLogLikelihood(Grammar maxGrammar, Lexicon maxLexicon, StateSetTreeList validationStateSetTrees) {
        ArrayParser parser = new ArrayParser(maxGrammar, maxLexicon);
        int unparsable = 0;
        double maxLikelihood = 0.0;
        for (Tree<StateSet> stateSetTree : validationStateSetTrees) {
            parser.doInsideScores(stateSetTree, false, false, null);
            double ll = stateSetTree.getLabel().getIScore(0);
            ll = Math.log(ll) + (double)(100 * stateSetTree.getLabel().getIScale());
            if (Double.isInfinite(ll) || Double.isNaN(ll)) {
                ++unparsable;
                continue;
            }
            maxLikelihood += ll;
        }
        return maxLikelihood;
    }

    public static void printBadLLReason(Tree<StateSet> stateSetTree, SophisticatedLexicon lexicon) {
        System.out.println(stateSetTree.toString());
        boolean lexiconProblem = false;
        List<StateSet> words = stateSetTree.getYield();
        Iterator<StateSet> wordIterator = words.iterator();
        for (StateSet stateSet : stateSetTree.getPreTerminalYield()) {
            String word = wordIterator.next().getWord();
            boolean lexiconProblemHere = true;
            int i = 0;
            while (i < stateSet.numSubStates()) {
                double score = stateSet.getIScore(i);
                if (!Double.isInfinite(score) && !Double.isNaN(score)) {
                    lexiconProblemHere = false;
                }
                ++i;
            }
            if (lexiconProblemHere) {
                System.out.println("LEXICON PROBLEM ON STATE " + stateSet.getState() + " word " + word);
                System.out.println("  word " + lexicon.wordCounter.getCount(stateSet.getWord()));
                i = 0;
                while (i < stateSet.numSubStates()) {
                    System.out.println("  tag " + lexicon.tagCounter[stateSet.getState()][i]);
                    System.out.println("  word/state/sub " + lexicon.wordToTagCounters[stateSet.getState()].get(stateSet.getWord())[i]);
                    ++i;
                }
            }
            boolean bl = lexiconProblem = lexiconProblem || lexiconProblemHere;
        }
        if (lexiconProblem) {
            System.out.println("  the likelihood is bad because of the lexicon");
        } else {
            System.out.println("  the likelihood is bad because of the grammar");
        }
    }

    public static double logLikelihood(List<Tree<StateSet>> trees, boolean verbose) {
        double likelihood = 0.0;
        double l = 0.0;
        for (Tree<StateSet> tree : trees) {
            l = tree.getLabel().getIScore(0);
            if (verbose) {
                System.out.println("LL is " + l + ".");
            }
            if (Double.isInfinite(l) || Double.isNaN(l)) {
                System.out.println("LL is not finite.");
                continue;
            }
            likelihood += l;
        }
        return likelihood;
    }

    public static void updateStateSetTrees(List<Tree<StateSet>> trees, ArrayParser parser) {
        for (Tree<StateSet> tree : trees) {
            parser.doInsideOutsideScores(tree, false, false);
        }
    }

    public static short[] initializeSubStateArray(List<Tree<String>> trainTrees, List<Tree<String>> validationTrees, Numberer tagNumberer, short nSubStates) {
        short[] nSub = new short[]{1, nSubStates};
        StateSetTreeList trainStateSetTrees = new StateSetTreeList(trainTrees, nSub, true, tagNumberer);
        StateSetTreeList validationStateSetTrees = new StateSetTreeList(validationTrees, nSub, true, tagNumberer);
        StateSetTreeList.initializeTagNumberer(trainTrees, tagNumberer);
        StateSetTreeList.initializeTagNumberer(validationTrees, tagNumberer);
        short numStates = (short)tagNumberer.total();
        short[] nSubStateArray = new short[numStates];
        short two = nSubStates;
        Arrays.fill(nSubStateArray, two);
        nSubStateArray[0] = 1;
        return nSubStateArray;
    }

    public static class Options {
        @Option(name="-out", required=true, usage="Output File for Grammar (Required)")
        public String outFileName;
        @Option(name="-path", usage="Path to Corpus (Default: null)")
        public String path = null;
        @Option(name="-SMcycles", usage="The number of split&merge iterations (Default: 6)")
        public int numSplits = 6;
        @Option(name="-mergingPercentage", usage="Merging percentage (Default: 0.5)")
        public double mergingPercentage = 0.5;
        @Option(name="-baseline", usage="Just read of the MLE baseline grammar")
        public boolean baseline = false;
        @Option(name="-treebank", usage="Language:  WSJ, CHNINESE, GERMAN, CONLL, SINGLEFILE (Default: ENGLISH)")
        public Corpus.TreeBankType treebank = Corpus.TreeBankType.WSJ;
        @Option(name="-splitMaxIt", usage="Maximum number of EM iterations after splitting (Default: 50)")
        public int splitMaxIterations = 50;
        @Option(name="-splitMinIt", usage="Minimum number of EM iterations after splitting (Default: 50)")
        public int splitMinIterations = 50;
        @Option(name="-mergeMaxIt", usage="Maximum number of EM iterations after merging (Default: 20)")
        public int mergeMaxIterations = 20;
        @Option(name="-mergeMinIt", usage="Minimum number of EM iterations after merging (Default: 20)")
        public int mergeMinIterations = 20;
        @Option(name="-di", usage="The number of allowed iterations in which the validation likelihood drops. (Default: 6)")
        public int di = 6;
        @Option(name="-trfr", usage="The fraction of the training corpus to keep (Default: 1.0)\n")
        public double trainingFractionToKeep = 1.0;
        @Option(name="-filter", usage="Filter rules with prob below this threshold (Default: 1.0e-30)")
        public double filter = 1.0E-30;
        @Option(name="-smooth", usage="Type of grammar smoothing used.")
        public String smooth = "SmoothAcrossParentBits";
        @Option(name="-maxL", usage="Maximum sentence length (Default <=10000)")
        public int maxSentenceLength = 10000;
        @Option(name="-b", usage="LEFT/RIGHT Binarization (Default: RIGHT)")
        public Binarization binarization = Binarization.RIGHT;
        @Option(name="-noSplit", usage="Don't split - just load and continue training an existing grammar (true/false) (Default:false)")
        public boolean noSplit = false;
        @Option(name="-in", usage="Input File for Grammar")
        public String inFile = null;
        @Option(name="-randSeed", usage="Seed for random number generator (Two works well for English)")
        public int randSeed = 2;
        @Option(name="-sep", usage="Set merging threshold for grammar and lexicon separately (Default: false)")
        public boolean separateMergingThreshold = false;
        @Option(name="-trainOnDevSet", usage="Include the development set into the training set (Default: false)")
        public boolean trainOnDevSet = false;
        @Option(name="-hor", usage="Horizontal Markovization (Default: 0)")
        public int horizontalMarkovization = 0;
        @Option(name="-sub", usage="Number of substates to split (Default: 2)")
        public short nSubStates = 1;
        @Option(name="-ver", usage="Vertical Markovization (Default: 1)")
        public int verticalMarkovization = 1;
        @Option(name="-v", usage="Verbose/Quiet (Default: Quiet)\n")
        public boolean verbose = false;
        @Option(name="-lowercase", usage="Lowercase all words in the treebank")
        public boolean lowercase = false;
        @Option(name="-r", usage="Level of Randomness at init (Default: 1)\n")
        public double randomization = 1.0;
        @Option(name="-sm1", usage="Lexicon smoothing parameter 1")
        public double smoothingParameter1 = 0.5;
        @Option(name="-sm2", usage="Lexicon smoothing parameter 2")
        public double smoothingParameter2 = 0.1;
        @Option(name="-rare", usage="Rare word threshold (Default 4)")
        public int rare = 4;
        @Option(name="-spath", usage="Whether or not to store the best path info (true/false) (Default: true)")
        public boolean findClosedUnaryPaths = true;
        @Option(name="-simpleLexicon", usage="Use the simple generative lexicon")
        public boolean simpleLexicon = false;
        @Option(name="-skipSection", usage="Skips a particular section of the WSJ training corpus (Needed for training Mark Johnsons reranker")
        public int skipSection = -1;
        @Option(name="-skipBilingual", usage="Skips the bilingual portion of the Chinese treebank (Needed for training the bilingual reranker")
        public boolean skipBilingual = false;
    }
}

