opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/opinion_processor/DefaultSentimentProcessor.java - opennlp-sandbox - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License. You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
 package opennlp.tools.parse_thicket.opinion_processor;

 import java.io.IOException;
 import java.util.List;

 import edu.stanford.nlp.util.logging.Redwood;

 import java.util.Iterator;
 import java.io.BufferedReader;
 import java.io.IOException;
 import java.io.FileOutputStream;
 import java.io.PrintStream;
 import java.text.DecimalFormat;
 import java.text.NumberFormat;
 import java.util.ArrayList;
 import java.util.Collections;
 import java.util.List;
 import java.util.Properties;
 import java.util.logging.Logger;

 import org.ejml.simple.SimpleMatrix;

 import edu.stanford.nlp.io.IOUtils;
 import edu.stanford.nlp.ling.CoreAnnotations;
 import edu.stanford.nlp.ling.CoreLabel;
 import edu.stanford.nlp.ling.Label;
 import edu.stanford.nlp.ling.LabeledWord;
 import edu.stanford.nlp.ling.TaggedWord;
 import edu.stanford.nlp.ling.WordLemmaTag;
 import edu.stanford.nlp.ling.WordTag;
 import edu.stanford.nlp.neural.rnn.RNNCoreAnnotations;
 import edu.stanford.nlp.pipeline.Annotation;
 import edu.stanford.nlp.pipeline.StanfordCoreNLP;
 import edu.stanford.nlp.sentiment.SentimentCoreAnnotations.SentimentAnnotatedTree;
 import edu.stanford.nlp.sentiment.SentimentUtils;
 import edu.stanford.nlp.trees.MemoryTreebank;
 import edu.stanford.nlp.trees.Tree;
 import edu.stanford.nlp.trees.TreeCoreAnnotations;
 import edu.stanford.nlp.util.CoreMap;
 import edu.stanford.nlp.util.Generics;
 import edu.stanford.nlp.ling.CoreAnnotations;
 import edu.stanford.nlp.pipeline.Annotation;
 import edu.stanford.nlp.pipeline.StanfordCoreNLP;
 import edu.stanford.nlp.sentiment.SentimentCoreAnnotations;
 import edu.stanford.nlp.trees.Tree;
 import edu.stanford.nlp.util.CoreMap;

 public class DefaultSentimentProcessor {
 	/** A logger for this class */
 	private static final Logger log = Logger
 			.getLogger("opennlp.tools.parse_thicket.opinion_processor.DefaultSentimentProcessor");

 	private static final NumberFormat NF = new DecimalFormat("0.0000");

 	enum Output {
 		PENNTREES, VECTORS, ROOT, PROBABILITIES
 	}

 	enum Input {
 		TEXT, TREES
 	}

 	/**
 	 * Sets the labels on the tree (except the leaves) to be the integer
 	 * value of the sentiment prediction.  Makes it easy to print out
 	 * with Tree.toString()
 	 */
 	static void setSentimentLabels(Tree tree) {
 		if (tree.isLeaf()) {
 			return;
 		}

 		for (Tree child : tree.children()) {
 			setSentimentLabels(child);
 		}

 		Label label = tree.label();
 		if (!(label instanceof CoreLabel)) {
 			throw new IllegalArgumentException("Required a tree with CoreLabels");
 		}
 		CoreLabel cl = (CoreLabel) label;
 		cl.setValue(Integer.toString(RNNCoreAnnotations.getPredictedClass(tree)));
 	}

 	/**
 	 * Sets the labels on the tree to be the indices of the nodes.
 	 * Starts counting at the root and does a postorder traversal.
 	 */
 	static int setIndexLabels(Tree tree, int index) {
 		if (tree.isLeaf()) {
 			return index;
 		}

 		tree.label().setValue(Integer.toString(index));
 		index++;
 		for (Tree child : tree.children()) {
 			index = setIndexLabels(child, index);
 		}
 		return index;
 	}

 	/**
 	 * Outputs the vectors from the tree.  Counts the tree nodes the
 	 * same as setIndexLabels.
 	 */
 	static int outputTreeVectors(PrintStream out, Tree tree, int index) {
 		if (tree.isLeaf()) {
 			return index;
 		}

 		out.print("  " + index + ":");
 		SimpleMatrix vector = RNNCoreAnnotations.getNodeVector(tree);
 		for (int i = 0; i < vector.getNumElements(); ++i) {
 			out.print("  " + NF.format(vector.get(i)));
 		}
 		out.println();
 		index++;
 		for (Tree child : tree.children()) {
 			index = outputTreeVectors(out, child, index);
 		}
 		return index;
 	}

 	/**
 	 * Outputs the scores from the tree.  Counts the tree nodes the
 	 * same as setIndexLabels.
 	 */
 	static int outputTreeScores(PrintStream out, Tree tree, int index) {
 		if (tree.isLeaf()) {
 			return index;
 		}

 		out.print("  " + index + ":");
 		SimpleMatrix vector = RNNCoreAnnotations.getPredictions(tree);
 		for (int i = 0; i < vector.getNumElements(); ++i) {
 			out.print("  " + NF.format(vector.get(i)));
 		}
 		out.println();
 		index++;
 		for (Tree child : tree.children()) {
 			index = outputTreeScores(out, child, index);
 		}
 		return index;
 	}

 	public static <T> String wordToString(T o, final boolean justValue) {
 		return wordToString(o, justValue, null);
 	}

 	public static <T> String wordToString(T o, final boolean justValue,
 			final String separator) {
 		if (justValue && o instanceof Label) {
 			if (o instanceof CoreLabel) {
 				CoreLabel l = (CoreLabel) o;
 				String w = l.value();
 				if (w == null)
 					w = l.word();
 				return w;
 			} else {
 				return (((Label) o).value());
 			}
 		} else if (o instanceof CoreLabel) {
 			CoreLabel l = ((CoreLabel) o);
 			String w = l.value();
 			if (w == null)
 				w = l.word();
 			if (l.tag() != null) {
 				if (separator == null) {
 					return w + CoreLabel.TAG_SEPARATOR + l.tag();
 				} else {
 					return w + separator + l.tag();
 				}
 			}
 			return w;
 			// an interface that covered these next four cases would be
 			// nice, but we're moving away from these data types anyway
 		} else if (separator != null && o instanceof TaggedWord) {
 			return ((TaggedWord) o).toString(separator);
 		} else if (separator != null && o instanceof LabeledWord) {
 			return ((LabeledWord) o).toString();
 		} else if (separator != null && o instanceof WordLemmaTag) {
 			return ((WordLemmaTag) o).toString(separator);
 		} else if (separator != null && o instanceof WordTag) {
 			return ((WordTag) o).toString(separator);
 		} else {
 			return (o.toString());
 		}
 	}


 	/**
 	 * Returns the sentence as a string with a space between words.
 	 * It prints out the {@code value()} of each item -
 	 * this will give the expected answer for a short form representation
 	 * of the "sentence" over a range of cases.  It is equivalent to
 	 * calling {@code toString(true)}.
 	 *
 	 * TODO: Sentence used to be a subclass of ArrayList, with this
 	 * method as the toString.  Therefore, there may be instances of
 	 * ArrayList being printed that expect this method to be used.
 	 *
 	 * @param list The tokenized sentence to print out
 	 * @return The tokenized sentence as a String
 	 */
 	public static <T> String listToString(List<T> list) {
 		return listToString(list, true);
 	}
 	/**
 	 * Returns the sentence as a string with a space between words.
 	 * Designed to work robustly, even if the elements stored in the
 	 * 'Sentence' are not of type Label.
 	 *
 	 * This one uses the default separators for any word type that uses
 	 * separators, such as TaggedWord.
 	 *
 	 * @param list The tokenized sentence to print out
 	 * @param justValue If {@code true} and the elements are of type
 	 *                  {@code Label}, return just the
 	 *                  {@code value()} of the {@code Label} of each word;
 	 *                  otherwise,
 	 *                  call the {@code toString()} method on each item.
 	 * @return The sentence in String form
 	 */
 	public static <T> String listToString(List<T> list, final boolean justValue) {
 		return listToString(list, justValue, null);
 	}

 	/**
 	 * As already described, but if separator is not null, then objects
 	 * such as TaggedWord
 	 *
 	 * @param separator The string used to separate Word and Tag
 	 *                  in TaggedWord, etc
 	 */
 	public static <T> String listToString(List<T> list, final boolean justValue,
 			final String separator) {
 		StringBuilder s = new StringBuilder();
 		for (Iterator<T> wordIterator = list.iterator(); wordIterator.hasNext();) {
 			T o = wordIterator.next();
 			s.append(wordToString(o, justValue, separator));
 			if (wordIterator.hasNext()) {
 				s.append(' ');
 			}
 		}
 		return s.toString();
 	}

 	/**
 	 * Outputs a tree using the output style requested
 	 */
 	static void outputTree(PrintStream out, CoreMap sentence, List<Output> outputFormats) {
 		Tree tree = sentence.get(SentimentCoreAnnotations.SentimentAnnotatedTree.class);
 		for (Output output : outputFormats) {
 			switch (output) {
 			case PENNTREES: {
 				Tree copy = tree.deepCopy();
 				setSentimentLabels(copy);
 				out.println(copy);
 				break;
 			}
 			case VECTORS: {
 				Tree copy = tree.deepCopy();
 				setIndexLabels(copy, 0);
 				out.println(copy);
 				outputTreeVectors(out, tree, 0);
 				break;
 			}
 			case ROOT: {
 				out.println("  " + sentence.get(SentimentCoreAnnotations.SentimentClass.class));
 				break;
 			}
 			case PROBABILITIES: {
 				Tree copy = tree.deepCopy();
 				setIndexLabels(copy, 0);
 				out.println(copy);
 				outputTreeScores(out, tree, 0);
 				break;
 			}
 			default:
 				throw new IllegalArgumentException("Unknown output format " + output);
 			}
 		}
 	}

 	/**
 	 * Reads an annotation from the given filename using the requested input.
 	 */
 	public static List<Annotation> getAnnotations(StanfordCoreNLP tokenizer, Input inputFormat, String filename, boolean filterUnknown) {
 		switch (inputFormat) {
 		case TEXT: {
 			String text = IOUtils.slurpFileNoExceptions(filename);
 			Annotation annotation = new Annotation(text);
 			tokenizer.annotate(annotation);
 			List<Annotation> annotations = Generics.newArrayList();
 			for (CoreMap sentence : annotation.get(CoreAnnotations.SentencesAnnotation.class)) {
 				Annotation nextAnnotation = new Annotation(sentence.get(CoreAnnotations.TextAnnotation.class));
 				nextAnnotation.set(CoreAnnotations.SentencesAnnotation.class, Collections.singletonList(sentence));
 				annotations.add(nextAnnotation);
 			}
 			return annotations;
 		}
 		case TREES: {
 			List<Tree> trees;
 			if (filterUnknown) {
 				trees = SentimentUtils.readTreesWithGoldLabels(filename);
 				trees = SentimentUtils.filterUnknownRoots(trees);
 			} else {
 				trees = Generics.newArrayList();
 				MemoryTreebank treebank = new MemoryTreebank("utf-8");
 				treebank.loadPath(filename, null);
 				for (Tree tree : treebank) {
 					trees.add(tree);
 				}
 			}

 			List<Annotation> annotations = Generics.newArrayList();
 			for (Tree tree : trees) {
 				CoreMap sentence = new Annotation(listToString(tree.yield()));
 				sentence.set(TreeCoreAnnotations.TreeAnnotation.class, tree);
 				List<CoreMap> sentences = Collections.singletonList(sentence);
 				Annotation annotation = new Annotation("");
 				annotation.set(CoreAnnotations.SentencesAnnotation.class, sentences);
 				annotations.add(annotation);
 			}
 			return annotations;
 		}
 		default:
 			throw new IllegalArgumentException("Unknown format " + inputFormat);
 		}
 	}

 	/** Runs the tree-based sentiment model on some text. */
 	public void processTextWithArgs(String[] args) throws IOException {
 		String parserModel = null;
 		String sentimentModel = null;

 		String filename = null;
 		String fileList = null;
 		boolean stdin = false;

 		boolean filterUnknown = false;

 		List<Output> outputFormats = Collections.singletonList(Output.ROOT);
 		Input inputFormat = Input.TEXT;

 		String tlppClass = "DEFAULT_TLPP_CLASS";

 		for (int argIndex = 0; argIndex < args.length; ) {
 			if (args[argIndex].equalsIgnoreCase("-sentimentModel")) {
 				sentimentModel = args[argIndex + 1];
 				argIndex += 2;
 			} else if (args[argIndex].equalsIgnoreCase("-parserModel")) {
 				parserModel = args[argIndex + 1];
 				argIndex += 2;
 			} else if (args[argIndex].equalsIgnoreCase("-file")) {
 				filename = args[argIndex + 1];
 				argIndex += 2;
 			} else if (args[argIndex].equalsIgnoreCase("-fileList")) {
 				fileList = args[argIndex + 1];
 				argIndex += 2;
 			} else if (args[argIndex].equalsIgnoreCase("-stdin")) {
 				stdin = true;
 				argIndex++;
 			} else if (args[argIndex].equalsIgnoreCase("-input")) {
 				inputFormat = Input.valueOf(args[argIndex + 1].toUpperCase());
 				argIndex += 2;
 			} else if (args[argIndex].equalsIgnoreCase("-output")) {
 				String[] formats = args[argIndex + 1].split(",");
 				outputFormats = new ArrayList<>();
 				for (String format : formats) {
 					outputFormats.add(Output.valueOf(format.toUpperCase()));
 				}
 				argIndex += 2;
 			} else if (args[argIndex].equalsIgnoreCase("-filterUnknown")) {
 				filterUnknown = true;
 				argIndex++;
 			} else if (args[argIndex].equalsIgnoreCase("-tlppClass")) {
 				tlppClass = args[argIndex + 1];
 				argIndex += 2;
 			} else if (args[argIndex].equalsIgnoreCase("-help")) {
 				System.exit(0);
 			} else {
 				log.info("Unknown argument " + args[argIndex + 1]);
 				throw new IllegalArgumentException("Unknown argument " + args[argIndex + 1]);
 			}
 		}

 		// We construct two pipelines.  One handles tokenization, if
 		// necessary.  The other takes tokenized sentences and converts
 		// them to sentiment trees.
 		Properties pipelineProps = new Properties();
 		Properties tokenizerProps = null;
 		if (sentimentModel != null) {
 			pipelineProps.setProperty("sentiment.model", sentimentModel);
 		}
 		if (parserModel != null) {
 			pipelineProps.setProperty("parse.model", parserModel);
 		}
 		if (inputFormat == Input.TREES) {
 			pipelineProps.setProperty("annotators", "binarizer, sentiment");
 			pipelineProps.setProperty("customAnnotatorClass.binarizer", "edu.stanford.nlp.pipeline.BinarizerAnnotator");
 			pipelineProps.setProperty("binarizer.tlppClass", tlppClass);
 			pipelineProps.setProperty("enforceRequirements", "false");
 		} else {
 			pipelineProps.setProperty("annotators", "parse, sentiment");
 			pipelineProps.setProperty("enforceRequirements", "false");
 			tokenizerProps = new Properties();
 			tokenizerProps.setProperty("annotators", "tokenize, ssplit");
 		}

 		if (stdin && tokenizerProps != null) {
 			tokenizerProps.setProperty(StanfordCoreNLP.NEWLINE_SPLITTER_PROPERTY, "true");
 		}

 		int count = 0;
 		if (filename != null) count++;
 		if (fileList != null) count++;
 		if (stdin) count++;
 		if (count > 1) {
 			throw new IllegalArgumentException("Please only specify one of -file, -fileList or -stdin");
 		}
 		if (count == 0) {
 			throw new IllegalArgumentException("Please specify either -file, -fileList or -stdin");
 		}

 		StanfordCoreNLP tokenizer = (tokenizerProps == null) ? null : new StanfordCoreNLP(tokenizerProps);
 		StanfordCoreNLP pipeline = new StanfordCoreNLP(pipelineProps);

 		if (filename != null) {
 			// Process a file.  The pipeline will do tokenization, which
 			// means it will split it into sentences as best as possible
 			// with the tokenizer.
 			List<Annotation> annotations = getAnnotations(tokenizer, inputFormat, filename, filterUnknown);
 			for (Annotation annotation : annotations) {
 				pipeline.annotate(annotation);

 				for (CoreMap sentence : annotation.get(CoreAnnotations.SentencesAnnotation.class)) {
 					System.out.println(sentence);
 					outputTree(System.out, sentence, outputFormats);
 				}
 			}
 		} else if (fileList != null) {
 			// Process multiple files.  The pipeline will do tokenization,
 			// which means it will split it into sentences as best as
 			// possible with the tokenizer.  Output will go to filename.out
 			// for each file.
 			for (String file : fileList.split(",")) {
 				List<Annotation> annotations = getAnnotations(tokenizer, inputFormat, file, filterUnknown);
 				FileOutputStream fout = new FileOutputStream(file + ".out");
 				PrintStream pout = new PrintStream(fout);
 				for (Annotation annotation : annotations) {
 					pipeline.annotate(annotation);

 					for (CoreMap sentence : annotation.get(CoreAnnotations.SentencesAnnotation.class)) {
 						pout.println(sentence);
 						outputTree(pout, sentence, outputFormats);
 					}
 				}
 				pout.flush();
 				fout.close();
 			}
 		} else {
 			// Process stdin.  Each line will be treated as a single sentence.
 			log.info("Reading in text from stdin.");
 			log.info("Please enter one sentence per line.");
 			log.info("Processing will end when EOF is reached.");
 			BufferedReader reader = IOUtils.readerFromStdin("utf-8");

 			for (String line; (line = reader.readLine()) != null; ) {
 				line = line.trim();
 				if ( ! line.isEmpty()) {
 					Annotation annotation = tokenizer.process(line);
 					pipeline.annotate(annotation);
 					for (CoreMap sentence : annotation.get(CoreAnnotations.SentencesAnnotation.class)) {
 						outputTree(System.out, sentence, outputFormats);
 					}
 				} else {
 					// Output blank lines for blank lines so the tool can be
 					// used for line-by-line text processing
 					System.out.println();
 				}
 			}

 		}
 	}

 	public float getNumericSentimentValue(String expression) {
 		Properties props = new Properties();
 		props.setProperty("annotators", "tokenize, ssplit, parse, sentiment");
 		StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
 		int mainSentiment = 0;
 		if (expression != null && expression.length() > 0) {
 			int longest = 0;
 			Annotation annotation = pipeline.process(expression);
 			for (CoreMap sentence : annotation.get(CoreAnnotations.SentencesAnnotation.class)) {
 				Tree tree = sentence.get(SentimentAnnotatedTree.class);
 				int sentiment = RNNCoreAnnotations.getPredictedClass(tree);
 				String partText = sentence.toString();
 				if (partText.length() > longest) {
 					mainSentiment = sentiment;
 					longest = partText.length();
 				}
 			}
 		}
 		return mainSentiment;
 	}
 }
	/*
	* Licensed to the Apache Software Foundation (ASF) under one or more
	* contributor license agreements. See the NOTICE file distributed with
	* this work for additional information regarding copyright ownership.
	* The ASF licenses this file to You under the Apache License, Version 2.0
	* (the "License"); you may not use this file except in compliance with
	* the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/
	package opennlp.tools.parse_thicket.opinion_processor;

	import java.io.IOException;
	import java.util.List;

	import edu.stanford.nlp.util.logging.Redwood;

	import java.util.Iterator;
	import java.io.BufferedReader;
	import java.io.IOException;
	import java.io.FileOutputStream;
	import java.io.PrintStream;
	import java.text.DecimalFormat;
	import java.text.NumberFormat;
	import java.util.ArrayList;
	import java.util.Collections;
	import java.util.List;
	import java.util.Properties;
	import java.util.logging.Logger;

	import org.ejml.simple.SimpleMatrix;

	import edu.stanford.nlp.io.IOUtils;
	import edu.stanford.nlp.ling.CoreAnnotations;
	import edu.stanford.nlp.ling.CoreLabel;
	import edu.stanford.nlp.ling.Label;
	import edu.stanford.nlp.ling.LabeledWord;
	import edu.stanford.nlp.ling.TaggedWord;
	import edu.stanford.nlp.ling.WordLemmaTag;
	import edu.stanford.nlp.ling.WordTag;
	import edu.stanford.nlp.neural.rnn.RNNCoreAnnotations;
	import edu.stanford.nlp.pipeline.Annotation;
	import edu.stanford.nlp.pipeline.StanfordCoreNLP;
	import edu.stanford.nlp.sentiment.SentimentCoreAnnotations.SentimentAnnotatedTree;
	import edu.stanford.nlp.sentiment.SentimentUtils;
	import edu.stanford.nlp.trees.MemoryTreebank;
	import edu.stanford.nlp.trees.Tree;
	import edu.stanford.nlp.trees.TreeCoreAnnotations;
	import edu.stanford.nlp.util.CoreMap;
	import edu.stanford.nlp.util.Generics;
	import edu.stanford.nlp.ling.CoreAnnotations;
	import edu.stanford.nlp.pipeline.Annotation;
	import edu.stanford.nlp.pipeline.StanfordCoreNLP;
	import edu.stanford.nlp.sentiment.SentimentCoreAnnotations;
	import edu.stanford.nlp.trees.Tree;
	import edu.stanford.nlp.util.CoreMap;

	public class DefaultSentimentProcessor {
	/** A logger for this class */
	private static final Logger log = Logger
	.getLogger("opennlp.tools.parse_thicket.opinion_processor.DefaultSentimentProcessor");

	private static final NumberFormat NF = new DecimalFormat("0.0000");

	enum Output {
	PENNTREES, VECTORS, ROOT, PROBABILITIES
	}

	enum Input {
	TEXT, TREES
	}

	/**
	* Sets the labels on the tree (except the leaves) to be the integer
	* value of the sentiment prediction. Makes it easy to print out
	* with Tree.toString()
	*/
	static void setSentimentLabels(Tree tree) {
	if (tree.isLeaf()) {
	return;
	}

	for (Tree child : tree.children()) {
	setSentimentLabels(child);
	}

	Label label = tree.label();
	if (!(label instanceof CoreLabel)) {
	throw new IllegalArgumentException("Required a tree with CoreLabels");
	}
	CoreLabel cl = (CoreLabel) label;
	cl.setValue(Integer.toString(RNNCoreAnnotations.getPredictedClass(tree)));
	}

	/**
	* Sets the labels on the tree to be the indices of the nodes.
	* Starts counting at the root and does a postorder traversal.
	*/
	static int setIndexLabels(Tree tree, int index) {
	if (tree.isLeaf()) {
	return index;
	}

	tree.label().setValue(Integer.toString(index));
	index++;
	for (Tree child : tree.children()) {
	index = setIndexLabels(child, index);
	}
	return index;
	}

	/**
	* Outputs the vectors from the tree. Counts the tree nodes the
	* same as setIndexLabels.
	*/
	static int outputTreeVectors(PrintStream out, Tree tree, int index) {
	if (tree.isLeaf()) {
	return index;
	}

	out.print(" " + index + ":");
	SimpleMatrix vector = RNNCoreAnnotations.getNodeVector(tree);
	for (int i = 0; i < vector.getNumElements(); ++i) {
	out.print(" " + NF.format(vector.get(i)));
	}
	out.println();
	index++;
	for (Tree child : tree.children()) {
	index = outputTreeVectors(out, child, index);
	}
	return index;
	}

	/**
	* Outputs the scores from the tree. Counts the tree nodes the
	* same as setIndexLabels.
	*/
	static int outputTreeScores(PrintStream out, Tree tree, int index) {
	if (tree.isLeaf()) {
	return index;
	}

	out.print(" " + index + ":");
	SimpleMatrix vector = RNNCoreAnnotations.getPredictions(tree);
	for (int i = 0; i < vector.getNumElements(); ++i) {
	out.print(" " + NF.format(vector.get(i)));
	}
	out.println();
	index++;
	for (Tree child : tree.children()) {
	index = outputTreeScores(out, child, index);
	}
	return index;
	}

	public static <T> String wordToString(T o, final boolean justValue) {
	return wordToString(o, justValue, null);
	}

	public static <T> String wordToString(T o, final boolean justValue,
	final String separator) {
	if (justValue && o instanceof Label) {
	if (o instanceof CoreLabel) {
	CoreLabel l = (CoreLabel) o;
	String w = l.value();
	if (w == null)
	w = l.word();
	return w;
	} else {
	return (((Label) o).value());
	}
	} else if (o instanceof CoreLabel) {
	CoreLabel l = ((CoreLabel) o);
	String w = l.value();
	if (w == null)
	w = l.word();
	if (l.tag() != null) {
	if (separator == null) {
	return w + CoreLabel.TAG_SEPARATOR + l.tag();
	} else {
	return w + separator + l.tag();
	}
	}
	return w;
	// an interface that covered these next four cases would be
	// nice, but we're moving away from these data types anyway
	} else if (separator != null && o instanceof TaggedWord) {
	return ((TaggedWord) o).toString(separator);
	} else if (separator != null && o instanceof LabeledWord) {
	return ((LabeledWord) o).toString();
	} else if (separator != null && o instanceof WordLemmaTag) {
	return ((WordLemmaTag) o).toString(separator);
	} else if (separator != null && o instanceof WordTag) {
	return ((WordTag) o).toString(separator);
	} else {
	return (o.toString());
	}
	}


	/**
	* Returns the sentence as a string with a space between words.
	* It prints out the {@code value()} of each item -
	* this will give the expected answer for a short form representation
	* of the "sentence" over a range of cases. It is equivalent to
	* calling {@code toString(true)}.
	*
	* TODO: Sentence used to be a subclass of ArrayList, with this
	* method as the toString. Therefore, there may be instances of
	* ArrayList being printed that expect this method to be used.
	*
	* @param list The tokenized sentence to print out
	* @return The tokenized sentence as a String
	*/
	public static <T> String listToString(List<T> list) {
	return listToString(list, true);
	}
	/**
	* Returns the sentence as a string with a space between words.
	* Designed to work robustly, even if the elements stored in the
	* 'Sentence' are not of type Label.
	*
	* This one uses the default separators for any word type that uses
	* separators, such as TaggedWord.
	*
	* @param list The tokenized sentence to print out
	* @param justValue If {@code true} and the elements are of type
	* {@code Label}, return just the
	* {@code value()} of the {@code Label} of each word;
	* otherwise,
	* call the {@code toString()} method on each item.
	* @return The sentence in String form
	*/
	public static <T> String listToString(List<T> list, final boolean justValue) {
	return listToString(list, justValue, null);
	}

	/**
	* As already described, but if separator is not null, then objects
	* such as TaggedWord
	*
	* @param separator The string used to separate Word and Tag
	* in TaggedWord, etc
	*/
	public static <T> String listToString(List<T> list, final boolean justValue,
	final String separator) {
	StringBuilder s = new StringBuilder();
	for (Iterator<T> wordIterator = list.iterator(); wordIterator.hasNext();) {
	T o = wordIterator.next();
	s.append(wordToString(o, justValue, separator));
	if (wordIterator.hasNext()) {
	s.append(' ');
	}
	}
	return s.toString();
	}

	/**
	* Outputs a tree using the output style requested
	*/
	static void outputTree(PrintStream out, CoreMap sentence, List<Output> outputFormats) {
	Tree tree = sentence.get(SentimentCoreAnnotations.SentimentAnnotatedTree.class);
	for (Output output : outputFormats) {
	switch (output) {
	case PENNTREES: {
	Tree copy = tree.deepCopy();
	setSentimentLabels(copy);
	out.println(copy);
	break;
	}
	case VECTORS: {
	Tree copy = tree.deepCopy();
	setIndexLabels(copy, 0);
	out.println(copy);
	outputTreeVectors(out, tree, 0);
	break;
	}
	case ROOT: {
	out.println(" " + sentence.get(SentimentCoreAnnotations.SentimentClass.class));
	break;
	}
	case PROBABILITIES: {
	Tree copy = tree.deepCopy();
	setIndexLabels(copy, 0);
	out.println(copy);
	outputTreeScores(out, tree, 0);
	break;
	}
	default:
	throw new IllegalArgumentException("Unknown output format " + output);
	}
	}
	}

	/**
	* Reads an annotation from the given filename using the requested input.
	*/
	public static List<Annotation> getAnnotations(StanfordCoreNLP tokenizer, Input inputFormat, String filename, boolean filterUnknown) {
	switch (inputFormat) {
	case TEXT: {
	String text = IOUtils.slurpFileNoExceptions(filename);
	Annotation annotation = new Annotation(text);
	tokenizer.annotate(annotation);
	List<Annotation> annotations = Generics.newArrayList();
	for (CoreMap sentence : annotation.get(CoreAnnotations.SentencesAnnotation.class)) {
	Annotation nextAnnotation = new Annotation(sentence.get(CoreAnnotations.TextAnnotation.class));
	nextAnnotation.set(CoreAnnotations.SentencesAnnotation.class, Collections.singletonList(sentence));
	annotations.add(nextAnnotation);
	}
	return annotations;
	}
	case TREES: {
	List<Tree> trees;
	if (filterUnknown) {
	trees = SentimentUtils.readTreesWithGoldLabels(filename);
	trees = SentimentUtils.filterUnknownRoots(trees);
	} else {
	trees = Generics.newArrayList();
	MemoryTreebank treebank = new MemoryTreebank("utf-8");
	treebank.loadPath(filename, null);
	for (Tree tree : treebank) {
	trees.add(tree);
	}
	}

	List<Annotation> annotations = Generics.newArrayList();
	for (Tree tree : trees) {
	CoreMap sentence = new Annotation(listToString(tree.yield()));
	sentence.set(TreeCoreAnnotations.TreeAnnotation.class, tree);
	List<CoreMap> sentences = Collections.singletonList(sentence);
	Annotation annotation = new Annotation("");
	annotation.set(CoreAnnotations.SentencesAnnotation.class, sentences);
	annotations.add(annotation);
	}
	return annotations;
	}
	default:
	throw new IllegalArgumentException("Unknown format " + inputFormat);
	}
	}

	/** Runs the tree-based sentiment model on some text. */
	public void processTextWithArgs(String[] args) throws IOException {
	String parserModel = null;
	String sentimentModel = null;

	String filename = null;
	String fileList = null;
	boolean stdin = false;

	boolean filterUnknown = false;

	List<Output> outputFormats = Collections.singletonList(Output.ROOT);
	Input inputFormat = Input.TEXT;

	String tlppClass = "DEFAULT_TLPP_CLASS";

	for (int argIndex = 0; argIndex < args.length; ) {
	if (args[argIndex].equalsIgnoreCase("-sentimentModel")) {
	sentimentModel = args[argIndex + 1];
	argIndex += 2;
	} else if (args[argIndex].equalsIgnoreCase("-parserModel")) {
	parserModel = args[argIndex + 1];
	argIndex += 2;
	} else if (args[argIndex].equalsIgnoreCase("-file")) {
	filename = args[argIndex + 1];
	argIndex += 2;
	} else if (args[argIndex].equalsIgnoreCase("-fileList")) {
	fileList = args[argIndex + 1];
	argIndex += 2;
	} else if (args[argIndex].equalsIgnoreCase("-stdin")) {
	stdin = true;
	argIndex++;
	} else if (args[argIndex].equalsIgnoreCase("-input")) {
	inputFormat = Input.valueOf(args[argIndex + 1].toUpperCase());
	argIndex += 2;
	} else if (args[argIndex].equalsIgnoreCase("-output")) {
	String[] formats = args[argIndex + 1].split(",");
	outputFormats = new ArrayList<>();
	for (String format : formats) {
	outputFormats.add(Output.valueOf(format.toUpperCase()));
	}
	argIndex += 2;
	} else if (args[argIndex].equalsIgnoreCase("-filterUnknown")) {
	filterUnknown = true;
	argIndex++;
	} else if (args[argIndex].equalsIgnoreCase("-tlppClass")) {
	tlppClass = args[argIndex + 1];
	argIndex += 2;
	} else if (args[argIndex].equalsIgnoreCase("-help")) {
	System.exit(0);
	} else {
	log.info("Unknown argument " + args[argIndex + 1]);
	throw new IllegalArgumentException("Unknown argument " + args[argIndex + 1]);
	}
	}

	// We construct two pipelines. One handles tokenization, if
	// necessary. The other takes tokenized sentences and converts
	// them to sentiment trees.
	Properties pipelineProps = new Properties();
	Properties tokenizerProps = null;
	if (sentimentModel != null) {
	pipelineProps.setProperty("sentiment.model", sentimentModel);
	}
	if (parserModel != null) {
	pipelineProps.setProperty("parse.model", parserModel);
	}
	if (inputFormat == Input.TREES) {
	pipelineProps.setProperty("annotators", "binarizer, sentiment");
	pipelineProps.setProperty("customAnnotatorClass.binarizer", "edu.stanford.nlp.pipeline.BinarizerAnnotator");
	pipelineProps.setProperty("binarizer.tlppClass", tlppClass);
	pipelineProps.setProperty("enforceRequirements", "false");
	} else {
	pipelineProps.setProperty("annotators", "parse, sentiment");
	pipelineProps.setProperty("enforceRequirements", "false");
	tokenizerProps = new Properties();
	tokenizerProps.setProperty("annotators", "tokenize, ssplit");
	}

	if (stdin && tokenizerProps != null) {
	tokenizerProps.setProperty(StanfordCoreNLP.NEWLINE_SPLITTER_PROPERTY, "true");
	}

	int count = 0;
	if (filename != null) count++;
	if (fileList != null) count++;
	if (stdin) count++;
	if (count > 1) {
	throw new IllegalArgumentException("Please only specify one of -file, -fileList or -stdin");
	}
	if (count == 0) {
	throw new IllegalArgumentException("Please specify either -file, -fileList or -stdin");
	}

	StanfordCoreNLP tokenizer = (tokenizerProps == null) ? null : new StanfordCoreNLP(tokenizerProps);
	StanfordCoreNLP pipeline = new StanfordCoreNLP(pipelineProps);

	if (filename != null) {
	// Process a file. The pipeline will do tokenization, which
	// means it will split it into sentences as best as possible
	// with the tokenizer.
	List<Annotation> annotations = getAnnotations(tokenizer, inputFormat, filename, filterUnknown);
	for (Annotation annotation : annotations) {
	pipeline.annotate(annotation);

	for (CoreMap sentence : annotation.get(CoreAnnotations.SentencesAnnotation.class)) {
	System.out.println(sentence);
	outputTree(System.out, sentence, outputFormats);
	}
	}
	} else if (fileList != null) {
	// Process multiple files. The pipeline will do tokenization,
	// which means it will split it into sentences as best as
	// possible with the tokenizer. Output will go to filename.out
	// for each file.
	for (String file : fileList.split(",")) {
	List<Annotation> annotations = getAnnotations(tokenizer, inputFormat, file, filterUnknown);
	FileOutputStream fout = new FileOutputStream(file + ".out");
	PrintStream pout = new PrintStream(fout);
	for (Annotation annotation : annotations) {
	pipeline.annotate(annotation);

	for (CoreMap sentence : annotation.get(CoreAnnotations.SentencesAnnotation.class)) {
	pout.println(sentence);
	outputTree(pout, sentence, outputFormats);
	}
	}
	pout.flush();
	fout.close();
	}
	} else {
	// Process stdin. Each line will be treated as a single sentence.
	log.info("Reading in text from stdin.");
	log.info("Please enter one sentence per line.");
	log.info("Processing will end when EOF is reached.");
	BufferedReader reader = IOUtils.readerFromStdin("utf-8");

	for (String line; (line = reader.readLine()) != null; ) {
	line = line.trim();
	if ( ! line.isEmpty()) {
	Annotation annotation = tokenizer.process(line);
	pipeline.annotate(annotation);
	for (CoreMap sentence : annotation.get(CoreAnnotations.SentencesAnnotation.class)) {
	outputTree(System.out, sentence, outputFormats);
	}
	} else {
	// Output blank lines for blank lines so the tool can be
	// used for line-by-line text processing
	System.out.println();
	}
	}

	}
	}

	public float getNumericSentimentValue(String expression) {
	Properties props = new Properties();
	props.setProperty("annotators", "tokenize, ssplit, parse, sentiment");
	StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
	int mainSentiment = 0;
	if (expression != null && expression.length() > 0) {
	int longest = 0;
	Annotation annotation = pipeline.process(expression);
	for (CoreMap sentence : annotation.get(CoreAnnotations.SentencesAnnotation.class)) {
	Tree tree = sentence.get(SentimentAnnotatedTree.class);
	int sentiment = RNNCoreAnnotations.getPredictedClass(tree);
	String partText = sentence.toString();
	if (partText.length() > longest) {
	mainSentiment = sentiment;
	longest = partText.length();
	}
	}
	}
	return mainSentiment;
	}
	}