blob: ba8a14039f639d4926db04a5cb9067758fbcb6fe [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package opennlp.tools.parse_thicket.opinion_processor;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import org.apache.commons.io.FileUtils;
import org.apache.commons.lang3.StringUtils;
import opennlp.tools.jsmlearning.ProfileReaderWriter;
import opennlp.tools.parse_thicket.ParseTreeNode;
import opennlp.tools.textsimilarity.ParseTreeChunk;
public class AbstractEngineRunner {
private List<File> queue;
private final static String reviewSource = "/Users/bgalitsky/Documents/relevance-based-on-parse-trees/src/test/resources/opinions/macbook_pro.txt";
NamedEntityExtractor neExtractor = new NamedEntityExtractor();
public void processJSONfileWithReviews(){
List<String[]> report = new ArrayList<String[]>();
report.add(new String[] { "text", "phrases of potential interest list" , });
String content=null;
try {
content = FileUtils.readFileToString(new File(reviewSource));
} catch (IOException e) {
e.printStackTrace();
}
String[] texts = StringUtils.substringsBetween(content, "reviewText\": \"", "\", \"overall");
for(String text: texts){
EntityExtractionResult result = neExtractor.extractEntities(text);
report.add(new String[]{text});
//report.add((String[])result.extractedNERWords.toArray(new String[0]));
//report.add((String[])result.extractedSentimentPhrases.toArray(new String[0]));
List<String> stringPhrases = new ArrayList<String>(),
nodePhrases = new ArrayList<String>();
for(List<ParseTreeNode> chList: result.extractedSentimentPhrases){
String buf = "", nodeBuf="";
for(ParseTreeNode ch: chList){
buf+=ch.getWord()+ " ";
nodeBuf+=ch.toString()+ " ";
}
stringPhrases.add(buf.trim());
nodePhrases.add(nodeBuf.trim());
}
report.add((String[])stringPhrases.toArray(new String[0]));
report.add((String[])nodePhrases.toArray(new String[0]));
report.add(new String[]{"-----------------------------"});
ProfileReaderWriter.writeReport(report, "nameEntitiesTopicsOfInterestExtracted.csv");
}
}
// this func collects files
private void addFiles(File file) {
if (!file.exists()) {
System.out.println(file + " does not exist.");
}
if (file.isDirectory()) {
for (File f : file.listFiles()) {
if (f.getName().startsWith("."))
continue;
addFiles(f);
System.out.println(f.getName());
}
} else {
queue.add(file);
}
}
public static void main(String[] args){
AbstractEngineRunner runner = new AbstractEngineRunner();
runner.processJSONfileWithReviews();
}
}
/*
public void processDirectory(String path){
List<String[]> report = new ArrayList<String[]>();
report.add(new String[] { "filename", "named entity list", "phrases of potential interest list" });
List<String> allNamedEntities = new ArrayList<String>();
addFiles(new File(path));
for(File f: queue){
List<String> entities = (List<String>) extractEntities(f.getAbsolutePath()).getFirst();
List<String> opinions = (List<String>) extractEntities(f.getAbsolutePath()).getSecond();
report.add(new String[]{ f.getName(), entities.toString(), opinions.toString()});
ProfileReaderWriter.writeReport(report, "nameEntitiesExtracted.csv");
allNamedEntities.addAll(entities);
allNamedEntities = new ArrayList<String>(new HashSet<String> (allNamedEntities ));
}
ProfileReaderWriter.writeReport(report, "nameEntitiesTopicsOfInterestExtracted.csv");
}
} */