blob: 47324a27e1ee14f73eca61f52e982f2a9cfc36c6 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package opennlp.tools.parse_thicket.pattern_structure;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.LinkedHashSet;
import java.util.List;
import junit.framework.TestCase;
import opennlp.tools.fca.ConceptLattice;
import opennlp.tools.fca.FcaWriter;
import opennlp.tools.fca.FormalConcept;
import opennlp.tools.similarity.apps.BingWebQueryRunner;
import opennlp.tools.similarity.apps.HitBase;
import opennlp.tools.textsimilarity.ParseTreeChunk;
import opennlp.tools.textsimilarity.chunker2matcher.ParserChunker2MatcherProcessor;
public class PatternStructureTest extends TestCase{
ParserChunker2MatcherProcessor chunk_maker = ParserChunker2MatcherProcessor.getInstance();
LinguisticPatternStructure ps = new LinguisticPatternStructure(0,0);
BingWebQueryRunner bqr = new BingWebQueryRunner();
public void test6texts() {
String text1 = "I rent an office space. This office is for my business. I can deduct office rental expense from my business profit to calculate net income.";
String text2 = "To run my business, I have to rent an office. The net business profit is calculated as follows. Rental expense needs to be subtracted from revenue.";
String text3 = "To store goods for my retail business I rent some space. When I calculate the net income, I take revenue and subtract business expenses such as office rent.";
String text4 = "I rent out a first floor unit of my house to a travel business. I need to add the rental income to my profit. However, when I repair my house, I can deduct the repair expense from my rental income.";
String text5 = "I receive rental income from my office. I have to claim it as a profit in my tax forms. I need to add my rental income to my profits, but subtract rental expenses such as repair from it.";
String text6 = "I advertised my property as a business rental. Advertisement and repair expenses can be subtracted from the rental income. Remaining rental income needs to be added to my profit and be reported as taxable profit. ";
List<List<ParseTreeChunk>> chunks1 = chunk_maker.formGroupedPhrasesFromChunksForPara(text1);
List<List<ParseTreeChunk>> chunks2 = chunk_maker.formGroupedPhrasesFromChunksForPara(text2);
List<List<ParseTreeChunk>> chunks3 = chunk_maker.formGroupedPhrasesFromChunksForPara(text3);
List<List<ParseTreeChunk>> chunks4 = chunk_maker.formGroupedPhrasesFromChunksForPara(text4);
List<List<ParseTreeChunk>> chunks5 = chunk_maker.formGroupedPhrasesFromChunksForPara(text5);
List<List<ParseTreeChunk>> chunks6 = chunk_maker.formGroupedPhrasesFromChunksForPara(text6);
//ArrayList<ParseTreeChunk> lst = new ArrayList<ParseTreeChunk>();
LinkedHashSet<Integer> obj = null;
obj = new LinkedHashSet<Integer>();
obj.add(0);
ps.AddIntent(chunks1, obj, 0);
obj = new LinkedHashSet<Integer>();
obj.add(1);
ps.AddIntent(chunks2, obj, 0);
obj = new LinkedHashSet<Integer>();
obj.add(2);
ps.AddIntent(chunks3, obj, 0);
obj = new LinkedHashSet<Integer>();
obj.add(3);
ps.AddIntent(chunks4, obj, 0);
obj = new LinkedHashSet<Integer>();
obj.add(4);
ps.AddIntent(chunks5, obj, 0);
obj = new LinkedHashSet<Integer>();
obj.add(5);
ps.AddIntent(chunks6, obj, 0);
ps.logStability();
ps.printLatticeExtended();
int [][] binaryContext = ps.toContext(3);
for (int i = 0; i < binaryContext.length; i++ ){
System.out.println(Arrays.toString(binaryContext[i]));
}
ConceptLattice new_cl = new ConceptLattice(binaryContext.length, binaryContext[0].length, binaryContext,true);
new_cl.printLatticeStats();
new_cl.printLatticeFull();
assertEquals(new_cl.getLattice().size(), 7);
assertEquals(new_cl.getAttributesCount(), 21);
assertEquals(new_cl.getObjectCount(), 3);
}
// TEST 2 QUERY NEWS
public void testQueryNews(){
List<List<ParseTreeChunk>> chunks = null;
BingWebQueryRunner bq = new BingWebQueryRunner();
String q = "";
// q = "barack obama";
// q = "lady gaga";
q = "angela merkel";
// q = "putin";
ArrayList <HitBase> hb = (ArrayList<HitBase>) bq.runSearch(q, 10);
int cnt = 0;
for (HitBase news: hb){
LinkedHashSet<Integer> obj = null;
obj = new LinkedHashSet<Integer>();
obj.add(cnt);
chunks = chunk_maker.formGroupedPhrasesFromChunksForPara(news.getAbstractText());
System.out.println(chunks);
ps.AddIntent(chunks,obj, 0);
cnt++;
}
ps.logStability();
System.out.println("LATTICE");
ps.printLatticeExtended();
int [][] binaryContext = ps.toContext(cnt);
for (int i = 0; i < binaryContext.length; i++ ){
System.out.println(Arrays.toString(binaryContext[i]));
}
ConceptLattice new_cl = new ConceptLattice(binaryContext.length, binaryContext[0].length, binaryContext,true);
new_cl.printLatticeStats();
new_cl.printLatticeFull();
FcaWriter wr = new FcaWriter();
wr.WriteAsCxt("res.cxt", new_cl);
System.out.println("Extent PS "+ps.conceptList.size());
//for (int i = 0; i<ps.conceptList.size();i++){
// System.out.println(ps.conceptList.get(i).extent);
//}
System.out.println("Extent CL "+new_cl.getLattice().size());
//for (int i = 0; i<new_cl.getLattice().size();i++){
// System.out.println(new_cl.getLattice().get(i).getExtent());
//}
}
public void testNews(){
List<List<ParseTreeChunk>> chunks = null;
ArrayList <HitBase> result = (ArrayList<HitBase>) bqr.runSearch("site:http://news.yahoo.com " + "merkel", 10);
System.out.println(" ResultSize " + result.size());
int ind = -1;
String text_result = "";
for (int i = 0; i < result.size(); i++ ){
System.out.println(result.get(i).getAbstractText());
ind = result.get(i).getAbstractText().indexOf(") -");
if (ind < 0)
ind = result.get(i)//.getDescription()
.getAbstractText().indexOf(") �");
if (ind > 0)
text_result = result.get(i)//.getDescription()
.getAbstractText().substring(ind + 3);
else
text_result = result.get(i)//.getDescription()
.getAbstractText();
LinkedHashSet<Integer> obj = null;
obj = new LinkedHashSet<Integer>();
obj.add(i);
chunks = chunk_maker.formGroupedPhrasesFromChunksForPara(text_result);
ps.AddIntent(chunks,obj, 0);
}
ps.logStability();
ps.printLatticeExtended();
int [][] binaryContext = ps.toContext(result.size());
ConceptLattice new_cl = new ConceptLattice(binaryContext.length, binaryContext[0].length, binaryContext,true);
FcaWriter wt = new FcaWriter();
wt.WriteStatsToTxt("merkel_stats.txt", new_cl, 0);
wt.WriteStatsToCvs("merkel_stats.csv", new_cl, ps.conceptList.size());
wt.WriteAsCxt("merkel_lattice.cxt", new_cl);
PatternStructureWriter pswt = new PatternStructureWriter();
pswt.WriteStatsToTxt("ps_res.txt", ps);
System.out.println("Extent PS "+ps.conceptList.size());
System.out.println("Extent CL "+new_cl.getLattice().size());
}
}