blob: 25d5ac5c7ccf5e442510a3ff27ef7cfdd9f0d07e [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package opennlp.tools.parse_thicket.pattern_structure;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import opennlp.tools.parse_thicket.ParseTreeNode;
import opennlp.tools.textsimilarity.ParseTreeChunk;
import opennlp.tools.textsimilarity.ParseTreeMatcherDeterministic;
public class PhrasePatternStructure {
int objectCount;
int attributeCount;
public List<PhraseConcept> conceptList;
ParseTreeMatcherDeterministic md;
public PhrasePatternStructure(int objectCounts, int attributeCounts) {
objectCount = objectCounts;
attributeCount = attributeCounts;
conceptList = new ArrayList<PhraseConcept>();
PhraseConcept bottom = new PhraseConcept();
md = new ParseTreeMatcherDeterministic();
/*Set<Integer> b_intent = new HashSet<Integer>();
for (int index = 0; index < attributeCount; ++index) {
b_intent.add(index);
}
bottom.setIntent(b_intent);*/
bottom.setPosition(0);
conceptList.add(bottom);
}
public int GetMaximalConcept(List<List<ParseTreeChunk>> intent, int Generator) {
boolean parentIsMaximal = true;
while(parentIsMaximal) {
parentIsMaximal = false;
for (int parent : conceptList.get(Generator).parents) {
if (conceptList.get(parent).intent.containsAll(intent)) {
Generator = parent;
parentIsMaximal = true;
break;
}
}
}
return Generator;
}
public int AddIntent(List<List<ParseTreeChunk>> intent, int generator) {
System.out.println("debug");
System.out.println("called for " + intent);
//printLattice();
int generator_tmp = GetMaximalConcept(intent, generator);
generator = generator_tmp;
if (conceptList.get(generator).intent.equals(intent)) {
System.out.println("at generator:" + conceptList.get(generator).intent);
System.out.println("to add:" + intent);
System.out.println("already generated");
return generator;
}
Set<Integer> generatorParents = conceptList.get(generator).parents;
Set<Integer> newParents = new HashSet<Integer>();
for (int candidate : generatorParents) {
if (!intent.containsAll(conceptList.get(candidate).intent)) {
//if (!conceptList.get(candidate).intent.containsAll(intent)) {
//Set<Integer> intersection = new HashSet<Integer>(conceptList.get(candidate).intent);
//List<List<ParseTreeChunk>> intersection = new ArrayList<List<ParseTreeChunk>>(conceptList.get(candidate).intent);
//intersection.retainAll(intent);
List<List<ParseTreeChunk>> intersection = md
.matchTwoSentencesGroupedChunksDeterministic(intent, conceptList.get(candidate).intent);
System.out.println("recursive call (inclusion)");
candidate = AddIntent(intersection, candidate);
}
boolean addParents = true;
System.out.println("now iterating over parents");
Iterator<Integer> iterator = newParents.iterator();
while (iterator.hasNext()) {
Integer parent = iterator.next();
if (conceptList.get(parent).intent.containsAll(conceptList.get(candidate).intent)) {
addParents = false;
break;
}
else {
if (conceptList.get(candidate).intent.containsAll(conceptList.get(parent).intent)) {
iterator.remove();
}
}
}
/*for (int parent : newParents) {
System.out.println("parent = " + parent);
System.out.println("candidate intent:"+conceptList.get(candidate).intent);
System.out.println("parent intent:"+conceptList.get(parent).intent);
if (conceptList.get(parent).intent.containsAll(conceptList.get(candidate).intent)) {
addParents = false;
break;
}
else {
if (conceptList.get(candidate).intent.containsAll(conceptList.get(parent).intent)) {
newParents.remove(parent);
}
}
}*/
if (addParents) {
newParents.add(candidate);
}
}
System.out.println("size of lattice: " + conceptList.size());
PhraseConcept newConcept = new PhraseConcept();
newConcept.setIntent(intent);
newConcept.setPosition(conceptList.size());
conceptList.add(newConcept);
conceptList.get(generator).parents.add(newConcept.position);
for (int newParent: newParents) {
if (conceptList.get(generator).parents.contains(newParent)) {
conceptList.get(generator).parents.remove(newParent);
}
conceptList.get(newConcept.position).parents.add(newParent);
}
return newConcept.position;
}
public void printLatticeStats() {
System.out.println("Lattice stats");
System.out.println("max_object_index = " + objectCount);
System.out.println("max_attribute_index = " + attributeCount);
System.out.println("Current concept count = " + conceptList.size());
}
public void printLattice() {
for (int i = 0; i < conceptList.size(); ++i) {
printConceptByPosition(i);
}
}
public void printConceptByPosition(int index) {
System.out.println("Concept at position " + index);
conceptList.get(index).printConcept();
}
public List<List<ParseTreeChunk>> formGroupedPhrasesFromChunksForPara(
List<List<ParseTreeNode>> phrs) {
List<List<ParseTreeChunk>> results = new ArrayList<List<ParseTreeChunk>>();
List<ParseTreeChunk> nps = new ArrayList<ParseTreeChunk>(), vps = new ArrayList<ParseTreeChunk>(),
pps = new ArrayList<ParseTreeChunk>();
for(List<ParseTreeNode> ps:phrs){
ParseTreeChunk ch = convertNodeListIntoChunk(ps);
String ptype = ps.get(0).getPhraseType();
System.out.println(ps);
if (ptype.equals("NP")){
nps.add(ch);
} else if (ptype.equals("VP")){
vps.add(ch);
} else if (ptype.equals("PP")){
pps.add(ch);
}
}
results.add(nps); results.add(vps); results.add(pps);
return results;
}
private ParseTreeChunk convertNodeListIntoChunk(List<ParseTreeNode> ps) {
List<String> lemmas = new ArrayList<String>(), poss = new ArrayList<String>();
for(ParseTreeNode n: ps){
lemmas.add(n.getWord());
poss.add(n.getPos());
}
ParseTreeChunk ch = new ParseTreeChunk(lemmas, poss, 0, 0);
ch.setMainPOS(ps.get(0).getPhraseType());
return ch;
}
}