blob: d910352ae7328c381f9a6671b9b6901236aafe57 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package opennlp.tools.parse_thicket.pattern_structure;
import java.lang.invoke.MethodHandles;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import opennlp.tools.parse_thicket.ParseTreeNode;
import opennlp.tools.textsimilarity.ParseTreeChunk;
import opennlp.tools.textsimilarity.ParseTreeMatcherDeterministic;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class PhrasePatternStructure {
private static final Logger LOG = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
final int objectCount;
final int attributeCount;
public final List<PhraseConcept> conceptList;
final ParseTreeMatcherDeterministic md;
public PhrasePatternStructure(int objectCounts, int attributeCounts) {
objectCount = objectCounts;
attributeCount = attributeCounts;
conceptList = new ArrayList<>();
PhraseConcept bottom = new PhraseConcept();
md = new ParseTreeMatcherDeterministic();
/*Set<Integer> b_intent = new HashSet<Integer>();
for (int index = 0; index < attributeCount; ++index) {
b_intent.add(index);
}
bottom.setIntent(b_intent);*/
bottom.setPosition(0);
conceptList.add(bottom);
}
public int GetMaximalConcept(List<List<ParseTreeChunk>> intent, int Generator) {
boolean parentIsMaximal = true;
while(parentIsMaximal) {
parentIsMaximal = false;
for (int parent : conceptList.get(Generator).parents) {
if (conceptList.get(parent).intent.containsAll(intent)) {
Generator = parent;
parentIsMaximal = true;
break;
}
}
}
return Generator;
}
public int AddIntent(List<List<ParseTreeChunk>> intent, int generator) {
LOG.debug("debug called for {}", intent);
//printLattice();
int generator_tmp = GetMaximalConcept(intent, generator);
generator = generator_tmp;
if (conceptList.get(generator).intent.equals(intent)) {
LOG.debug("at generator: {}", conceptList.get(generator).intent);
LOG.debug("to add: {}", intent);
LOG.debug("already generated");
return generator;
}
Set<Integer> generatorParents = conceptList.get(generator).parents;
Set<Integer> newParents = new HashSet<>();
for (int candidate : generatorParents) {
if (!intent.containsAll(conceptList.get(candidate).intent)) {
//if (!conceptList.get(candidate).intent.containsAll(intent)) {
//Set<Integer> intersection = new HashSet<Integer>(conceptList.get(candidate).intent);
//List<List<ParseTreeChunk>> intersection = new ArrayList<List<ParseTreeChunk>>(conceptList.get(candidate).intent);
//intersection.retainAll(intent);
List<List<ParseTreeChunk>> intersection = md
.matchTwoSentencesGroupedChunksDeterministic(intent, conceptList.get(candidate).intent);
LOG.debug("recursive call (inclusion)");
candidate = AddIntent(intersection, candidate);
}
boolean addParents = true;
LOG.debug("now iterating over parents");
Iterator<Integer> iterator = newParents.iterator();
while (iterator.hasNext()) {
int parent = iterator.next();
if (conceptList.get(parent).intent.containsAll(conceptList.get(candidate).intent)) {
addParents = false;
break;
}
else {
if (conceptList.get(candidate).intent.containsAll(conceptList.get(parent).intent)) {
iterator.remove();
}
}
}
/*for (int parent : newParents) {
System.out.println("parent = " + parent);
System.out.println("candidate intent:"+conceptList.get(candidate).intent);
System.out.println("parent intent:"+conceptList.get(parent).intent);
if (conceptList.get(parent).intent.containsAll(conceptList.get(candidate).intent)) {
addParents = false;
break;
}
else {
if (conceptList.get(candidate).intent.containsAll(conceptList.get(parent).intent)) {
newParents.remove(parent);
}
}
}*/
if (addParents) {
newParents.add(candidate);
}
}
LOG.debug("size of lattice: {}", conceptList.size());
PhraseConcept newConcept = new PhraseConcept();
newConcept.setIntent(intent);
newConcept.setPosition(conceptList.size());
conceptList.add(newConcept);
conceptList.get(generator).parents.add(newConcept.position);
for (int newParent: newParents) {
if (conceptList.get(generator).parents.contains(newParent)) {
conceptList.get(generator).parents.remove(newParent);
}
conceptList.get(newConcept.position).parents.add(newParent);
}
return newConcept.position;
}
public void printLatticeStats() {
LOG.info("Lattice stats:");
LOG.info("max_object_index = {}", objectCount);
LOG.info("max_attribute_index = {}", attributeCount);
LOG.info("Current concept count = {}", conceptList.size());
}
public void printLattice() {
for (int i = 0; i < conceptList.size(); ++i) {
printConceptByPosition(i);
}
}
public void printConceptByPosition(int index) {
LOG.debug("Concept at position {}", index);
conceptList.get(index).printConcept();
}
public List<List<ParseTreeChunk>> formGroupedPhrasesFromChunksForPara(
List<List<ParseTreeNode>> phrs) {
List<List<ParseTreeChunk>> results = new ArrayList<>();
List<ParseTreeChunk> nps = new ArrayList<>(), vps = new ArrayList<>(), pps = new ArrayList<>();
for(List<ParseTreeNode> ps:phrs) {
ParseTreeChunk ch = convertNodeListIntoChunk(ps);
String ptype = ps.get(0).getPhraseType();
LOG.debug(ps.toString());
if (ptype.equals("NP")){
nps.add(ch);
} else if (ptype.equals("VP")){
vps.add(ch);
} else if (ptype.equals("PP")){
pps.add(ch);
}
}
results.add(nps); results.add(vps); results.add(pps);
return results;
}
private ParseTreeChunk convertNodeListIntoChunk(List<ParseTreeNode> ps) {
List<String> lemmas = new ArrayList<>(), poss = new ArrayList<>();
for(ParseTreeNode n: ps) {
lemmas.add(n.getWord());
poss.add(n.getPos());
}
ParseTreeChunk ch = new ParseTreeChunk(lemmas, poss, 0, 0);
ch.setMainPOS(ps.get(0).getPhraseType());
return ch;
}
}