blob: ebf44fb20fd023ec7baf55adfc962d1d7d74afd2 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package opennlp.tools.parse_thicket.pattern_structure;
import java.lang.invoke.MethodHandles;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Set;
import opennlp.tools.textsimilarity.ParseTreeChunk;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class LinguisticPatternStructure extends PhrasePatternStructure {
private static final Logger LOG = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
public LinguisticPatternStructure(int objectCounts, int attributeCounts) {
super(objectCounts, attributeCounts);
}
public void AddExtentToAncestors(LinkedHashSet<Integer>extent, int curNode) {
//
if (conceptList.get(curNode).parents.size()>0){
for (int parent : conceptList.get(curNode).parents){
conceptList.get(parent).addExtents(extent);
AddExtentToAncestors(extent, parent);
}
}
}
public int AddIntent(List<List<ParseTreeChunk>> intent, LinkedHashSet<Integer>extent,int generator) {
LOG.debug("debug called for {}", intent);
//printLattice();
int generator_tmp = GetMaximalConcept(intent, generator);
generator = generator_tmp;
if (conceptList.get(generator).intent.equals(intent)) {
LOG.debug("at generator: {}", conceptList.get(generator).intent);
LOG.debug("to add: {}", intent);
LOG.debug("already generated");
AddExtentToAncestors(extent, generator);
return generator;
}
Set<Integer> generatorParents = conceptList.get(generator).parents;
Set<Integer> newParents = new HashSet<>();
for (int candidate : generatorParents) {
if (!intent.containsAll(conceptList.get(candidate).intent)) {
List<List<ParseTreeChunk>> intersection = md
.matchTwoSentencesGroupedChunksDeterministic(intent, conceptList.get(candidate).intent);
LinkedHashSet<Integer> new_extent = new LinkedHashSet<>();
new_extent.addAll(conceptList.get(candidate).extent);
new_extent.addAll(extent);
if (intent.size()!=intersection.size()){
LOG.debug("recursive call (inclusion)");
LOG.debug("{}----{}", intent, intersection);
candidate = AddIntent(intersection,new_extent, candidate);
}
}
boolean addParents = true;
// System.out.println("now iterating over parents");
Iterator<Integer> iterator = newParents.iterator();
while (iterator.hasNext()) {
Integer parent = iterator.next();
if (conceptList.get(parent).intent.containsAll(conceptList.get(candidate).intent)) {
addParents = false;
break;
}
else {
if (conceptList.get(candidate).intent.containsAll(conceptList.get(parent).intent)) {
iterator.remove();
}
}
}
if (addParents) {
newParents.add(candidate);
}
}
LOG.debug("size of lattice: {}", conceptList.size());
PhraseConcept newConcept = new PhraseConcept();
newConcept.setIntent(intent);
LinkedHashSet<Integer> new_extent = new LinkedHashSet<>();
new_extent.addAll(conceptList.get(generator).extent);
new_extent.addAll(extent);
newConcept.addExtents(new_extent);
newConcept.setPosition(conceptList.size());
conceptList.add(newConcept);
conceptList.get(generator).parents.add(newConcept.position);
conceptList.get(newConcept.position).children.add(generator);
for (int newParent: newParents) {
if (conceptList.get(generator).parents.contains(newParent)) {
conceptList.get(generator).parents.remove(newParent);
conceptList.get(newParent).children.remove(generator);
}
conceptList.get(newConcept.position).parents.add(newParent);
conceptList.get(newParent).addExtents(new_extent);
AddExtentToAncestors(new_extent, newParent);
conceptList.get(newParent).children.add(newConcept.position);
}
return newConcept.position;
}
public void printLatticeExtended() {
for (int i = 0; i < conceptList.size(); ++i) {
printConceptByPositionExtended(i);
}
}
public void printConceptByPositionExtended(int index) {
LOG.debug("Concept at position {}", index);
conceptList.get(index).printConceptExtended();
}
public int [][] toContext(int extentCardinality) {
int newAttrCount = conceptList.size();
ArrayList<PhraseConcept> cList = new ArrayList<>(conceptList);
boolean run = true;
int k = 0;
while (run && k<conceptList.size()) {
if (conceptList.get(k).intent.size() == attributeCount) {
if (conceptList.get(k).extent.size() == 0)
for (Integer i:conceptList.get(k).parents)
cList.remove(i);
cList.remove(k);
run = false;
}
else
k+=1;
}
run = true;
k=0;
while (run && k<=newAttrCount){
if (cList.get(k).extent.size()==0)
k++;
run = false;
}
newAttrCount = cList.size();
Set<Integer> nodeExtend;
int [][] binaryContext = new int[extentCardinality][newAttrCount];
for (int j = 0; j<newAttrCount; j++){
nodeExtend = cList.get(j).extent;
for (Integer i: nodeExtend){
binaryContext[i][j]=1;
}
}
return binaryContext;
}
public void logStability(){
int min_delta, delta;
float sum;
for (PhraseConcept phraseConcept : conceptList) {
min_delta = Integer.MAX_VALUE;
sum = 0;
PhraseConcept pc = phraseConcept;
for (Integer j : pc.children) {
delta = pc.extent.size() - conceptList.get(j).extent.size();
if (delta < min_delta)
min_delta = delta;
sum += Math.pow(2, -delta);
}
pc.intLogStabilityBottom = -(Math.log(sum) / Math.log(2.0));
pc.intLogStabilityUp = min_delta;
}
}
}