blob: a2cb6af00baaa5c3ea8723a72baba3ae51ad4d12 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package opennlp.tools.parse_thicket;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
public class ParseTreeNode implements IGeneralizer<ParseTreeNode>{
String word; // word in normal form, lemma
// this is the POS tag of the token
String pos;
// this is the NER label of the token
String ne;
Integer id;
//PhraseType
String phraseType;
Map<String, Object> attributes;
String normalizedWord;
String syntacticDependence;
String originalWord; //what actually occurs in a sentence
String head;
String label;
String modifier;
public String getOriginalWord() {
return originalWord;
}
public void setOriginalWord(String originalWord) {
this.originalWord = originalWord;
}
public String getHead() {
return head;
}
public void setHead(String head) {
this.head = head;
}
public String getLabel() {
return label;
}
public void setLabel(String label) {
this.label = label;
}
public String getModifier() {
return modifier;
}
public void setModifier(String modifier) {
this.modifier = modifier;
}
public String getNormalizedWord() {
return normalizedWord;
}
public void setNormalizedWord(String normalizedWord) {
this.normalizedWord = normalizedWord;
}
public String getSyntacticDependence() {
return syntacticDependence;
}
public void setSyntacticDependence(String syntacticDependence) {
this.syntacticDependence = syntacticDependence;
}
public Map<String, Object> getAttributes() {
return attributes;
}
public void setAttributes(Map<String, Object> attributes) {
this.attributes = attributes;
}
public enum PhraseType {NP("NP"), VP("VP"), PRP("PRP");
PhraseType(final String text) {
this.text = text;
}
private final String text;
}
public ParseTreeNode(String word, String pos, String ne, Integer id) {
super();
this.word = word;
this.pos = pos;
this.ne = ne;
this.id = id;
}
public ParseTreeNode(String word, String pos) {
super();
this.word = word;
this.pos = pos;
}
public String getPhraseType() {
return phraseType;
}
public void setPhraseType(String pt) {
this.phraseType=pt;
}
public String getWord() {
return word;
}
public void setWord(String word) {
this.word = word;
}
public String getPos() {
return pos;
}
public void setPos(String pos) {
this.pos = pos;
}
public String getNe() {
return ne;
}
public void setNe(String ne) {
this.ne = ne;
}
public Integer getId() {
return id;
}
public void setId(Integer id) {
this.id = id;
}
public String toString(){
StringBuilder buf = new StringBuilder();
if (id!=null)
buf.append("<").append(id).append(">");
if(phraseType!=null)
buf.append(phraseType);
if(word!=null)
buf.append("'").append(word).append("'");
if (pos!=null)
buf.append(":").append(pos);
return buf.toString();
}
public static String toTreeRepresentationString(List<ParseTreeNode> chList){
StringBuilder buf = new StringBuilder();
for(ParseTreeNode ch: chList){
if (ch.getPos().startsWith(".") || ch.getPos().startsWith(",") || ch.getPos().startsWith(";") || ch.getPos().startsWith("!"))
continue;
buf.append("(").append(ch.getWord()).append(" ").append(ch.getPos()).append(")");
}
return buf.toString().trim();
}
public static String toWordString(List<ParseTreeNode> chList){
StringBuilder buf = new StringBuilder();
for(ParseTreeNode ch: chList){
buf.append(ch.getWord()).append(" ");
}
return buf.toString().trim();
}
@Override
public List<ParseTreeNode> generalize(Object o1, Object o2) {
List<ParseTreeNode> result = new ArrayList<>();
ParseTreeNode w1 = (ParseTreeNode) o1;
ParseTreeNode w2 = (ParseTreeNode) o2;
String posGen = generalizePOS(w1.pos, w2.pos);
if (posGen ==null)
return result;
ParseTreeNode newNode = new ParseTreeNode(generalizeWord(w1.word, w2.word),
posGen, "O", -1);
result.add(newNode);
return result;
}
public String generalizeWord(String lemma1, String lemma2){
if (lemma1.equals(lemma2))
return lemma1;
if (lemma1.equals("*"))
return "*";
//TODO
return "*";
}
public String generalizePOS(String pos1, String pos2) {
if ((pos1.startsWith("NN") && pos2.equals("NP") || pos2.startsWith("NN")
&& pos1.equals("NP"))) {
return "NN";
}
if ((pos1.startsWith("NN") && pos2.equals("VBG") || pos2.startsWith("VBG")
&& pos1.equals("NN"))) {
return "NN";
}
if ((pos1.startsWith("NN") && pos2.equals("ADJP") || pos2.startsWith("NN")
&& pos1.equals("ADJP"))) {
return "NN";
}
if ((pos1.equals("IN") && pos2.equals("TO") || pos1.equals("TO")
&& pos2.equals("IN"))) {
return "IN";
}
// VBx vs VBx = VB (does not matter which form for verb)
if (pos1.startsWith("VB") && pos2.startsWith("VB")) {
return "VB";
}
// ABx vs ABy always gives AB
if (pos1.equalsIgnoreCase(pos2)) {
return pos1;
}
if (pos1.length() > 2) {
pos1 = pos1.substring(0, 2);
}
if (pos2.length() > 2) {
pos2 = pos2.substring(0, 2);
}
if (pos1.equalsIgnoreCase(pos2)) {
return pos1 + "*";
}
return null;
}
}