blob: 7adbf9cd62c5e7b7da9464925ffaab1eff717c91 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.stanbol.enhancer.engines.entitytagging.impl;
import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.DC_TYPE;
import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_SELECTED_TEXT;
import org.apache.clerezza.rdf.core.NonLiteral;
import org.apache.clerezza.rdf.core.TripleCollection;
import org.apache.clerezza.rdf.core.UriRef;
import org.apache.commons.lang.StringUtils;
import org.apache.stanbol.enhancer.servicesapi.helper.EnhancementEngineHelper;
import org.apache.stanbol.enhancer.servicesapi.rdf.TechnicalClasses;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public final class NamedEntity {
private static final Logger log = LoggerFactory.getLogger(NamedEntity.class);
private final NonLiteral entity;
private final String name;
private final UriRef type;
private NamedEntity(NonLiteral entity, String name, UriRef type) {
this.entity = entity;
this.name = name;
this.type = type;
}
/**
* Getter for the Node providing the information about that entity
* @return the entity
*/
public final NonLiteral getEntity() {
return entity;
}
/**
* Getter for the name
* @return the name
*/
public final String getName() {
return name;
}
/**
* Getter for the type
* @return the type
*/
public final UriRef getType() {
return type;
}
@Override
public int hashCode() {
return entity.hashCode();
}
@Override
public boolean equals(Object o) {
return o instanceof NamedEntity && entity.equals(((NamedEntity)o).entity);
}
@Override
public String toString() {
return String.format("NamedEntity %s (name=%s|type=%s)",entity,name,type);
}
/**
* Extracts the information of an {@link NamedEntity} from an
* {@link TechnicalClasses#ENHANCER_TEXTANNOTATION} instance.
* @param graph the graph with the information
* @param textAnnotation the text annotation instance
* @return the {@link NamedEntity} or <code>null</code> if the parsed
* text annotation is missing required information.
*/
public static NamedEntity createFromTextAnnotation(TripleCollection graph, NonLiteral textAnnotation){
String selected = EnhancementEngineHelper.getString(graph, textAnnotation, ENHANCER_SELECTED_TEXT);
if (selected == null) {
log.debug("Unable to create NamedEntity for TextAnnotation {} "
+ "because property {} is not present",textAnnotation,ENHANCER_SELECTED_TEXT);
return null;
}
String name = selected.trim();
if(name.isEmpty()){
log.debug("Unable to process TextAnnotation {} because its selects "
+ "an empty Stirng !",textAnnotation);
return null;
}
// remove punctuation form the search string
name = cleanupKeywords(name);
if(name.isEmpty()){
log.debug("Unable to process TextAnnotation {} because its selects "
+ "an stirng with punktations only (selected: {})!",
textAnnotation, selected);
return null;
}
UriRef type = EnhancementEngineHelper.getReference(graph, textAnnotation, DC_TYPE);
if (type == null) {
log.warn("Unable to process TextAnnotation {} because property {}"
+ " is not present!",textAnnotation, DC_TYPE);
return null;
}
return new NamedEntity(textAnnotation,name,type);
}
/**
* Removes punctuation form a parsed string
*/
private static String cleanupKeywords(String keywords) {
return keywords.replaceAll("\\p{P}", " ").trim();
}
}