blob: a92953cb829297cf3624284fdb2e4d485b4b9daf [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.uima.ruta.action;
import java.io.File;
import java.io.IOException;
import java.util.Collection;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import java.util.logging.Level;
import java.util.logging.Logger;
import org.apache.commons.lang3.StringUtils;
import org.apache.uima.cas.CAS;
import org.apache.uima.cas.Feature;
import org.apache.uima.cas.FeatureStructure;
import org.apache.uima.cas.Type;
import org.apache.uima.cas.text.AnnotationFS;
import org.apache.uima.jcas.cas.TOP;
import org.apache.uima.jcas.tcas.Annotation;
import org.apache.uima.ruta.RutaStream;
import org.apache.uima.ruta.expression.bool.IBooleanExpression;
import org.apache.uima.ruta.expression.bool.SimpleBooleanExpression;
import org.apache.uima.ruta.expression.number.INumberExpression;
import org.apache.uima.ruta.expression.string.IStringExpression;
import org.apache.uima.ruta.expression.type.ITypeExpression;
import org.apache.uima.ruta.resource.CSVTable;
import org.apache.uima.ruta.resource.RutaResourceLoader;
import org.apache.uima.ruta.resource.RutaTable;
import org.apache.uima.ruta.resource.RutaWordList;
import org.apache.uima.ruta.rule.MatchContext;
import org.apache.uima.ruta.rule.RuleElement;
import org.apache.uima.ruta.visitor.InferenceCrowd;
import org.springframework.core.io.Resource;
import org.springframework.core.io.ResourceLoader;
public class MarkTableReloadAction extends AbstractRutaAction {
private static ConcurrentHashMap<String, TableCacheEntry> tableCache = new ConcurrentHashMap<String, TableCacheEntry>();
private final ITypeExpression typeExpr;
private final IStringExpression tableName;
private final Map<IStringExpression, INumberExpression> featureMap;
private final INumberExpression indexExpr;
private final IBooleanExpression ignoreCase;
private final INumberExpression ignoreLength;
private final IStringExpression ignoreChar;
private final INumberExpression maxIgnoreChar;
private IBooleanExpression ignoreWS = new SimpleBooleanExpression(true);
private class TableCacheEntry {
private RutaTable table;
private long lastModified;
public TableCacheEntry(RutaTable table, long lastModified) {
this.lastModified = lastModified;
this.table = table;
}
}
public MarkTableReloadAction(ITypeExpression typeExpr, INumberExpression indexExpr,
IStringExpression tableName, Map<IStringExpression, INumberExpression> featureMap,
IBooleanExpression ignoreCase, INumberExpression ignoreLength,
IStringExpression ignoreChar, INumberExpression maxIgnoreChar) {
super();
this.typeExpr = typeExpr;
this.indexExpr = indexExpr;
this.tableName = tableName;
this.featureMap = featureMap;
this.ignoreCase = ignoreCase;
this.ignoreLength = ignoreLength;
this.ignoreChar = ignoreChar;
this.maxIgnoreChar = maxIgnoreChar;
}
public void setIgnoreWS(IBooleanExpression ignoreWS) {
this.ignoreWS = ignoreWS;
}
@Override
public void execute(MatchContext context, RutaStream stream, InferenceCrowd crowd) {
// Get the table from the cache or reload it if it has changed
RutaTable table = null;
String tableNameValue = tableName.getStringValue(context, stream);
ResourceLoader resourceLoader = new RutaResourceLoader(context.getParent().getEnvironment().getResourcePaths());
Resource resource = resourceLoader.getResource(tableNameValue);
if (resource.exists()) {
File resourceFile = null;
try {
resourceFile = resource.getFile();
} catch (IOException e1) {
Logger.getLogger(this.getClass().getName()).log(Level.SEVERE, "Unable to obtain file from resource: " + tableNameValue, e1);
}
TableCacheEntry cacheEntry = tableCache.get(tableNameValue);
if (cacheEntry == null || cacheEntry != null && resourceFile.lastModified() > cacheEntry.lastModified) {
Logger.getLogger(this.getClass().getName()).info("Creating Table Word List from resource: " + tableNameValue);
try {
table = new CSVTable(resource, CSVTable.DEFAULT_CSV_SEPARATOR);
} catch (IOException e) {
Logger.getLogger(this.getClass().getName()).log(Level.SEVERE, "Unable to load CSV file: " + tableNameValue, e);
}
tableCache.put(tableNameValue, new TableCacheEntry(table, resourceFile.lastModified()));
} else {
table = cacheEntry.table;
}
} else {
Logger.getLogger(this.getClass().getName()).severe("Can't find resource: " + tableNameValue);
}
// The original code from the RUTA action
int index = indexExpr.getIntegerValue(context, stream);
Type type = typeExpr.getType(context, stream);
Map<String, Integer> map = new HashMap<String, Integer>();
for (IStringExpression each : featureMap.keySet()) {
map.put(each.getStringValue(context, stream), featureMap.get(each)
.getIntegerValue(context, stream));
}
boolean ignoreCaseValue = ignoreCase != null ? ignoreCase.getBooleanValue(context, stream) : false;
int ignoreLengthValue = ignoreLength != null ? ignoreLength.getIntegerValue(context, stream) : 0;
String ignoreCharValue = ignoreChar != null ? ignoreChar.getStringValue(context, stream) : "";
int maxIgnoreCharValue = maxIgnoreChar != null ? maxIgnoreChar.getIntegerValue(context, stream) : 0;
boolean ignoreWSValue = ignoreWS != null ? ignoreWS.getBooleanValue(context, stream) : false;
RutaWordList wordList = table.getWordList(index, context.getParent());
Collection<AnnotationFS> found = wordList.find(stream, ignoreCaseValue, ignoreLengthValue,
ignoreCharValue.toCharArray(), maxIgnoreCharValue, ignoreWSValue);
for (AnnotationFS annotationFS : found) {
// HOTFIX: for feature assignment
String candidate = stream.getVisibleCoveredText(annotationFS);
if(!StringUtils.isBlank(ignoreCharValue)) {
for (int i = 0; i < maxIgnoreCharValue; i++) {
candidate = candidate.replaceFirst("[" + ignoreCharValue + "]", "");
}
}
List<String> rowWhere = table.getRowWhere(index - 1, candidate, false);
if (rowWhere.isEmpty() && ignoreCaseValue && candidate.length() > ignoreLengthValue) {
// TODO: does not cover all variants
rowWhere = table.getRowWhere(index - 1, candidate, true);
}
FeatureStructure newFS = stream.getCas().createFS(type);
if (newFS instanceof Annotation) {
Annotation a = (Annotation) newFS;
a.setBegin(annotationFS.getBegin());
a.setEnd(annotationFS.getEnd());
stream.addAnnotation(a, context.getRuleMatch());
}
TOP newStructure = null;
if (newFS instanceof TOP) {
newStructure = (TOP) newFS;
fillFeatures(newStructure, map, annotationFS, context.getElement(), rowWhere, stream);
newStructure.addToIndexes();
}
}
}
private void fillFeatures(TOP structure, Map<String, Integer> map, AnnotationFS annotationFS,
RuleElement element, List<String> row, RutaStream stream) {
List<?> featuresList = structure.getType().getFeatures();
for (int i = 0; i < featuresList.size(); i++) {
Feature targetFeature = (Feature) featuresList.get(i);
String name = targetFeature.getName();
String shortFName = name.substring(name.indexOf(":") + 1, name.length());
Integer entryIndex = map.get(shortFName);
Type range = targetFeature.getRange();
if (entryIndex != null && row.size() >= entryIndex) {
String value = row.get(entryIndex - 1);
if (range.getName().equals(CAS.TYPE_NAME_STRING)) {
structure.setStringValue(targetFeature, value);
} else if (range.getName().equals(CAS.TYPE_NAME_INTEGER)) {
Integer integer = Integer.parseInt(value);
structure.setIntValue(targetFeature, integer);
} else if (range.getName().equals(CAS.TYPE_NAME_DOUBLE)) {
Double d = Double.parseDouble(value);
structure.setDoubleValue(targetFeature, d);
} else if (range.getName().equals(CAS.TYPE_NAME_FLOAT)) {
Float d = Float.parseFloat(value);
structure.setFloatValue(targetFeature, d);
} else if (range.getName().equals(CAS.TYPE_NAME_BYTE)) {
Byte d = Byte.parseByte(value);
structure.setByteValue(targetFeature, d);
} else if (range.getName().equals(CAS.TYPE_NAME_SHORT)) {
Short d = Short.parseShort(value);
structure.setShortValue(targetFeature, d);
} else if (range.getName().equals(CAS.TYPE_NAME_LONG)) {
Long d = Long.parseLong(value);
structure.setLongValue(targetFeature, d);
} else if (range.getName().equals(CAS.TYPE_NAME_BOOLEAN)) {
Boolean b = Boolean.parseBoolean(value);
structure.setBooleanValue(targetFeature, b);
} else {
}
}
}
}
public ITypeExpression getTypeExpr() {
return typeExpr;
}
public IStringExpression getTableExpr() {
return tableName;
}
public Map<IStringExpression, INumberExpression> getFeatureMap() {
return featureMap;
}
public INumberExpression getIndexExpr() {
return indexExpr;
}
public IBooleanExpression getIgnoreCase() {
return ignoreCase;
}
public INumberExpression getIgnoreLength() {
return ignoreLength;
}
public IStringExpression getIgnoreChar() {
return ignoreChar;
}
public INumberExpression getMaxIgnoreChar() {
return maxIgnoreChar;
}
}