blob: 3b02a2f1b4dfbaa2d4004e948e81f39f72275508 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.uima.collection.impl.cpm.vinci;
import java.io.ByteArrayOutputStream;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.Vector;
import org.apache.uima.UIMAFramework;
import org.apache.uima.cas_data.CasData;
import org.apache.uima.cas_data.FeatureStructure;
import org.apache.uima.cas_data.FeatureValue;
import org.apache.uima.cas_data.PrimitiveValue;
import org.apache.uima.cas_data.impl.CasDataToXCas;
import org.apache.uima.cas_data.impl.FeatureStructureImpl;
import org.apache.uima.collection.impl.cpm.Constants;
import org.apache.uima.collection.impl.cpm.utils.CPMUtils;
import org.apache.uima.collection.impl.cpm.utils.ConfigurableFeature;
import org.apache.uima.collection.impl.cpm.utils.FeatureMap;
import org.apache.uima.collection.impl.cpm.utils.Filter;
import org.apache.uima.collection.impl.cpm.vinci.cas_data.VinciPrimitiveValue;
import org.apache.uima.internal.util.StringUtils;
import org.apache.uima.resource.metadata.NameValuePair;
import org.apache.uima.resource.metadata.impl.NameValuePair_impl;
import org.apache.uima.util.Level;
import org.apache.uima.util.XMLSerializer;
public class DATACasUtils {
public static String getXCASasString(CasData aCasData, String[] keysToFilter) throws Exception {
CasDataToXCas generator = new CasDataToXCas();
generator.setTypesToFilter(keysToFilter);
ByteArrayOutputStream baos = new ByteArrayOutputStream();
XMLSerializer sax2xml = new XMLSerializer(baos);
generator.setContentHandler(sax2xml.getContentHandler());
generator.generateXCas(aCasData);
return new String(baos.toByteArray());
}
/**
*
* @param dataCas -
* @param featureType -
* @param featureName -
* @param featureValue -
*/
public static void addFeatureStructure(CasData dataCas, String featureType, String featureName,
String featureValue) {
FeatureStructure vfs = new FeatureStructureImpl();
vfs.setType(featureType);
PrimitiveValue pv = new VinciPrimitiveValue(featureValue);
vfs.setFeatureValue(featureName, pv);
dataCas.addFeatureStructure(vfs);
}
/**
*
* @param aDataCas -
* @return true if the data cas is empty
*/
public static boolean isCasEmpty(CasData aDataCas) {
Iterator it = aDataCas.getFeatureStructures();
if (!it.hasNext()) {
return true;
}
return false;
}
/**
*
* @param dataCas -
* @param featureType -
* @param featureName -
* @param featureValue -
*/
public static void addFeature(CasData dataCas, String featureType, String featureName,
String featureValue) {
Iterator it = dataCas.getFeatureStructures();
while (it.hasNext()) {
FeatureStructure fs = (FeatureStructure) it.next();
if (fs.getType().equals(featureType)) {
PrimitiveValue pv = new VinciPrimitiveValue(featureValue);
fs.setFeatureValue(featureName, pv);
}
}
}
/**
*
* @param aDataCas -
* @return the byte count
* @throws Exception -
*/
public static long getByteCount(CasData aDataCas) throws Exception {
long byteCount = 0;
Iterator it = aDataCas.getFeatureStructures();
while (it.hasNext()) {
FeatureStructure fs = (FeatureStructure) it.next();
FeatureValue value = null;
String[] keys = fs.getFeatureNames();
for (int i = 0; i < keys.length; i++) {
value = fs.getFeatureValue(keys[i]);
if (value == null) {
continue;
}
byteCount += value.toString().length();
}
}
return byteCount;
}
/**
*
* @param aCAS -
* @param aFilterList -
* @return true if this cas should be analyzed
*/
public static boolean shouldAnalyzeCAS(CasData aCAS, LinkedList aFilterList) {
for (int i = 0; aFilterList != null && i < aFilterList.size(); i++) {
Filter.Expression filterExpression = (Filter.Expression) aFilterList.get(i);
String featureValue = getFeatureValueByType(aCAS, filterExpression.getLeftPart().get());
// This evaluates if the Feature with a given name exist
if (filterExpression.getRightPart() == null) {
// The first check is to see if the feature exists in the CAS. In this case,
// the featureValue must NOT be null.
// The second check is to see if the the feature does NOT exist in the CAS. In
// this case, the feature MUST be null.
if ((filterExpression.getOperand() == null && featureValue == null || featureValue.trim()
.length() == 0)
|| // this means that the feature must exist in CAS
("!".equals(filterExpression.getOperand().getOperand()) && featureValue != null) // this
// means
// that
// the
// feature
// must
// not
// be
// present
// in
// CAS
) {
return false;
}
} else {
// evaluate if the feature equals specified value
if ("=".equals(filterExpression.getOperand().getOperand())) {
if (!filterExpression.getRightPart().get().equals(featureValue)) {
return false;
}
}
// evaluate if the feature doesnt equal specified value
else if ("!=".equals(filterExpression.getOperand().getOperand())) {
if (filterExpression.getRightPart().get().equals(featureValue)) {
return false;
}
}
}
}
return true;
}
/**
*
* @param aKey -
* @param dropKeyList -
* @return true if this key is in the dropKeyList
*/
public static boolean dropIt(String aKey, String[] dropKeyList) {
for (int i = 0; aKey != null && dropKeyList != null && i < dropKeyList.length
&& dropKeyList[i] != null; i++) {
if (dropKeyList[i].equals(aKey)) {
return true;
}
}
return false;
}
/**
*
* @param aKey
* @param typeList
* @return true if tbd
*/
public static boolean isValidType(String aKey, String[] typeList) {
if (aKey.indexOf(org.apache.uima.collection.impl.cpm.Constants.SHORT_DASH_TERM) > -1) {
aKey = StringUtils.replaceAll(aKey,
org.apache.uima.collection.impl.cpm.Constants.SHORT_DASH_TERM,
org.apache.uima.collection.impl.cpm.Constants.LONG_DASH_TERM);
}
if (aKey.indexOf(org.apache.uima.collection.impl.cpm.Constants.SHORT_COLON_TERM) > -1) {
aKey = StringUtils.replaceAll(aKey,
org.apache.uima.collection.impl.cpm.Constants.SHORT_COLON_TERM,
org.apache.uima.collection.impl.cpm.Constants.LONG_COLON_TERM);
}
for (int i = 0; aKey != null && typeList != null && i < typeList.length && typeList[i] != null; i++) {
if (typeList[i].equals(aKey)) {
return true;
}
}
return false;
}
/**
*
* @param aCAS
* @param featureName
* @return true if
*/
public static boolean hasFeature(CasData aCAS, String featureName) {
Iterator it = aCAS.getFeatureStructures();
while (it.hasNext()) {
Object object = it.next();
if (object instanceof FeatureStructure) {
FeatureStructure fs = (FeatureStructure) object;
FeatureValue fValue = fs.getFeatureValue(featureName);
if (fValue != null) {
return true;
}
}
}
return false;
}
/**
*
* @param aCAS
* @param aName
* @return true if tbd
*/
public static boolean hasFeatureStructure(CasData aCAS, String aName) {
Iterator it = aCAS.getFeatureStructures();
while (it.hasNext()) {
Object object = it.next();
if (object instanceof FeatureStructure) {
FeatureStructure fs = (FeatureStructure) object;
String type = StringUtils.replaceAll(fs.getType(), Constants.LONG_COLON_TERM,
Constants.SHORT_COLON_TERM);
if (type.equals(aName)) {
return true;
}
}
}
return false;
}
/**
*
* @param aCAS
*/
public static void dumpFeatures(CasData aCAS) {
Iterator it = aCAS.getFeatureStructures();
while (it.hasNext()) {
Object object = it.next();
if (object instanceof FeatureStructure) {
FeatureStructure fs = (FeatureStructure) object;
if (UIMAFramework.getLogger().isLoggable(Level.FINEST)) {
UIMAFramework.getLogger(DATACasUtils.class).logrb(Level.FINEST,
DATACasUtils.class.getName(), "process", CPMUtils.CPM_LOG_RESOURCE_BUNDLE,
"UIMA_CPM_show_cas_fs_type__FINEST",
new Object[] { Thread.currentThread().getName(), fs.getType() });
}
String[] names = fs.getFeatureNames();
for (int i = 0; names != null && i < names.length; i++) {
FeatureValue fValue = fs.getFeatureValue(names[i]);
if (fValue != null) {
if (UIMAFramework.getLogger().isLoggable(Level.FINEST)) {
UIMAFramework.getLogger(DATACasUtils.class)
.logrb(
Level.FINEST,
DATACasUtils.class.getName(),
"process",
CPMUtils.CPM_LOG_RESOURCE_BUNDLE,
"UIMA_CPM_show_cas_fs_value__FINEST",
new Object[] { Thread.currentThread().getName(), names[i],
fValue.toString() });
}
}
}
}
}
}
/**
*
* @param aCAS
* @param featureName
* @return true if tbd
*/
public static String getFeatureValueByType(CasData aCAS, String featureName) {
Iterator it = aCAS.getFeatureStructures();
String featureValue = null;
while (it.hasNext()) {
Object object = it.next();
if (object instanceof FeatureStructure) {
FeatureStructure fs = (FeatureStructure) object;
if (System.getProperty("SHOWFEATURES") != null) {
UIMAFramework.getLogger(DATACasUtils.class).logrb(Level.FINEST,
DATACasUtils.class.getName(), "process", CPMUtils.CPM_LOG_RESOURCE_BUNDLE,
"UIMA_CPM_search_cas_by_value__FINEST",
new Object[] { Thread.currentThread().getName(), featureName, fs.getType() });
}
if (featureName.equals(fs.getType())) {
String[] names = fs.getFeatureNames();
for (int i = 0; names != null && i < names.length; i++) {
if (System.getProperty("SHOWFEATURES") != null) {
UIMAFramework.getLogger(DATACasUtils.class).logrb(Level.FINEST,
DATACasUtils.class.getName(), "process", CPMUtils.CPM_LOG_RESOURCE_BUNDLE,
"UIMA_CPM_show_type_value__FINEST",
new Object[] { Thread.currentThread().getName(), names[i], fs.getType() });
}
}
if ("uima.cpm.DocumentText".equals(featureName) || "UTF8:UTF8Content".equals(featureName)) {
FeatureValue fValue = fs.getFeatureValue("value");
if (fValue == null) {
return null;
}
return fValue.toString();
} else if ("Detag:DetagContent".equals(featureName)) {
FeatureValue fValue = fs.getFeatureValue("Doc:SpannedText");
if (fValue == null) {
return null;
}
return fValue.toString();
}
FeatureValue fValue = fs.getFeatureValue(featureName);
if (fValue != null) {
featureValue = fValue.toString();
break;
}
}
}
}
return featureValue;
}
/**
*
* @param aCAS
* @param featureStructureName
* @param featureName
* @return tbd
*/
public static String[] getFeatureStructureValues(CasData aCAS, String featureStructureName,
String featureName) {
Iterator it = aCAS.getFeatureStructures();
String featureValue = null;
Vector v = new Vector();
while (it.hasNext()) {
Object object = it.next();
if (object instanceof FeatureStructure) {
FeatureStructure fs = (FeatureStructure) object;
if (featureStructureName.equals(fs.getType())) {
String[] names = fs.getFeatureNames();
for (int i = 0; names != null && i < names.length; i++) {
if (names[i].equals(featureName)) {
FeatureValue fValue = fs.getFeatureValue(featureName);
if (fValue != null) {
featureValue = fValue.toString();
v.add(featureValue);
}
}
}
}
}
}
String[] features = new String[v.size()];
v.copyInto(features);
return features;
}
/**
*
* @param aCAS
* @param aFeatureStructure
* @param featureName
* @return tbd
*/
public static String getFeatureValueByType(CasData aCAS, String aFeatureStructure,
String featureName) {
Iterator it = aCAS.getFeatureStructures();
String featureValue = null;
while (it.hasNext()) {
Object object = it.next();
if (object instanceof FeatureStructureImpl) {
FeatureStructureImpl fs = (FeatureStructureImpl) object;
if (fs.getType().equals(aFeatureStructure)) {
FeatureValue fValue = fs.getFeatureValue(featureName);
if (fValue != null) {
featureValue = fValue.toString();
break;
}
}
}
}
return featureValue;
}
/**
*
* @param aDataCas
* @param aFeatureMap
*/
public static void remapFeatureTypes(CasData aDataCas, FeatureMap aFeatureMap) {
ConfigurableFeature cf = null;
Iterator it = aDataCas.getFeatureStructures();
while (it.hasNext()) {
Object object = it.next();
if (object instanceof FeatureStructure) {
FeatureStructure fs = (FeatureStructure) object;
if ((cf = aFeatureMap.get(fs.getType())) != null) {
fs.setType(cf.getNewFeatureName());
if (cf.attributeListSize() > 0) {
String[] featureNameList = fs.getFeatureNames();
for (int i = 0; featureNameList != null && i < featureNameList.length; i++) {
if (cf.getOldAttributeValue(featureNameList[i]) != null) {
FeatureValue fv = fs.getFeatureValue(featureNameList[i]);
// fs.removeFeature(featureNameList[i]);
fs.setFeatureValue(cf.getNewAttributeValue(featureNameList[i]), fv);
}
}
}
}
}
}
}
/**
*
* @param aCasData
* @param aFeatureStructureName
* @return tbd
*/
public static NameValuePair[] getCasDataFeatures(CasData aCasData, String aFeatureStructureName) {
NameValuePair[] valuePairSet = null;
Iterator it = aCasData.getFeatureStructures();
while (it.hasNext()) {
Object object = it.next();
if (object instanceof FeatureStructure
&& ((FeatureStructure) object).getType().equals(aFeatureStructureName)) {
FeatureStructure fs = (FeatureStructure) object;
String[] featureNames = fs.getFeatureNames();
if (featureNames == null) {
// return empty set
return new NameValuePair[0];
}
valuePairSet = new NameValuePair[featureNames.length];
for (int i = 0; i < featureNames.length; i++) {
valuePairSet[i] = new NameValuePair_impl();
valuePairSet[i].setName(featureNames[i]);
valuePairSet[i].setValue(fs.getFeatureValue(featureNames[i]).toString());
}
}
}
return valuePairSet;
}
}