blob: 5375e616608e5eeffbed8bf40b2886d2f0ec2390 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.uima.util;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;
import org.apache.uima.UIMARuntimeException;
import org.apache.uima.cas.ArrayFS;
import org.apache.uima.cas.BooleanArrayFS;
import org.apache.uima.cas.ByteArrayFS;
import org.apache.uima.cas.CAS;
import org.apache.uima.cas.CASRuntimeException;
import org.apache.uima.cas.DoubleArrayFS;
import org.apache.uima.cas.FSIndex;
import org.apache.uima.cas.Feature;
import org.apache.uima.cas.FeatureStructure;
import org.apache.uima.cas.FloatArrayFS;
import org.apache.uima.cas.IntArrayFS;
import org.apache.uima.cas.LongArrayFS;
import org.apache.uima.cas.ShortArrayFS;
import org.apache.uima.cas.SofaFS;
import org.apache.uima.cas.StringArrayFS;
import org.apache.uima.cas.Type;
import org.apache.uima.cas.impl.CASImpl;
import org.apache.uima.cas.impl.LowLevelCAS;
import org.apache.uima.cas.text.AnnotationFS;
/**
* Utility class for doing deep copies of FeatureStructures from one CAS to another. To handle cases
* where the source CAS has multiple references to the same FS, you can create one instance of
* CasCopier and use it to copy multiple FeatureStructures. The CasCopier will remember previously
* copied FeatureStructures, so if you later copy another FS that has a reference to a previously
* copied FS, it will not duplicate the multiply-referenced FS.
*/
public class CasCopier {
private CAS mSrcCas;
private CAS mDestCas;
private LowLevelCAS mLowLevelDestCas;
private Feature mDestSofaFeature;
private Map<FeatureStructure, FeatureStructure> mFsMap = new HashMap<FeatureStructure, FeatureStructure>();
/**
* Creates a new CasCopier that can be used to copy FeatureStructures from one CAS to another.
* Note that if you are merging data from multiple CASes, you must create a new CasCopier
* for each source CAS.
*
* @param aSrcCas
* the CAS to copy from.
* @param aDestCas
* the CAS to copy into.
*/
public CasCopier(CAS aSrcCas, CAS aDestCas) {
mSrcCas = aSrcCas;
mDestCas = aDestCas;
mLowLevelDestCas = aDestCas.getLowLevelCAS();
mDestSofaFeature = aDestCas.getTypeSystem().getFeatureByFullName(CAS.FEATURE_FULL_NAME_SOFA);
}
/**
* Does a complete deep copy of one CAS into another CAS. The contents of each view
* in the source CAS will be copied to the same-named view in the destination CAS. If
* the view does not already exist it will be created. All FeatureStructures that are
* indexed in a view in the source CAS will become indexed in the same-named view in the
* destination CAS.
*
* @param aSrcCas
* the CAS to copy from
* @param aDestCas
* the CAS to copy to
* @param aCopySofa
* if true, the sofa data and mimeType of each view will be copied. If false they will not.
*/
public static void copyCas(CAS aSrcCas, CAS aDestCas, boolean aCopySofa) {
CasCopier copier = new CasCopier(aSrcCas, aDestCas);
Iterator<SofaFS> sofaIter = aSrcCas.getSofaIterator();
while (sofaIter.hasNext()) {
SofaFS sofa = sofaIter.next();
CAS view = aSrcCas.getView(sofa);
copier.copyCasView(view, aCopySofa);
}
}
/**
* Does a deep copy of the contents of one CAS View into another CAS.
* If a view with the same name as <code>aSrcCasView</code> exists in the destination CAS,
* then it will be the target of the copy. Otherwise, a new view will be created with
* that name and will become the target of the copy. All FeatureStructures that are indexed
* in the source CAS view will become indexed in the target view.
*
* @param aSrcCasView
* the CAS to copy from
* @param aCopySofa
* if true, the sofa data and mimeType will be copied. If false they will not.
*/
public void copyCasView(CAS aSrcCasView, boolean aCopySofa) {
//get or create the target view
CAS targetView = getOrCreateView(mDestCas, aSrcCasView.getViewName());
if (aCopySofa) {
// can't copy the SofaFS - just copy the sofa data and mime type
String sofaMime = aSrcCasView.getSofa().getSofaMime();
if (aSrcCasView.getDocumentText() != null) {
targetView.setSofaDataString(aSrcCasView.getDocumentText(), sofaMime);
} else if (aSrcCasView.getSofaDataURI() != null) {
targetView.setSofaDataURI(aSrcCasView.getSofaDataURI(), sofaMime);
} else if (aSrcCasView.getSofaDataArray() != null) {
targetView.setSofaDataArray(copyFs(aSrcCasView.getSofaDataArray()), sofaMime);
}
}
// now copy indexed FS, but keep track so we don't index anything more than once
Set<FeatureStructure> indexedFs = new HashSet<FeatureStructure>();
Iterator<FSIndex<FeatureStructure>> indexes = aSrcCasView.getIndexRepository().getIndexes();
while (indexes.hasNext()) {
FSIndex<FeatureStructure> index = indexes.next();
Iterator<FeatureStructure> iter = index.iterator();
while (iter.hasNext()) {
FeatureStructure fs = iter.next();
if (!indexedFs.contains(fs)) {
FeatureStructure copyOfFs = copyFs(fs);
//check for annotations with null Sofa reference - this can happen
//if the annotations were created with the Low Level CAS API. If the
//Sofa reference isn't set, attempting to add the FS to the indexes
//will fail.
if (fs instanceof AnnotationFS) {
FeatureStructure sofa =((AnnotationFS)copyOfFs).getFeatureValue(mDestSofaFeature);
if (sofa == null) {
copyOfFs.setFeatureValue(mDestSofaFeature, targetView.getSofa());
}
}
// also don't index the DocumentAnnotation (it's indexed by default)
if (!isDocumentAnnotation(copyOfFs)) {
targetView.addFsToIndexes(copyOfFs);
}
indexedFs.add(fs);
}
}
}
}
/**
* Copies an FS from the source CAS to the destination CAS. Also copies any referenced FS, except
* that previously copied FS will not be copied again.
*
* @param aFS
* the FS to copy. Must be contained within the source CAS.
* @return the copy of <code>aFS</code> in the target CAS.
*/
public FeatureStructure copyFs(FeatureStructure aFS) {
//FS must be in the source CAS
assert ((CASImpl)aFS.getCAS()).getBaseCAS() == ((CASImpl)mSrcCas).getBaseCAS();
// check if we already copied this FS
FeatureStructure copy = (FeatureStructure) mFsMap.get(aFS);
if (copy != null)
return copy;
// get the type of the FS
Type srcType = aFS.getType();
// Certain types need to be handled specially
// Sofa - cannot be created by normal methods. Instead, we return the Sofa with the
// same Sofa ID in the target CAS. If it does not exist it will be created.
if (aFS instanceof SofaFS) {
String sofaId = ((SofaFS)aFS).getSofaID();
return getOrCreateView(mDestCas, sofaId).getSofa();
}
// DocumentAnnotation - instead of creating a new instance, reuse the automatically created
// instance in the destination view.
if (isDocumentAnnotation(aFS)) {
String viewName = ((AnnotationFS)aFS).getView().getViewName();
CAS destView = mDestCas.getView(viewName);
FeatureStructure destDocAnnot = destView.getDocumentAnnotation();
if (destDocAnnot != null) {
copyFeatures(aFS, destDocAnnot);
}
return destDocAnnot;
}
// Arrays - need to be created a populated differently than "normal" FS
if (aFS.getType().isArray()) {
copy = copyArray(aFS);
mFsMap.put(aFS, copy);
return copy;
}
// create a new FS of the same type in the target CAS
Type destType;
if (mDestCas.getTypeSystem() == mSrcCas.getTypeSystem()) {
//optimize type lookup if type systems are identical
destType = srcType;
} else {
destType = mDestCas.getTypeSystem().getType(srcType.getName());
}
if (destType == null) {
throw new UIMARuntimeException(UIMARuntimeException.TYPE_NOT_FOUND_DURING_CAS_COPY,
new Object[] { srcType.getName() });
}
//We need to use the LowLevel CAS interface to create the FS, because the usual
//CAS.createFS() call doesn't allow us to create subtypes of AnnotationBase from
//a base CAS. In any case we don't need the Sofa reference to be automatically
//set because we'll set it manually when in the copyFeatures method.
int typeCode = mLowLevelDestCas.ll_getTypeSystem().ll_getCodeForType(destType);
int destFsAddr = mLowLevelDestCas.ll_createFS(typeCode);
FeatureStructure destFs = mDestCas.getLowLevelCAS().ll_getFSForRef(destFsAddr);
// add to map so we don't try to copy this more than once
mFsMap.put(aFS, destFs);
copyFeatures(aFS, destFs);
return destFs;
}
/**
* Copy feature values from one FS to another. For reference-valued features, this does a deep
* copy.
*
* @param aSrcFS
* FeatureStructure to copy from
* @param aDestFS
* FeatureStructure to copy to
*/
private void copyFeatures(FeatureStructure aSrcFS, FeatureStructure aDestFS) {
// set feature values
Type srcType = aSrcFS.getType();
Type destType = aDestFS.getType();
for (Feature srcFeat : srcType.getFeatures()) {
Feature destFeat;
if (destType == aSrcFS.getType()) {
// sharing same type system, so destFeat == srcFeat
destFeat = srcFeat;
} else {
// not sharing same type system, so do a name loop up in destination type system
destFeat = destType.getFeatureByBaseName(srcFeat.getShortName());
if (destFeat == null) {
throw new UIMARuntimeException(UIMARuntimeException.TYPE_NOT_FOUND_DURING_CAS_COPY,
new Object[] { srcFeat.getName() });
}
}
// copy primitive values using their string representation
// TODO: could be optimized but this code would be very messy if we have to
// enumerate all possible primitive types. Maybe LowLevel CAS API could help?
if (srcFeat.getRange().isPrimitive()) {
aDestFS.setFeatureValueFromString(destFeat, aSrcFS.getFeatureValueAsString(srcFeat));
} else {
// recursive copy
FeatureStructure refFS = aSrcFS.getFeatureValue(srcFeat);
if (refFS != null) {
FeatureStructure copyRefFs = copyFs(refFS);
aDestFS.setFeatureValue(destFeat, copyRefFs);
}
}
}
}
/**
* Returns whether the given FS has already been copied using this CasCopier.
*
* @param aFS
* @return
*/
public boolean alreadyCopied(FeatureStructure aFS) {
return mFsMap.containsKey(aFS);
}
/**
* @param arrayFS
* @return
*/
private FeatureStructure copyArray(FeatureStructure aSrcFs) {
// TODO: there should be a way to do this without enumerating all the array types!
if (aSrcFs instanceof StringArrayFS) {
StringArrayFS arrayFs = (StringArrayFS) aSrcFs;
int len = arrayFs.size();
StringArrayFS destFS = mDestCas.createStringArrayFS(len);
for (int i = 0; i < len; i++) {
destFS.set(i, arrayFs.get(i));
}
return destFS;
}
if (aSrcFs instanceof IntArrayFS) {
IntArrayFS arrayFs = (IntArrayFS) aSrcFs;
int len = arrayFs.size();
IntArrayFS destFS = mDestCas.createIntArrayFS(len);
for (int i = 0; i < len; i++) {
destFS.set(i, arrayFs.get(i));
}
return destFS;
}
if (aSrcFs instanceof ByteArrayFS) {
ByteArrayFS arrayFs = (ByteArrayFS) aSrcFs;
int len = arrayFs.size();
ByteArrayFS destFS = mDestCas.createByteArrayFS(len);
for (int i = 0; i < len; i++) {
destFS.set(i, arrayFs.get(i));
}
return destFS;
}
if (aSrcFs instanceof ShortArrayFS) {
ShortArrayFS arrayFs = (ShortArrayFS) aSrcFs;
int len = arrayFs.size();
ShortArrayFS destFS = mDestCas.createShortArrayFS(len);
for (int i = 0; i < len; i++) {
destFS.set(i, arrayFs.get(i));
}
return destFS;
}
if (aSrcFs instanceof LongArrayFS) {
LongArrayFS arrayFs = (LongArrayFS) aSrcFs;
int len = arrayFs.size();
LongArrayFS destFS = mDestCas.createLongArrayFS(len);
for (int i = 0; i < len; i++) {
destFS.set(i, arrayFs.get(i));
}
return destFS;
}
if (aSrcFs instanceof FloatArrayFS) {
FloatArrayFS arrayFs = (FloatArrayFS) aSrcFs;
int len = arrayFs.size();
FloatArrayFS destFS = mDestCas.createFloatArrayFS(len);
for (int i = 0; i < len; i++) {
destFS.set(i, arrayFs.get(i));
}
return destFS;
}
if (aSrcFs instanceof DoubleArrayFS) {
DoubleArrayFS arrayFs = (DoubleArrayFS) aSrcFs;
int len = arrayFs.size();
DoubleArrayFS destFS = mDestCas.createDoubleArrayFS(len);
for (int i = 0; i < len; i++) {
destFS.set(i, arrayFs.get(i));
}
return destFS;
}
if (aSrcFs instanceof BooleanArrayFS) {
BooleanArrayFS arrayFs = (BooleanArrayFS) aSrcFs;
int len = arrayFs.size();
BooleanArrayFS destFS = mDestCas.createBooleanArrayFS(len);
for (int i = 0; i < len; i++) {
destFS.set(i, arrayFs.get(i));
}
return destFS;
}
if (aSrcFs instanceof ArrayFS) {
ArrayFS arrayFs = (ArrayFS) aSrcFs;
int len = arrayFs.size();
ArrayFS destFS = mDestCas.createArrayFS(len);
for (int i = 0; i < len; i++) {
FeatureStructure srcElem = arrayFs.get(i);
if (srcElem != null) {
FeatureStructure copyElem = copyFs(arrayFs.get(i));
destFS.set(i, copyElem);
}
}
return destFS;
}
assert false; // the set of array types should be exhaustive, so we should never get here
return null;
}
/**
* Gets the named view; if the view doesn't exist it will be created.
*/
private static CAS getOrCreateView(CAS aCas, String aViewName) {
//TODO: there should be some way to do this without the try...catch
try {
return aCas.getView(aViewName);
}
catch(CASRuntimeException e) {
//create the view
return aCas.createView(aViewName);
}
}
/**
* Determines whether the given FS is the DocumentAnnotation for its view.
* This is more than just a type check; we actually check if it is the one "special"
* DocumentAnnotation that CAS.getDocumentAnnotation() would return.
*/
private static boolean isDocumentAnnotation(FeatureStructure aFS) {
return (aFS instanceof AnnotationFS) &&
aFS.equals(((AnnotationFS)aFS).getView().getDocumentAnnotation());
}
}