blob: 6c2998fea9d59b890afa10d09e17b3c2a2c106bc [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.uima.cas.test;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import junit.framework.TestCase;
import org.apache.uima.UIMAFramework;
import org.apache.uima.cas.ByteArrayFS;
import org.apache.uima.cas.CAS;
import org.apache.uima.cas.CASException;
import org.apache.uima.cas.CASRuntimeException;
import org.apache.uima.cas.FSIndex;
import org.apache.uima.cas.FSIndexRepository;
import org.apache.uima.cas.FSIterator;
import org.apache.uima.cas.Feature;
import org.apache.uima.cas.FeatureStructure;
import org.apache.uima.cas.LongArrayFS;
import org.apache.uima.cas.Marker;
import org.apache.uima.cas.ShortArrayFS;
import org.apache.uima.cas.StringArrayFS;
import org.apache.uima.cas.Type;
import org.apache.uima.cas.TypeSystem;
import org.apache.uima.cas.admin.CASAdminException;
import org.apache.uima.cas.admin.CASFactory;
import org.apache.uima.cas.admin.CASMgr;
import org.apache.uima.cas.admin.TypeSystemMgr;
import org.apache.uima.cas.impl.CASImpl;
import org.apache.uima.cas.impl.CASSerializer;
import org.apache.uima.cas.impl.LowLevelCAS;
import org.apache.uima.cas.impl.Serialization;
import org.apache.uima.cas.text.AnnotationFS;
import org.apache.uima.cas_data.impl.CasComparer;
import org.apache.uima.internal.util.TextStringTokenizer;
import org.apache.uima.resource.ResourceInitializationException;
import org.apache.uima.resource.metadata.FsIndexDescription;
import org.apache.uima.resource.metadata.TypeSystemDescription;
import org.apache.uima.resource.metadata.impl.TypePriorities_impl;
import org.apache.uima.test.junit_extension.JUnitExtension;
import org.apache.uima.util.CasCreationUtils;
import org.apache.uima.util.FileUtils;
import org.apache.uima.util.XMLInputSource;
/**
* Test for binary serialization and deserialization (no compression)
*
*/
public class SerializationReinitTest extends TestCase {
public static final String TOKEN_TYPE = "Token";
public static final String TOKEN_TYPE_FEAT = "type";
public static final String TOKEN_TYPE_FEAT_Q = TOKEN_TYPE + TypeSystem.FEATURE_SEPARATOR
+ TOKEN_TYPE_FEAT;
public static final String TOKEN_TYPE_TYPE = "TokenType";
public static final String WORD_TYPE = "Word";
public static final String SEP_TYPE = "Separator";
public static final String EOS_TYPE = "EndOfSentence";
public static final String SENT_TYPE = "Sentence";
public static final String STRING_SUBTYPE_1 = "StringSubtype1";
public static final String[] STR_1_VALS = { "test1", "test2" };
public static final String OSTR_TYPE = "theType";
public static final String OSTR_TYPE_FEAT = "theString";
public static final String OBYTE_TYPE_FEAT = "theByte";
public static final String OSHORT_TYPE_FEAT = "theShort";
public static final String OBYTEA_TYPE_FEAT = "theByteArray";
public static final String OSHORTA_TYPE_FEAT = "theShortArray";
public static final String OLONGA_TYPE_FEAT = "theLongArray";
public static final String OLONG_TYPE_FEAT = "theLong";
private CASMgr casMgr;
private CAS cas;
private Type wordType;
private Type separatorType;
private Type eosType;
private Type tokenType;
private Feature tokenTypeFeature;
private Type sentenceType;
private Feature startFeature;
private Feature endFeature;
private Type strSub1;
private Type theTypeType;
private Feature theStringFeature;
private Feature theByteFeature;
private Feature theShortFeature;
private Feature theByteArrayFeature;
private Feature theShortArrayFeature;
private Feature theLongArrayFeature;
private Feature theLongFeature;
private FsIndexDescription[] indexes;
private TypeSystemDescription typeSystem;
public SerializationReinitTest(String arg) {
super(arg);
}
/**
* @see junit.framework.TestCase#setUp()
*/
public void setUp() throws Exception {
/**
* sets up two type systems:
* One defined via API calls, and set into the global var cas = casMgr
* One defined by parsing ExampleCas/testTypeSystem and setting
* typeSystem and indexes
*/
super.setUp();
casMgr = initCAS();
cas = (CASImpl)casMgr;
TypeSystem ts = cas.getTypeSystem();
wordType = ts.getType(WORD_TYPE);
// assert(wordType != null);
separatorType = ts.getType(SEP_TYPE);
eosType = ts.getType(EOS_TYPE);
tokenType = ts.getType(TOKEN_TYPE);
tokenTypeFeature = ts.getFeatureByFullName(TOKEN_TYPE_FEAT_Q);
startFeature = ts.getFeatureByFullName(CAS.FEATURE_FULL_NAME_BEGIN);
endFeature = ts.getFeatureByFullName(CAS.FEATURE_FULL_NAME_END);
sentenceType = ts.getType(SENT_TYPE);
strSub1 = ts.getType(STRING_SUBTYPE_1);
assertTrue(strSub1 != null);
theTypeType = ts.getType(OSTR_TYPE);
theStringFeature = ts.getFeatureByFullName(OSTR_TYPE + TypeSystem.FEATURE_SEPARATOR + OSTR_TYPE_FEAT);
theByteFeature = ts.getFeatureByFullName(OSTR_TYPE + TypeSystem.FEATURE_SEPARATOR + OBYTE_TYPE_FEAT);
theByteArrayFeature = ts.getFeatureByFullName(OSTR_TYPE + TypeSystem.FEATURE_SEPARATOR + OBYTEA_TYPE_FEAT);
theShortFeature = ts.getFeatureByFullName(OSTR_TYPE + TypeSystem.FEATURE_SEPARATOR + OSHORT_TYPE_FEAT);
theShortArrayFeature = ts.getFeatureByFullName(OSTR_TYPE + TypeSystem.FEATURE_SEPARATOR + OSHORTA_TYPE_FEAT);
theLongFeature = ts.getFeatureByFullName(OSTR_TYPE + TypeSystem.FEATURE_SEPARATOR + OLONG_TYPE_FEAT);
theLongArrayFeature = ts.getFeatureByFullName(OSTR_TYPE + TypeSystem.FEATURE_SEPARATOR + OLONGA_TYPE_FEAT);
File typeSystemFile = JUnitExtension.getFile("ExampleCas/testTypeSystem.xml");
File indexesFile = JUnitExtension.getFile("ExampleCas/testIndexes.xml");
typeSystem = UIMAFramework.getXMLParser().parseTypeSystemDescription(
new XMLInputSource(typeSystemFile));
indexes = UIMAFramework.getXMLParser().parseFsIndexCollection(new XMLInputSource(indexesFile))
.getFsIndexes();
}
public void tearDown() {
casMgr = null;
cas = null;
wordType = null;
// assert(wordType != null);
separatorType = null;
eosType = null;
tokenType = null;
tokenTypeFeature = null;
startFeature = null;
endFeature = null;
sentenceType = null;
strSub1 = null;
indexes = null;
typeSystem = null;
}
// Initialize the first CAS.
private static CASMgr initCAS() throws CASException {
// Create an initial CASMgr from the factory.
// CASMgr cas = CASFactory.createCAS();
// assert(tsa != null);
// Create a CASMgr. Ensures existence of AnnotationFS type.
// CASMgr tcas = CASFactory.createCAS();
CASMgr aCas = CASFactory.createCAS();
try {
CasCreationUtils.setupTypeSystem(aCas, (TypeSystemDescription) null);
} catch (ResourceInitializationException e) {
e.printStackTrace();
}
// Create a writable type system.
TypeSystemMgr tsa = aCas.getTypeSystemMgr();
// Add new types and features.
Type topType = tsa.getTopType();
Type annotType = tsa.getType(CAS.TYPE_NAME_ANNOTATION);
// assert(annotType != null);
tsa.addType(SENT_TYPE, annotType);
Type tokenType = tsa.addType(TOKEN_TYPE, annotType);
Type tokenTypeType = tsa.addType(TOKEN_TYPE_TYPE, topType);
tsa.addType(WORD_TYPE, tokenTypeType);
tsa.addType(SEP_TYPE, tokenTypeType);
tsa.addType(EOS_TYPE, tokenTypeType);
tsa.addFeature(TOKEN_TYPE_FEAT, tokenType, tokenTypeType);
tsa.addStringSubtype(STRING_SUBTYPE_1, STR_1_VALS);
Type stringType = tsa.getType(CAS.TYPE_NAME_STRING);
Type byteType = tsa.getType(CAS.TYPE_NAME_BYTE);
Type byteArrayType = tsa.getType(CAS.TYPE_NAME_BYTE_ARRAY);
Type shortType = tsa.getType(CAS.TYPE_NAME_SHORT);
Type shortArrayType = tsa.getType(CAS.TYPE_NAME_SHORT_ARRAY);
Type longArrayType = tsa.getType(CAS.TYPE_NAME_LONG_ARRAY);
Type longType = tsa.getType(CAS.TYPE_NAME_LONG);
Type theTypeType = tsa.addType(OSTR_TYPE, annotType);
tsa.addFeature(OSTR_TYPE_FEAT, theTypeType, stringType);
tsa.addFeature(OBYTE_TYPE_FEAT, theTypeType, byteType);
tsa.addFeature(OSHORT_TYPE_FEAT, theTypeType, shortType);
tsa.addFeature(OBYTEA_TYPE_FEAT, theTypeType, byteArrayType);
tsa.addFeature(OSHORTA_TYPE_FEAT, theTypeType, shortArrayType);
tsa.addFeature(OLONGA_TYPE_FEAT, theTypeType, longArrayType);
tsa.addFeature(OLONG_TYPE_FEAT, theTypeType, longType);
// Commit the type system.
((CASImpl) aCas).commitTypeSystem();
// assert(tsa.isCommitted());
// // Create the CAS indexes.
// tcas.initCASIndexes();
// Create the Base indexes.
try {
aCas.initCASIndexes();
} catch (CASException e) {
e.printStackTrace();
}
// Commit the index repository.
aCas.getIndexRepositoryMgr().commit();
// assert(cas.getIndexRepositoryMgr().isCommitted());
// Create the default text Sofa and return CAS view
return (CASMgr) aCas.getCAS().getCurrentView();
}
public void testReset() {
cas.reset();
casMgr.enableReset(false);
boolean exc = false;
try {
cas.reset();
} catch (CASAdminException e) {
assertTrue(e.getError() == CASAdminException.FLUSH_DISABLED);
exc = true;
}
assertTrue(exc);
casMgr.enableReset(true);
cas.reset();
}
// Tokenize text.
private void tokenize() throws Exception {
// System.out.println("Tokenizing text.");
// Create FSs for the token types.
FeatureStructure wordFS = cas.createFS(wordType);
FeatureStructure sepFS = cas.createFS(separatorType);
FeatureStructure eosFS = cas.createFS(eosType);
String text = cas.getDocumentText();
TextStringTokenizer tokenizer = new TextStringTokenizer(text);
tokenizer.setSeparators("/-*&@");
tokenizer.addWhitespaceChars(",");
tokenizer.setEndOfSentenceChars(".!?");
tokenizer.setShowWhitespace(false);
int tokenTypeCode;
int wordCounter = 0;
int sepCounter = 0;
int endOfSentenceCounter = 0;
AnnotationFS tokenAnnot;
while (tokenizer.isValid()) {
tokenAnnot = cas.createAnnotation(tokenType, tokenizer.getTokenStart(), tokenizer
.getTokenEnd());
tokenTypeCode = tokenizer.getTokenType();
switch (tokenTypeCode) {
case TextStringTokenizer.EOS: {
++endOfSentenceCounter;
tokenAnnot.setFeatureValue(tokenTypeFeature, eosFS);
break;
}
case TextStringTokenizer.SEP: {
++sepCounter;
tokenAnnot.setFeatureValue(tokenTypeFeature, sepFS);
break;
}
case TextStringTokenizer.WSP: {
break;
}
case TextStringTokenizer.WCH: {
++wordCounter;
tokenAnnot.setFeatureValue(tokenTypeFeature, wordFS);
// if ((wordCounter % 100000) == 0) {
// System.out.println("Number of words tokenized: " + wordCounter);
// }
break;
}
default: {
throw new Exception("Something went wrong, fire up that debugger!");
}
}
cas.getIndexRepository().addFS(tokenAnnot);
tokenizer.setToNext();
// System.out.println("Token: " + tokenizer.nextToken());
}
// time = System.currentTimeMillis() - time;
// System.out.println("Number of words: " + wordCounter);
// int allTokens = wordCounter + sepCounter + endOfSentenceCounter;
// System.out.println("Number of tokens: " + allTokens);
// System.out.println("Time used: " + new TimeSpan(time));
// FSIterator it = cas.getAnnotationIndex(tokenType).iterator();
// int count = 0;
// while (it.isValid()) {
// ++count;
// it.moveToNext();
// }
// System.out.println("Number of tokens in index: " + count);
}
// Very (!) primitive EOS detection.
private void createSentences() throws CASException {
// TypeSystem ts = cas.getTypeSystem();
// Type eosType = ts.getType(EOS_TYPE);
// Type tokenType = ts.getType(TOKEN_TYPE);
// //assert(tokenType != null);
// Type sentenceType = ts.getType(SENT_TYPE);
// Feature tokenTypeFeature = ts.getFeature(TOKEN_TYPE_FEAT);
// Feature startFeature = ts.getFeature(CAS.START_FEAT);
// Feature endFeature = ts.getFeature(CAS.END_FEAT);
// System.out.println("\nCreating sentence annotations.");
// Get a handle to the index repository.
FSIndexRepository indexRepository = cas.getIndexRepository();
// assert(indexRepository != null);
Iterator<String> labelIt = indexRepository.getLabels();
assertTrue(labelIt != null);
// Get the standard index for tokens.
FSIndex<AnnotationFS> tokenIndex = cas.getAnnotationIndex(tokenType);
// assert(tokenIndex != null);
// Get an iterator over tokens.
FSIterator<AnnotationFS> it = tokenIndex.iterator();
// assert(it != null);
// Now create sentences. We do this as follows: a sentence starts where
// the first token after an EOS starts, and ends with an EOS.
long time = System.currentTimeMillis();
int endOfSentenceCounter = 0;
it.moveToFirst();
boolean lookForStart = true;
int start = 0, end; // Initialize start to pacify compiler.
FeatureStructure tokenFS, sentFS;
while (it.isValid()) {
if (lookForStart) {
// If we're looking for the start of a sentence, just grab the start
// of the current FS.
start = it.get().getIntValue(startFeature);
lookForStart = false;
} else {
// Check if we've reached the end of a sentence.
tokenFS = it.get();
if (tokenFS.getFeatureValue(tokenTypeFeature).getType() == eosType) {
end = tokenFS.getIntValue(endFeature);
sentFS = cas.createFS(sentenceType);
sentFS.setIntValue(startFeature, start);
sentFS.setIntValue(endFeature, end);
cas.getIndexRepository().addFS(sentFS);
++endOfSentenceCounter;
lookForStart = true;
}
}
it.moveToNext();
}
time = System.currentTimeMillis() - time;
// System.out.println("Created " + endOfSentenceCounter + " sentences: " + new TimeSpan(time));
}
//?m (MULTILINE) makes $ match just before line terminator or end of input
private static final Pattern nlPattern = Pattern.compile("(?m)(.*?$)");
/**
* Test driver.
*/
public void testMain() throws Exception {
// System.out.println("Setting up CAS.");
// Create the initial CAS.
long time = System.currentTimeMillis();
time = System.currentTimeMillis() - time;
// System.out.println("CAS set up: " + new TimeSpan(time));
time = System.currentTimeMillis();
// Read the document into a String. I'm sure there are better ways to
File textFile = JUnitExtension.getFile("data/moby.txt");
String moby = FileUtils.file2String(textFile);
// String moby = file2String(System.getProperty("cas.data.test") + "moby.txt");
String line;
// BufferedReader br = new BufferedReader(new StringReader(moby));
StringBuffer buf = new StringBuffer(10000);
List<String> docs = new ArrayList<String>();
Matcher m = nlPattern.matcher(moby);
while (m.find()) {
line = m.group();
if (line.startsWith(".. <p")) {
docs.add(buf.toString());
buf.setLength(0);
} else {
buf.append(line + "\n");
}
}
// while ((line = br.readLine()) != null) {
// if (line.startsWith(".. <p")) {
// docs.add(buf.toString());
// buf = new StringBuffer();
// } else {
// buf.append(line + "\n");
// }
// }
m.appendTail(buf);
docs.add(buf.toString());
buf = null;
final int numDocs = docs.size();
final int max = 30;
int docCount = 0;
long overallTime = System.currentTimeMillis();
int numTok, numSent;
CASSerializer cs;
while (docCount < max) {
for (int i = 0; i < numDocs && docCount < max; i++) {
// System.out.println("Processing document: " + i);
// Set document text in first CAS.
cas.setDocumentText(docs.get(i));
tokenize();
numTok = cas.getAnnotationIndex(tokenType).size();
assertTrue(numTok > 0);
// System.out.println(" Number of tokens: " + numTok);
// System.out.println("Serializing...");
cs = Serialization.serializeCAS(cas);
cas = Serialization.createCAS(casMgr, cs);
assertTrue(numTok == cas.getAnnotationIndex(tokenType).size());
createSentences();
numSent = cas.getAnnotationIndex(sentenceType).size();
assertTrue(numSent > 0);
// System.out.println(" Number of sentences: " + numSent);
// System.out.println("Serializing...");
cs = Serialization.serializeCAS(cas);
cas = Serialization.createCAS(casMgr, cs);
assertTrue(numTok == cas.getAnnotationIndex(tokenType).size());
assertTrue(numSent == cas.getAnnotationIndex(sentenceType).size());
// System.out.println("Serializing...");
cs = Serialization.serializeCAS(cas);
cas = Serialization.createCAS(casMgr, cs);
assertTrue(numTok == cas.getAnnotationIndex(tokenType).size());
assertTrue(numSent == cas.getAnnotationIndex(sentenceType).size());
// System.out.println(" Verify: " + numTok + " tokens, " + numSent + " sentences.");
casMgr.reset();
++docCount;
}
// System.out.println("Number of documents processed: " + docCount);
}
overallTime = System.currentTimeMillis() - overallTime;
// System.out.println("Time taken over all: " + new TimeSpan(overallTime));
}
/** Test basic blob serialization
*/
public void testBlob() throws Exception {
/*
* Test that FS, indexes and strings work after repeated blob serialization
* For each iteration, add two new FS, serialize and test all created so
* The first FS sets the string feature using standard API => goes into stringlist
* The second FS sets the string feature using lowlevel API => goes into stringheap
*
* Throw in tests of the byte, short and long heaps as well
*
*/
String testString = "testString";
cas.reset();
LowLevelCAS ll_cas = cas.getLowLevelCAS();
FSIndexRepository ir = cas.getIndexRepository();
int ll_strfeatcode = ll_cas.ll_getTypeSystem().ll_getCodeForFeature(theStringFeature);
int ll_bytefeatcode = ll_cas.ll_getTypeSystem().ll_getCodeForFeature(theByteFeature);
int ll_shortfeatcode = ll_cas.ll_getTypeSystem().ll_getCodeForFeature(theShortFeature);
int ll_bytearrayfeatcode = ll_cas.ll_getTypeSystem().ll_getCodeForFeature(theByteArrayFeature);
int ll_shortarrayfeatcode = ll_cas.ll_getTypeSystem().ll_getCodeForFeature(theShortArrayFeature);
int ll_longfeatcode = ll_cas.ll_getTypeSystem().ll_getCodeForFeature(theLongFeature);
for (int cycle=0; cycle<10; cycle+=2) {
FeatureStructure newFS1 = cas.createFS(theTypeType);
newFS1.setIntValue(startFeature, cycle);
newFS1.setIntValue(endFeature, cycle+1);
// set string using normal string feature create
newFS1.setStringValue(theStringFeature, testString);
newFS1.setByteValue(theByteFeature, (byte)cycle);
newFS1.setShortValue(theShortFeature, (short)cycle);
newFS1.setLongValue(theLongFeature, (long)cycle);
ByteArrayFS newBA1 = cas.createByteArrayFS(1);
ShortArrayFS newSA1 = cas.createShortArrayFS(1);
newBA1.set(0, (byte)cycle);
newSA1.set(0, (short)cycle);
newFS1.setFeatureValue(theByteArrayFeature, newBA1);
newFS1.setFeatureValue(theShortArrayFeature, newSA1);
ir.addFS(newFS1);
FeatureStructure newFS2 = cas.createFS(theTypeType);
ByteArrayFS newBA2 = cas.createByteArrayFS(1);
ShortArrayFS newSA2 = cas.createShortArrayFS(1);
newFS2.setIntValue(startFeature, cycle+1);
newFS2.setIntValue(endFeature, cycle+2);
ir.addFS(newFS2);
// set string using lowlevel string create API
final int llfs2 = ll_cas.ll_getFSRef(newFS2);
final int llba2 = ll_cas.ll_getFSRef(newBA2);
final int llsa2 = ll_cas.ll_getFSRef(newSA2);
ll_cas.ll_setCharBufferValue(llfs2, ll_strfeatcode,
testString.toCharArray(), 0, testString.length());
ll_cas.ll_setByteValue(llfs2, ll_bytefeatcode, (byte)(cycle+1));
ll_cas.ll_setShortValue(llfs2, ll_shortfeatcode, (short)(cycle+1));
ll_cas.ll_setLongValue(llfs2, ll_longfeatcode, (long)(cycle+1));
ll_cas.ll_setByteArrayValue(llba2, 0, (byte)(cycle+1));
ll_cas.ll_setShortArrayValue(llsa2, 0, (short)(cycle+1));
newFS2.setFeatureValue(theByteArrayFeature, newBA2);
newFS2.setFeatureValue(theShortArrayFeature, newSA2);
ir.addFS(newFS2);
ByteArrayOutputStream fos = new ByteArrayOutputStream();
Serialization.serializeCAS(cas, fos);
cas.reset();
ByteArrayInputStream fis = new ByteArrayInputStream(fos.toByteArray());
Serialization.deserializeCAS(cas, fis);
FSIndex<AnnotationFS> idx = cas.getAnnotationIndex(theTypeType);
FSIterator<AnnotationFS> iter = idx.iterator();
for (int tc=0; tc<cycle+1; tc++) {
FeatureStructure testFS = iter.get();
iter.moveToNext();
assertTrue(tc == testFS.getIntValue(startFeature));
assertTrue(testString.equals(testFS.getStringValue(theStringFeature)));
assertTrue(tc == testFS.getByteValue(theByteFeature));
assertTrue(tc == testFS.getShortValue(theShortFeature));
assertTrue(tc == testFS.getLongValue(theLongFeature));
ByteArrayFS ba = (ByteArrayFS)testFS.getFeatureValue(theByteArrayFeature);
assertTrue(tc == ba.get(0));
ShortArrayFS sa = (ShortArrayFS)testFS.getFeatureValue(theShortArrayFeature);
assertTrue(tc == sa.get(0));
}
}
}
public void testDeltaBinaryShortLongArrayMods() throws Exception {
CASImpl cas2 = (CASImpl) initCAS();
CASImpl cas3 = (CASImpl) initCAS();
// create short array and long array
FeatureStructure newFS1 = cas.createFS(theTypeType);
ByteArrayFS newBA1 = cas.createByteArrayFS(1);
ShortArrayFS newSA1 = cas.createShortArrayFS(1);
LongArrayFS newLA1 = cas.createLongArrayFS(1);
newBA1.set(0, (byte)1);
newSA1.set(0, (short)2);
newLA1.set(0, (long)4);
newFS1.setFeatureValue(theByteArrayFeature, newBA1);
newFS1.setFeatureValue(theShortArrayFeature, newSA1);
newFS1.setFeatureValue(theLongArrayFeature, newLA1);
cas.getIndexRepository().addFS(newFS1);
//serialize binary, non compressed, not delta
ByteArrayOutputStream fos = new ByteArrayOutputStream();
Serialization.serializeCAS(cas, fos);
//deserialize into cas2
ByteArrayInputStream fis = new ByteArrayInputStream(fos.toByteArray());
Serialization.deserializeCAS(cas2, fis);
CasComparer.assertEquals(cas, cas2);
//=======================================================================
//create Marker, add/modify fs and serialize in delta xmi format.
Marker marker = cas2.createMarker();
// modify a value in the int arrays
Iterator<AnnotationFS> typeIterator = cas2.getAnnotationIndex(theTypeType).iterator();
assertTrue(typeIterator.hasNext());
FeatureStructure fsWithArrays = typeIterator.next();
((ByteArrayFS)fsWithArrays.getFeatureValue(theByteArrayFeature)).set(0, (byte) 11);
((ShortArrayFS)fsWithArrays.getFeatureValue(theShortArrayFeature)).set(0, (short) 22);
((LongArrayFS)fsWithArrays.getFeatureValue(theLongArrayFeature)).set(0, (long) 44);
// serialize cas2 in delta format
ByteArrayOutputStream fosDelta = new ByteArrayOutputStream();
Serialization.serializeCAS(cas2, fosDelta, marker);
//======================================================================
//deserialize delta binary into cas1
ByteArrayInputStream fisDelta = new ByteArrayInputStream(fosDelta.toByteArray());
Serialization.deserializeCAS(cas, fisDelta);
//======================================================================
//serialize complete cas and deserialize into cas3 and compare with cas1.
ByteArrayOutputStream fosFull = new ByteArrayOutputStream();
Serialization.serializeCAS(cas2, fosFull);
ByteArrayInputStream fisFull = new ByteArrayInputStream(fosFull.toByteArray());
Serialization.deserializeCAS(cas3, fisFull);
CasComparer.assertEquals(cas, cas3);
}
/**
* setup cas1, binary (not compressed) serialize to cas2
* modify cas2, binary (not compressed) delta serialize back into cas1
*
* serialize cas2 binary (not compressed) not delta into cas3, compare cas 1 and 3
*
* @throws Exception
*/
public void testDeltaBlobSerialization() throws Exception {
try {
CAS cas1 = CasCreationUtils.createCas(typeSystem, new TypePriorities_impl(),
indexes);
CAS cas2 = CasCreationUtils.createCas(typeSystem, new TypePriorities_impl(),
indexes);
CAS cas3 = CasCreationUtils.createCas(typeSystem, new TypePriorities_impl(),
indexes);
Type personType = cas1.getTypeSystem().getType(
"org.apache.uima.testTypeSystem.Person");
Feature componentIdFeat = personType.getFeatureByBaseName("componentId");
Feature confidenceFeat = personType.getFeatureByBaseName("confidence");
Type orgType = cas1.getTypeSystem().getType(
"org.apache.uima.testTypeSystem.Organization");
Type ownerType = cas1.getTypeSystem().getType(
"org.apache.uima.testTypeSystem.Owner");
Type entityAnnotType = cas1.getTypeSystem().getType(
"org.apache.uima.testTypeSystem.EntityAnnotation");
Feature mentionTypeFeat = entityAnnotType.getFeatureByBaseName("mentionType");
Feature argsFeat = ownerType.getFeatureByBaseName("relationArgs");
Type relArgsType = cas1.getTypeSystem().getType(
"org.apache.uima.testTypeSystem.BinaryRelationArgs");
Feature domainFeat = relArgsType.getFeatureByBaseName("domainValue");
Feature rangeFeat = relArgsType.getFeatureByBaseName("rangeValue");
Type entityType = cas1.getTypeSystem().getType("org.apache.uima.testTypeSystem.Entity");
Feature classesFeat = entityType.getFeatureByBaseName("classes");
Feature linksFeat = entityType.getFeatureByBaseName("links");
Feature canonicalFormFeat = entityType.getFeatureByBaseName("canonicalForm");
Type nonEmptyFsListType = cas1.getTypeSystem().getType(CAS.TYPE_NAME_NON_EMPTY_FS_LIST);
Type emptyFsListType = cas1.getTypeSystem().getType(CAS.TYPE_NAME_EMPTY_FS_LIST);
Feature headFeat = nonEmptyFsListType.getFeatureByBaseName("head");
Feature tailFeat = nonEmptyFsListType.getFeatureByBaseName("tail");
//cas1
//initial set of feature structures
// set document text for the initial view and create Annotations
cas1.setDocumentText("This is a test document in the initial view");
AnnotationFS anAnnot1 = cas1.createAnnotation(cas1.getAnnotationType(), 0, 4);
cas1.getIndexRepository().addFS(anAnnot1);
AnnotationFS anAnnot2 = cas1.createAnnotation(cas1.getAnnotationType(), 5, 6);
cas1.getIndexRepository().addFS(anAnnot2);
AnnotationFS anAnnot3 = cas1.createAnnotation(cas1.getAnnotationType(), 8, 13);
cas1.getIndexRepository().addFS(anAnnot3);
AnnotationFS anAnnot4 = cas1.createAnnotation(cas1.getAnnotationType(), 15, 30);
cas1.getIndexRepository().addFS(anAnnot4);
FSIndex<AnnotationFS> tIndex = cas1.getAnnotationIndex();
assertTrue(tIndex.size() == 5); //doc annot plus 4 annots
FeatureStructure entityFS = cas1.createFS(entityType);
cas1.getIndexRepository().addFS(entityFS);
StringArrayFS strArrayFS = cas1.createStringArrayFS(5);
strArrayFS.set(0, "class1");
entityFS.setFeatureValue(classesFeat, strArrayFS);
//create listFS and set the link feature
FeatureStructure emptyNode = cas1.createFS(emptyFsListType);
FeatureStructure secondNode = cas1.createFS(nonEmptyFsListType);
secondNode.setFeatureValue(headFeat, anAnnot2);
secondNode.setFeatureValue(tailFeat, emptyNode);
FeatureStructure firstNode = cas1.createFS(nonEmptyFsListType);
firstNode.setFeatureValue(headFeat, anAnnot1);
firstNode.setFeatureValue(tailFeat, secondNode);
entityFS.setFeatureValue(linksFeat, firstNode);
// create a view w/o setting document text
CAS view1 = cas1.createView("View1");
// create another view
CAS preexistingView = cas1.createView("preexistingView");
String preexistingViewText = "John Smith blah blah blah";
preexistingView.setDocumentText(preexistingViewText);
AnnotationFS person1Annot = createPersonAnnot(preexistingView, 0, 10);
person1Annot.setStringValue(componentIdFeat, "deltacas1");
AnnotationFS person2Annot = createPersonAnnot(preexistingView, 0, 5);
AnnotationFS orgAnnot = preexistingView.createAnnotation(orgType, 16, 24);
preexistingView.addFsToIndexes(orgAnnot);
AnnotationFS ownerAnnot = preexistingView.createAnnotation(ownerType, 0, 24);
preexistingView.addFsToIndexes(ownerAnnot);
FeatureStructure relArgs = cas1.createFS(relArgsType);
relArgs.setFeatureValue(domainFeat, person1Annot);
ownerAnnot.setFeatureValue(argsFeat, relArgs);
//serialize binary, non compressed, not delta
ByteArrayOutputStream fos = new ByteArrayOutputStream();
Serialization.serializeCAS(cas1, fos);
//deserialize into cas2
ByteArrayInputStream fis = new ByteArrayInputStream(fos.toByteArray());
Serialization.deserializeCAS(cas2, fis);
CasComparer.assertEquals(cas1, cas2);
//=======================================================================
//create Marker, add/modify fs and serialize in delta xmi format.
Marker marker = cas2.createMarker();
FSIndex<AnnotationFS> cas2tIndex = cas2.getAnnotationIndex();
CAS cas2preexistingView = cas2.getView("preexistingView");
FSIndex<AnnotationFS> cas2personIndex = cas2preexistingView.getAnnotationIndex(personType);
FSIndex<AnnotationFS> cas2orgIndex = cas2preexistingView.getAnnotationIndex(orgType);
FSIndex<AnnotationFS> cas2ownerIndex = cas2preexistingView.getAnnotationIndex(ownerType);
// create an annotation and add to index
AnnotationFS cas2anAnnot5 = cas2.createAnnotation(cas2.getAnnotationType(), 6, 8);
cas2.getIndexRepository().addFS(cas2anAnnot5);
assertTrue(cas2tIndex.size() == 6); // prev annots and this new one
// set document text of View1
CAS cas2view1 = cas2.getView("View1");
cas2view1.setDocumentText("This is the View1 document.");
//create an annotation in View1
AnnotationFS cas2view1Annot = cas2view1.createAnnotation(cas2.getAnnotationType(), 1, 5);
cas2view1.getIndexRepository().addFS(cas2view1Annot);
FSIndex<AnnotationFS> cas2view1Index = cas2view1.getAnnotationIndex();
assertTrue(cas2view1Index.size() == 2); //document annot and this annot
//modify an existing annotation
Iterator<AnnotationFS> tIndexIter = cas2tIndex.iterator();
AnnotationFS docAnnot = tIndexIter.next(); //doc annot
AnnotationFS modAnnot1 = tIndexIter.next();
AnnotationFS delAnnot = tIndexIter.next();
//modify language feature
Feature languageF = cas2.getDocumentAnnotation().getType().getFeatureByBaseName(CAS.FEATURE_BASE_NAME_LANGUAGE);
docAnnot.setStringValue(languageF, "en");
//index update - reindex
cas2.getIndexRepository().removeFS(modAnnot1);
Feature endF = cas2.getAnnotationType().getFeatureByBaseName(CAS.FEATURE_BASE_NAME_END);
modAnnot1.setIntValue(endF, 4);
cas2.getIndexRepository().addFS(modAnnot1);
//index update - remove annotation from index
cas2.getIndexRepository().removeFS(delAnnot);
//modify FS - string feature and FS feature.
Iterator<AnnotationFS> personIter = cas2personIndex.iterator();
AnnotationFS cas2person1 = personIter.next();
AnnotationFS cas2person2 = personIter.next();
cas2person1.setFloatValue(confidenceFeat, (float) 99.99);
cas2person1.setStringValue(mentionTypeFeat, "FULLNAME");
cas2person2.setStringValue(componentIdFeat, "delataCas2");
cas2person2.setStringValue(mentionTypeFeat, "FIRSTNAME");
Iterator<AnnotationFS> orgIter = cas2orgIndex.iterator();
AnnotationFS cas2orgAnnot = orgIter.next();
cas2orgAnnot.setStringValue(mentionTypeFeat, "ORGNAME");
//modify FS feature
Iterator<AnnotationFS> ownerIter = cas2ownerIndex.iterator();
AnnotationFS cas2ownerAnnot = ownerIter.next();
FeatureStructure cas2relArgs = cas2ownerAnnot.getFeatureValue(argsFeat);
cas2relArgs.setFeatureValue(rangeFeat, cas2orgAnnot);
//Test modification of a nonshared multivalued feature.
//This should serialize the encompassing FS.
Iterator<FeatureStructure> iter = cas2.getIndexRepository().getIndex("testEntityIndex").iterator();
FeatureStructure cas2EntityFS = iter.next();
StringArrayFS cas2strarrayFS = (StringArrayFS) cas2EntityFS.getFeatureValue(classesFeat);
cas2strarrayFS.set(1, "class2");
cas2strarrayFS.set(2, "class3");
cas2strarrayFS.set(3, "class4");
cas2strarrayFS.set(4, "class5");
//add to FSList
FeatureStructure cas2linksFS = cas2EntityFS.getFeatureValue(linksFeat);
FeatureStructure cas2secondNode = cas2linksFS.getFeatureValue(tailFeat);
FeatureStructure cas2emptyNode = cas2secondNode.getFeatureValue(tailFeat);
FeatureStructure cas2thirdNode = cas2.createFS(nonEmptyFsListType);
cas2thirdNode.setFeatureValue(headFeat, cas2anAnnot5);
cas2thirdNode.setFeatureValue(tailFeat, cas2emptyNode);
cas2secondNode.setFeatureValue(tailFeat, cas2thirdNode);
// serialize cas2 in delta format
ByteArrayOutputStream fosDelta = new ByteArrayOutputStream();
Serialization.serializeCAS(cas2, fosDelta, marker);
//======================================================================
//deserialize delta binary into cas1
ByteArrayInputStream fisDelta = new ByteArrayInputStream(fosDelta.toByteArray());
Serialization.deserializeCAS(cas1, fisDelta);
//======================================================================
//serialize complete cas and deserialize into cas3 and compare with cas1.
ByteArrayOutputStream fosFull = new ByteArrayOutputStream();
Serialization.serializeCAS(cas2, fosFull);
ByteArrayInputStream fisFull = new ByteArrayInputStream(fosFull.toByteArray());
Serialization.deserializeCAS(cas3, fisFull);
CasComparer.assertEquals(cas1, cas3);
//System.out.println("CAS1 " + serialize(cas1, new XmiSerializationSharedData()));
//System.out.println("CAS2 " + serialize(cas2, new XmiSerializationSharedData()));
} catch (Exception e) {
JUnitExtension.handleException(e);
}
}
public void testDeltaBlobWithInvalidMarker() throws Exception {
try {
CAS cas1 = CasCreationUtils.createCas(typeSystem, new TypePriorities_impl(),
indexes);
boolean serfailed = false;
Marker mark1 = cas1.createMarker();
// Marker mark2 = cas1.createMarker(); // multiple markers not supported, tested in other test case
cas1.reset();
try {
ByteArrayOutputStream fos = new ByteArrayOutputStream();
Serialization.serializeCAS(cas1, fos, mark1);
} catch (CASRuntimeException e) {
serfailed = true;
}
assertTrue(serfailed);
// serfailed = false;
// try {
// ByteArrayOutputStream fos = new ByteArrayOutputStream();
// Serialization.serializeCAS(cas1, fos, mark2);
// } catch (CASRuntimeException e) {
// serfailed = true;
// }
// assertTrue(serfailed);
} catch (Exception e) {
JUnitExtension.handleException(e);
}
}
private AnnotationFS createPersonAnnot(CAS cas, int begin, int end) {
Type personType = cas.getTypeSystem().getType("org.apache.uima.testTypeSystem.Person");
AnnotationFS person = cas.createAnnotation(personType, begin, end);
cas.addFsToIndexes(person);
return person;
}
public static void main(String[] args) {
junit.textui.TestRunner.run(SerializationReinitTest.class);
}
/**
* Test setCAS().
* This test does nothing useful. setCAS is a no-op
*/
// public void testSetCAS() throws Exception {
//
// // Read the document into a String.
// File textFile = JUnitExtension.getFile("data/moby.txt");
// String moby = FileUtils.file2String(textFile);
// // String moby = file2String(System.getProperty("cas.data.test") + "moby.txt");
// String line;
//// BufferedReader br = new BufferedReader(new StringReader(moby));
// StringBuffer buf = new StringBuffer(10000);
// List<String> docs = new ArrayList<String>();
// Matcher m = nlPattern.matcher(moby);
// while (m.find()) {
// line = m.group();
// if (line.startsWith(".. <p")) {
// docs.add(buf.toString());
// buf.setLength(0);
// } else {
// buf.append(line + "\n");
// }
// }
//
//// while ((line = br.readLine()) != null) {
//// if (line.startsWith(".. <p")) {
//// docs.add(buf.toString());
//// buf = new StringBuffer();
//// } else {
//// buf.append(line + "\n");
//// }
//// }
//// docs.add(buf.toString());
// m.appendTail(buf);
// docs.add(buf.toString());
// buf = null;
//
// final int numDocs = docs.size();
// final int max = 30;
// int docCount = 0;
// long overallTime = System.currentTimeMillis();
// int numTok, numSent;
// while (docCount < max) {
// for (int i = 0; i < numDocs && docCount < max; i++) {
// // System.out.println("Processing document: " + i);
// // Set document text in first CAS.
// cas.setDocumentText(docs.get(i));
//
// tokenize();
// numTok = cas.getAnnotationIndex(tokenType).size();
// assertTrue(numTok > 0);
// // System.out.println(" Number of tokens: " + numTok);
//
// // System.out.println("Serializing...");
// // CASMgr casMgr = CASFactory.createCAS();
// // casMgr.setCAS(cas);
// // cas = (CAS) casMgr.getCAS();
// /* setCAS is no longer used or implemented
// * You cannot use this method to set up a new cas with a copy of
// * the contents of another cas, including its indexes
// CASMgr realCasMgr = CASFactory.createCAS(cas.getTypeSystem());
// realCasMgr.setCAS(((CASImpl) cas).getBaseCAS());
// cas = ((CASImpl) realCasMgr).getCurrentView();
// casMgr = (CASMgr) cas;
// */
//
// assertTrue(numTok == cas.getAnnotationIndex(tokenType).size());
//
// createSentences();
// numSent = cas.getAnnotationIndex(sentenceType).size();
// assertTrue(numSent > 0);
// // System.out.println(" Number of sentences: " + numSent);
//
// // System.out.println("Serializing...");
// // casMgr = CASFactory.createCAS();
// // casMgr.setCAS(cas);
// // cas = (CAS) casMgr.getCAS();
// /* setCAS is no longer used or implemented
// * You cannot use this method to set up a new cas with a copy of
// * the contents of another cas, including its indexes
// realCasMgr = CASFactory.createCAS();
// realCasMgr.setCAS(((CASImpl) cas).getBaseCAS());
// cas = ((CASImpl) realCasMgr).getCurrentView();
// casMgr = (CASMgr) cas;
// */
//
// assertTrue(numTok == cas.getAnnotationIndex(tokenType).size());
// assertTrue(numSent == cas.getAnnotationIndex(sentenceType).size());
//
// // System.out.println("Serializing...");
// // casMgr = CASFactory.createCAS();
// // casMgr.setCAS(cas);
// // cas = (CAS) casMgr.getCAS();
// /* setCAS is no longer used or implemented
// * You cannot use this method to set up a new cas with a copy of
// * the contents of another cas, including its indexes
// realCasMgr = CASFactory.createCAS();
// realCasMgr.setCAS(((CASImpl) cas).getBaseCAS());
// cas = ((CASImpl) realCasMgr).getCurrentView();
// casMgr = (CASMgr) cas;
// */
//
// assertTrue(numTok == cas.getAnnotationIndex(tokenType).size());
// assertTrue(numSent == cas.getAnnotationIndex(sentenceType).size());
// // System.out.println(" Verify: " + numTok + " tokens, " + numSent + " sentences.");
//
// casMgr.reset();
//
// ++docCount;
// }
// // System.out.println("Number of documents processed: " + docCount);
// }
// overallTime = System.currentTimeMillis() - overallTime;
// // System.out.println("Time taken over all: " + new TimeSpan(overallTime));
//
// }
}