uimaj-core/src/test/java/org/apache/uima/cas/test/SerializationReinitTest.java - uima-uimaj - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
  * regarding copyright ownership.  The ASF licenses this file
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
  *
  *   http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing,
  * software distributed under the License is distributed on an
  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  * KIND, either express or implied.  See the License for the
  * specific language governing permissions and limitations
  * under the License.
  */

 package org.apache.uima.cas.test;

 import java.io.ByteArrayInputStream;
 import java.io.ByteArrayOutputStream;
 import java.io.File;
 import java.util.ArrayList;
 import java.util.Iterator;
 import java.util.List;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;

 import junit.framework.TestCase;

 import org.apache.uima.UIMAFramework;
 import org.apache.uima.cas.ByteArrayFS;
 import org.apache.uima.cas.CAS;
 import org.apache.uima.cas.CASException;
 import org.apache.uima.cas.CASRuntimeException;
 import org.apache.uima.cas.FSIndex;
 import org.apache.uima.cas.FSIndexRepository;
 import org.apache.uima.cas.FSIterator;
 import org.apache.uima.cas.Feature;
 import org.apache.uima.cas.FeatureStructure;
 import org.apache.uima.cas.LongArrayFS;
 import org.apache.uima.cas.Marker;
 import org.apache.uima.cas.ShortArrayFS;
 import org.apache.uima.cas.StringArrayFS;
 import org.apache.uima.cas.Type;
 import org.apache.uima.cas.TypeSystem;
 import org.apache.uima.cas.admin.CASAdminException;
 import org.apache.uima.cas.admin.CASFactory;
 import org.apache.uima.cas.admin.CASMgr;
 import org.apache.uima.cas.admin.TypeSystemMgr;
 import org.apache.uima.cas.impl.CASImpl;
 import org.apache.uima.cas.impl.CASSerializer;
 import org.apache.uima.cas.impl.LowLevelCAS;
 import org.apache.uima.cas.impl.Serialization;
 import org.apache.uima.cas.text.AnnotationFS;
 import org.apache.uima.cas_data.impl.CasComparer;
 import org.apache.uima.internal.util.TextStringTokenizer;
 import org.apache.uima.resource.ResourceInitializationException;
 import org.apache.uima.resource.metadata.FsIndexDescription;
 import org.apache.uima.resource.metadata.TypeSystemDescription;
 import org.apache.uima.resource.metadata.impl.TypePriorities_impl;
 import org.apache.uima.test.junit_extension.JUnitExtension;
 import org.apache.uima.util.CasCreationUtils;
 import org.apache.uima.util.FileUtils;
 import org.apache.uima.util.XMLInputSource;

 /**
  * Test for binary serialization and deserialization (no compression)
  *
  */
 public class SerializationReinitTest extends TestCase {

   public static final String TOKEN_TYPE = "Token";

   public static final String TOKEN_TYPE_FEAT = "type";

   public static final String TOKEN_TYPE_FEAT_Q = TOKEN_TYPE + TypeSystem.FEATURE_SEPARATOR
           + TOKEN_TYPE_FEAT;

   public static final String TOKEN_TYPE_TYPE = "TokenType";

   public static final String WORD_TYPE = "Word";

   public static final String SEP_TYPE = "Separator";

   public static final String EOS_TYPE = "EndOfSentence";

   public static final String SENT_TYPE = "Sentence";

   public static final String STRING_SUBTYPE_1 = "StringSubtype1";

   public static final String[] STR_1_VALS = { "test1", "test2" };

   public static final String OSTR_TYPE = "theType";

   public static final String OSTR_TYPE_FEAT = "theString";

   public static final String OBYTE_TYPE_FEAT = "theByte";

   public static final String OSHORT_TYPE_FEAT = "theShort";

   public static final String OBYTEA_TYPE_FEAT = "theByteArray";

   public static final String OSHORTA_TYPE_FEAT = "theShortArray";

   public static final String OLONGA_TYPE_FEAT = "theLongArray";

   public static final String OLONG_TYPE_FEAT = "theLong";

   private CASMgr casMgr;

   private CAS cas;

   private Type wordType;

   private Type separatorType;

   private Type eosType;

   private Type tokenType;

   private Feature tokenTypeFeature;

   private Type sentenceType;

   private Feature startFeature;

   private Feature endFeature;

   private Type strSub1;

   private Type theTypeType;

   private Feature theStringFeature;

   private Feature theByteFeature;

   private Feature theShortFeature;

   private Feature theByteArrayFeature;

   private Feature theShortArrayFeature;

   private Feature theLongArrayFeature;

   private Feature theLongFeature;

   private FsIndexDescription[] indexes;

   private TypeSystemDescription typeSystem;


   public SerializationReinitTest(String arg) {
     super(arg);
   }

   /**
    * @see junit.framework.TestCase#setUp()
    */
   public void setUp() throws Exception {

     /**
      * sets up two type systems:
      *   One defined via API calls, and set into the global var cas = casMgr
      *   One defined by parsing ExampleCas/testTypeSystem and setting
      *     typeSystem and indexes
      */

     super.setUp();
     casMgr = initCAS();
     cas = (CASImpl)casMgr;

     TypeSystem ts = cas.getTypeSystem();
     wordType = ts.getType(WORD_TYPE);
     // assert(wordType != null);
     separatorType = ts.getType(SEP_TYPE);
     eosType = ts.getType(EOS_TYPE);
     tokenType = ts.getType(TOKEN_TYPE);
     tokenTypeFeature = ts.getFeatureByFullName(TOKEN_TYPE_FEAT_Q);
     startFeature = ts.getFeatureByFullName(CAS.FEATURE_FULL_NAME_BEGIN);
     endFeature = ts.getFeatureByFullName(CAS.FEATURE_FULL_NAME_END);
     sentenceType = ts.getType(SENT_TYPE);
     strSub1 = ts.getType(STRING_SUBTYPE_1);
     assertTrue(strSub1 != null);
     theTypeType = ts.getType(OSTR_TYPE);
     theStringFeature = ts.getFeatureByFullName(OSTR_TYPE + TypeSystem.FEATURE_SEPARATOR + OSTR_TYPE_FEAT);
     theByteFeature = ts.getFeatureByFullName(OSTR_TYPE + TypeSystem.FEATURE_SEPARATOR + OBYTE_TYPE_FEAT);
     theByteArrayFeature = ts.getFeatureByFullName(OSTR_TYPE + TypeSystem.FEATURE_SEPARATOR + OBYTEA_TYPE_FEAT);
     theShortFeature = ts.getFeatureByFullName(OSTR_TYPE + TypeSystem.FEATURE_SEPARATOR + OSHORT_TYPE_FEAT);
     theShortArrayFeature = ts.getFeatureByFullName(OSTR_TYPE + TypeSystem.FEATURE_SEPARATOR + OSHORTA_TYPE_FEAT);
     theLongFeature = ts.getFeatureByFullName(OSTR_TYPE + TypeSystem.FEATURE_SEPARATOR + OLONG_TYPE_FEAT);
     theLongArrayFeature = ts.getFeatureByFullName(OSTR_TYPE + TypeSystem.FEATURE_SEPARATOR + OLONGA_TYPE_FEAT);


     File typeSystemFile = JUnitExtension.getFile("ExampleCas/testTypeSystem.xml");
     File indexesFile = JUnitExtension.getFile("ExampleCas/testIndexes.xml");

     typeSystem = UIMAFramework.getXMLParser().parseTypeSystemDescription(
                 new XMLInputSource(typeSystemFile));
     indexes = UIMAFramework.getXMLParser().parseFsIndexCollection(new XMLInputSource(indexesFile))
                 .getFsIndexes();
   }

   public void tearDown() {
     casMgr = null;
     cas = null;
     wordType = null;
     // assert(wordType != null);
     separatorType = null;
     eosType = null;
     tokenType = null;
     tokenTypeFeature = null;
     startFeature = null;
     endFeature = null;
     sentenceType = null;
     strSub1 = null;
     indexes = null;
     typeSystem = null;
   }

   // Initialize the first CAS.
   private static CASMgr initCAS() throws CASException {
     // Create an initial CASMgr from the factory.
     // CASMgr cas = CASFactory.createCAS();
     // assert(tsa != null);
     // Create a CASMgr. Ensures existence of AnnotationFS type.
     // CASMgr tcas = CASFactory.createCAS();
     CASMgr aCas = CASFactory.createCAS();
     try {
       CasCreationUtils.setupTypeSystem(aCas, (TypeSystemDescription) null);
     } catch (ResourceInitializationException e) {
       e.printStackTrace();
     }
     // Create a writable type system.
     TypeSystemMgr tsa = aCas.getTypeSystemMgr();
     // Add new types and features.
     Type topType = tsa.getTopType();
     Type annotType = tsa.getType(CAS.TYPE_NAME_ANNOTATION);
     // assert(annotType != null);
     tsa.addType(SENT_TYPE, annotType);
     Type tokenType = tsa.addType(TOKEN_TYPE, annotType);
     Type tokenTypeType = tsa.addType(TOKEN_TYPE_TYPE, topType);
     tsa.addType(WORD_TYPE, tokenTypeType);
     tsa.addType(SEP_TYPE, tokenTypeType);
     tsa.addType(EOS_TYPE, tokenTypeType);
     tsa.addFeature(TOKEN_TYPE_FEAT, tokenType, tokenTypeType);
     tsa.addStringSubtype(STRING_SUBTYPE_1, STR_1_VALS);
     Type stringType = tsa.getType(CAS.TYPE_NAME_STRING);
     Type byteType = tsa.getType(CAS.TYPE_NAME_BYTE);
     Type byteArrayType = tsa.getType(CAS.TYPE_NAME_BYTE_ARRAY);
     Type shortType = tsa.getType(CAS.TYPE_NAME_SHORT);
     Type shortArrayType = tsa.getType(CAS.TYPE_NAME_SHORT_ARRAY);
     Type longArrayType = tsa.getType(CAS.TYPE_NAME_LONG_ARRAY);
     Type longType = tsa.getType(CAS.TYPE_NAME_LONG);
     Type theTypeType = tsa.addType(OSTR_TYPE, annotType);
     tsa.addFeature(OSTR_TYPE_FEAT, theTypeType, stringType);
     tsa.addFeature(OBYTE_TYPE_FEAT, theTypeType, byteType);
     tsa.addFeature(OSHORT_TYPE_FEAT, theTypeType, shortType);
     tsa.addFeature(OBYTEA_TYPE_FEAT, theTypeType, byteArrayType);
     tsa.addFeature(OSHORTA_TYPE_FEAT, theTypeType, shortArrayType);
     tsa.addFeature(OLONGA_TYPE_FEAT,  theTypeType,  longArrayType);
     tsa.addFeature(OLONG_TYPE_FEAT, theTypeType, longType);
     // Commit the type system.
     ((CASImpl) aCas).commitTypeSystem();
     // assert(tsa.isCommitted());
     // // Create the CAS indexes.
     // tcas.initCASIndexes();
     // Create the Base indexes.
     try {
       aCas.initCASIndexes();
     } catch (CASException e) {
       e.printStackTrace();
     }

     // Commit the index repository.
     aCas.getIndexRepositoryMgr().commit();
     // assert(cas.getIndexRepositoryMgr().isCommitted());

     // Create the default text Sofa and return CAS view
     return (CASMgr) aCas.getCAS().getCurrentView();
   }

   public void testReset() {
     cas.reset();
     casMgr.enableReset(false);
     boolean exc = false;
     try {
       cas.reset();
     } catch (CASAdminException e) {
       assertTrue(e.getError() == CASAdminException.FLUSH_DISABLED);
       exc = true;
     }
     assertTrue(exc);
     casMgr.enableReset(true);
     cas.reset();
   }

   // Tokenize text.
   private void tokenize() throws Exception {
     // System.out.println("Tokenizing text.");

     // Create FSs for the token types.
     FeatureStructure wordFS = cas.createFS(wordType);
     FeatureStructure sepFS = cas.createFS(separatorType);
     FeatureStructure eosFS = cas.createFS(eosType);

     String text = cas.getDocumentText();
     TextStringTokenizer tokenizer = new TextStringTokenizer(text);
     tokenizer.setSeparators("/-*&@");
     tokenizer.addWhitespaceChars(",");
     tokenizer.setEndOfSentenceChars(".!?");
     tokenizer.setShowWhitespace(false);
     int tokenTypeCode;
     int wordCounter = 0;
     int sepCounter = 0;
     int endOfSentenceCounter = 0;
     AnnotationFS tokenAnnot;
     while (tokenizer.isValid()) {
       tokenAnnot = cas.createAnnotation(tokenType, tokenizer.getTokenStart(), tokenizer
               .getTokenEnd());
       tokenTypeCode = tokenizer.getTokenType();
       switch (tokenTypeCode) {
         case TextStringTokenizer.EOS: {
           ++endOfSentenceCounter;
           tokenAnnot.setFeatureValue(tokenTypeFeature, eosFS);
           break;
         }
         case TextStringTokenizer.SEP: {
           ++sepCounter;
           tokenAnnot.setFeatureValue(tokenTypeFeature, sepFS);
           break;
         }
         case TextStringTokenizer.WSP: {
           break;
         }
         case TextStringTokenizer.WCH: {
           ++wordCounter;
           tokenAnnot.setFeatureValue(tokenTypeFeature, wordFS);
           // if ((wordCounter % 100000) == 0) {
           // System.out.println("Number of words tokenized: " + wordCounter);
           // }
           break;
         }
         default: {
           throw new Exception("Something went wrong, fire up that debugger!");
         }
       }
       cas.getIndexRepository().addFS(tokenAnnot);
       tokenizer.setToNext();
       // System.out.println("Token: " + tokenizer.nextToken());
     }
     // time = System.currentTimeMillis() - time;
     // System.out.println("Number of words: " + wordCounter);
     // int allTokens = wordCounter + sepCounter + endOfSentenceCounter;
     // System.out.println("Number of tokens: " + allTokens);
     // System.out.println("Time used: " + new TimeSpan(time));

     // FSIterator it = cas.getAnnotationIndex(tokenType).iterator();
     // int count = 0;
     // while (it.isValid()) {
     // ++count;
     // it.moveToNext();
     // }
     // System.out.println("Number of tokens in index: " + count);
   }

   // Very (!) primitive EOS detection.
   private void createSentences() throws CASException {
     // TypeSystem ts = cas.getTypeSystem();
     // Type eosType = ts.getType(EOS_TYPE);
     // Type tokenType = ts.getType(TOKEN_TYPE);
     // //assert(tokenType != null);
     // Type sentenceType = ts.getType(SENT_TYPE);
     // Feature tokenTypeFeature = ts.getFeature(TOKEN_TYPE_FEAT);
     // Feature startFeature = ts.getFeature(CAS.START_FEAT);
     // Feature endFeature = ts.getFeature(CAS.END_FEAT);

     // System.out.println("\nCreating sentence annotations.");

     // Get a handle to the index repository.
     FSIndexRepository indexRepository = cas.getIndexRepository();
     // assert(indexRepository != null);
     Iterator<String> labelIt = indexRepository.getLabels();
     assertTrue(labelIt != null);
     // Get the standard index for tokens.
     FSIndex<AnnotationFS> tokenIndex = cas.getAnnotationIndex(tokenType);
     // assert(tokenIndex != null);
     // Get an iterator over tokens.
     FSIterator<AnnotationFS> it = tokenIndex.iterator();
     // assert(it != null);
     // Now create sentences. We do this as follows: a sentence starts where
     // the first token after an EOS starts, and ends with an EOS.
     long time = System.currentTimeMillis();
     int endOfSentenceCounter = 0;
     it.moveToFirst();
     boolean lookForStart = true;
     int start = 0, end; // Initialize start to pacify compiler.
     FeatureStructure tokenFS, sentFS;
     while (it.isValid()) {
       if (lookForStart) {
         // If we're looking for the start of a sentence, just grab the start
         // of the current FS.
         start = it.get().getIntValue(startFeature);
         lookForStart = false;
       } else {
         // Check if we've reached the end of a sentence.
         tokenFS = it.get();
         if (tokenFS.getFeatureValue(tokenTypeFeature).getType() == eosType) {
           end = tokenFS.getIntValue(endFeature);
           sentFS = cas.createFS(sentenceType);
           sentFS.setIntValue(startFeature, start);
           sentFS.setIntValue(endFeature, end);
           cas.getIndexRepository().addFS(sentFS);
           ++endOfSentenceCounter;
           lookForStart = true;
         }
       }
       it.moveToNext();
     }
     time = System.currentTimeMillis() - time;
     // System.out.println("Created " + endOfSentenceCounter + " sentences: " + new TimeSpan(time));
   }

   //?m (MULTILINE) makes $ match just before line terminator or end of input
   private static final Pattern nlPattern = Pattern.compile("(?m)(.*?$)");
   /**
    * Test driver.
    */
   public void testMain() throws Exception {

     // System.out.println("Setting up CAS.");
     // Create the initial CAS.
     long time = System.currentTimeMillis();
     time = System.currentTimeMillis() - time;
     // System.out.println("CAS set up: " + new TimeSpan(time));

     time = System.currentTimeMillis();
     // Read the document into a String. I'm sure there are better ways to
     File textFile = JUnitExtension.getFile("data/moby.txt");
     String moby = FileUtils.file2String(textFile);
     // String moby = file2String(System.getProperty("cas.data.test") + "moby.txt");
     String line;
 //    BufferedReader br = new BufferedReader(new StringReader(moby));
     StringBuffer buf = new StringBuffer(10000);
     List<String> docs = new ArrayList<String>();
     Matcher m = nlPattern.matcher(moby);
     while (m.find()) {
       line = m.group();
       if (line.startsWith(".. <p")) {
         docs.add(buf.toString());
         buf.setLength(0);
       } else {
         buf.append(line + "\n");
       }
     }
 //    while ((line = br.readLine()) != null) {
 //      if (line.startsWith(".. <p")) {
 //        docs.add(buf.toString());
 //        buf = new StringBuffer();
 //      } else {
 //        buf.append(line + "\n");
 //      }
 //    }
     m.appendTail(buf);
     docs.add(buf.toString());
     buf = null;

     final int numDocs = docs.size();
     final int max = 30;
     int docCount = 0;
     long overallTime = System.currentTimeMillis();
     int numTok, numSent;
     CASSerializer cs;
     while (docCount < max) {
       for (int i = 0; i < numDocs && docCount < max; i++) {
         // System.out.println("Processing document: " + i);
         // Set document text in first CAS.
         cas.setDocumentText(docs.get(i));

         tokenize();
         numTok = cas.getAnnotationIndex(tokenType).size();
         assertTrue(numTok > 0);
         // System.out.println(" Number of tokens: " + numTok);

         // System.out.println("Serializing...");
         cs = Serialization.serializeCAS(cas);
         cas = Serialization.createCAS(casMgr, cs);

         assertTrue(numTok == cas.getAnnotationIndex(tokenType).size());

         createSentences();
         numSent = cas.getAnnotationIndex(sentenceType).size();
         assertTrue(numSent > 0);
         // System.out.println(" Number of sentences: " + numSent);

         // System.out.println("Serializing...");
         cs = Serialization.serializeCAS(cas);
         cas = Serialization.createCAS(casMgr, cs);

         assertTrue(numTok == cas.getAnnotationIndex(tokenType).size());
         assertTrue(numSent == cas.getAnnotationIndex(sentenceType).size());

         // System.out.println("Serializing...");
         cs = Serialization.serializeCAS(cas);
         cas = Serialization.createCAS(casMgr, cs);

         assertTrue(numTok == cas.getAnnotationIndex(tokenType).size());
         assertTrue(numSent == cas.getAnnotationIndex(sentenceType).size());
         // System.out.println(" Verify: " + numTok + " tokens, " + numSent + " sentences.");

         casMgr.reset();

         ++docCount;
       }
       // System.out.println("Number of documents processed: " + docCount);
     }
     overallTime = System.currentTimeMillis() - overallTime;
     // System.out.println("Time taken over all: " + new TimeSpan(overallTime));

   }

   /** Test basic blob serialization
    */
   public void testBlob() throws Exception {

     /*
      * Test that FS, indexes and strings work after repeated blob serialization
      * For each iteration, add two new FS, serialize and test all created so
      * The first FS sets the string feature using standard API => goes into stringlist
      * The second FS sets the string feature using lowlevel API => goes into stringheap
      *
      * Throw in tests of the byte, short and long heaps as well
      *
      */
   String testString = "testString";
   cas.reset();
   LowLevelCAS ll_cas = cas.getLowLevelCAS();
   FSIndexRepository ir = cas.getIndexRepository();
   int ll_strfeatcode = ll_cas.ll_getTypeSystem().ll_getCodeForFeature(theStringFeature);
   int ll_bytefeatcode = ll_cas.ll_getTypeSystem().ll_getCodeForFeature(theByteFeature);
   int ll_shortfeatcode = ll_cas.ll_getTypeSystem().ll_getCodeForFeature(theShortFeature);
   int ll_bytearrayfeatcode = ll_cas.ll_getTypeSystem().ll_getCodeForFeature(theByteArrayFeature);
   int ll_shortarrayfeatcode = ll_cas.ll_getTypeSystem().ll_getCodeForFeature(theShortArrayFeature);
   int ll_longfeatcode = ll_cas.ll_getTypeSystem().ll_getCodeForFeature(theLongFeature);

   for (int cycle=0; cycle<10; cycle+=2) {
     FeatureStructure newFS1 = cas.createFS(theTypeType);
     newFS1.setIntValue(startFeature, cycle);
     newFS1.setIntValue(endFeature, cycle+1);
     // set string using normal string feature create
     newFS1.setStringValue(theStringFeature, testString);
     newFS1.setByteValue(theByteFeature, (byte)cycle);
     newFS1.setShortValue(theShortFeature, (short)cycle);
     newFS1.setLongValue(theLongFeature, (long)cycle);
     ByteArrayFS newBA1 = cas.createByteArrayFS(1);
     ShortArrayFS newSA1 = cas.createShortArrayFS(1);
     newBA1.set(0, (byte)cycle);
     newSA1.set(0, (short)cycle);
     newFS1.setFeatureValue(theByteArrayFeature, newBA1);
     newFS1.setFeatureValue(theShortArrayFeature, newSA1);
     ir.addFS(newFS1);

     FeatureStructure newFS2 = cas.createFS(theTypeType);
     ByteArrayFS newBA2 = cas.createByteArrayFS(1);
     ShortArrayFS newSA2 = cas.createShortArrayFS(1);
     newFS2.setIntValue(startFeature, cycle+1);
     newFS2.setIntValue(endFeature, cycle+2);
     ir.addFS(newFS2);
     // set string using lowlevel string create API
     final int llfs2 = ll_cas.ll_getFSRef(newFS2);
     final int llba2 = ll_cas.ll_getFSRef(newBA2);
     final int llsa2 = ll_cas.ll_getFSRef(newSA2);
     ll_cas.ll_setCharBufferValue(llfs2, ll_strfeatcode,
             testString.toCharArray(), 0, testString.length());
     ll_cas.ll_setByteValue(llfs2, ll_bytefeatcode, (byte)(cycle+1));
     ll_cas.ll_setShortValue(llfs2, ll_shortfeatcode, (short)(cycle+1));
     ll_cas.ll_setLongValue(llfs2, ll_longfeatcode, (long)(cycle+1));
     ll_cas.ll_setByteArrayValue(llba2, 0, (byte)(cycle+1));
     ll_cas.ll_setShortArrayValue(llsa2, 0, (short)(cycle+1));
     newFS2.setFeatureValue(theByteArrayFeature, newBA2);
     newFS2.setFeatureValue(theShortArrayFeature, newSA2);
     ir.addFS(newFS2);

     ByteArrayOutputStream fos = new ByteArrayOutputStream();
     Serialization.serializeCAS(cas, fos);
       cas.reset();
     ByteArrayInputStream fis = new ByteArrayInputStream(fos.toByteArray());
     Serialization.deserializeCAS(cas, fis);

     FSIndex<AnnotationFS> idx = cas.getAnnotationIndex(theTypeType);
     FSIterator<AnnotationFS> iter = idx.iterator();
     for (int tc=0; tc<cycle+1; tc++) {
       FeatureStructure testFS = iter.get();
       iter.moveToNext();
       assertTrue(tc == testFS.getIntValue(startFeature));
       assertTrue(testString.equals(testFS.getStringValue(theStringFeature)));
       assertTrue(tc == testFS.getByteValue(theByteFeature));
       assertTrue(tc == testFS.getShortValue(theShortFeature));
       assertTrue(tc == testFS.getLongValue(theLongFeature));
       ByteArrayFS ba = (ByteArrayFS)testFS.getFeatureValue(theByteArrayFeature);
       assertTrue(tc == ba.get(0));
       ShortArrayFS sa = (ShortArrayFS)testFS.getFeatureValue(theShortArrayFeature);
       assertTrue(tc == sa.get(0));
     }
     }
   }

   public void testDeltaBinaryShortLongArrayMods() throws Exception {
     CASImpl cas2 = (CASImpl) initCAS();
     CASImpl cas3 = (CASImpl) initCAS();

     // create short array and long array
     FeatureStructure newFS1 = cas.createFS(theTypeType);
     ByteArrayFS newBA1 = cas.createByteArrayFS(1);
     ShortArrayFS newSA1 = cas.createShortArrayFS(1);
     LongArrayFS newLA1 = cas.createLongArrayFS(1);
     newBA1.set(0, (byte)1);
     newSA1.set(0, (short)2);
     newLA1.set(0, (long)4);
     newFS1.setFeatureValue(theByteArrayFeature, newBA1);
     newFS1.setFeatureValue(theShortArrayFeature, newSA1);
     newFS1.setFeatureValue(theLongArrayFeature, newLA1);
     cas.getIndexRepository().addFS(newFS1);

     //serialize binary, non compressed, not delta
     ByteArrayOutputStream fos = new ByteArrayOutputStream();
     Serialization.serializeCAS(cas, fos);

     //deserialize into cas2
     ByteArrayInputStream fis = new ByteArrayInputStream(fos.toByteArray());
     Serialization.deserializeCAS(cas2, fis);
     CasComparer.assertEquals(cas, cas2);

     //=======================================================================
     //create Marker, add/modify fs and serialize in delta xmi format.
     Marker marker = cas2.createMarker();

     // modify a value in the int arrays
     Iterator<AnnotationFS> typeIterator = cas2.getAnnotationIndex(theTypeType).iterator();
     assertTrue(typeIterator.hasNext());
     FeatureStructure fsWithArrays = typeIterator.next();

     ((ByteArrayFS)fsWithArrays.getFeatureValue(theByteArrayFeature)).set(0, (byte) 11);
     ((ShortArrayFS)fsWithArrays.getFeatureValue(theShortArrayFeature)).set(0, (short) 22);
     ((LongArrayFS)fsWithArrays.getFeatureValue(theLongArrayFeature)).set(0, (long) 44);

     // serialize cas2 in delta format
     ByteArrayOutputStream fosDelta = new ByteArrayOutputStream();
     Serialization.serializeCAS(cas2, fosDelta, marker);

     //======================================================================
     //deserialize delta binary into cas1
     ByteArrayInputStream fisDelta = new ByteArrayInputStream(fosDelta.toByteArray());
     Serialization.deserializeCAS(cas, fisDelta);

     //======================================================================
     //serialize complete cas and deserialize into cas3 and compare with cas1.
     ByteArrayOutputStream fosFull = new ByteArrayOutputStream();
     Serialization.serializeCAS(cas2, fosFull);
     ByteArrayInputStream fisFull = new ByteArrayInputStream(fosFull.toByteArray());
     Serialization.deserializeCAS(cas3, fisFull);
     CasComparer.assertEquals(cas, cas3);

   }


   /**
    * setup cas1, binary (not compressed) serialize to cas2
    * modify cas2, binary (not compressed) delta serialize back into cas1
    *
    * serialize cas2 binary (not compressed) not delta into cas3, compare cas 1 and 3
    *
    * @throws Exception
    */
   public void testDeltaBlobSerialization() throws Exception {
    try {
       CAS cas1 = CasCreationUtils.createCas(typeSystem, new TypePriorities_impl(),
               indexes);
       CAS cas2 = CasCreationUtils.createCas(typeSystem, new TypePriorities_impl(),
               indexes);
       CAS cas3 = CasCreationUtils.createCas(typeSystem, new TypePriorities_impl(),
               indexes);

       Type personType = cas1.getTypeSystem().getType(
       		"org.apache.uima.testTypeSystem.Person");
       Feature componentIdFeat = personType.getFeatureByBaseName("componentId");
       Feature confidenceFeat = personType.getFeatureByBaseName("confidence");
       Type orgType = cas1.getTypeSystem().getType(
 			"org.apache.uima.testTypeSystem.Organization");
       Type ownerType = cas1.getTypeSystem().getType(
       						"org.apache.uima.testTypeSystem.Owner");
       Type entityAnnotType = cas1.getTypeSystem().getType(
 		"org.apache.uima.testTypeSystem.EntityAnnotation");
       Feature mentionTypeFeat = entityAnnotType.getFeatureByBaseName("mentionType");
       Feature argsFeat = ownerType.getFeatureByBaseName("relationArgs");
       Type relArgsType = cas1.getTypeSystem().getType(
       						"org.apache.uima.testTypeSystem.BinaryRelationArgs");
       Feature domainFeat = relArgsType.getFeatureByBaseName("domainValue");
       Feature rangeFeat = relArgsType.getFeatureByBaseName("rangeValue");

       Type entityType = cas1.getTypeSystem().getType("org.apache.uima.testTypeSystem.Entity");
       Feature classesFeat = entityType.getFeatureByBaseName("classes");
       Feature linksFeat = entityType.getFeatureByBaseName("links");
       Feature canonicalFormFeat = entityType.getFeatureByBaseName("canonicalForm");

       Type nonEmptyFsListType = cas1.getTypeSystem().getType(CAS.TYPE_NAME_NON_EMPTY_FS_LIST);
       Type emptyFsListType = cas1.getTypeSystem().getType(CAS.TYPE_NAME_EMPTY_FS_LIST);
       Feature headFeat = nonEmptyFsListType.getFeatureByBaseName("head");
       Feature tailFeat = nonEmptyFsListType.getFeatureByBaseName("tail");

       //cas1
       //initial set of feature structures
       // set document text for the initial view and create Annotations
       cas1.setDocumentText("This is a test document in the initial view");
       AnnotationFS anAnnot1 = cas1.createAnnotation(cas1.getAnnotationType(), 0, 4);
       cas1.getIndexRepository().addFS(anAnnot1);
       AnnotationFS anAnnot2 = cas1.createAnnotation(cas1.getAnnotationType(), 5, 6);
       cas1.getIndexRepository().addFS(anAnnot2);
       AnnotationFS anAnnot3 = cas1.createAnnotation(cas1.getAnnotationType(), 8, 13);
       cas1.getIndexRepository().addFS(anAnnot3);
       AnnotationFS anAnnot4 = cas1.createAnnotation(cas1.getAnnotationType(), 15, 30);
       cas1.getIndexRepository().addFS(anAnnot4);
       FSIndex<AnnotationFS> tIndex = cas1.getAnnotationIndex();
       assertTrue(tIndex.size() == 5); //doc annot plus 4 annots

       FeatureStructure entityFS = cas1.createFS(entityType);
       cas1.getIndexRepository().addFS(entityFS);

       StringArrayFS strArrayFS = cas1.createStringArrayFS(5);
       strArrayFS.set(0, "class1");
       entityFS.setFeatureValue(classesFeat, strArrayFS);

       //create listFS and set the link feature
       FeatureStructure emptyNode = cas1.createFS(emptyFsListType);
       FeatureStructure secondNode = cas1.createFS(nonEmptyFsListType);
       secondNode.setFeatureValue(headFeat, anAnnot2);
       secondNode.setFeatureValue(tailFeat, emptyNode);
       FeatureStructure firstNode = cas1.createFS(nonEmptyFsListType);
       firstNode.setFeatureValue(headFeat, anAnnot1);
       firstNode.setFeatureValue(tailFeat, secondNode);
       entityFS.setFeatureValue(linksFeat, firstNode);

       // create a view w/o setting document text
       CAS view1 = cas1.createView("View1");

       // create another view
       CAS preexistingView = cas1.createView("preexistingView");
       String preexistingViewText = "John Smith blah blah blah";
       preexistingView.setDocumentText(preexistingViewText);
       AnnotationFS person1Annot = createPersonAnnot(preexistingView, 0, 10);
       person1Annot.setStringValue(componentIdFeat, "deltacas1");
       AnnotationFS person2Annot = createPersonAnnot(preexistingView, 0, 5);
       AnnotationFS orgAnnot = preexistingView.createAnnotation(orgType, 16, 24);
       preexistingView.addFsToIndexes(orgAnnot);

       AnnotationFS ownerAnnot = preexistingView.createAnnotation(ownerType, 0, 24);
       preexistingView.addFsToIndexes(ownerAnnot);
       FeatureStructure relArgs = cas1.createFS(relArgsType);
       relArgs.setFeatureValue(domainFeat, person1Annot);
       ownerAnnot.setFeatureValue(argsFeat, relArgs);

       //serialize binary, non compressed, not delta
       ByteArrayOutputStream fos = new ByteArrayOutputStream();
       Serialization.serializeCAS(cas1, fos);

       //deserialize into cas2
       ByteArrayInputStream fis = new ByteArrayInputStream(fos.toByteArray());
       Serialization.deserializeCAS(cas2, fis);
       CasComparer.assertEquals(cas1, cas2);

       //=======================================================================
       //create Marker, add/modify fs and serialize in delta xmi format.
       Marker marker = cas2.createMarker();
       FSIndex<AnnotationFS> cas2tIndex = cas2.getAnnotationIndex();
       CAS cas2preexistingView = cas2.getView("preexistingView");
       FSIndex<AnnotationFS> cas2personIndex = cas2preexistingView.getAnnotationIndex(personType);
       FSIndex<AnnotationFS> cas2orgIndex = cas2preexistingView.getAnnotationIndex(orgType);
       FSIndex<AnnotationFS> cas2ownerIndex = cas2preexistingView.getAnnotationIndex(ownerType);

       // create an annotation and add to index
       AnnotationFS cas2anAnnot5 = cas2.createAnnotation(cas2.getAnnotationType(), 6, 8);
       cas2.getIndexRepository().addFS(cas2anAnnot5);
       assertTrue(cas2tIndex.size() == 6); // prev annots and this new one

       // set document text of View1
       CAS cas2view1 = cas2.getView("View1");
       cas2view1.setDocumentText("This is the View1 document.");
       //create an annotation in View1
       AnnotationFS cas2view1Annot = cas2view1.createAnnotation(cas2.getAnnotationType(), 1, 5);
       cas2view1.getIndexRepository().addFS(cas2view1Annot);
       FSIndex<AnnotationFS> cas2view1Index = cas2view1.getAnnotationIndex();
       assertTrue(cas2view1Index.size() == 2); //document annot and this annot

       //modify an existing annotation
       Iterator<AnnotationFS> tIndexIter = cas2tIndex.iterator();
       AnnotationFS docAnnot = tIndexIter.next(); //doc annot
       AnnotationFS modAnnot1 = tIndexIter.next();
       AnnotationFS delAnnot = tIndexIter.next();

       //modify language feature
       Feature languageF = cas2.getDocumentAnnotation().getType().getFeatureByBaseName(CAS.FEATURE_BASE_NAME_LANGUAGE);
       docAnnot.setStringValue(languageF, "en");

       //index update - reindex
       cas2.getIndexRepository().removeFS(modAnnot1);
       Feature endF = cas2.getAnnotationType().getFeatureByBaseName(CAS.FEATURE_BASE_NAME_END);
       modAnnot1.setIntValue(endF, 4);
       cas2.getIndexRepository().addFS(modAnnot1);
       //index update - remove annotation from index
       cas2.getIndexRepository().removeFS(delAnnot);

       //modify FS - string feature and FS feature.
       Iterator<AnnotationFS> personIter = cas2personIndex.iterator();
       AnnotationFS cas2person1 = personIter.next();
       AnnotationFS cas2person2 = personIter.next();

       cas2person1.setFloatValue(confidenceFeat, (float) 99.99);
       cas2person1.setStringValue(mentionTypeFeat, "FULLNAME");

       cas2person2.setStringValue(componentIdFeat, "delataCas2");
       cas2person2.setStringValue(mentionTypeFeat, "FIRSTNAME");

       Iterator<AnnotationFS> orgIter = cas2orgIndex.iterator();
       AnnotationFS cas2orgAnnot = orgIter.next();
       cas2orgAnnot.setStringValue(mentionTypeFeat, "ORGNAME");

       //modify FS feature
       Iterator<AnnotationFS> ownerIter = cas2ownerIndex.iterator();
       AnnotationFS cas2ownerAnnot = ownerIter.next();
       FeatureStructure cas2relArgs = cas2ownerAnnot.getFeatureValue(argsFeat);
       cas2relArgs.setFeatureValue(rangeFeat, cas2orgAnnot);

     //Test modification of a nonshared multivalued feature.
       //This should serialize the encompassing FS.
       Iterator<FeatureStructure> iter = cas2.getIndexRepository().getIndex("testEntityIndex").iterator();
       FeatureStructure cas2EntityFS = iter.next();
       StringArrayFS cas2strarrayFS = (StringArrayFS) cas2EntityFS.getFeatureValue(classesFeat);
       cas2strarrayFS.set(1, "class2");
       cas2strarrayFS.set(2, "class3");
       cas2strarrayFS.set(3, "class4");
       cas2strarrayFS.set(4, "class5");

       //add to FSList
       FeatureStructure cas2linksFS = cas2EntityFS.getFeatureValue(linksFeat);
       FeatureStructure cas2secondNode = cas2linksFS.getFeatureValue(tailFeat);
       FeatureStructure cas2emptyNode = cas2secondNode.getFeatureValue(tailFeat);
       FeatureStructure cas2thirdNode = cas2.createFS(nonEmptyFsListType);
       cas2thirdNode.setFeatureValue(headFeat, cas2anAnnot5);
       cas2thirdNode.setFeatureValue(tailFeat, cas2emptyNode);
       cas2secondNode.setFeatureValue(tailFeat, cas2thirdNode);

       // serialize cas2 in delta format
       ByteArrayOutputStream fosDelta = new ByteArrayOutputStream();
       Serialization.serializeCAS(cas2, fosDelta, marker);

       //======================================================================
       //deserialize delta binary into cas1
       ByteArrayInputStream fisDelta = new ByteArrayInputStream(fosDelta.toByteArray());
       Serialization.deserializeCAS(cas1, fisDelta);

       //======================================================================
       //serialize complete cas and deserialize into cas3 and compare with cas1.
       ByteArrayOutputStream fosFull = new ByteArrayOutputStream();
       Serialization.serializeCAS(cas2, fosFull);
       ByteArrayInputStream fisFull = new ByteArrayInputStream(fosFull.toByteArray());
       Serialization.deserializeCAS(cas3, fisFull);
       CasComparer.assertEquals(cas1, cas3);
       //System.out.println("CAS1 " + serialize(cas1, new XmiSerializationSharedData()));
       //System.out.println("CAS2 " + serialize(cas2, new XmiSerializationSharedData()));

 	} catch (Exception e) {
 	      JUnitExtension.handleException(e);
 	}
   }

   public void testDeltaBlobWithInvalidMarker() throws Exception {
     try {
        CAS cas1 = CasCreationUtils.createCas(typeSystem, new TypePriorities_impl(),
                indexes);
        boolean serfailed = false;
        Marker mark1 = cas1.createMarker();
 //       Marker mark2 = cas1.createMarker();  // multiple markers not supported, tested in other test case

        cas1.reset();

        try {
       	 ByteArrayOutputStream fos = new ByteArrayOutputStream();
       	 Serialization.serializeCAS(cas1, fos, mark1);
        } catch (CASRuntimeException e) {
       	 serfailed = true;
        }
        assertTrue(serfailed);

 //       serfailed = false;
 //       try {
 //      	 ByteArrayOutputStream fos = new ByteArrayOutputStream();
 //      	 Serialization.serializeCAS(cas1, fos, mark2);
 //       } catch (CASRuntimeException e) {
 //      	 serfailed = true;
 //       }
 //       assertTrue(serfailed);
     } catch (Exception e) {
       JUnitExtension.handleException(e);
     }
   }

   private AnnotationFS createPersonAnnot(CAS cas, int begin, int end) {
 	Type personType = cas.getTypeSystem().getType("org.apache.uima.testTypeSystem.Person");
 	AnnotationFS person = cas.createAnnotation(personType, begin, end);
 	cas.addFsToIndexes(person);
 	return person;
   }
   public static void main(String[] args) {
     junit.textui.TestRunner.run(SerializationReinitTest.class);
   }

   /**
    * Test setCAS().
    * This test does nothing useful.  setCAS is a no-op
    */
 //  public void testSetCAS() throws Exception {
 //
 //    // Read the document into a String.
 //    File textFile = JUnitExtension.getFile("data/moby.txt");
 //    String moby = FileUtils.file2String(textFile);
 //    // String moby = file2String(System.getProperty("cas.data.test") + "moby.txt");
 //    String line;
 ////    BufferedReader br = new BufferedReader(new StringReader(moby));
 //    StringBuffer buf = new StringBuffer(10000);
 //    List<String> docs = new ArrayList<String>();
 //    Matcher m = nlPattern.matcher(moby);
 //    while (m.find()) {
 //      line = m.group();
 //      if (line.startsWith(".. <p")) {
 //        docs.add(buf.toString());
 //        buf.setLength(0);
 //      } else {
 //        buf.append(line + "\n");
 //      }
 //    }
 //
 ////    while ((line = br.readLine()) != null) {
 ////      if (line.startsWith(".. <p")) {
 ////        docs.add(buf.toString());
 ////        buf = new StringBuffer();
 ////      } else {
 ////        buf.append(line + "\n");
 ////      }
 ////    }
 ////    docs.add(buf.toString());
 //    m.appendTail(buf);
 //    docs.add(buf.toString());
 //    buf = null;
 //
 //    final int numDocs = docs.size();
 //    final int max = 30;
 //    int docCount = 0;
 //    long overallTime = System.currentTimeMillis();
 //    int numTok, numSent;
 //    while (docCount < max) {
 //      for (int i = 0; i < numDocs && docCount < max; i++) {
 //        // System.out.println("Processing document: " + i);
 //        // Set document text in first CAS.
 //        cas.setDocumentText(docs.get(i));
 //
 //        tokenize();
 //        numTok = cas.getAnnotationIndex(tokenType).size();
 //        assertTrue(numTok > 0);
 //        // System.out.println(" Number of tokens: " + numTok);
 //
 //        // System.out.println("Serializing...");
 //        // CASMgr casMgr = CASFactory.createCAS();
 //        // casMgr.setCAS(cas);
 //        // cas = (CAS) casMgr.getCAS();
 //        /* setCAS is no longer used or implemented
 //         * You cannot use this method to set up a new cas with a copy of
 //         * the contents of another cas, including its indexes
 //        CASMgr realCasMgr = CASFactory.createCAS(cas.getTypeSystem());
 //        realCasMgr.setCAS(((CASImpl) cas).getBaseCAS());
 //        cas = ((CASImpl) realCasMgr).getCurrentView();
 //        casMgr = (CASMgr) cas;
 //        */
 //
 //        assertTrue(numTok == cas.getAnnotationIndex(tokenType).size());
 //
 //        createSentences();
 //        numSent = cas.getAnnotationIndex(sentenceType).size();
 //        assertTrue(numSent > 0);
 //        // System.out.println(" Number of sentences: " + numSent);
 //
 //        // System.out.println("Serializing...");
 //        // casMgr = CASFactory.createCAS();
 //        // casMgr.setCAS(cas);
 //        // cas = (CAS) casMgr.getCAS();
 //        /* setCAS is no longer used or implemented
 //         * You cannot use this method to set up a new cas with a copy of
 //         * the contents of another cas, including its indexes
 //        realCasMgr = CASFactory.createCAS();
 //        realCasMgr.setCAS(((CASImpl) cas).getBaseCAS());
 //        cas = ((CASImpl) realCasMgr).getCurrentView();
 //        casMgr = (CASMgr) cas;
 //        */
 //
 //        assertTrue(numTok == cas.getAnnotationIndex(tokenType).size());
 //        assertTrue(numSent == cas.getAnnotationIndex(sentenceType).size());
 //
 //        // System.out.println("Serializing...");
 //        // casMgr = CASFactory.createCAS();
 //        // casMgr.setCAS(cas);
 //        // cas = (CAS) casMgr.getCAS();
 //        /* setCAS is no longer used or implemented
 //         * You cannot use this method to set up a new cas with a copy of
 //         * the contents of another cas, including its indexes
 //        realCasMgr = CASFactory.createCAS();
 //        realCasMgr.setCAS(((CASImpl) cas).getBaseCAS());
 //        cas = ((CASImpl) realCasMgr).getCurrentView();
 //        casMgr = (CASMgr) cas;
 //        */
 //
 //        assertTrue(numTok == cas.getAnnotationIndex(tokenType).size());
 //        assertTrue(numSent == cas.getAnnotationIndex(sentenceType).size());
 //        // System.out.println(" Verify: " + numTok + " tokens, " + numSent + " sentences.");
 //
 //        casMgr.reset();
 //
 //        ++docCount;
 //      }
 //      // System.out.println("Number of documents processed: " + docCount);
 //    }
 //    overallTime = System.currentTimeMillis() - overallTime;
 //    // System.out.println("Time taken over all: " + new TimeSpan(overallTime));
 //
 //  }
 }