blob: 50722673d9824ea5db17acd30bb1865c226c5210 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.uima.cas.impl;
import java.io.DataOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.io.Serializable;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import org.apache.uima.cas.CASRuntimeException;
import org.apache.uima.cas.Marker;
/**
* Serialization for CAS. This serializes the state of the CAS, assuming that the type and index
* information remains constant. <code>CASSerializer</code> objects can be serialized with
* standard Java serialization.
*
* @see org.apache.uima.cas.impl.CASMgrSerializer
*
*
*/
public class CASSerializer implements Serializable {
static final long serialVersionUID = -7972011651957420295L;
// The heap itself.
public int[] heapArray = null;
// Heap metadata. This is not strictly required, the heap can be
// deserialized
// without. Must be null if not used.
public int[] heapMetaData = null;
// The string table for strings that are feature structure values. Note that
// the 0th position in the string table should be null and will be ignored.
public String[] stringTable;
// All FSs in any index.
public int[] fsIndex;
public byte[] byteHeapArray;
public short[] shortHeapArray;
public long[] longHeapArray;
/**
* Constructor for CASSerializer.
*/
public CASSerializer() {
super();
}
/**
* Serialize CAS data without heap-internal meta data. Currently used for serialization to C++.
*
* @param casImpl
* The CAS to be serialized.
*/
public void addNoMetaData(CASImpl casImpl) {
addCAS(casImpl, false);
}
/**
* Add the CAS to be serialized. Note that we need the implementation here, the interface is not
* enough.
*
* @param cas
* The CAS to be serialized.
*/
public void addCAS(CASImpl cas) {
addCAS(cas, true);
}
/**
* Add the CAS to be serialized. Note that we need the implementation here, the interface is not
* enough.
*
* @param cas
* The CAS to be serialized.
*/
public void addCAS(CASImpl cas, boolean addMetaData) {
this.fsIndex = cas.getIndexedFSs();
final int heapSize = cas.getHeap().getCellsUsed();
this.heapArray = new int[heapSize];
System.arraycopy(cas.getHeap().heap, 0, this.heapArray, 0, heapSize);
if (addMetaData) {
this.heapMetaData = cas.getHeap().getMetaData();
}
this.stringTable = stringArrayListToArray(cas.getStringTable());
final int byteHeapSize = cas.getByteHeap().getSize();
this.byteHeapArray = new byte[byteHeapSize];
System.arraycopy(cas.getByteHeap().heap, 0, this.byteHeapArray, 0, byteHeapSize);
final int shortHeapSize = cas.getShortHeap().getSize();
this.shortHeapArray = new short[shortHeapSize];
System.arraycopy(cas.getShortHeap().heap, 0, this.shortHeapArray, 0, shortHeapSize);
final int longHeapSize = cas.getLongHeap().getSize();
this.longHeapArray = new long[longHeapSize];
System.arraycopy(cas.getLongHeap().heap, 0, this.longHeapArray, 0, longHeapSize);
}
/**
* Serializes the CAS data and writes it to the output stream.
* --------------------------------------------------------------------- Blob Format
*
* Element Size Number of Description (bytes) Elements ------------ ---------
* -------------------------------- 4 1 Blob key = "UIMA" in utf-8 4 1 Version (currently = 1) 4 1
* size of 32-bit FS Heap array = s32H 4 s32H 32-bit FS heap array 4 1 size of 16-bit string Heap
* array = sSH 2 sSH 16-bit string heap array 4 1 size of string Ref Heap array = sSRH 4 2*sSRH
* string ref offsets and lengths 4 1 size of FS index array = sFSI 4 sFSI FS index array
*
* 4 1 size of 8-bit Heap array = s8H 1 s8H 8-bit Heap array 4 1 size of 16-bit Heap array = s16H
* 2 s16H 16-bit Heap array 4 1 size of 64-bit Heap array = s64H 8 s64H 64-bit Heap array
* ---------------------------------------------------------------------
*
* This reads in and deserializes CAS data from a stream. Byte swapping may be needed is the blob
* is from C++ -- C++ blob serialization writes data in native byte order.
*
* @param cas
* The CAS to be serialized. ostream The output stream.
*/
public void addCAS(CASImpl cas, OutputStream ostream) {
try {
DataOutputStream dos = new DataOutputStream(ostream);
// get the indexed FSs
this.fsIndex = cas.getIndexedFSs();
// output the key and version number
byte[] uima = new byte[4];
uima[0] = 85; // U
uima[1] = 73; // I
uima[2] = 77; // M
uima[3] = 65; // A
ByteBuffer buf = ByteBuffer.wrap(uima);
int key = buf.asIntBuffer().get();
int version = 1;
dos.writeInt(key);
dos.writeInt(version);
// output the FS heap
final int heapSize = cas.getHeap().getCellsUsed();
dos.writeInt(heapSize);
for (int i = 0; i < heapSize; i++) {
dos.writeInt(cas.getHeap().heap[i]);
}
// output the strings
StringHeapDeserializationHelper shdh = cas.getStringHeap().serialize();
// compute the number of total size of data in stringHeap
// total size = char buffer length + length of strings in the string list;
int stringHeapLength = shdh.charHeapPos;
int stringListLength = 0;
for (int i = 0; i < shdh.refHeap.length; i += 3) {
int ref = shdh.refHeap[i + StringHeapDeserializationHelper.STRING_LIST_ADDR_OFFSET];
// this is a string in the string list
// get length and add to total string heap length
if (ref != 0) {
// terminate each string with a null
stringListLength += 1 + cas.getStringHeap().getStringForCode(ref).length();
}
}
int stringTotalLength = stringHeapLength + stringListLength;
if (stringHeapLength == 0 && stringListLength > 0) {
// nothing from stringHeap
// add 1 for the null at the beginning
stringTotalLength += 1;
}
dos.writeInt(stringTotalLength);
// write the data in the stringheap, if there is any
if (stringTotalLength > 0) {
if (shdh.charHeapPos > 0) {
dos.writeChars(String.valueOf(shdh.charHeap, 0, shdh.charHeapPos));
} else {
// no stringheap data
// if there is data in the string lists, write a leading 0
if (stringListLength > 0) {
dos.writeChar(0);
}
}
// word alignment
if (stringTotalLength % 2 != 0) {
dos.writeChar(0);
}
}
// write out the string ref heap
// each reference consist of a offset into stringheap and a length
int refheapsz = ((shdh.refHeap.length - StringHeapDeserializationHelper.FIRST_CELL_REF) / StringHeapDeserializationHelper.REF_HEAP_CELL_SIZE) * 2;
refheapsz++;
dos.writeInt(refheapsz);
dos.writeInt(0);
for (int i = StringHeapDeserializationHelper.FIRST_CELL_REF; i < shdh.refHeap.length; i += 3) {
dos.writeInt(shdh.refHeap[i + StringHeapDeserializationHelper.CHAR_HEAP_POINTER_OFFSET]);
dos.writeInt(shdh.refHeap[i + StringHeapDeserializationHelper.CHAR_HEAP_STRLEN_OFFSET]);
}
// output the index FSs
dos.writeInt(this.fsIndex.length);
for (int i = 0; i < this.fsIndex.length; i++) {
dos.writeInt(this.fsIndex[i]);
}
// 8bit heap
int byteheapsz = cas.getByteHeap().getSize();
dos.writeInt(byteheapsz);
for (int i = 0; i < cas.getByteHeap().getSize(); i++) {
dos.writeByte(cas.getByteHeap().heap[i]);
}
// word alignment
int align = (4 - (byteheapsz % 4)) % 4;
for (int i = 0; i < align; i++) {
dos.writeByte(0);
}
// 16bit heap
int shortheapsz = cas.getShortHeap().getSize();
dos.writeInt(shortheapsz);
for (int i = 0; i < cas.getShortHeap().getSize(); i++) {
dos.writeShort(cas.getShortHeap().heap[i]);
}
// word alignment
if (shortheapsz % 2 != 0) {
dos.writeShort(0);
}
// 64bit heap
int longheapsz = cas.getLongHeap().getSize();
dos.writeInt(longheapsz);
for (int i = 0; i < cas.getLongHeap().getSize(); i++) {
dos.writeLong(cas.getLongHeap().heap[i]);
}
} catch (IOException e) {
CASRuntimeException exception = new CASRuntimeException(
CASRuntimeException.BLOB_SERIALIZATION, new String[] { e.getMessage() });
throw exception;
}
}
/**
* Serializes only new and modified FS and index operations made after
* the tracking mark is created.
* Serizlizes CAS data in binary Delta format described below and writes it to the output stream.
*
* ElementSize NumberOfElements Description
* ----------- ---------------- ---------------------------------------------------------
* 4 1 Blob key = "UIMA" in utf-8 (byte order flag)
* 4 1 Version (1 = complete cas, 2 = delta cas)
* 4 1 size of 32-bit heap array = s32H
* 4 s32H 32-bit FS heap array (new elements)
* 4 1 size of 16-bit string Heap array = sSH
* 2 sSH 16-bit string heap array (new strings)
* 4 1 size of string Ref Heap array = sSRH
* 4 2*sSRH string ref offsets and lengths (for new strings)
* 4 1 number of modified, preexisting 32-bit modified FS heap elements = sM32H
* 4 2*sM32H 32-bit heap offset and value (preexisting cells modified)
* 4 1 size of FS index array = sFSI
* 4 sFSI FS index array in Delta format
* 4 1 size of 8-bit Heap array = s8H
* 1 s8H 8-bit Heap array (new elements)
* 4 1 size of 16-bit Heap array = s16H
* 2 s16H 16-bit Heap array (new elements)
* 4 1 size of 64-bit Heap array = s64H
* 8 s64H 64-bit Heap array (new elements)
* 4 1 number of modified, preexisting 8-bit heap elements = sM8H
* 4 sM8H 8-bit heap offsets (preexisting cells modified)
* 1 sM8H 8-bit heap values (preexisting cells modified)
* 4 1 number of modified, preexisting 16-bit heap elements = sM16H
* 4 sM16H 16-bit heap offsets (preexisting cells modified)
* 2 sM16H 16-bit heap values (preexisting cells modified)
* 4 1 number of modified, preexisting 64-bit heap elements = sM64H
* 4 sM64H 64-bit heap offsets (preexisting cells modified)
* 2 sM64H 64-bit heap values (preexisting cells modified)
*
*
* @param cas
* @param ostream
* @param trackingMark
*/
public void addCAS(CASImpl cas, OutputStream ostream, Marker trackingMark) {
try {
if (!trackingMark.isValid() ) {
CASRuntimeException exception = new CASRuntimeException(
CASRuntimeException.INVALID_MARKER, new String[] { "Invalid Marker." });
throw exception;
}
MarkerImpl mark = (MarkerImpl) trackingMark;
DataOutputStream dos = new DataOutputStream(ostream);
// get the indexed FSs
this.fsIndex = cas.getDeltaIndexedFSs(mark);
// output the key and version number
byte[] uima = new byte[4];
uima[0] = 85; // U
uima[1] = 73; // I
uima[2] = 77; // M
uima[3] = 65; // A
ByteBuffer buf = ByteBuffer.wrap(uima);
int key = buf.asIntBuffer().get();
int version = 2; //1 = current full serialization; 2 = delta format
//perhaps this should be split into 2 bytes for version and 2 bytes for format.
dos.writeInt(key);
dos.writeInt(version);
// output the new FS heap cells
final int heapSize = cas.getHeap().getCellsUsed() - mark.nextFSId;
dos.writeInt(heapSize);
for (int i = mark.nextFSId; i < cas.getHeap().getCellsUsed(); i++) {
dos.writeInt(cas.getHeap().heap[i]);
}
// output the new strings
StringHeapDeserializationHelper shdh = cas.getStringHeap().serialize(mark.nextStringHeapAddr);
// compute the number of total size of data in stringHeap
// total size = char buffer length + length of strings in the string list;
int stringHeapLength = shdh.charHeapPos;
int stringListLength = 0;
for (int i = 0; i < shdh.refHeap.length; i += 3) {
int ref = shdh.refHeap[i + StringHeapDeserializationHelper.STRING_LIST_ADDR_OFFSET];
// this is a string in the string list
// get length and add to total string heap length
if (ref != 0) {
// terminate each string with a null
stringListLength += 1 + cas.getStringHeap().getStringForCode(ref).length();
}
}
int stringTotalLength = stringHeapLength + stringListLength;
if (stringHeapLength == 0 && stringListLength > 0) {
// nothing from stringHeap
// add 1 for the null at the beginning
stringTotalLength += 1;
}
dos.writeInt(stringTotalLength);
// write the data in the stringheap, if there is any
if (stringTotalLength > 0) {
if (shdh.charHeapPos > 0) {
dos.writeChars(String.valueOf(shdh.charHeap, 0, shdh.charHeapPos));
} else {
// no stringheap data
// if there is data in the string lists, write a leading 0
if (stringListLength > 0) {
dos.writeChar(0);
}
}
// word alignment
if (stringTotalLength % 2 != 0) {
dos.writeChar(0);
}
}
// write out the string ref heap
// each reference consist of a offset into stringheap and a length
int refheapsz = ((shdh.refHeap.length - StringHeapDeserializationHelper.FIRST_CELL_REF) / StringHeapDeserializationHelper.REF_HEAP_CELL_SIZE) * 2;
refheapsz++;
dos.writeInt(refheapsz);
dos.writeInt(0);
for (int i = StringHeapDeserializationHelper.FIRST_CELL_REF; i < shdh.refHeap.length; i += 3) {
dos.writeInt(shdh.refHeap[i + StringHeapDeserializationHelper.CHAR_HEAP_POINTER_OFFSET]);
dos.writeInt(shdh.refHeap[i + StringHeapDeserializationHelper.CHAR_HEAP_STRLEN_OFFSET]);
}
//output modified FS Heap cells
int[] fsHeapModifiedAddrs = cas.getModifiedFSHeapAddrs().toArray();
dos.writeInt(fsHeapModifiedAddrs.length); //num modified
for (int i=0; i < fsHeapModifiedAddrs.length; i++) {
dos.writeInt(fsHeapModifiedAddrs[i]);
dos.writeInt(cas.getHeapValue(fsHeapModifiedAddrs[i]));
}
// output the index FSs
dos.writeInt(this.fsIndex.length);
for (int i = 0; i < this.fsIndex.length; i++) {
dos.writeInt(this.fsIndex[i]);
}
// 8bit heap new
int byteheapsz = cas.getByteHeap().getSize() - mark.nextByteHeapAddr;
dos.writeInt(byteheapsz);
for (int i = mark.nextByteHeapAddr; i < cas.getByteHeap().getSize(); i++) {
dos.writeByte(cas.getByteHeap().heap[i]);
}
// word alignment
int align = (4 - (byteheapsz % 4)) % 4;
for (int i = 0; i < align; i++) {
dos.writeByte(0);
}
// 16bit heap new
int shortheapsz = cas.getShortHeap().getSize() - mark.nextShortHeapAddr;
dos.writeInt(shortheapsz);
for (int i = mark.nextShortHeapAddr; i < cas.getShortHeap().getSize(); i++) {
dos.writeShort(cas.getShortHeap().heap[i]);
}
// word alignment
if (shortheapsz % 2 != 0) {
dos.writeShort(0);
}
// 64bit heap new
int longheapsz = cas.getLongHeap().getSize() - mark.nextLongHeapAddr;
dos.writeInt(longheapsz);
for (int i = mark.nextLongHeapAddr; i < cas.getLongHeap().getSize(); i++) {
dos.writeLong(cas.getLongHeap().heap[i]);
}
// 8 bit heap modified cells
int[] byteHeapModifiedAddrs = cas.getModifiedByteHeapAddrs().toArray();
byte[] byteValues = new byte[byteHeapModifiedAddrs.length];
dos.writeInt(byteHeapModifiedAddrs.length);
for (int i=0; i < byteHeapModifiedAddrs.length; i++) {
dos.writeInt(byteHeapModifiedAddrs[i]);
byteValues[i] = cas.getByteHeap().getHeapValue(byteHeapModifiedAddrs[i]);
}
for (int i=0; i < byteValues.length; i++) {
dos.writeByte(cas.getByteHeap().getHeapValue(byteHeapModifiedAddrs[i]));
}
// word alignment
align = (4 - (byteheapsz % 4)) % 4;
for (int i = 0; i < align; i++) {
dos.writeByte(0);
}
// 16 bit heap modified cells
int[] shortHeapModifiedAddrs = cas.getModifiedShortHeapAddrs().toArray();
short[] shortValues = new short[shortHeapModifiedAddrs.length];
dos.writeInt(shortHeapModifiedAddrs.length);
for (int i=0; i < shortHeapModifiedAddrs.length; i++) {
dos.writeShort(shortHeapModifiedAddrs[i]);
shortValues[i] = cas.getShortHeap().getHeapValue(shortHeapModifiedAddrs[i]);
}
for (int i=0; i < shortValues.length; i++) {
dos.writeShort(cas.getShortHeap().getHeapValue(shortHeapModifiedAddrs[i]));
}
// word alignment
if (shortheapsz % 2 != 0) {
dos.writeShort(0);
}
// 64 bit heap modified cells
int[] longHeapModifiedAddrs = cas.getModifiedShortHeapAddrs().toArray();
long[] longValues = new long[longHeapModifiedAddrs.length];
dos.writeInt(longHeapModifiedAddrs.length);
for (int i=0; i < longHeapModifiedAddrs.length; i++) {
dos.writeShort(longHeapModifiedAddrs[i]);
longValues[i] = cas.getLongHeap().getHeapValue(longHeapModifiedAddrs[i]);
}
for (int i=0; i < longValues.length; i++) {
dos.writeLong(cas.getLongHeap().getHeapValue(longHeapModifiedAddrs[i]));
}
} catch (IOException e) {
CASRuntimeException exception = new CASRuntimeException(
CASRuntimeException.BLOB_SERIALIZATION, new String[] { e.getMessage() });
throw exception;
}
}
/**
* Method stringArrayListToArray.
*
* @param arrayList
* @return String[]
*/
private String[] stringArrayListToArray(ArrayList<String> arrayList) {
final int max = arrayList.size();
String[] ar = new String[max];
for (int i = 0; i < max; i++) {
ar[i] = arrayList.get(i);
}
return ar;
}
int[] getHeapMetadata() {
return this.heapMetaData;
}
int[] getHeapArray() {
return this.heapArray;
}
String[] getStringTable() {
return this.stringTable;
}
int[] getFSIndex() {
return this.fsIndex;
}
byte[] getByteArray() {
return this.byteHeapArray;
}
short[] getShortArray() {
return this.shortHeapArray;
}
long[] getLongArray() {
return this.longHeapArray;
}
}