blob: 616554d9ca536415e83fed4c985e5c8e50d7d16e [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.uima.cas.impl;
import java.io.DataOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.io.Serializable;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import org.apache.uima.cas.CASRuntimeException;
/**
* Serialization for CAS. This serializes the state of the CAS, assuming that the type and index
* information remains constant. <code>CASSerializer</code> objects can be serialized with
* standard Java serialization.
*
* @see org.apache.uima.cas.impl.CASMgrSerializer
*
*
*/
public class CASSerializer implements Serializable {
static final long serialVersionUID = -7972011651957420295L;
// The heap itself.
public int[] heapArray = null;
// Heap metadata. This is not strictly required, the heap can be
// deserialized
// without. Must be null if not used.
public int[] heapMetaData = null;
// The string table for strings that are feature structure values. Note that
// the 0th position in the string table should be null and will be ignored.
public String[] stringTable;
// All FSs in any index.
public int[] fsIndex;
public byte[] byteHeapArray;
public short[] shortHeapArray;
public long[] longHeapArray;
/**
* Constructor for CASSerializer.
*/
public CASSerializer() {
super();
}
/**
* Serialize CAS data without heap-internal meta data. Currently used for serialization to C++.
*
* @param casImpl
* The CAS to be serialized.
*/
public void addNoMetaData(CASImpl casImpl) {
addCAS(casImpl, false);
}
/**
* Add the CAS to be serialized. Note that we need the implementation here, the interface is not
* enough.
*
* @param cas
* The CAS to be serialized.
*/
public void addCAS(CASImpl cas) {
addCAS(cas, true);
}
/**
* Add the CAS to be serialized. Note that we need the implementation here, the interface is not
* enough.
*
* @param cas
* The CAS to be serialized.
*/
public void addCAS(CASImpl cas, boolean addMetaData) {
this.fsIndex = cas.getIndexedFSs();
final int heapSize = cas.getHeap().getCellsUsed();
this.heapArray = new int[heapSize];
System.arraycopy(cas.getHeap().heap, 0, this.heapArray, 0, heapSize);
if (addMetaData) {
this.heapMetaData = cas.getHeap().getMetaData();
}
this.stringTable = stringArrayListToArray(cas.getStringTable());
final int byteHeapSize = cas.getByteHeap().getSize();
this.byteHeapArray = new byte[byteHeapSize];
System.arraycopy(cas.getByteHeap().heap, 0, this.byteHeapArray, 0, byteHeapSize);
final int shortHeapSize = cas.getShortHeap().getSize();
this.shortHeapArray = new short[shortHeapSize];
System.arraycopy(cas.getShortHeap().heap, 0, this.shortHeapArray, 0, shortHeapSize);
final int longHeapSize = cas.getLongHeap().getSize();
this.longHeapArray = new long[longHeapSize];
System.arraycopy(cas.getLongHeap().heap, 0, this.longHeapArray, 0, longHeapSize);
}
/**
* Serializes the CAS data and writes it to the output stream.
* ---------------------------------------------------------------------
* Blob Format
*
* Element Size Number of Description
* (bytes) Elements
* ------------ --------- --------------------------------
* 4 1 Blob key = "UIMA" in utf-8
* 4 1 Version (currently = 1)
* 4 1 size of 32-bit FS Heap array = s32H
* 4 s32H 32-bit FS heap array
* 4 1 size of 16-bit string Heap array = sSH
* 2 sSH 16-bit string heap array
* 4 1 size of string Ref Heap array = sSRH
* 4 2*sSRH string ref offsets and lengths
* 4 1 size of FS index array = sFSI
* 4 sFSI FS index array
*
* 4 1 size of 8-bit Heap array = s8H
* 1 s8H 8-bit Heap array
* 4 1 size of 16-bit Heap array = s16H
* 2 s16H 16-bit Heap array
* 4 1 size of 64-bit Heap array = s64H
* 8 s64H 64-bit Heap array
* ---------------------------------------------------------------------
*
* This reads in and deserializes CAS data from a stream.
* Byte swapping may be needed is the blob is from C++ --
* C++ blob serialization writes data in native byte order.
*
* @param cas
* The CAS to be serialized.
* ostream
* The output stream.
*/
public void addCAS(CASImpl cas, OutputStream ostream) {
try {
DataOutputStream dos = new DataOutputStream(ostream);
// get the indexed FSs
this.fsIndex = cas.getIndexedFSs();
// output the key and version number
byte[] uima = new byte[4];
uima[0] = 85; // U
uima[1] = 73; // I
uima[2] = 77; // M
uima[3] = 65; // A
ByteBuffer buf = ByteBuffer.wrap(uima);
int key = buf.asIntBuffer().get();
int version = 1;
dos.writeInt(key);
dos.writeInt(version);
// output the FS heap
final int heapSize = cas.getHeap().getCellsUsed();
dos.writeInt(heapSize);
for (int i = 0; i < heapSize; i++) {
dos.writeInt(cas.getHeap().heap[i]);
}
// output the strings
// strings in the StringList will be serialized out as if the
// were in the string heap.
// local array to hold ref heap to be serialized
// String list reference in this local ref heap will be updated to be string heap references.
int[] refheap = new int[cas.getStringHeap().refHeapPos];
for (int i = 0; i < refheap.length; i++) {
refheap[i] = cas.getStringHeap().refHeap[i];
}
// compute the number of total size of data in stringHeap
// total size = char buffer length + length of strings in the string list;
int stringHeapLength = cas.getStringHeap().charHeapPos;
int stringListLength = 0;
for (int i = 0; i < refheap.length; i += 3) {
int ref = refheap[i + StringHeap.STRING_LIST_ADDR_OFFSET];
// this is a string in the string list
// get length and add to total string heap length
if (ref != 0) {
// terminate each string with a null
stringListLength += 1 + ((String) cas.getStringHeap().stringList.get(ref)).length();
}
}
int stringTotalLength = stringHeapLength + stringListLength;
if ( stringHeapLength == 0 && stringListLength > 0 ) {
// nothing from stringHeap
// add 1 for the null at the beginning
stringTotalLength += 1;
}
dos.writeInt(stringTotalLength);
//write the data in the stringheap, if there is any
if (stringTotalLength > 0) {
if (cas.getStringHeap().charHeapPos > 0) {
dos.writeChars( String.valueOf(cas.getStringHeap().stringHeap, 0, cas.getStringHeap().charHeapPos) );
}
else {
// no stringheap data
//if there is data in the string lists, write a leading 0
if ( stringListLength > 0 ) {
dos.writeChar(0);
}
}
//write out the data in the StringList and update the
//reference in the local ref heap.
if ( stringListLength > 0 ) {
int pos = cas.getStringHeap().charHeapPos > 0 ? cas.getStringHeap().charHeapPos : 1;
for (int i=0; i < refheap.length; i+=3) {
int ref = refheap[i+StringHeap.STRING_LIST_ADDR_OFFSET];
//this is a string in the string list
if (ref !=0) {
//update the ref
refheap[i+StringHeap.CHAR_HEAP_POINTER_OFFSET] = pos;
//write out the chars in the string
dos.writeChars((String)cas.getStringHeap().stringList.get(ref));
dos.writeChar(0); // null terminate each string
//update pos
pos += 1 + ((String) cas.getStringHeap().stringList.get(ref)).length();
}
}
}
//word alignment
if (stringTotalLength % 2 != 0) {
dos.writeChar(0);
}
}
// write out the string ref heap
// each reference consist of a offset into stringheap and a length
int refheapsz = ((refheap.length - StringHeap.FIRST_CELL_REF) / StringHeap.REF_HEAP_CELL_SIZE) * 2;
refheapsz++;
dos.writeInt(refheapsz);
dos.writeInt(0);
for (int i = StringHeap.FIRST_CELL_REF; i < refheap.length; i += 3) {
dos.writeInt(refheap[i + StringHeap.CHAR_HEAP_POINTER_OFFSET]);
dos.writeInt(refheap[i + StringHeap.CHAR_HEAP_STRLEN_OFFSET]);
}
// output the index FSs
dos.writeInt(this.fsIndex.length);
for (int i = 0; i < this.fsIndex.length; i++) {
dos.writeInt(this.fsIndex[i]);
}
// 8bit heap
int byteheapsz = cas.getByteHeap().getSize();
dos.writeInt(byteheapsz);
for (int i = 0; i < cas.getByteHeap().getSize(); i++) {
dos.writeByte(cas.getByteHeap().heap[i]);
}
// word alignment
int align = (4 - (byteheapsz % 4)) % 4;
for (int i = 0; i < align; i++) {
dos.writeByte(0);
}
// 16bit heap
int shortheapsz = cas.getShortHeap().getSize();
dos.writeInt(shortheapsz);
for (int i = 0; i < cas.getShortHeap().getSize(); i++) {
dos.writeShort(cas.getShortHeap().heap[i]);
}
// word alignment
if (shortheapsz % 2 != 0) {
dos.writeShort(0);
}
// 64bit heap
int longheapsz = cas.getLongHeap().getSize();
dos.writeInt(longheapsz);
for (int i = 0; i < cas.getLongHeap().getSize(); i++) {
dos.writeLong(cas.getLongHeap().heap[i]);
}
} catch (IOException e) {
CASRuntimeException exception = new CASRuntimeException(
CASRuntimeException.BLOB_SERIALIZATION, new String[] { e.getMessage() });
throw exception;
}
}
/**
* Method stringArrayListToArray.
*
* @param arrayList
* @return String[]
*/
private String[] stringArrayListToArray(ArrayList arrayList) {
final int max = arrayList.size();
String[] ar = new String[max];
for (int i = 0; i < max; i++) {
ar[i] = (String) arrayList.get(i);
}
return ar;
}
int[] getHeapMetadata() {
return this.heapMetaData;
}
int[] getHeapArray() {
return this.heapArray;
}
String[] getStringTable() {
return this.stringTable;
}
int[] getFSIndex() {
return this.fsIndex;
}
byte[] getByteArray() {
return this.byteHeapArray;
}
short[] getShortArray() {
return this.shortHeapArray;
}
long[] getLongArray() {
return this.longHeapArray;
}
}