blob: 19134080bb9ccf2c5d6fbf943bbebcaba24d4e4d [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.uima.cas.impl;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import org.apache.uima.cas.CAS;
import org.apache.uima.cas.CASRuntimeException;
import org.apache.uima.cas.Marker;
import org.apache.uima.cas.SerialFormat;
import org.apache.uima.cas.TypeSystem;
import org.apache.uima.cas.admin.CASMgr;
import org.apache.uima.cas.impl.BinaryCasSerDes6.ReuseInfo;
import org.apache.uima.resource.ResourceInitializationException;
/**
* This class has no fields or instance methods, but instead
* has only static methods.
*
* In spite of its name, it has static methods for both serializing and deserializing CASes.
*
* It has 2 styles of Serialization / Deserialization
* - one which makes use of various custom binary serialization methods, and
* - one which just converts CAS and related objects into other objects which
* in turn are serializable by normal Java Object serialization.
*
* See also CasIOUtils, which has static methods for serialization and deserialization, including
* support for XMI and XCAS.
*
*/
public class Serialization {
/***************************************************************
* These methods convert a CAS to / from a serializable object *
* and vice-versa. *
* Some also handle type system and index definitions *
***************************************************************/
/**
* Convert a CAS to a CASSerializer object.
* This object used in testing , and also to pass things via the CPP JNI interface, and the Vinci protocol
* @param cas the CAS which serves as the source for a new CASSerializer object
* @return a corresponding CASSerializer object
*/
public static CASSerializer serializeCAS(CAS cas) {
CASSerializer ser = new CASSerializer();
ser.addCAS((CASImpl) cas);
return ser;
}
/**
* Convert a CAS to a CASSerializer object.
* This object used in testing
* Excludes metadata about the CAS
* @param cas the source for a new CASSerializer object
* @return a corresponding CASSerializer object
*/
public static CASSerializer serializeNoMetaData(CAS cas) {
CASSerializer ser = new CASSerializer();
ser.addNoMetaData((CASImpl) cas);
return ser;
}
/**
* Convert a Type System and Index Repository into a
* CASMgrSerializer object which can be serialized
*
* @param casMgr the type system and index repo definitions
* @return a serializable object version of these
*/
public static CASMgrSerializer serializeCASMgr(CASMgr casMgr) {
CASMgrSerializer ser = new CASMgrSerializer();
ser.addTypeSystem((TypeSystemImpl) casMgr.getCAS().getTypeSystem());
ser.addIndexRepository((FSIndexRepositoryImpl) ((CASImpl) casMgr.getCAS())
.getBaseIndexRepository());
return ser;
}
/**
* Convert a CAS + the type system and index definitions into a
* CASCompleteSerializer object
* @param casMgr the source for a new CASCompleteSerializer object
* @return a Java Object which is serializable and has both the type system, index definitions, and the CAS contents
*/
public static CASCompleteSerializer serializeCASComplete(CASMgr casMgr) {
return new CASCompleteSerializer((CASImpl) casMgr);
}
/**
* Deserialize the data in a CASCompleteSerializer into an
* existing CAS
* @param casCompSer the source for deserialization
* @param casMgr the CAS to receive the data
*/
public static void deserializeCASComplete(CASCompleteSerializer casCompSer, CASMgr casMgr) {
((CASImpl) casMgr).reinit(casCompSer);
}
/**
* Deserialize a type system and index repository definition and use to initialize
* a new instance of a CAS.
* @param ser the CAS to receive the type system
* @return the initialized CAS loaded with the deserialized info about the CAS Type systen and Index repositories
*/
public static CASMgr createCASMgr(CASMgrSerializer ser) {
return new CASImpl(ser);
}
// public static CASMgr createCASMgr(CASMgrSerializer ser) {
// return new CASImpl(ser);
// }
/**
* Deserialize the data in a CASSerializer into an existing CAS,
* return the currentview in that Cas.
* @param casMgr the CAS Manager
* @param casSer the serializer
* @return the initialized CAS loaded with the deserialized data
*/
public static CAS createCAS(CASMgr casMgr, CASSerializer casSer) {
((CASImpl) casMgr).reinit(casSer);
return ((CASImpl) casMgr).getCurrentView();
}
/*******************************************************************************
* Methods from here on do some form of custom serialization / deserialization *
* with data streams, byte arrays, etc. *
*******************************************************************************/
/**
* Serialize a CAS including what's indexed, to an output stream
* Uses uncompressed binary serialization
* @param cas the CAS to serialize
* @param ostream the output stream
*/
public static void serializeCAS(CAS cas, OutputStream ostream) {
CASSerializer ser = new CASSerializer();
ser.addCAS((CASImpl) cas, ostream);
}
/**
* Deserialize a CAS, in various binary formats, into an existing CAS
* Note: this form supports deserializing the following binary representations:
* - plain (uncompressed)
* - compressed, no type filtering (form 4), Delta and not-delta
* - compressed, no type filtering (form 6), not-delta only.
* If this form encounters a non-conforming kind of input, it will throw a runtime exception.
* @param cas the CAS to deserialize into. If the incoming representation is a Delta Cas, then the receiving CAS is not reset, but is added to.
* @param istream the input stream
* @return The form of the serialized CAS (from its header)
*/
public static SerialFormat deserializeCAS(CAS cas, InputStream istream) {
return ((CASImpl) cas).reinit(istream);
}
/**
* Serializes CAS data added or modified after the tracking Marker was created and writes it
* to the output stream in Delta CAS format
* using uncompressed binary format
* @param cas the Cas to serialize
* @param ostream the output stream
* @param mark the cas mark (for delta CASes)
*/
public static void serializeCAS(CAS cas, OutputStream ostream, Marker mark) {
if (!mark.isValid() ) {
throw new CASRuntimeException(CASRuntimeException.INVALID_MARKER);
}
CASSerializer ser = new CASSerializer();
ser.addCAS((CASImpl) cas, ostream, mark);
}
/*******************************************************************************
* Methods from here on use some form of compression *
*******************************************************************************/
/**
* Serialize in compressed binary form 4
* @param cas the CAS to serialize
* @param out - an OutputStream, a DataOutputStream, or a File
* @throws IOException if IO exception
*/
public static void serializeWithCompression(CAS cas, Object out) throws IOException {
(new BinaryCasSerDes4(((CASImpl)cas).getTypeSystemImpl(), false)).serialize(cas, out);
}
/**
* Serialize in compress binary form 4, only the delta part of a CAS
* @param cas the CAS to serialize
* @param out - an OutputStream, a DataOutputStream, or a File
* @param marker identifying where the delta starts
* @throws IOException if IO exception
*/
public static void serializeWithCompression(CAS cas, Object out, Marker marker) throws IOException {
(new BinaryCasSerDes4(((CASImpl)cas).getTypeSystemImpl(), false)).serialize(cas, out, marker);
}
/**
* Serialize in compressed binary with type filtering
* This method can use type filtering to omit sending those types and/or features not present in the target type system.
* - To omit type filtering, use null for the target type system
* It also only sends those feature structures which are reachable either from an index or references from other reachable feature structures.
*
* @param cas the CAS to serialize
* @param out an OutputStream, a DataOutputStream, or a File
* @param tgtTypeSystem null or a target TypeSystem, which must be mergable with this CAS's type system
* @return information to be used on subsequent serializations (to save time) or deserializations (for receiving delta CASs), or reserializations (if sending delta CASs)
* @throws IOException if IO exception
* @throws ResourceInitializationException if target type system is incompatible with this CAS's type system
*/
public static ReuseInfo serializeWithCompression(CAS cas, Object out, boolean includeTSI) throws IOException, ResourceInitializationException {
BinaryCasSerDes6 bcs = new BinaryCasSerDes6(cas, null, includeTSI);
bcs.serialize(out);
return bcs.getReuseInfo();
}
/**
* Serialize in compressed binary with type filtering
* This method can use type filtering to omit sending those types and/or features not present in the target type system.
* - To omit type filtering, use null for the target type system
* It also only sends those feature structures which are reachable either from an index or references from other reachable feature structures.
*
* @param cas the CAS to serialize
* @param out an OutputStream, a DataOutputStream, or a File
* @param tgtTypeSystem null or a target TypeSystem, which must be mergable with this CAS's type system
* @return information to be used on subsequent serializations (to save time) or deserializations (for receiving delta CASs), or reserializations (if sending delta CASs)
* @throws IOException if IO exception
* @throws ResourceInitializationException if target type system is incompatible with this CAS's type system
*/
public static ReuseInfo serializeWithCompression(CAS cas, Object out, TypeSystem tgtTypeSystem) throws IOException, ResourceInitializationException {
BinaryCasSerDes6 bcs = new BinaryCasSerDes6(cas, (TypeSystemImpl) tgtTypeSystem);
bcs.serialize(out);
return bcs.getReuseInfo();
}
/**
* Delta Serialize in compressed form, with type filtering
* This method can use type filtering to omit sending those types and/or features not present in the target type system.
* - To omit type filtering, use null for the target type system
* It also only sends those feature structures which are reachable either from an index or references from other reachable feature structures.
*
* @param cas the CAS to serialize
* @param out an OutputStream, a DataOutputStream, or a File
* @param tgtTypeSystem null or a target TypeSystem, which must be mergable with this CAS's type system
* @param mark null or where the mark is in the CAS. If not null, indicates doing a delta CAS serialization
* @param reuseInfo if mark is not null, this parameter is required
* and must have been computed when the original deserialization (of the CAS now being serialized as a delta CAS) was done
* @throws IOException if IO exception
* @throws ResourceInitializationException if the target type system and the CAS's type system can't be merged
*/
public static void serializeWithCompression(CAS cas, Object out, TypeSystem tgtTypeSystem, Marker mark, ReuseInfo reuseInfo) throws IOException, ResourceInitializationException {
BinaryCasSerDes6 bcs = new BinaryCasSerDes6(cas, (MarkerImpl) mark, (TypeSystemImpl) tgtTypeSystem, reuseInfo);
bcs.serialize(out);
}
/**
* Deserialize a CAS, in various binary formats, into an existing CAS
* Note: this form supports deserializing the following binary representations:
* - compressed, type filtering (form 6), delta and not-delta.
*
* @param cas the CAS to deserialize into. If the incoming representation is a Delta Cas, then the receiving CAS is not reset, but is added to.
* @param istream the input stream
* @param tgtTypeSystem The typeSystem of the serialized form of the CAS; must be compatible with the type system of the receiving cas.
* @param reuseInfo If delta CAS is being received and form 6 compression is being used, then this must be the reuseInfo captured when the
* original CAS (being updated by the delta coming in) was sent out.
* @return The instance of BinaryCasSerDes6 used for deserialization
* @throws IOException if IO exception
* @throws ResourceInitializationException if the target type system and the CAS's type system can't be merged
*/
public static BinaryCasSerDes6 deserializeCAS(CAS cas, InputStream istream, TypeSystem tgtTypeSystem, ReuseInfo reuseInfo) throws IOException, ResourceInitializationException {
BinaryCasSerDes6 bcs = new BinaryCasSerDes6(cas, null, (TypeSystemImpl) tgtTypeSystem, reuseInfo);
bcs.deserialize(istream);
return bcs;
}
/**
* Deserialize a CAS, in various binary formats, into an existing CAS
* Note: this form supports deserializing the following binary representations:
* - compressed, type filtering (form 6), delta and not-delta.
*
* @param cas the CAS to deserialize into. If the incoming representation is a Delta Cas, then the receiving CAS is not reset, but is added to.
* @param istream the input stream
* @param tgtTypeSystem The typeSystem of the serialized form of the CAS; must be compatible with the type system of the receiving cas.
* @param reuseInfo If delta CAS is being received and form 6 compression is being used, then this must be the reuseInfo captured when the
* original CAS (being updated by the delta coming in) was sent out.
* @param allowPreexisting used to control what happens when a delta cas is modifying Feature Structures below the line
* @return The instance of BinaryCasSerDes6 used for deserialization
* @throws IOException if IO exception
* @throws ResourceInitializationException if the target type system and the CAS's type system can't be merged
*/
public static BinaryCasSerDes6 deserializeCAS(CAS cas, InputStream istream, TypeSystem tgtTypeSystem, ReuseInfo reuseInfo, AllowPreexistingFS allowPreexisting) throws IOException, ResourceInitializationException {
BinaryCasSerDes6 bcs = new BinaryCasSerDes6(cas, null, (TypeSystemImpl) tgtTypeSystem, reuseInfo);
bcs.deserialize(istream, allowPreexisting);
return bcs;
}
}