blob: 8e52262670064cbc258c31859f6fde00060d0e48 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.uima.json.flexjson;
import static com.fasterxml.jackson.core.JsonEncoding.UTF8;
import static java.util.Arrays.asList;
import static java.util.Arrays.sort;
import static java.util.Collections.sort;
import static java.util.Collections.unmodifiableSet;
import static java.util.Comparator.comparing;
import static java.util.stream.Collectors.toList;
import static org.apache.uima.json.flexjson.FlexJsonCasSerializer.FeatureStructuresMode.AS_ARRAY;
import static org.apache.uima.json.flexjson.FlexJsonCasSerializer.ViewsMode.INLINE;
import static org.apache.uima.json.flexjson.FlexJsonCasSerializer.ViewsMode.SEPARATE;
import static org.apache.uima.json.flexjson.FlexJsonNames.COMPONENT_TYPE_FIELD;
import static org.apache.uima.json.flexjson.FlexJsonNames.FEATURE_STRUCTURES_FIELD;
import static org.apache.uima.json.flexjson.FlexJsonNames.FLAG_DOCUMENT_ANNOTATION;
import static org.apache.uima.json.flexjson.FlexJsonNames.ID_FIELD;
import static org.apache.uima.json.flexjson.FlexJsonNames.REF;
import static org.apache.uima.json.flexjson.FlexJsonNames.SUPER_TYPE_FIELD;
import static org.apache.uima.json.flexjson.FlexJsonNames.TYPES_FIELD;
import static org.apache.uima.json.flexjson.FlexJsonNames.TYPE_FIELD;
import static org.apache.uima.json.flexjson.FlexJsonNames.VIEWS_FIELD;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.IdentityHashMap;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.function.Function;
import java.util.function.Supplier;
import java.util.stream.StreamSupport;
import org.apache.uima.cas.CAS;
import org.apache.uima.cas.Feature;
import org.apache.uima.cas.FeatureStructure;
import org.apache.uima.cas.Type;
import org.apache.uima.cas.TypeSystem;
import org.apache.uima.cas.impl.CASImpl;
import org.apache.uima.jcas.cas.TOP;
import com.fasterxml.jackson.core.JsonFactory;
import com.fasterxml.jackson.core.JsonGenerator;
import com.fasterxml.jackson.databind.ObjectMapper;
@Deprecated
public class FlexJsonCasSerializer {
public enum FeatureStructuresMode {
AS_OBJECT, AS_ARRAY
}
public enum ViewsMode {
SEPARATE, INLINE
}
private final Set<String> BUILT_IN_TYPES = unmodifiableSet(new HashSet<>(asList(
CAS.TYPE_NAME_ANNOTATION, CAS.TYPE_NAME_ANNOTATION_BASE, CAS.TYPE_NAME_ARRAY_BASE,
CAS.TYPE_NAME_BOOLEAN, CAS.TYPE_NAME_BOOLEAN_ARRAY, CAS.TYPE_NAME_BYTE,
CAS.TYPE_NAME_BYTE_ARRAY, CAS.TYPE_NAME_DOCUMENT_ANNOTATION, CAS.TYPE_NAME_DOUBLE,
CAS.TYPE_NAME_DOUBLE_ARRAY, CAS.TYPE_NAME_EMPTY_FLOAT_LIST, CAS.TYPE_NAME_EMPTY_FS_LIST,
CAS.TYPE_NAME_EMPTY_INTEGER_LIST, CAS.TYPE_NAME_EMPTY_STRING_LIST, CAS.TYPE_NAME_FLOAT,
CAS.TYPE_NAME_FLOAT_ARRAY, CAS.TYPE_NAME_FLOAT_LIST, CAS.TYPE_NAME_FS_ARRAY,
CAS.TYPE_NAME_FS_LIST, CAS.TYPE_NAME_INTEGER, CAS.TYPE_NAME_INTEGER_ARRAY,
CAS.TYPE_NAME_INTEGER_LIST, CAS.TYPE_NAME_LIST_BASE, CAS.TYPE_NAME_LONG,
CAS.TYPE_NAME_LONG_ARRAY, CAS.TYPE_NAME_NON_EMPTY_FLOAT_LIST,
CAS.TYPE_NAME_NON_EMPTY_FS_LIST, CAS.TYPE_NAME_NON_EMPTY_INTEGER_LIST,
CAS.TYPE_NAME_NON_EMPTY_STRING_LIST, CAS.TYPE_NAME_SHORT, CAS.TYPE_NAME_SHORT_ARRAY,
CAS.TYPE_NAME_SOFA, CAS.TYPE_NAME_STRING, CAS.TYPE_NAME_STRING_ARRAY,
CAS.TYPE_NAME_STRING_LIST, CAS.TYPE_NAME_TOP)));
private JsonGenerator jg;
private FeatureStructuresMode featureStructuresMode = AS_ARRAY;
private ViewsMode viewsMode = INLINE;
private Supplier<Function<FeatureStructure, String>> idRefGeneratorSupplier;
private Function<FeatureStructure, String> idRefGenerator;
private Map<FeatureStructure, String> idRefCache;
private Supplier<Function<Type, String>> typeRefGeneratorSupplier;
private Function<Type, String> typeRefGenerator;
private Map<Type, String> typeRefCache;
private Map<FeatureStructure, Set<String>> fsToViewsCache;
public FlexJsonCasSerializer(JsonGenerator aJg) {
jg = aJg;
setIdRefGeneratorSupplier(SequentialIdRefGenerator::new);
setTypeRefGeneratorSupplier(FullyQualifiedTypeRefGenerator::new);
}
public void setViewsMode(ViewsMode aViewsMode) {
viewsMode = aViewsMode;
}
public ViewsMode getViewsMode() {
return viewsMode;
}
public FeatureStructuresMode getFeatureStructuresMode() {
return featureStructuresMode;
}
public void setFeatureStructuresMode(FeatureStructuresMode aFeatureStructuresMode) {
featureStructuresMode = aFeatureStructuresMode;
}
public void setIdRefGeneratorSupplier(
Supplier<Function<FeatureStructure, String>> aIdRefGeneratorSupplier) {
idRefGeneratorSupplier = aIdRefGeneratorSupplier;
}
public void setTypeRefGeneratorSupplier(
Supplier<Function<Type, String>> aTypeRefGeneratorSupplier) {
typeRefGeneratorSupplier = aTypeRefGeneratorSupplier;
}
public void write(CAS aCas) throws IOException {
idRefGenerator = idRefGeneratorSupplier.get();
typeRefGenerator = typeRefGeneratorSupplier.get();
idRefCache = new HashMap<>();
typeRefCache = new HashMap<>();
fsToViewsCache = new IdentityHashMap<>();
jg.writeStartObject(aCas);
writeTypeSystem(aCas.getTypeSystem());
List<CAS> views = new ArrayList<>();
aCas.getViewIterator().forEachRemaining(views::add);
if (!views.isEmpty()) {
sort(views, comparing(CAS::getViewName));
if (viewsMode == SEPARATE) {
jg.writeObjectFieldStart(VIEWS_FIELD);
for (CAS view : views) {
jg.writeFieldName(view.getViewName());
writeView(view);
}
jg.writeEndObject();
}
if (viewsMode == INLINE) {
for (CAS view : views) {
for (FeatureStructure fs : view.select()) {
fsToViewsCache.computeIfAbsent(fs, _fs -> new HashSet<>()).add(view.getViewName());
}
}
}
}
Set<FeatureStructure> allFSes = findAllFeatureStructures(aCas);
if (!allFSes.isEmpty()) {
switch (featureStructuresMode) {
case AS_ARRAY:
jg.writeArrayFieldStart(FEATURE_STRUCTURES_FIELD);
for (FeatureStructure fs : allFSes) {
jg.writeStartObject(fs);
jg.writeStringField(ID_FIELD, fsRef(fs));
writeFeatureStructure(fs);
jg.writeEndObject();
}
jg.writeEndArray();
break;
case AS_OBJECT:
jg.writeObjectFieldStart(FEATURE_STRUCTURES_FIELD);
for (FeatureStructure fs : allFSes) {
jg.writeFieldName(fsRef(fs));
jg.writeStartObject(fs);
writeFeatureStructure(fs);
jg.writeEndObject();
}
jg.writeEndObject();
break;
default:
throw new IOException("Unsupported feature structures serialization mode: ["
+ featureStructuresMode + "]");
}
}
jg.writeEndObject();
}
private Set<FeatureStructure> findAllFeatureStructures(CAS aCas) {
Set<FeatureStructure> allFSes = new LinkedHashSet<>();
((CASImpl) aCas).walkReachablePlusFSsSorted(allFSes::add, null, null, null);
return allFSes;
}
private String fsRef(FeatureStructure aFs) {
return idRefCache.computeIfAbsent(aFs, idRefGenerator);
}
private String typeRef(Type aType) {
return typeRefCache.computeIfAbsent(aType, typeRefGenerator);
}
private void writeFeatureStructure(FeatureStructure aFs) throws IOException {
Type type = aFs.getType();
jg.writeStringField(TYPE_FIELD, typeRef(type));
if (viewsMode == INLINE) {
Set<String> views = fsToViewsCache.get(aFs);
if (views != null && !views.isEmpty()) {
String[] viewsArray = views.toArray(new String[views.size()]);
sort(viewsArray);
jg.writeArrayFieldStart(VIEWS_FIELD);
for (String view : viewsArray) {
jg.writeString(view);
}
jg.writeEndArray();
}
}
List<String> flags = new ArrayList<>();
if (((CASImpl) aFs.getCAS()).getDocumentAnnotationNoCreate() == aFs) {
flags.add(FLAG_DOCUMENT_ANNOTATION);
}
if (!flags.isEmpty()) {
jg.writeArrayFieldStart(FlexJsonNames.FLAGS_FIELD);
for (String flag : flags) {
jg.writeString(flag);
}
jg.writeEndArray();
}
for (Feature feature : type.getFeatures()) {
writeFeature(aFs, feature);
}
}
private void writeFeature(FeatureStructure aFs, Feature aFeature) throws IOException {
if (!aFeature.getRange().isPrimitive()) {
FeatureStructure target = aFs.getFeatureValue(aFeature);
if (target != null) {
jg.writeFieldName(REF + aFeature.getShortName());
jg.writeString(fsRef(aFs.getFeatureValue(aFeature)));
}
return;
}
if (aFeature.getRange().isStringOrStringSubtype()) {
String value = aFs.getStringValue(aFeature);
if (value != null) {
jg.writeFieldName(aFeature.getShortName());
jg.writeString(value);
}
return;
}
jg.writeFieldName(aFeature.getShortName());
String rangeTypeName = aFeature.getRange().getName();
switch (rangeTypeName) {
case CAS.TYPE_NAME_INTEGER:
jg.writeNumber(aFs.getIntValue(aFeature));
break;
case CAS.TYPE_NAME_BOOLEAN:
jg.writeBoolean(aFs.getBooleanValue(aFeature));
break;
default:
throw new IOException("Unsupported primitive type [" + rangeTypeName + "]");
}
}
private void writeView(CAS aView) throws IOException {
jg.writeStartArray();
jg.writeString(fsRef(aView.getSofa()));
for (TOP fs : aView.getIndexedFSs()) {
jg.writeString(fsRef(fs));
}
jg.writeEndArray();
}
private void writeTypeSystem(TypeSystem aTypeSystem) throws IOException {
List<Type> types = StreamSupport.stream(aTypeSystem.spliterator(), false)
.sorted(comparing(Type::getName))
.filter(type -> !BUILT_IN_TYPES.contains(type.getName())).collect(toList());
if (types.isEmpty()) {
return;
}
jg.writeFieldName(TYPES_FIELD);
jg.writeStartObject(aTypeSystem);
for (Type type : types) {
writeType(aTypeSystem, type);
}
jg.writeEndObject();
}
private void writeType(TypeSystem aTypeSystem, Type aType) throws IOException {
jg.writeFieldName(typeRef(aType));
jg.writeStartObject(aType);
if (!typeRef(aType).equals(aType.getName())) {
jg.writeStringField(FlexJsonNames.NAME_FIELD, aType.getName());
}
Type parent = aTypeSystem.getParent(aType);
if (parent != null) {
jg.writeStringField(SUPER_TYPE_FIELD, parent.getName());
}
if (aType.getComponentType() != null) {
jg.writeStringField(COMPONENT_TYPE_FIELD, aType.getComponentType().getName());
}
List<Feature> newFeatures = aType.getFeatures().stream().filter(f -> f.getDomain() == aType)
.collect(toList());
if (!newFeatures.isEmpty()) {
for (Feature feature : newFeatures) {
jg.writeStringField(feature.getShortName(), feature.getRange().getName());
}
}
jg.writeEndObject();
}
public static Builder builder() {
return new Builder();
}
public static class Builder {
private Supplier<Function<FeatureStructure, String>> idRefGeneratorSupplier;
private Supplier<Function<Type, String>> typeRefGeneratorSupplier;
private FeatureStructuresMode featureStructuresMode = AS_ARRAY;
private ViewsMode viewsMode = INLINE;
public Builder() {
setIdRefGeneratorSupplier(SequentialIdRefGenerator::new);
setTypeRefGeneratorSupplier(FullyQualifiedTypeRefGenerator::new);
}
public Builder setFeatureStructuresMode(FeatureStructuresMode aFeatureStructuresMode) {
featureStructuresMode = aFeatureStructuresMode;
return this;
}
public Builder setViewsMode(ViewsMode aViewsMode) {
viewsMode = aViewsMode;
return this;
}
public Builder setIdRefGeneratorSupplier(
Supplier<Function<FeatureStructure, String>> aIdRefGeneratorSupplier) {
idRefGeneratorSupplier = aIdRefGeneratorSupplier;
return this;
}
public Builder setTypeRefGeneratorSupplier(
Supplier<Function<Type, String>> aTypeRefGeneratorSupplier) {
typeRefGeneratorSupplier = aTypeRefGeneratorSupplier;
return this;
}
public FlexJsonCasSerializer build(JsonGenerator jg) {
FlexJsonCasSerializer ser = new FlexJsonCasSerializer(jg);
ser.setFeatureStructuresMode(featureStructuresMode);
ser.setViewsMode(viewsMode);
ser.setIdRefGeneratorSupplier(idRefGeneratorSupplier);
ser.setTypeRefGeneratorSupplier(typeRefGeneratorSupplier);
return ser;
}
public void write(CAS aCas, File aTargetFile) throws IOException {
JsonFactory jsonFactory = new JsonFactory();
jsonFactory.setCodec(new ObjectMapper());
try (JsonGenerator jg = jsonFactory.createGenerator(aTargetFile, UTF8)
.useDefaultPrettyPrinter()) {
FlexJsonCasSerializer ser = build(jg);
ser.write(aCas);
}
}
}
public static void write(CAS aCas, File aTargetFile) throws IOException {
JsonFactory jsonFactory = new JsonFactory();
jsonFactory.setCodec(new ObjectMapper());
try (JsonGenerator jg = jsonFactory.createGenerator(aTargetFile, UTF8)
.useDefaultPrettyPrinter()) {
FlexJsonCasSerializer ser = new FlexJsonCasSerializer(jg);
ser.write(aCas);
}
}
public static class SequentialIdRefGenerator implements Function<FeatureStructure, String> {
private int nextId = 0;
@Override
public String apply(FeatureStructure aT) {
return String.valueOf(nextId++);
}
}
public static class FullyQualifiedTypeRefGenerator implements Function<Type, String> {
@Override
public String apply(Type aType) {
return aType.getName();
}
}
public static class ShortTypeRefGenerator implements Function<Type, String> {
private Set<String> usedNames = new HashSet<>();
@Override
public String apply(Type aType) {
if (!usedNames.contains(aType.getShortName())) {
usedNames.add(aType.getShortName());
return aType.getShortName();
}
int n = 1;
String newName;
while (usedNames.contains(newName = aType.getShortName() + "-" + n)) {
n++;
}
usedNames.add(newName);
return newName;
}
}
}