blob: dd8a5f4d0fcf717b70f0a3b0e7796ddcd1b4d073 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.stanbol.enhancer.nlp.json;
import java.io.BufferedWriter;
import java.io.IOException;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.nio.charset.Charset;
import java.util.EnumSet;
import java.util.Iterator;
import java.util.List;
import org.apache.felix.scr.annotations.Component;
import org.apache.felix.scr.annotations.ConfigurationPolicy;
import org.apache.felix.scr.annotations.Reference;
import org.apache.felix.scr.annotations.Service;
import org.apache.stanbol.enhancer.nlp.json.valuetype.ValueTypeSerializer;
import org.apache.stanbol.enhancer.nlp.json.valuetype.ValueTypeSerializerRegistry;
import org.apache.stanbol.enhancer.nlp.model.AnalysedText;
import org.apache.stanbol.enhancer.nlp.model.Span;
import org.apache.stanbol.enhancer.nlp.model.Span.SpanTypeEnum;
import org.apache.stanbol.enhancer.nlp.model.annotation.Value;
import org.codehaus.jackson.JsonFactory;
import org.codehaus.jackson.JsonGenerator;
import org.codehaus.jackson.map.ObjectMapper;
import org.codehaus.jackson.node.ArrayNode;
import org.codehaus.jackson.node.ObjectNode;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* Serializes an AnalysedText instance as JSON
* @author Rupert Westenthaler
*
*/
@Component(immediate=true,policy=ConfigurationPolicy.IGNORE)
@Service(value=AnalyzedTextSerializer.class)
public class AnalyzedTextSerializer {
Logger log = LoggerFactory.getLogger(AnalyzedTextSerializer.class);
private final static Charset UTF8 = Charset.forName("UTF-8");
private static AnalyzedTextSerializer defaultInstance;
protected ObjectMapper mapper = new ObjectMapper();
/**
* Can be used when running outside of OSGI to obtain the default (singelton)
* instance.
* @return
*/
public static final AnalyzedTextSerializer getDefaultInstance(){
if(defaultInstance == null){
defaultInstance = new AnalyzedTextSerializer(ValueTypeSerializerRegistry.getInstance());
}
return defaultInstance;
}
/**
* Default constructor used by OSGI
*/
public AnalyzedTextSerializer() {}
/**
* Constructs a new Serializer instance for the parsed {@link ValueTypeSerializerRegistry}
* instance. Typically this constructor should not be used as usages within
* an OSGI environment MUST lookup the service via the service registry.
* Usages outside an OSGI environment should prefer to use the
* {@link #getDefaultInstance()} instance to obtain the singleton instance.
* @param vtsr
*/
public AnalyzedTextSerializer(ValueTypeSerializerRegistry vtsr){
if(vtsr == null){
throw new IllegalArgumentException("The parsed ValueTypeSerializerRegistry MUST NOT be NULL!");
}
this.valueTypeSerializerRegistry = vtsr;
}
@Reference
protected ValueTypeSerializerRegistry valueTypeSerializerRegistry;
/**
* Serializes the parsed {@link AnalysedText} to the {@link OutputStream} by
* using the {@link Charset}.
* @param at the {@link AnalysedText} to serialize
* @param out the {@link OutputStream}
* @param charset the {@link Charset}. UTF-8 is used as default if <code>null</code>
* is parsed
*/
public void serialize(AnalysedText at, OutputStream out, Charset charset) throws IOException {
if(at == null){
throw new IllegalArgumentException("The parsed AnalysedText MUST NOT be NULL!");
}
if(out == null){
throw new IllegalArgumentException("The parsed OutputStream MUST NOT be NULL");
}
if(charset == null){
charset = UTF8;
}
JsonFactory jsonFactory = mapper.getJsonFactory();
JsonGenerator jg = jsonFactory.createJsonGenerator(new OutputStreamWriter(out, charset));
jg.useDefaultPrettyPrinter();
jg.writeStartObject();
jg.writeArrayFieldStart("spans");
jg.writeTree(writeSpan(at));
for(Iterator<Span> it = at.getEnclosed(EnumSet.allOf(SpanTypeEnum.class));it.hasNext();){
jg.writeTree(writeSpan(it.next()));
}
jg.writeEndArray();
jg.writeEndObject();
jg.close();
}
private ObjectNode writeSpan(Span span) throws IOException {
log.trace("wirte {}",span);
ObjectNode jSpan = mapper.createObjectNode();
jSpan.put("type", span.getType().name());
jSpan.put("start", span.getStart());
jSpan.put("end", span.getEnd());
for(String key : span.getKeys()){
List<Value<?>> values = span.getValues(key);
if(values.size() == 1){
jSpan.put(key, writeValue(values.get(0)));
} else {
ArrayNode jValues = jSpan.putArray(key);
for(Value<?> value : values){
jValues.add(writeValue(value));
}
jSpan.put(key, jValues);
}
}
log.trace(" ... {}",jSpan);
return jSpan;
}
@SuppressWarnings({"rawtypes", "unchecked"})
private ObjectNode writeValue(Value<?> value) {
ObjectNode jValue;
Class<?> valueType = value.value().getClass();
ValueTypeSerializer vts = valueTypeSerializerRegistry.getSerializer(valueType);
if(vts != null){
jValue = vts.serialize(mapper,value.value());
//TODO assert that jValue does not define "class" and "prob"!
} else { //use the default binding and the "data" field
jValue = mapper.createObjectNode();
jValue.put("value", mapper.valueToTree(value.value()));
}
jValue.put("class",valueType.getName());
if(value.probability() != Value.UNKNOWN_PROBABILITY){
jValue.put("prob", value.probability());
}
return jValue;
}
}