blob: c597b1403bfba6417002c31945e49e2b181ce645 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.camel.dataformat.any23;
import java.io.ByteArrayOutputStream;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import org.apache.any23.Any23;
import org.apache.any23.configuration.DefaultConfiguration;
import org.apache.any23.configuration.ModifiableConfiguration;
import org.apache.any23.source.ByteArrayDocumentSource;
import org.apache.any23.writer.TripleHandler;
import org.apache.camel.Exchange;
import org.apache.camel.dataformat.any23.utils.Any23Utils;
import org.apache.camel.dataformat.any23.writer.RDF4JModelWriter;
import org.apache.camel.spi.DataFormat;
import org.apache.camel.spi.DataFormatName;
import org.apache.camel.spi.annotations.Dataformat;
import org.apache.camel.support.service.ServiceSupport;
import org.eclipse.rdf4j.model.Model;
import org.eclipse.rdf4j.rio.RDFFormat;
import org.eclipse.rdf4j.rio.Rio;
/**
* Dataformat for any23 .. This dataformat is intended to convert HTML from a
* site (or file) into rdf.
*/
@Dataformat("any23")
public class Any23DataFormat extends ServiceSupport implements DataFormat, DataFormatName {
private Any23 any23;
private Map<String, String> configurations;
private List<String> extractors;
private Any23OutputFormat outputFormat;
private String baseURI;
public Any23DataFormat() {
}
public Any23DataFormat(String baseURI) {
this.baseURI = baseURI;
}
public Any23DataFormat(Any23OutputFormat outputFormat, String baseURI) {
this.outputFormat = outputFormat;
this.baseURI = baseURI;
}
public Any23DataFormat(Map<String, String> configurations, Any23OutputFormat outputFormat, String baseURI) {
this.configurations = configurations;
this.outputFormat = outputFormat;
this.baseURI = baseURI;
}
public Any23DataFormat(Map<String, String> configurations, List<String> extractors, Any23OutputFormat outputFormat, String baseURI) {
this.configurations = configurations;
this.extractors = extractors;
this.outputFormat = outputFormat;
this.baseURI = baseURI;
}
@Override
public String getDataFormatName() {
return "any23";
}
/**
* Marshal data. Generate RDF.
*/
public void marshal(Exchange exchange, Object object, OutputStream outputStream) throws Exception {
OutputStreamWriter outputStreamWriter = new OutputStreamWriter(outputStream);
outputStreamWriter.write("<html><script type=\"application/ld+json\">\n");
outputStreamWriter.flush();
Model mdl = (Model)object;
Rio.write(mdl, outputStream, RDFFormat.JSONLD);
outputStreamWriter.write("\n</script></html>");
outputStreamWriter.flush();
outputStreamWriter.close();
outputStream.close();
}
/**
* Unmarshal the data
*/
public Object unmarshal(Exchange exchange, InputStream inputStream) throws Exception {
ByteArrayDocumentSource source = new ByteArrayDocumentSource(inputStream, this.baseURI, null);
ByteArrayOutputStream out = new ByteArrayOutputStream();
TripleHandler handler = Any23Utils.obtainHandler(outputFormat, out);
any23.extract(source, handler);
handler.close();
Object respon;
if (outputFormat == Any23OutputFormat.RDF4JMODEL) {
respon = ((RDF4JModelWriter)handler).getModel();
} else {
respon = new String(out.toByteArray());
}
return respon;
}
@Override
protected void doStart() throws Exception {
ModifiableConfiguration conf = null;
String[] extrArray = null;
if (extractors != null && !extractors.isEmpty()) {
extrArray = new String[extractors.size()];
extrArray = extractors.toArray(extrArray);
}
if (configurations != null && !configurations.isEmpty()) {
conf = DefaultConfiguration.copy();
for (Entry<String, String> entry : configurations.entrySet()) {
conf.setProperty(entry.getKey(), entry.getValue());
}
}
if (outputFormat == null) {
// Default output format
outputFormat = Any23OutputFormat.RDF4JMODEL;
}
if (conf == null && extrArray == null) {
any23 = new Any23();
} else if (conf != null && extrArray == null) {
any23 = new Any23(conf);
} else if (conf == null && extrArray != null) {
any23 = new Any23(extrArray);
} else if (conf != null && extrArray != null) {
any23 = new Any23(conf, extrArray);
}
}
@Override
protected void doStop() throws Exception {
// noop
}
public Any23 getAny23() {
return any23;
}
public Any23DataFormat setAny23(Any23 any23) {
this.any23 = any23;
return this;
}
public Map<String, String> getConfigurations() {
return configurations;
}
public Any23DataFormat setConfigurations(Map<String, String> configurations) {
this.configurations = configurations;
return this;
}
public List<String> getExtractors() {
return extractors;
}
public Any23DataFormat setExtractors(List<String> extractors) {
this.extractors = extractors;
return this;
}
public Any23OutputFormat getOutputFormat() {
return outputFormat;
}
public Any23DataFormat setOutputFormat(Any23OutputFormat outputFormat) {
this.outputFormat = outputFormat;
return this;
}
public String getBaseURI() {
return baseURI;
}
public Any23DataFormat setBaseURI(String baseURI) {
this.baseURI = baseURI;
return this;
}
}