blob: c5d11ecb8733686b6d7c4afaefb1848051d369fe [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.stanbol.ontologymanager.sources.clerezza;
import java.io.IOException;
import java.io.InputStream;
import java.util.Collection;
import java.util.Collections;
import java.util.Iterator;
import org.apache.clerezza.commons.rdf.ImmutableGraph;
import org.apache.clerezza.commons.rdf.Graph;
import org.apache.clerezza.commons.rdf.Graph;
import org.apache.clerezza.commons.rdf.IRI;
import org.apache.clerezza.rdf.core.access.TcProvider;
import org.apache.clerezza.rdf.core.serializedform.Parser;
import org.apache.clerezza.rdf.core.serializedform.UnsupportedFormatException;
import org.apache.stanbol.commons.indexedgraph.IndexedGraph;
import org.apache.stanbol.ontologymanager.servicesapi.io.Origin;
import org.apache.stanbol.ontologymanager.servicesapi.ontology.OntologyLoadingException;
import org.apache.stanbol.ontologymanager.servicesapi.util.OntologyUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* An ontology input source that returns a Clerezza {@link Graph} ({@link ImmutableGraph} or {@link Graph})
* after parsing its serialized content from an input stream.
*
* @author alexdma
*
*/
public class GraphContentInputSource extends AbstractClerezzaGraphInputSource {
protected Logger log = LoggerFactory.getLogger(getClass());
/**
* Creates a new graph input source by parsing <code>content</code>. Every supported format will be tried
* until one is parsed successfully. The resulting graph is created in-memory, and its triples will have
* to be manually added to a stored graph if necessary.
*
* @param content
* the serialized graph content.
*/
public GraphContentInputSource(InputStream content) {
this(content, (String) null);
}
/**
* Creates a new graph input source by parsing <code>content</code> assuming it has the given format. The
* resulting graph is created in-memory, and its triples will have to be manually added to a stored graph
* if necessary.
*
* @param content
* the serialized graph content.
* @param formatIdentifier
* the format to parse the content as.
*/
public GraphContentInputSource(InputStream content, String formatIdentifier) {
this(content, formatIdentifier, null);
}
/**
* Creates a new graph input source by parsing <code>content</code> into a graph created using the
* supplied {@link TcProvider}, assuming it has the given format.
*
* @param content
* the serialized graph content.
* @param formatIdentifier
* the format to parse the content as.
* @param tcProvider
* the provider that will create the graph where the triples will be stored.
*/
public GraphContentInputSource(InputStream content, String formatIdentifier, TcProvider tcProvider) {
this(content, formatIdentifier, tcProvider, Parser.getInstance());
}
/**
* Creates a new graph input source by parsing <code>content</code> (using the supplied {@link Parser})
* into a graph created using the supplied {@link TcProvider}, assuming it has the given format. An
* {@link OntologyLoadingException} will be thrown if the parser fails.
*
* @param content
* the serialized graph content.
* @param formatIdentifier
* the format to parse the content as. Blank or null values are allowed, but could cause
* exceptions to be thrown if the supplied input stream cannot be reset.
* @param tcProvider
* the provider that will create the graph where the triples will be stored. If null, an
* in-memory graph will be created, in which case any ontology collectors using this input
* source will most likely have to copy it to persistent storage.
* @param parser
* the parser to use for creating the graph. If null, the default one will be used. * @deprecated
*/
public GraphContentInputSource(InputStream content,
String formatIdentifier,
TcProvider tcProvider,
Parser parser) {
long before = System.currentTimeMillis();
if (content == null) throw new IllegalArgumentException("No content supplied");
if (parser == null) parser = Parser.getInstance();
boolean loaded = false;
// Check if we can make multiple attempts at parsing the data stream.
if (content.markSupported()) {
log.debug("Stream mark/reset supported. Can try multiple formats if necessary.");
content.mark(Integer.MAX_VALUE);
}
// Check for supported formats to try.
Collection<String> formats;
if (formatIdentifier == null || "".equals(formatIdentifier.trim())) formats = OntologyUtils
.getPreferredSupportedFormats(parser.getSupportedFormats());
else formats = Collections.singleton(formatIdentifier);
// TODO guess/lookahead the ontology ID and use it in the graph name.
IRI name = new IRI( /* "ontonet" + "::" + */
getClass().getCanonicalName() + "-time:" + System.currentTimeMillis());
Graph graph = null;
if (tcProvider != null && tcProvider != null) {
// ImmutableGraph directly stored in the TcProvider prior to using the source
graph = tcProvider.createGraph(name);
bindPhysicalOrigin(Origin.create(name));
// XXX if addition fails, should rollback procedures also delete the graph?
} else {
// In memory graph, will most likely have to be copied afterwards.
graph = new IndexedGraph();
bindPhysicalOrigin(null);
}
Iterator<String> itf = formats.iterator();
if (!itf.hasNext()) throw new OntologyLoadingException("No suitable format found or defined.");
do {
String f = itf.next();
log.debug("Parsing with format {}", f);
try {
parser.parse((Graph) graph, content, f);
loaded = true;
log.info("ImmutableGraph parsed, has {} triples", graph.size());
} catch (UnsupportedFormatException e) {
log.debug("Parsing format {} failed.", f);
} catch (Exception e) {
log.debug("Error parsing format " + f, e);
} finally {
if (!loaded && content.markSupported()) try {
content.reset();
} catch (IOException e) {
log.debug("Failed to reset data stream while parsing format {}.", f);
// No way to retry if the stream cannot be reset. Must recreate it.
break;
}
}
} while (!loaded && itf.hasNext());
if (loaded) {
bindRootOntology(graph);
log.debug("Root ontology is a {}.", getRootOntology().getClass().getCanonicalName());
} else {
// Rollback graph creation, if any
if (tcProvider != null && tcProvider != null) {
tcProvider.deleteGraph(name);
log.error("Parsing failed. Deleting triple collection {}", name);
}
throw new OntologyLoadingException("Parsing failed. Giving up.");
}
log.debug("Input source initialization completed in {} ms.", (System.currentTimeMillis() - before));
}
/**
* Creates a new graph input source by parsing <code>content</code> into a graph created using the
* supplied {@link TcProvider}. Every supported format will be tried until one is parsed successfully.
*
* @param content
* the serialized graph content.
* @param tcProvider
* the provider that will create the graph where the triples will be stored.
*
*/
public GraphContentInputSource(InputStream content, TcProvider tcProvider) {
this(content, null, tcProvider);
}
@Override
public String toString() {
return "<GRAPH_CONTENT>" + getOrigin();
}
}