blob: 2824e2c159a27f3d1024b11f1ad0a60707ddecaa [file] [log] [blame]
<!DOCTYPE html>
<html lang="en">
<head>
<title>Apache Jena - Working with RDF Streams in Apache Jena</title>
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<link href="/css/bootstrap.min.css" rel="stylesheet" media="screen">
<link href="/css/bootstrap-extension.css" rel="stylesheet" type="text/css">
<link href="/css/jena.css" rel="stylesheet" type="text/css">
<link rel="shortcut icon" href="/images/favicon.ico" />
<script src="https://code.jquery.com/jquery-2.2.4.min.js"
integrity="sha256-BbhdlvQf/xTY9gja0Dq3HiwQF8LaCRTXxZKRutelT44="
crossorigin="anonymous"></script>
<script src="/js/jena-navigation.js" type="text/javascript"></script>
<script src="/js/bootstrap.min.js" type="text/javascript"></script>
<script src="/js/improve.js" type="text/javascript"></script>
</head>
<body>
<nav class="navbar navbar-default" role="navigation">
<div class="container">
<div class="navbar-header">
<button type="button" class="navbar-toggle" data-toggle="collapse" data-target=".navbar-ex1-collapse">
<span class="icon-bar"></span>
<span class="icon-bar"></span>
<span class="icon-bar"></span>
</button>
<a class="navbar-brand" href="/index.html">
<img class="logo-menu" src="/images/jena-logo/jena-logo-notext-small.png" alt="jena logo">Apache Jena</a>
</div>
<div class="collapse navbar-collapse navbar-ex1-collapse">
<ul class="nav navbar-nav">
<li id="homepage"><a href="/index.html"><span class="glyphicon glyphicon-home"></span> Home</a></li>
<li id="download"><a href="/download/index.cgi"><span class="glyphicon glyphicon-download-alt"></span> Download</a></li>
<li class="dropdown">
<a href="#" class="dropdown-toggle" data-toggle="dropdown"><span class="glyphicon glyphicon-book"></span> Learn <b class="caret"></b></a>
<ul class="dropdown-menu">
<li class="dropdown-header">Tutorials</li>
<li><a href="/tutorials/index.html">Overview</a></li>
<li><a href="/documentation/fuseki2/index.html">Fuseki Triplestore</a></li>
<li><a href="/documentation/notes/index.html">How-To's</a></li>
<li><a href="/documentation/query/manipulating_sparql_using_arq.html">Manipulating SPARQL using ARQ</a></li>
<li><a href="/tutorials/rdf_api.html">RDF core API tutorial</a></li>
<li><a href="/tutorials/sparql.html">SPARQL tutorial</a></li>
<li><a href="/tutorials/using_jena_with_eclipse.html">Using Jena with Eclipse</a></li>
<li class="divider"></li>
<li class="dropdown-header">References</li>
<li><a href="/documentation/index.html">Overview</a></li>
<li><a href="/documentation/query/index.html">ARQ (SPARQL)</a></li>
<li><a href="/documentation/assembler/index.html">Assembler</a></li>
<li><a href="/documentation/tools/index.html">Command-line tools</a></li>
<li><a href="/documentation/rdfs/">Data with RDFS Inferencing</a></li>
<li><a href="/documentation/geosparql/index.html">GeoSPARQL</a></li>
<li><a href="/documentation/inference/index.html">Inference API</a></li>
<li><a href="/documentation/javadoc.html">Javadoc</a></li>
<li><a href="/documentation/ontology/">Ontology API</a></li>
<li><a href="/documentation/permissions/index.html">Permissions</a></li>
<li><a href="/documentation/extras/querybuilder/index.html">Query Builder</a></li>
<li><a href="/documentation/rdf/index.html">RDF API</a></li>
<li><a href="/documentation/rdfconnection/">RDF Connection - SPARQL API</a></li>
<li><a href="/documentation/io/">RDF I/O</a></li>
<li><a href="/documentation/rdfstar/index.html">RDF-star</a></li>
<li><a href="/documentation/shacl/index.html">SHACL</a></li>
<li><a href="/documentation/shex/index.html">ShEx</a></li>
<li><a href="/documentation/jdbc/index.html">SPARQL over JDBC</a></li>
<li><a href="/documentation/tdb/index.html">TDB</a></li>
<li><a href="/documentation/tdb2/index.html">TDB2</a></li>
<li><a href="/documentation/query/text-query.html">Text Search</a></li>
</ul>
</li>
<li class="drop down">
<a href="#" class="dropdown-toggle" data-toggle="dropdown"><span class="glyphicon glyphicon-book"></span> Javadoc <b class="caret"></b></a>
<ul class="dropdown-menu">
<li><a href="/documentation/javadoc.html">All Javadoc</a></li>
<li><a href="/documentation/javadoc/arq/">ARQ</a></li>
<li><a href="/documentation/javadoc_elephas.html">Elephas</a></li>
<li><a href="/documentation/javadoc/fuseki2/">Fuseki</a></li>
<li><a href="/documentation/javadoc/geosparql/">GeoSPARQL</a></li>
<li><a href="/documentation/javadoc/jdbc/">JDBC</a></li>
<li><a href="/documentation/javadoc/jena/">Jena Core</a></li>
<li><a href="/documentation/javadoc/permissions/">Permissions</a></li>
<li><a href="/documentation/javadoc/extras/querybuilder/">Query Builder</a></li>
<li><a href="/documentation/javadoc/shacl/">SHACL</a></li>
<li><a href="/documentation/javadoc/tdb/">TDB</a></li>
<li><a href="/documentation/javadoc/text/">Text Search</a></li>
</ul>
</li>
<li id="ask"><a href="/help_and_support/index.html"><span class="glyphicon glyphicon-question-sign"></span> Ask</a></li>
<li class="dropdown">
<a href="#" class="dropdown-toggle" data-toggle="dropdown"><span class="glyphicon glyphicon-bullhorn"></span> Get involved <b class="caret"></b></a>
<ul class="dropdown-menu">
<li><a href="/getting_involved/index.html">Contribute</a></li>
<li><a href="/help_and_support/bugs_and_suggestions.html">Report a bug</a></li>
<li class="divider"></li>
<li class="dropdown-header">Project</li>
<li><a href="/about_jena/about.html">About Jena</a></li>
<li><a href="/about_jena/architecture.html">Architecture</a></li>
<li><a href="/about_jena/citing.html">Citing</a></li>
<li><a href="/about_jena/team.html">Project team</a></li>
<li><a href="/about_jena/contributions.html">Related projects</a></li>
<li><a href="/about_jena/roadmap.html">Roadmap</a></li>
<li class="divider"></li>
<li class="dropdown-header">ASF</li>
<li><a href="http://www.apache.org/">Apache Software Foundation</a></li>
<li><a href="http://www.apache.org/foundation/sponsorship.html">Become a Sponsor</a></li>
<li><a href="http://www.apache.org/licenses/LICENSE-2.0">License</a></li>
<li><a href="http://www.apache.org/security/">Security</a></li>
<li><a href="http://www.apache.org/foundation/thanks.html">Thanks</a></li>
</ul>
</li>
<li id="edit"><a href="https://github.com/apache/jena-site/edit/main/source/documentation/io/streaming-io.md" title="Edit this page on GitHub"><span class="glyphicon glyphicon-pencil"></span> Edit this page</a></li>
</ul>
</div>
</div>
</nav>
<div class="container">
<div class="row">
<div class="col-md-12">
<div id="breadcrumbs">
<ol class="breadcrumb">
<li><a href='/documentation'>DOCUMENTATION</a></li>
<li><a href='/documentation/io'>IO</a></li>
<li class="active">STREAMING IO</li>
</ol>
</div>
<h1 class="title">Working with RDF Streams in Apache Jena</h1>
<p>Jena has operations useful in processing RDF in a streaming
fashion. Streaming can be used for manipulating RDF at scale. Jena
provides high performance readers and writers for all standard RDF formats,
and it can be extended with custom formats.</p>
<p>The <a href="rdf-binary.html">RDF Binary</a> provides the highest
input parsing performance. N-Triples/N-Quads provide the highest
input parsing performance using W3C Standards.</p>
<p>Files ending in <code>.gz</code> are assumed to be gzip-compressed. Input and output
to such files takes this into account, including looking for the other file
extension. <code>data.nt.gz</code> is parsed as a gzip-compressed N-Triples file.</p>
<p>Jena does not support all possible compression formats itself, only
GZip and BZip2 are supported directly. If you want to use an
alternative compression format you can do so by adding suitable dependencies
into your project and passing an appropriate <code>InputStream</code>/<code>OutputStream</code>
implementation to Jena code e.g.</p>
<pre><code>InputStream input = new ZstdCompressorInputStream(....);
Graph graph = RDFParser.source(input).lang(Lang.NQ).toGraph();
</code></pre>
<h2 id="streamrdf">StreamRDF</h2>
<p>The central abstraction is
<a href="/documentation/javadoc/arq/org/apache/jena/riot/system/StreamRDF.html"><code>StreamRDF</code></a>
which is an interface for streamed RDF data. It covers triples and quads,
and also parser events for prefix settings and base URI declarations.</p>
<pre><code>public interface StreamRDF {
/** Start processing */
public void start() ;
/** Triple emitted */
public void triple(Triple triple) ;
/** Quad emitted */
public void quad(Quad quad) ;
/** base declaration seen */
public void base(String base) ;
/** prefix declaration seen */
public void prefix(String prefix, String iri) ;
/** Finish processing */
public void finish() ;
}
</code></pre>
<p>There are utilities to help:</p>
<ul>
<li><a href="/documentation/javadoc/arq/org/apache/jena/riot/system/StreamRDFLib.html"><code>StreamRDFLib</code></a> – create <code>StreamRDF</code> objects</li>
<li><a href="/documentation/javadoc/arq/org/apache/jena/riot/system/StreamRDFOps.html"><code>StreamRDFOps</code></a> – helpers for sending RDF data to <code>StreamRDF</code> objects</li>
</ul>
<h2 id="reading-data">Reading data</h2>
<p>All parsers of RDF syntaxes provided by RIOT are streaming with the
exception of JSON-LD. A JSON object can have members in any order so the
parser may need the whole top-level object in order to have the information
needed for parsing.</p>
<p>The <a href="/documentation/javadoc/arq/org/apache/jena/riot/RDFDataMgr.html#parse%28org.apache.jena.riot.system.StreamRDF%2C%20java.io.InputStream%2C%20org.apache.jena.riot.Lang%29"><code>parse</code> functions</a>
of <a href="/documentation/javadoc/arq/org/apache/jena/riot/RDFDataMgr.html">RDFDataMgr</a>
directs the output of the parser to a <code>StreamRDF</code>. For example:</p>
<pre><code>StreamRDF destination = ...
RDFDataMgr.parse(destination, &quot;http://example/data.ttl&quot;) ;
</code></pre>
<p>The above code reads the remote URL, with content negotiation, and sends the
triples to the <code>destination</code>.</p>
<h2 id="writing-data">Writing data</h2>
<p>Not all RDF formats are suitable for writing as a stream. Formats that
provide pretty printing (for example the default <code>RDFFormat</code> for each of
Turtle, TriG and RDF/XML) require analysis of the entire model in order
to determine nestable structures of blank nodes and for using specific
syntax for RDF lists.</p>
<p>These languages can be used for streaming output but with an appearance
that is necessarily &ldquo;less pretty&rdquo;.
See <a href="rdf-output.html#streamed-block-formats">&ldquo;Streamed Block Formats&rdquo;</a>
for details.</p>
<p>The <a href="/documentation/javadoc/arq/org/apache/jena/riot/system/StreamRDFWriter.html"><code>StreamRDFWriter</code></a>
class has functions that write graphs and datasets
using a streaming writer and also provides for the creation of
an <code>StreamRDF</code> backed by a stream-based writer</p>
<pre><code>StreamRDFWriter.write(output, model.getGraph(), lang) ;
</code></pre>
<p>which can be done as:</p>
<pre><code>StreamRDF writer = StreamRDFWriter.getWriterStream(output, lang) ;
StreamRDFOps.graphToStream(writer, model.getGraph()) ;
</code></pre>
<p>N-Triples and N-Quads are always written as a stream.</p>
<h2 id="rdfformat-and-lang">RDFFormat and Lang</h2>
<table>
<thead>
<tr>
<th><a href="/documentation/javadoc/arq/org/apache/jena/riot/RDFFormat.html">RDFFormat</a></th>
<th><a href="/documentation/javadoc/arq/org/apache/jena/riot/Lang.html">Lang</a> shortcut</th>
</tr>
</thead>
<tbody>
<tr>
<td><code>RDFFormat.TURTLE_BLOCKS</code></td>
<td><code>Lang.TURTLE</code></td>
</tr>
<tr>
<td><code>RDFFormat.TURTLE_FLAT</code></td>
<td></td>
</tr>
<tr>
<td><code>RDFFormat.TRIG_BLOCKS</code></td>
<td><code>Lang.TRIG</code></td>
</tr>
<tr>
<td><code>RDFFormat.TRIG_FLAT</code></td>
<td></td>
</tr>
<tr>
<td><code>RDFFormat.NTRIPLES_UTF8</code></td>
<td><code>Lang.NTRIPLES</code></td>
</tr>
<tr>
<td><code>RDFFormat.NTRIPLES_ASCII</code></td>
<td></td>
</tr>
<tr>
<td><code>RDFFormat.NQUADS_UTF8</code></td>
<td><code>Lang.NQUADS</code></td>
</tr>
<tr>
<td><code>RDFFormat.NQUADS_ASCII</code></td>
<td></td>
</tr>
<tr>
<td><code>RDFFormat.TRIX</code></td>
<td><code>Lang.TRIX</code></td>
</tr>
<tr>
<td><code>RDFFormat.RDF_THRIFT</code></td>
<td><code>Lang.RDFTHRIFT</code></td>
</tr>
<tr>
<td><code>RDFFormat.RDF_PROTO</code></td>
<td><code>Lang.RDFPROTO</code></td>
</tr>
</tbody>
</table>
</div>
</div>
</div>
<footer class="footer">
<div class="container" style="font-size:80%" >
<p>
Copyright &copy; 2011&ndash;2022 The Apache Software Foundation, Licensed under the
<a href="http://www.apache.org/licenses/LICENSE-2.0">Apache License, Version 2.0</a>.
</p>
<p>
Apache Jena, Jena, the Apache Jena project logo, Apache and the Apache feather logos are trademarks of
The Apache Software Foundation.
<br/>
<a href="https://privacy.apache.org/policies/privacy-policy-public.html"
>Apache Software Foundation Privacy Policy</a>.
</p>
</div>
</footer>
<script type="text/javascript">
var link = $('a[href="' + this.location.pathname + '"]');
if (link != undefined)
link.parents('li,ul').addClass('active');
</script>
</body>
</html>