blob: 4aff96832e6c34b091881b2f9176ddae199efea3 [file] [log] [blame]
<!DOCTYPE html>
<html lang="en">
<head>
<title>Apache Jena - Working with RDF Streams in Apache Jena</title>
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<link href="/css/bootstrap.min.css" rel="stylesheet" media="screen">
<link href="/css/bootstrap-icons.css" rel="stylesheet" media="screen"><link rel="stylesheet" type="text/css" href="https://jena.apache.org/sass/jena.1b17c39a117e22b46db4c66f6395dc27c134a60377d87d2d5745b8600eb69722.css" integrity="sha256-GxfDmhF&#43;IrRttMZvY5XcJ8E0pgN32H0tV0W4YA62lyI=">
<link rel="shortcut icon" href="/images/favicon.ico" />
</head>
<body>
<nav class="navbar navbar-expand-lg bg-body-tertiary" role="navigation">
<div class="container">
<div class="navbar-header">
<button class="navbar-toggler" type="button" data-bs-toggle="collapse" data-bs-target="#navbarNav" aria-controls="navbarNav" aria-expanded="false" aria-label="Toggle navigation">
<span class="navbar-toggler-icon"></span>
</button>
<a class="navbar-brand" href="/index.html">
<img class="logo-menu" src="/images/jena-logo/jena-logo-notext-small.png" alt="jena logo">Apache Jena</a>
</div>
<div class="collapse navbar-collapse" id="navbarNav">
<ul class="navbar-nav me-auto mb-2 mb-lg-0">
<li id="homepage" class="nav-item"><a class="nav-link" href="/index.html"><span class="bi-house"></span> Home</a></li>
<li id="download" class="nav-item"><a class="nav-link" href="/download/index.cgi"><span class="bi-download"></span> Download</a></li>
<li class="nav-item dropdown">
<a href="#" class="nav-link dropdown-toggle" role="button" data-bs-toggle="dropdown" aria-expanded="false"><span class="bi-journal"></span> Learn <b class="caret"></b></a>
<ul class="dropdown-menu">
<li class="dropdown-header">Tutorials</li>
<li><a class="dropdown-item" href="/tutorials/index.html">Overview</a></li>
<li><a class="dropdown-item" href="/documentation/fuseki2/index.html">Fuseki Triplestore</a></li>
<li><a class="dropdown-item" href="/documentation/notes/index.html">How-To's</a></li>
<li><a class="dropdown-item" href="/documentation/query/manipulating_sparql_using_arq.html">Manipulating SPARQL using ARQ</a></li>
<li><a class="dropdown-item" href="/tutorials/rdf_api.html">RDF core API tutorial</a></li>
<li><a class="dropdown-item" href="/tutorials/sparql.html">SPARQL tutorial</a></li>
<li><a class="dropdown-item" href="/tutorials/using_jena_with_eclipse.html">Using Jena with Eclipse</a></li>
<li class="dropdown-divider"></li>
<li class="dropdown-header">References</li>
<li><a class="dropdown-item" href="/documentation/index.html">Overview</a></li>
<li><a class="dropdown-item" href="/documentation/query/index.html">ARQ (SPARQL)</a></li>
<li><a class="dropdown-item" href="/documentation/io/">RDF I/O</a></li>
<li><a class="dropdown-item" href="/documentation/assembler/index.html">Assembler</a></li>
<li><a class="dropdown-item" href="/documentation/tools/index.html">Command-line tools</a></li>
<li><a class="dropdown-item" href="/documentation/rdfs/">Data with RDFS Inferencing</a></li>
<li><a class="dropdown-item" href="/documentation/geosparql/index.html">GeoSPARQL</a></li>
<li><a class="dropdown-item" href="/documentation/inference/index.html">Inference API</a></li>
<li><a class="dropdown-item" href="/documentation/ontology/">Ontology API</a></li>
<li><a class="dropdown-item" href="/documentation/permissions/index.html">Permissions</a></li>
<li><a class="dropdown-item" href="/documentation/extras/querybuilder/index.html">Query Builder</a></li>
<li><a class="dropdown-item" href="/documentation/rdf/index.html">RDF API</a></li>
<li><a class="dropdown-item" href="/documentation/rdfconnection/">RDF Connection - SPARQL API</a></li>
<li><a class="dropdown-item" href="/documentation/rdfstar/index.html">RDF-star</a></li>
<li><a class="dropdown-item" href="/documentation/shacl/index.html">SHACL</a></li>
<li><a class="dropdown-item" href="/documentation/shex/index.html">ShEx</a></li>
<li><a class="dropdown-item" href="/documentation/tdb/index.html">TDB</a></li>
<li><a class="dropdown-item" href="/documentation/tdb2/index.html">TDB2</a></li>
<li><a class="dropdown-item" href="/documentation/query/text-query.html">Text Search</a></li>
</ul>
</li>
<li class="nav-item dropdown">
<a href="#" class="nav-link dropdown-toggle" role="button" data-bs-toggle="dropdown" aria-expanded="false"><span class="bi-journal-code"></span> Javadoc <b class="caret"></b></a>
<ul class="dropdown-menu">
<li><a class="dropdown-item" href="/documentation/javadoc.html">All Javadoc</a></li>
<li><a class="dropdown-item" href="/documentation/javadoc/arq/">ARQ</a></li>
<li><a class="dropdown-item" href="/documentation/javadoc/fuseki2/">Fuseki</a></li>
<li><a class="dropdown-item" href="/documentation/javadoc/geosparql/">GeoSPARQL</a></li>
<li><a class="dropdown-item" href="/documentation/javadoc/jena/">Jena Core</a></li>
<li><a class="dropdown-item" href="/documentation/javadoc/permissions/">Permissions</a></li>
<li><a class="dropdown-item" href="/documentation/javadoc/extras/querybuilder/">Query Builder</a></li>
<li><a class="dropdown-item" href="/documentation/javadoc/shacl/">SHACL</a></li>
<li><a class="dropdown-item" href="/documentation/javadoc/tdb/">TDB</a></li>
<li><a class="dropdown-item" href="/documentation/javadoc/text/">Text Search</a></li>
</ul>
</li>
</ul>
<form class="d-flex" role="search" action="/search" method="GET">
<div class="input-group">
<input class="form-control border-end-0 border m-0" type="search" name="q" id="search-query" placeholder="Search...." aria-label="Search" style="width: 10rem;">
<button class="btn btn-outline-secondary border-start-0 border" type="submit">
<i class="bi-search"></i>
</button>
</div>
</form>
<ul class="navbar-nav">
<li id="ask" class="nav-item"><a class="nav-link" href="/help_and_support/index.html" title="Ask"><span class="bi-patch-question"></span><span class="text-body d-none d-xxl-inline"> Ask</span></a></li>
<li class="nav-item dropdown">
<a href="#" title="Get involved" class="nav-link dropdown-toggle" role="button" data-bs-toggle="dropdown" aria-expanded="false"><span class="bi-megaphone"></span><span class="text-body d-none d-xxl-inline"> Get involved </span><b class="caret"></b></a>
<ul class="dropdown-menu">
<li><a class="dropdown-item" href="/getting_involved/index.html">Contribute</a></li>
<li><a class="dropdown-item" href="/help_and_support/bugs_and_suggestions.html">Report a bug</a></li>
<li class="dropdown-divider"></li>
<li class="dropdown-header">Project</li>
<li><a class="dropdown-item" href="/about_jena/about.html">About Jena</a></li>
<li><a class="dropdown-item" href="/about_jena/architecture.html">Architecture</a></li>
<li><a class="dropdown-item" href="/about_jena/citing.html">Citing</a></li>
<li><a class="dropdown-item" href="/about_jena/team.html">Project team</a></li>
<li><a class="dropdown-item" href="/about_jena/contributions.html">Related projects</a></li>
<li><a class="dropdown-item" href="/about_jena/roadmap.html">Roadmap</a></li>
<li><a class="dropdown-item" href="/about_jena/security-advisories.html">Security Advisories</a></li>
<li class="dropdown-divider"></li>
<li class="dropdown-header">ASF</li>
<li><a class="dropdown-item" href="https://www.apache.org/">Apache Software Foundation</a></li>
<li><a class="dropdown-item" href="https://www.apache.org/foundation/sponsorship.html">Become a Sponsor</a></li>
<li><a class="dropdown-item" href="https://www.apache.org/licenses/LICENSE-2.0">License</a></li>
<li><a class="dropdown-item" href="https://www.apache.org/security/">Security</a></li>
<li><a class="dropdown-item" href="https://www.apache.org/foundation/thanks.html">Thanks</a></li>
</ul>
</li>
<li class="nav-item" id="edit"><a class="nav-link" href="https://github.com/apache/jena-site/edit/main/source/documentation/io/streaming-io.md" title="Edit this page on GitHub"><span class="bi-pencil-square"></span><span class="text-body d-none d-xxl-inline"> Edit this page</span></a></li>
</ul>
</div>
</div>
</nav>
<div class="container">
<div class="row">
<div class="col-md-12">
<div id="breadcrumbs">
<ol class="breadcrumb mt-4 p-2 bg-body-tertiary">
<li class="breadcrumb-item"><a href='/documentation'>DOCUMENTATION</a></li>
<li class="breadcrumb-item"><a href='/documentation/io'>IO</a></li>
<li class="breadcrumb-item active">STREAMING IO</li>
</ol>
</div>
<h1 class="title">Working with RDF Streams in Apache Jena</h1>
<main class="d-flex flex-xl-row flex-column">
<aside class="text-muted align-self-start mb-3 p-0 d-xl-none d-block">
<h2 class="h6 sticky-top m-0 p-2 bg-body-tertiary">On this page</h2>
<nav id="TableOfContents">
<ul>
<li><a href="#streamrdf">StreamRDF</a></li>
<li><a href="#reading-data">Reading data</a></li>
<li><a href="#writing-data">Writing data</a></li>
<li><a href="#rdfformat-and-lang">RDFFormat and Lang</a></li>
</ul>
</nav>
</aside>
<article class="flex-column me-lg-4">
<p>Jena has operations useful in processing RDF in a streaming
fashion. Streaming can be used for manipulating RDF at scale. Jena
provides high performance readers and writers for all standard RDF formats,
and it can be extended with custom formats.</p>
<p>The <a href="rdf-binary.html">RDF Binary</a> provides the highest
input parsing performance. N-Triples/N-Quads provide the highest
input parsing performance using W3C Standards.</p>
<p>Files ending in <code>.gz</code> are assumed to be gzip-compressed. Input and output
to such files takes this into account, including looking for the other file
extension. <code>data.nt.gz</code> is parsed as a gzip-compressed N-Triples file.</p>
<p>Jena does not support all possible compression formats itself, only
GZip and BZip2 are supported directly. If you want to use an
alternative compression format you can do so by adding suitable dependencies
into your project and passing an appropriate <code>InputStream</code>/<code>OutputStream</code>
implementation to Jena code e.g.</p>
<pre><code>InputStream input = new ZstdCompressorInputStream(....);
Graph graph = RDFParser.source(input).lang(Lang.NQ).toGraph();
</code></pre>
<h2 id="streamrdf">StreamRDF</h2>
<p>The central abstraction is
<a href="/documentation/javadoc/arq/org.apache.jena.arq/org/apache/jena/riot/system/StreamRDF.html"><code>StreamRDF</code></a>
which is an interface for streamed RDF data. It covers triples and quads,
and also parser events for prefix settings and base URI declarations.</p>
<pre><code>public interface StreamRDF {
/** Start processing */
public void start() ;
/** Triple emitted */
public void triple(Triple triple) ;
/** Quad emitted */
public void quad(Quad quad) ;
/** base declaration seen */
public void base(String base) ;
/** prefix declaration seen */
public void prefix(String prefix, String iri) ;
/** Finish processing */
public void finish() ;
}
</code></pre>
<p>There are utilities to help:</p>
<ul>
<li><a href="/documentation/javadoc/arq/org.apache.jena.arq/org/apache/jena/riot/system/StreamRDFLib.html"><code>StreamRDFLib</code></a> – create <code>StreamRDF</code> objects</li>
<li><a href="/documentation/javadoc/arq/org.apache.jena.arq/org/apache/jena/riot/system/StreamRDFOps.html"><code>StreamRDFOps</code></a> – helpers for sending RDF data to <code>StreamRDF</code> objects</li>
</ul>
<h2 id="reading-data">Reading data</h2>
<p>All parsers of RDF syntaxes provided by RIOT are streaming with the
exception of JSON-LD. A JSON object can have members in any order so the
parser may need the whole top-level object in order to have the information
needed for parsing.</p>
<p>The <a href="/documentation/javadoc/arq/org.apache.jena.arq/org/apache/jena/riot/RDFDataMgr.html#parse%28org.apache.jena.riot.system.StreamRDF%2C%20java.io.InputStream%2C%20org.apache.jena.riot.Lang%29"><code>parse</code> functions</a>
of <a href="/documentation/javadoc/arq/org.apache.jena.arq/org/apache/jena/riot/RDFDataMgr.html">RDFDataMgr</a>
directs the output of the parser to a <code>StreamRDF</code>. For example:</p>
<pre><code>StreamRDF destination = ...
RDFDataMgr.parse(destination, &quot;http://example/data.ttl&quot;) ;
</code></pre>
<p>The above code reads the remote URL, with content negotiation, and sends the
triples to the <code>destination</code>.</p>
<h2 id="writing-data">Writing data</h2>
<p>Not all RDF formats are suitable for writing as a stream. Formats that
provide pretty printing (for example the default <code>RDFFormat</code> for each of
Turtle, TriG and RDF/XML) require analysis of the entire model in order
to determine nestable structures of blank nodes and for using specific
syntax for RDF lists.</p>
<p>These languages can be used for streaming output but with an appearance
that is necessarily &ldquo;less pretty&rdquo;.
See <a href="rdf-output.html#streamed-block-formats">&ldquo;Streamed Block Formats&rdquo;</a>
for details.</p>
<p>The <a href="/documentation/javadoc/arq/org.apache.jena.arq/org/apache/jena/riot/system/StreamRDFWriter.html"><code>StreamRDFWriter</code></a>
class has functions that write graphs and datasets
using a streaming writer and also provides for the creation of
an <code>StreamRDF</code> backed by a stream-based writer</p>
<pre><code>StreamRDFWriter.write(output, model.getGraph(), lang) ;
</code></pre>
<p>which can be done as:</p>
<pre><code>StreamRDF writer = StreamRDFWriter.getWriterStream(output, lang) ;
StreamRDFOps.graphToStream(model.getGraph(), writer) ;
</code></pre>
<p>N-Triples and N-Quads are always written as a stream.</p>
<h2 id="rdfformat-and-lang">RDFFormat and Lang</h2>
<table>
<thead>
<tr>
<th><a href="/documentation/javadoc/arq/org.apache.jena.arq/org/apache/jena/riot/RDFFormat.html">RDFFormat</a></th>
<th><a href="/documentation/javadoc/arq/org.apache.jena.arq/org/apache/jena/riot/Lang.html">Lang</a> shortcut</th>
</tr>
</thead>
<tbody>
<tr>
<td><code>RDFFormat.TURTLE_BLOCKS</code></td>
<td><code>Lang.TURTLE</code></td>
</tr>
<tr>
<td><code>RDFFormat.TURTLE_FLAT</code></td>
<td></td>
</tr>
<tr>
<td><code>RDFFormat.TRIG_BLOCKS</code></td>
<td><code>Lang.TRIG</code></td>
</tr>
<tr>
<td><code>RDFFormat.TRIG_FLAT</code></td>
<td></td>
</tr>
<tr>
<td><code>RDFFormat.NTRIPLES_UTF8</code></td>
<td><code>Lang.NTRIPLES</code></td>
</tr>
<tr>
<td><code>RDFFormat.NTRIPLES_ASCII</code></td>
<td></td>
</tr>
<tr>
<td><code>RDFFormat.NQUADS_UTF8</code></td>
<td><code>Lang.NQUADS</code></td>
</tr>
<tr>
<td><code>RDFFormat.NQUADS_ASCII</code></td>
<td></td>
</tr>
<tr>
<td><code>RDFFormat.TRIX</code></td>
<td><code>Lang.TRIX</code></td>
</tr>
<tr>
<td><code>RDFFormat.RDF_THRIFT</code></td>
<td><code>Lang.RDFTHRIFT</code></td>
</tr>
<tr>
<td><code>RDFFormat.RDF_PROTO</code></td>
<td><code>Lang.RDFPROTO</code></td>
</tr>
</tbody>
</table>
</article>
<aside class="text-muted align-self-start mb-3 mb-xl-5 p-0 d-none d-xl-flex flex-column sticky-top">
<h2 class="h6 sticky-top m-0 p-2 bg-body-tertiary">On this page</h2>
<nav id="TableOfContents">
<ul>
<li><a href="#streamrdf">StreamRDF</a></li>
<li><a href="#reading-data">Reading data</a></li>
<li><a href="#writing-data">Writing data</a></li>
<li><a href="#rdfformat-and-lang">RDFFormat and Lang</a></li>
</ul>
</nav>
</aside>
</main>
</div>
</div>
</div>
<footer class="bd-footer py-4 py-md-5 mt-4 mt-lg-5 bg-body-tertiary">
<div class="container" style="font-size:80%" >
<p>
Copyright &copy; 2011&ndash;2024 The Apache Software Foundation, Licensed under the
<a href="https://www.apache.org/licenses/LICENSE-2.0">Apache License, Version 2.0</a>.
</p>
<p>
Apache Jena, Jena, the Apache Jena project logo, Apache and the Apache feather logos are trademarks of
The Apache Software Foundation.
<br/>
<a href="https://privacy.apache.org/policies/privacy-policy-public.html"
>Apache Software Foundation Privacy Policy</a>.
</p>
</div>
</footer>
<script src="/js/popper.min.js.js" type="text/javascript"></script>
<script src="/js/bootstrap.min.js" type="text/javascript"></script>
<script src="/js/improve.js" type="text/javascript"></script>
<script type="text/javascript">
(function() {
'use strict'
const links = document.querySelectorAll(`a[href="${window.location.pathname}"]`)
if (links !== undefined && links !== null) {
for (const link of links) {
link.classList.add('active')
let parentElement = link.parentElement
let count = 0
const levelsLimit = 4
while (['UL', 'LI'].includes(parentElement.tagName) && count <= levelsLimit) {
if (parentElement.tagName === 'LI') {
parentElement.querySelector('a:first-child').classList.add('active')
}
parentElement = parentElement.parentElement
count++
}
}
}
})()
</script>
</body>
</html>