blob: 7bce74c2972af61be811c86979d2b1e99f085b21 [file] [log] [blame]
<!DOCTYPE html>
<html lang="en">
<head>
<title>Apache Jena - RDF Binary using Apache Thrift</title>
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<link href="/css/bootstrap.min.css" rel="stylesheet" media="screen">
<link href="/css/bootstrap-icons.css" rel="stylesheet" media="screen"><link rel="stylesheet" type="text/css" href="https://jena.apache.org/sass/jena.1b17c39a117e22b46db4c66f6395dc27c134a60377d87d2d5745b8600eb69722.css" integrity="sha256-GxfDmhF&#43;IrRttMZvY5XcJ8E0pgN32H0tV0W4YA62lyI=">
<link rel="shortcut icon" href="/images/favicon.ico" />
</head>
<body>
<nav class="navbar navbar-expand-lg bg-body-tertiary" role="navigation">
<div class="container">
<div class="navbar-header">
<button class="navbar-toggler" type="button" data-bs-toggle="collapse" data-bs-target="#navbarNav" aria-controls="navbarNav" aria-expanded="false" aria-label="Toggle navigation">
<span class="navbar-toggler-icon"></span>
</button>
<a class="navbar-brand" href="/index.html">
<img class="logo-menu" src="/images/jena-logo/jena-logo-notext-small.png" alt="jena logo">Apache Jena</a>
</div>
<div class="collapse navbar-collapse" id="navbarNav">
<ul class="navbar-nav me-auto mb-2 mb-lg-0">
<li id="homepage" class="nav-item"><a class="nav-link" href="/index.html"><span class="bi-house"></span> Home</a></li>
<li id="download" class="nav-item"><a class="nav-link" href="/download/index.cgi"><span class="bi-download"></span> Download</a></li>
<li class="nav-item dropdown">
<a href="#" class="nav-link dropdown-toggle" role="button" data-bs-toggle="dropdown" aria-expanded="false"><span class="bi-journal"></span> Learn <b class="caret"></b></a>
<ul class="dropdown-menu">
<li class="dropdown-header">Tutorials</li>
<li><a class="dropdown-item" href="/tutorials/index.html">Overview</a></li>
<li><a class="dropdown-item" href="/documentation/fuseki2/index.html">Fuseki Triplestore</a></li>
<li><a class="dropdown-item" href="/documentation/notes/index.html">How-To's</a></li>
<li><a class="dropdown-item" href="/documentation/query/manipulating_sparql_using_arq.html">Manipulating SPARQL using ARQ</a></li>
<li><a class="dropdown-item" href="/tutorials/rdf_api.html">RDF core API tutorial</a></li>
<li><a class="dropdown-item" href="/tutorials/sparql.html">SPARQL tutorial</a></li>
<li><a class="dropdown-item" href="/tutorials/using_jena_with_eclipse.html">Using Jena with Eclipse</a></li>
<li class="dropdown-divider"></li>
<li class="dropdown-header">References</li>
<li><a class="dropdown-item" href="/documentation/index.html">Overview</a></li>
<li><a class="dropdown-item" href="/documentation/query/index.html">ARQ (SPARQL)</a></li>
<li><a class="dropdown-item" href="/documentation/io/">RDF I/O</a></li>
<li><a class="dropdown-item" href="/documentation/assembler/index.html">Assembler</a></li>
<li><a class="dropdown-item" href="/documentation/tools/index.html">Command-line tools</a></li>
<li><a class="dropdown-item" href="/documentation/rdfs/">Data with RDFS Inferencing</a></li>
<li><a class="dropdown-item" href="/documentation/geosparql/index.html">GeoSPARQL</a></li>
<li><a class="dropdown-item" href="/documentation/inference/index.html">Inference API</a></li>
<li><a class="dropdown-item" href="/documentation/ontology/">Ontology API</a></li>
<li><a class="dropdown-item" href="/documentation/permissions/index.html">Permissions</a></li>
<li><a class="dropdown-item" href="/documentation/extras/querybuilder/index.html">Query Builder</a></li>
<li><a class="dropdown-item" href="/documentation/rdf/index.html">RDF API</a></li>
<li><a class="dropdown-item" href="/documentation/rdfconnection/">RDF Connection - SPARQL API</a></li>
<li><a class="dropdown-item" href="/documentation/rdfstar/index.html">RDF-star</a></li>
<li><a class="dropdown-item" href="/documentation/shacl/index.html">SHACL</a></li>
<li><a class="dropdown-item" href="/documentation/shex/index.html">ShEx</a></li>
<li><a class="dropdown-item" href="/documentation/tdb/index.html">TDB</a></li>
<li><a class="dropdown-item" href="/documentation/tdb2/index.html">TDB2</a></li>
<li><a class="dropdown-item" href="/documentation/query/text-query.html">Text Search</a></li>
</ul>
</li>
<li class="nav-item dropdown">
<a href="#" class="nav-link dropdown-toggle" role="button" data-bs-toggle="dropdown" aria-expanded="false"><span class="bi-journal-code"></span> Javadoc <b class="caret"></b></a>
<ul class="dropdown-menu">
<li><a class="dropdown-item" href="/documentation/javadoc.html">All Javadoc</a></li>
<li><a class="dropdown-item" href="/documentation/javadoc/arq/">ARQ</a></li>
<li><a class="dropdown-item" href="/documentation/javadoc/fuseki2/">Fuseki</a></li>
<li><a class="dropdown-item" href="/documentation/javadoc/geosparql/">GeoSPARQL</a></li>
<li><a class="dropdown-item" href="/documentation/javadoc/jena/">Jena Core</a></li>
<li><a class="dropdown-item" href="/documentation/javadoc/permissions/">Permissions</a></li>
<li><a class="dropdown-item" href="/documentation/javadoc/extras/querybuilder/">Query Builder</a></li>
<li><a class="dropdown-item" href="/documentation/javadoc/shacl/">SHACL</a></li>
<li><a class="dropdown-item" href="/documentation/javadoc/tdb/">TDB</a></li>
<li><a class="dropdown-item" href="/documentation/javadoc/text/">Text Search</a></li>
</ul>
</li>
</ul>
<form class="d-flex" role="search" action="/search" method="GET">
<div class="input-group">
<input class="form-control border-end-0 border m-0" type="search" name="q" id="search-query" placeholder="Search...." aria-label="Search" style="width: 10rem;">
<button class="btn btn-outline-secondary border-start-0 border" type="submit">
<i class="bi-search"></i>
</button>
</div>
</form>
<ul class="navbar-nav">
<li id="ask" class="nav-item"><a class="nav-link" href="/help_and_support/index.html" title="Ask"><span class="bi-patch-question"></span><span class="text-body d-none d-xxl-inline"> Ask</span></a></li>
<li class="nav-item dropdown">
<a href="#" title="Get involved" class="nav-link dropdown-toggle" role="button" data-bs-toggle="dropdown" aria-expanded="false"><span class="bi-megaphone"></span><span class="text-body d-none d-xxl-inline"> Get involved </span><b class="caret"></b></a>
<ul class="dropdown-menu">
<li><a class="dropdown-item" href="/getting_involved/index.html">Contribute</a></li>
<li><a class="dropdown-item" href="/help_and_support/bugs_and_suggestions.html">Report a bug</a></li>
<li class="dropdown-divider"></li>
<li class="dropdown-header">Project</li>
<li><a class="dropdown-item" href="/about_jena/about.html">About Jena</a></li>
<li><a class="dropdown-item" href="/about_jena/architecture.html">Architecture</a></li>
<li><a class="dropdown-item" href="/about_jena/citing.html">Citing</a></li>
<li><a class="dropdown-item" href="/about_jena/team.html">Project team</a></li>
<li><a class="dropdown-item" href="/about_jena/contributions.html">Related projects</a></li>
<li><a class="dropdown-item" href="/about_jena/roadmap.html">Roadmap</a></li>
<li><a class="dropdown-item" href="/about_jena/security-advisories.html">Security Advisories</a></li>
<li class="dropdown-divider"></li>
<li class="dropdown-header">ASF</li>
<li><a class="dropdown-item" href="https://www.apache.org/">Apache Software Foundation</a></li>
<li><a class="dropdown-item" href="https://www.apache.org/foundation/sponsorship.html">Become a Sponsor</a></li>
<li><a class="dropdown-item" href="https://www.apache.org/licenses/LICENSE-2.0">License</a></li>
<li><a class="dropdown-item" href="https://www.apache.org/security/">Security</a></li>
<li><a class="dropdown-item" href="https://www.apache.org/foundation/thanks.html">Thanks</a></li>
</ul>
</li>
<li class="nav-item" id="edit"><a class="nav-link" href="https://github.com/apache/jena-site/edit/main/source/documentation/io/rdf-binary.md" title="Edit this page on GitHub"><span class="bi-pencil-square"></span><span class="text-body d-none d-xxl-inline"> Edit this page</span></a></li>
</ul>
</div>
</div>
</nav>
<div class="container">
<div class="row">
<div class="col-md-12">
<div id="breadcrumbs">
<ol class="breadcrumb mt-4 p-2 bg-body-tertiary">
<li class="breadcrumb-item"><a href='/documentation'>DOCUMENTATION</a></li>
<li class="breadcrumb-item"><a href='/documentation/io'>IO</a></li>
<li class="breadcrumb-item active">RDF BINARY</li>
</ol>
</div>
<h1 class="title">RDF Binary using Apache Thrift</h1>
<main class="d-flex flex-xl-row flex-column">
<aside class="text-muted align-self-start mb-3 p-0 d-xl-none d-block">
<h2 class="h6 sticky-top m-0 p-2 bg-body-tertiary">On this page</h2>
<nav id="TableOfContents">
<ul>
<li><a href="#encoding-terms-thrift">Thrift encoding of RDF Terms</a>
<ul>
<li><a href="#rdf-terms">RDF terms</a></li>
<li><a href="#encoding-thrift-tuples">Thrift encoding of Triples, Quads and rows.</a></li>
<li><a href="#encoding-thrift-graphs-datasets">Thrift encoding of RDF Graphs and RDF Datasets</a></li>
<li><a href="#encoding-thrift-result-sets">Thrift encoding of SPARQL Result Sets</a></li>
</ul>
</li>
<li><a href="#encoding-terms-protobuf">Protobuf encoding of RDF Terms</a></li>
</ul>
</nav>
</aside>
<article class="flex-column me-lg-4">
<p>&ldquo;RDF Binary&rdquo; is a efficient format for RDF and RDF-related data using
<a href="https://thrift.apache.org/">Apache Thrift</a>
or <a href="https://developers.google.com/protocol-buffers">Google Protocol Buffers</a>
as the binary data encoding.</p>
<p>The W3C standard RDF syntaxes are text or XML based. These incur costs in
parsing; the most human-readable formats also incur high costs to write, and
have limited scalability due to the need to analyse the data for pretty
printing rather than simply stream to output.</p>
<p>Binary formats are faster to process - they do not incur the parsing
costs of text-base formats. &ldquo;RDF Binary&rdquo; defines basic encoding for RDF
terms, then builds data formats for RDF graphs, RDF datasets, and for
SPARQL result sets. This gives a basis for high-performance linked data
systems.</p>
<p><a href="https://thrift.apache.org/">Thrift</a> and
<a href="https://developers.google.com/protocol-buffers">Protobuf</a> provides efficient,
widely-used, binary encoding layers each with a large number of language
bindings.</p>
<p>For more details of <a href="https://afs.github.io/rdf-thrift/">RDF Thrift</a>.</p>
<h2 id="encoding-terms-thrift">Thrift encoding of RDF Terms</h2>
<p>RDF Thrift uses the Thrift compact protocol.</p>
<p>Source: <a href="https://github.com/apache/jena/blob/main/jena-arq/Grammar/RDF-Thrift/BinaryRDF.thrift">BinaryRDF.thrift</a></p>
<h3 id="rdf-terms">RDF terms</h3>
<pre><code>struct RDF_IRI {
1: required string iri
}
# A prefix name (abbrev for an IRI)
struct RDF_PrefixName {
1: required string prefix ;
2: required string localName ;
}
struct RDF_BNode {
1: required string label
}
struct RDF_Literal {
1: required string lex ;
2: optional string langtag ;
3: optional string datatype ;
4: optional RDF_PrefixName dtPrefix ;
}
struct RDF_Decimal {
1: required i64 value ;
2: required i32 scale ;
}
struct RDF_VAR {
1: required string name ;
}
struct RDF_ANY { }
struct RDF_UNDEF { }
struct RDF_REPEAT { }
union RDF_Term {
1: RDF_IRI iri
2: RDF_BNode bnode
3: RDF_Literal literal
4: RDF_PrefixName prefixName
5: RDF_VAR variable
6: RDF_ANY any
7: RDF_UNDEF undefined
8: RDF_REPEAT repeat
9: RDF_Triple tripleTerm # RDF-star
# Value forms of literals.
10: i64 valInteger
11: double valDouble
12: RDF_Decimal valDecimal
}
</code></pre>
<h3 id="encoding-thrift-tuples">Thrift encoding of Triples, Quads and rows.</h3>
<pre><code>struct RDF_Triple {
1: required RDF_Term S
2: required RDF_Term P
3: required RDF_Term O
}
struct RDF_Quad {
1: required RDF_Term S
2: required RDF_Term P
3: required RDF_Term O
4: optional RDF_Term G
}
struct RDF_PrefixDecl {
1: required string prefix ;
2: required string uri ;
}
</code></pre>
<h3 id="encoding-thrift-graphs-datasets">Thrift encoding of RDF Graphs and RDF Datasets</h3>
<pre><code>union RDF_StreamRow {
1: RDF_PrefixDecl prefixDecl
2: RDF_Triple triple
3: RDF_Quad quad
}
</code></pre>
<p>RDF Graphs are encoded as a stream of <code>RDF_Triple</code> and <code>RDF_PrefixDecl</code>.</p>
<p>RDF Datasets are encoded as a stream of <code>RDF_Triple</code>, <code>RDF-Quad</code> and <code>RDF_PrefixDecl</code>.</p>
<h3 id="encoding-thrift-result-sets">Thrift encoding of SPARQL Result Sets</h3>
<p>A SPARQL Result Set is encoded as a list of variables (the header), then
a stream of rows (the results).</p>
<pre><code>struct RDF_VarTuple {
1: list&lt;RDF_VAR&gt; vars
}
struct RDF_DataTuple {
1: list&lt;RDF_Term&gt; row
}
</code></pre>
<h2 id="encoding-terms-protobuf">Protobuf encoding of RDF Terms</h2>
<p>The Protobuf schema is simialr.</p>
<p>Source:
<a href="https://github.com/apache/jena/blob/main/jena-arq/Grammar/RDF-Protobuf/binary-rdf.proto">binary-rdf.proto</a></p>
<p>Streaming isused to allow for abitrary size graphs. Therefore the steram items
(<code>RDF_StreamRow</code> below) are written with an initial length (<code>writeDelimitedTo</code>
in the Java API).</p>
<p>See
<a href="https://developers.google.com/protocol-buffers/docs/techniques#streaming">Protobuf Techniques Streaming</a>.</p>
<pre tabindex="0"><code>syntax = &#34;proto3&#34;;
option java_package = &#34;org.apache.jena.riot.protobuf.wire&#34; ;
// Prefer one file with static inner classes.
option java_outer_classname = &#34;PB_RDF&#34; ;
// Optimize for speed (default)
option optimize_for = SPEED ;
//option java_multiple_files = true;
// ==== RDF Term Definitions
message RDF_IRI {
string iri = 1 ;
}
// A prefix name (abbrev for an IRI)
message RDF_PrefixName {
string prefix = 1 ;
string localName = 2 ;
}
message RDF_BNode {
string label = 1 ;
// 2 * fixed64
}
// Common abbreviations for datatypes and other URIs?
// union with additional values.
message RDF_Literal {
string lex = 1 ;
oneof literalKind {
bool simple = 9 ;
string langtag = 2 ;
string datatype = 3 ;
RDF_PrefixName dtPrefix = 4 ;
}
}
message RDF_Decimal {
sint64 value = 1 ;
sint32 scale = 2 ;
}
message RDF_Var {
string name = 1 ;
}
message RDF_ANY { }
message RDF_UNDEF { }
message RDF_REPEAT { }
message RDF_Term {
oneof term {
RDF_IRI iri = 1 ;
RDF_BNode bnode = 2 ;
RDF_Literal literal = 3 ;
RDF_PrefixName prefixName = 4 ;
RDF_Var variable = 5 ;
RDF_Triple tripleTerm = 6 ;
RDF_ANY any = 7 ;
RDF_UNDEF undefined = 8 ;
RDF_REPEAT repeat = 9 ;
// Value forms of literals.
sint64 valInteger = 20 ;
double valDouble = 21 ;
RDF_Decimal valDecimal = 22 ;
}
}
// === StreamRDF items
message RDF_Triple {
RDF_Term S = 1 ;
RDF_Term P = 2 ;
RDF_Term O = 3 ;
}
message RDF_Quad {
RDF_Term S = 1 ;
RDF_Term P = 2 ;
RDF_Term O = 3 ;
RDF_Term G = 4 ;
}
// Prefix declaration
message RDF_PrefixDecl {
string prefix = 1;
string uri = 2 ;
}
// StreamRDF
message RDF_StreamRow {
oneof row {
RDF_PrefixDecl prefixDecl = 1 ;
RDF_Triple triple = 2 ;
RDF_Quad quad = 3 ;
RDF_IRI base = 4 ;
}
}
message RDF_Stream {
repeated RDF_StreamRow row = 1 ;
}
// ==== SPARQL Result Sets
message RDF_VarTuple {
repeated RDF_Var vars = 1 ;
}
message RDF_DataTuple {
repeated RDF_Term row = 1 ;
}
// ==== RDF Graph
message RDF_Graph {
repeated RDF_Triple triple = 1 ;
}
</code></pre>
</article>
<aside class="text-muted align-self-start mb-3 mb-xl-5 p-0 d-none d-xl-flex flex-column sticky-top">
<h2 class="h6 sticky-top m-0 p-2 bg-body-tertiary">On this page</h2>
<nav id="TableOfContents">
<ul>
<li><a href="#encoding-terms-thrift">Thrift encoding of RDF Terms</a>
<ul>
<li><a href="#rdf-terms">RDF terms</a></li>
<li><a href="#encoding-thrift-tuples">Thrift encoding of Triples, Quads and rows.</a></li>
<li><a href="#encoding-thrift-graphs-datasets">Thrift encoding of RDF Graphs and RDF Datasets</a></li>
<li><a href="#encoding-thrift-result-sets">Thrift encoding of SPARQL Result Sets</a></li>
</ul>
</li>
<li><a href="#encoding-terms-protobuf">Protobuf encoding of RDF Terms</a></li>
</ul>
</nav>
</aside>
</main>
</div>
</div>
</div>
<footer class="bd-footer py-4 py-md-5 mt-4 mt-lg-5 bg-body-tertiary">
<div class="container" style="font-size:80%" >
<p>
Copyright &copy; 2011&ndash;2024 The Apache Software Foundation, Licensed under the
<a href="https://www.apache.org/licenses/LICENSE-2.0">Apache License, Version 2.0</a>.
</p>
<p>
Apache Jena, Jena, the Apache Jena project logo, Apache and the Apache feather logos are trademarks of
The Apache Software Foundation.
<br/>
<a href="https://privacy.apache.org/policies/privacy-policy-public.html"
>Apache Software Foundation Privacy Policy</a>.
</p>
</div>
</footer>
<script src="/js/popper.min.js.js" type="text/javascript"></script>
<script src="/js/bootstrap.min.js" type="text/javascript"></script>
<script src="/js/improve.js" type="text/javascript"></script>
<script type="text/javascript">
(function() {
'use strict'
const links = document.querySelectorAll(`a[href="${window.location.pathname}"]`)
if (links !== undefined && links !== null) {
for (const link of links) {
link.classList.add('active')
let parentElement = link.parentElement
let count = 0
const levelsLimit = 4
while (['UL', 'LI'].includes(parentElement.tagName) && count <= levelsLimit) {
if (parentElement.tagName === 'LI') {
parentElement.querySelector('a:first-child').classList.add('active')
}
parentElement = parentElement.parentElement
count++
}
}
}
})()
</script>
</body>
</html>