blob: 0ddc65d8909f91f42edeb332c353fb0e4d70218a [file] [log] [blame]
<!DOCTYPE html>
<html lang="en">
<head>
<title>Apache Jena - TDB Transactions</title>
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<link href="/css/bootstrap.min.css" rel="stylesheet" media="screen">
<link href="/css/bootstrap-icons.css" rel="stylesheet" media="screen"><link rel="stylesheet" type="text/css" href="https://jena.apache.org/sass/jena.1b17c39a117e22b46db4c66f6395dc27c134a60377d87d2d5745b8600eb69722.css" integrity="sha256-GxfDmhF&#43;IrRttMZvY5XcJ8E0pgN32H0tV0W4YA62lyI=">
<link rel="shortcut icon" href="/images/favicon.ico" />
</head>
<body>
<nav class="navbar navbar-expand-lg bg-body-tertiary" role="navigation">
<div class="container">
<div class="navbar-header">
<button class="navbar-toggler" type="button" data-bs-toggle="collapse" data-bs-target="#navbarNav" aria-controls="navbarNav" aria-expanded="false" aria-label="Toggle navigation">
<span class="navbar-toggler-icon"></span>
</button>
<a class="navbar-brand" href="/index.html">
<img class="logo-menu" src="/images/jena-logo/jena-logo-notext-small.png" alt="jena logo">Apache Jena</a>
</div>
<div class="collapse navbar-collapse" id="navbarNav">
<ul class="navbar-nav me-auto mb-2 mb-lg-0">
<li id="homepage" class="nav-item"><a class="nav-link" href="/index.html"><span class="bi-house"></span> Home</a></li>
<li id="download" class="nav-item"><a class="nav-link" href="/download/index.cgi"><span class="bi-download"></span> Download</a></li>
<li class="nav-item dropdown">
<a href="#" class="nav-link dropdown-toggle" role="button" data-bs-toggle="dropdown" aria-expanded="false"><span class="bi-journal"></span> Learn <b class="caret"></b></a>
<ul class="dropdown-menu">
<li class="dropdown-header">Tutorials</li>
<li><a class="dropdown-item" href="/tutorials/index.html">Overview</a></li>
<li><a class="dropdown-item" href="/documentation/fuseki2/index.html">Fuseki Triplestore</a></li>
<li><a class="dropdown-item" href="/documentation/notes/index.html">How-To's</a></li>
<li><a class="dropdown-item" href="/documentation/query/manipulating_sparql_using_arq.html">Manipulating SPARQL using ARQ</a></li>
<li><a class="dropdown-item" href="/tutorials/rdf_api.html">RDF core API tutorial</a></li>
<li><a class="dropdown-item" href="/tutorials/sparql.html">SPARQL tutorial</a></li>
<li><a class="dropdown-item" href="/tutorials/using_jena_with_eclipse.html">Using Jena with Eclipse</a></li>
<li class="dropdown-divider"></li>
<li class="dropdown-header">References</li>
<li><a class="dropdown-item" href="/documentation/index.html">Overview</a></li>
<li><a class="dropdown-item" href="/documentation/query/index.html">ARQ (SPARQL)</a></li>
<li><a class="dropdown-item" href="/documentation/io/">RDF I/O</a></li>
<li><a class="dropdown-item" href="/documentation/assembler/index.html">Assembler</a></li>
<li><a class="dropdown-item" href="/documentation/tools/index.html">Command-line tools</a></li>
<li><a class="dropdown-item" href="/documentation/rdfs/">Data with RDFS Inferencing</a></li>
<li><a class="dropdown-item" href="/documentation/geosparql/index.html">GeoSPARQL</a></li>
<li><a class="dropdown-item" href="/documentation/inference/index.html">Inference API</a></li>
<li><a class="dropdown-item" href="/documentation/ontology/">Ontology API</a></li>
<li><a class="dropdown-item" href="/documentation/permissions/index.html">Permissions</a></li>
<li><a class="dropdown-item" href="/documentation/extras/querybuilder/index.html">Query Builder</a></li>
<li><a class="dropdown-item" href="/documentation/rdf/index.html">RDF API</a></li>
<li><a class="dropdown-item" href="/documentation/rdfconnection/">RDF Connection - SPARQL API</a></li>
<li><a class="dropdown-item" href="/documentation/rdfstar/index.html">RDF-star</a></li>
<li><a class="dropdown-item" href="/documentation/shacl/index.html">SHACL</a></li>
<li><a class="dropdown-item" href="/documentation/shex/index.html">ShEx</a></li>
<li><a class="dropdown-item" href="/documentation/tdb/index.html">TDB</a></li>
<li><a class="dropdown-item" href="/documentation/tdb2/index.html">TDB2</a></li>
<li><a class="dropdown-item" href="/documentation/query/text-query.html">Text Search</a></li>
</ul>
</li>
<li class="nav-item dropdown">
<a href="#" class="nav-link dropdown-toggle" role="button" data-bs-toggle="dropdown" aria-expanded="false"><span class="bi-journal-code"></span> Javadoc <b class="caret"></b></a>
<ul class="dropdown-menu">
<li><a class="dropdown-item" href="/documentation/javadoc.html">All Javadoc</a></li>
<li><a class="dropdown-item" href="/documentation/javadoc/arq/">ARQ</a></li>
<li><a class="dropdown-item" href="/documentation/javadoc/fuseki2/">Fuseki</a></li>
<li><a class="dropdown-item" href="/documentation/javadoc/geosparql/">GeoSPARQL</a></li>
<li><a class="dropdown-item" href="/documentation/javadoc/jena/">Jena Core</a></li>
<li><a class="dropdown-item" href="/documentation/javadoc/permissions/">Permissions</a></li>
<li><a class="dropdown-item" href="/documentation/javadoc/extras/querybuilder/">Query Builder</a></li>
<li><a class="dropdown-item" href="/documentation/javadoc/shacl/">SHACL</a></li>
<li><a class="dropdown-item" href="/documentation/javadoc/tdb/">TDB</a></li>
<li><a class="dropdown-item" href="/documentation/javadoc/text/">Text Search</a></li>
</ul>
</li>
</ul>
<form class="d-flex" role="search" action="/search" method="GET">
<div class="input-group">
<input class="form-control border-end-0 border m-0" type="search" name="q" id="search-query" placeholder="Search...." aria-label="Search" style="width: 10rem;">
<button class="btn btn-outline-secondary border-start-0 border" type="submit">
<i class="bi-search"></i>
</button>
</div>
</form>
<ul class="navbar-nav">
<li id="ask" class="nav-item"><a class="nav-link" href="/help_and_support/index.html" title="Ask"><span class="bi-patch-question"></span><span class="text-body d-none d-xxl-inline"> Ask</span></a></li>
<li class="nav-item dropdown">
<a href="#" title="Get involved" class="nav-link dropdown-toggle" role="button" data-bs-toggle="dropdown" aria-expanded="false"><span class="bi-megaphone"></span><span class="text-body d-none d-xxl-inline"> Get involved </span><b class="caret"></b></a>
<ul class="dropdown-menu">
<li><a class="dropdown-item" href="/getting_involved/index.html">Contribute</a></li>
<li><a class="dropdown-item" href="/help_and_support/bugs_and_suggestions.html">Report a bug</a></li>
<li class="dropdown-divider"></li>
<li class="dropdown-header">Project</li>
<li><a class="dropdown-item" href="/about_jena/about.html">About Jena</a></li>
<li><a class="dropdown-item" href="/about_jena/architecture.html">Architecture</a></li>
<li><a class="dropdown-item" href="/about_jena/citing.html">Citing</a></li>
<li><a class="dropdown-item" href="/about_jena/team.html">Project team</a></li>
<li><a class="dropdown-item" href="/about_jena/contributions.html">Related projects</a></li>
<li><a class="dropdown-item" href="/about_jena/roadmap.html">Roadmap</a></li>
<li><a class="dropdown-item" href="/about_jena/security-advisories.html">Security Advisories</a></li>
<li class="dropdown-divider"></li>
<li class="dropdown-header">ASF</li>
<li><a class="dropdown-item" href="https://www.apache.org/">Apache Software Foundation</a></li>
<li><a class="dropdown-item" href="https://www.apache.org/foundation/sponsorship.html">Become a Sponsor</a></li>
<li><a class="dropdown-item" href="https://www.apache.org/licenses/LICENSE-2.0">License</a></li>
<li><a class="dropdown-item" href="https://www.apache.org/security/">Security</a></li>
<li><a class="dropdown-item" href="https://www.apache.org/foundation/thanks.html">Thanks</a></li>
</ul>
</li>
<li class="nav-item" id="edit"><a class="nav-link" href="https://github.com/apache/jena-site/edit/main/source/documentation/tdb/tdb_transactions.md" title="Edit this page on GitHub"><span class="bi-pencil-square"></span><span class="text-body d-none d-xxl-inline"> Edit this page</span></a></li>
</ul>
</div>
</div>
</nav>
<div class="container">
<div class="row">
<div class="col-md-12">
<div id="breadcrumbs">
<ol class="breadcrumb mt-4 p-2 bg-body-tertiary">
<li class="breadcrumb-item"><a href='/documentation'>DOCUMENTATION</a></li>
<li class="breadcrumb-item"><a href='/documentation/tdb'>TDB</a></li>
<li class="breadcrumb-item active">TDB TRANSACTIONS</li>
</ol>
</div>
<h1 class="title">TDB Transactions</h1>
<main class="d-flex flex-xl-row flex-column">
<aside class="text-muted align-self-start mb-3 p-0 d-xl-none d-block">
<h2 class="h6 sticky-top m-0 p-2 bg-body-tertiary">On this page</h2>
<nav id="TableOfContents">
<ul>
<li><a href="#overview">Overview</a></li>
<li><a href="#limitations">Limitations</a></li>
<li><a href="#api-for-transactions">API for Transactions</a>
<ul>
<li><a href="#read-transactions">Read transactions</a></li>
<li><a href="#write-transactions">Write transactions</a></li>
</ul>
</li>
<li><a href="#multi-threaded-use">Multi-threaded use</a></li>
<li><a href="#multi-jvm">Multi JVM</a></li>
<li><a href="#bulk-loading">Bulk loading</a></li>
</ul>
</nav>
</aside>
<article class="flex-column me-lg-4">
<p>TDB provides
<a href="http://en.wikipedia.org/wiki/ACID">ACID</a>
transaction support through the use of
<a href="http://en.wikipedia.org/wiki/Write-ahead_logging">write-ahead-logging</a> in TDB1
and copy-on-write MVCC structures in TDB2.</p>
<p>Use of transactions protects a TDB dataset against data corruption, unexpected
process termination and system crashes.</p>
<p>Non-transactional use of TDB1 should be avoided; TDB2 only operates with transactions.</p>
<h2 id="overview">Overview</h2>
<p>TDB2 uses <a href="https://en.wikipedia.org/wiki/Multiversion_concurrency_control">MVCC</a>
via a copy-on-write mechanism. Update transactions can be of any size.</p>
<p>The TDB1 transaction mechanism is based on
<a href="http://en.wikipedia.org/wiki/Write-ahead_logging">write-ahead-logging</a>. All
changes made inside a write-transaction are written to
<a href="http://en.wikipedia.org/wiki/Journaling_file_system">journals</a>, then propagated
to the main database at a suitable moment. Transactions in TDB1 are limited in
size to a few 10&rsquo;s of million triples because they retain data in-memory until
indexes can be updated.</p>
<p>Transactional TDB supports one active write transaction, and
multiple read transactions at the same time. Read-transactions
started before a write-transaction commits see the database in a
state without any changes visible. Any transaction starting after a
write-transaction commits sees the database with the changes
visible, whether fully propagates back to the database or not.
There can be active read transactions seeing the state of the
database before the updates, and read transactions seeing the state
of the database after the updates running at the same time.</p>
<p>Transactional TDB works with SPARQL Query, SPARQL Update, SPARQL
Graph Store Update as well as the full Jena API.</p>
<p>TDB provides
<a href="http://en.wikipedia.org/wiki/Isolation_(database_systems)#SERIALIZABLE">Serializable</a>
transactions, the highest
<a href="http://en.wikipedia.org/wiki/Isolation_(database_systems)">isolation level</a>.</p>
<h2 id="limitations">Limitations</h2>
<p>(some of these limitations may be removed in later versions)</p>
<ul>
<li>Bulk loads: the TDB bulk loader is not transactional</li>
<li><a href="http://en.wikipedia.org/wiki/Nested_transaction">Nested transactions</a>
are not supported.</li>
</ul>
<p>TDB2 removed the limitations of TDB1:</p>
<ul>
<li>Some active transaction state is held exclusively in-memory,
limiting scalability.</li>
<li>Long-running transactions. Read-transactions cause a build-up
of pending changes;</li>
</ul>
<p>If a single read transaction runs for a long time when there are
many updates, the TDB1 system will consume a lot of temporary
resources.</p>
<h2 id="api-for-transactions">API for Transactions</h2>
<p>Ths section uses the primitives of the transaction mechanism.</p>
<p>Better APIs are described in <a href="/documentation/txn/">the transaction API
documentation</a>.</p>
<h3 id="read-transactions">Read transactions</h3>
<p>These are used for SPARQL queries and code using the Jena API
actions that do not change the data. The general pattern is:</p>
<pre><code> dataset.begin(ReadWrite.READ) ;
try {
...
} finally { dataset.end() ; }
</code></pre>
<p>The <code>dataset.end()</code> declares the end of the read transaction. Applications may also call
<code>dataset.commit()</code> or <code>dataset.abort()</code> which all have the same effect for a read transaction.</p>
<pre><code> Location location = ... ;
Dataset dataset = ... ;
dataset.begin(ReadWrite.READ) ;
String qs1 = &quot;SELECT * {?s ?p ?o} LIMIT 10&quot; ;
try(QueryExecution qExec = QueryExecution.dataset(dataset).query(qs1).build() ) {
ResultSet rs = qExec.execSelect() ;
ResultSetFormatter.out(rs) ;
}
String qs2 = &quot;SELECT * {?s ?p ?o} OFFSET 10 LIMIT 10&quot; ;
try(QueryExecution qExec = QueryExecution.dataset(dataset).query(qs2).build() ) {
rs = qExec.execSelect() ;
ResultSetFormatter.out(rs) ;
}
</code></pre>
<h3 id="write-transactions">Write transactions</h3>
<p>These are used for SPARQL queries, SPARQL updates and any Jena API
actions that modify the data. Beware that large <code>model.read</code>
operations consume large amounts of temporary space.</p>
<p>The general pattern is:</p>
<pre><code> dataset.begin(ReadWrite.WRITE) ;
try {
...
dataset.commit() ;
} finally {
dataset.end() ;
}
</code></pre>
<p>The <code>dataset.end()</code> will abort the transaction is there was no call to
<code>dataset.commit()</code> or <code>dataset.abort()</code> inside the write transaction.</p>
<p>Once <code>dataset.commit()</code> or <code>dataset.abort()</code> is called, the application
needs to start a new transaction to perform further operations on the
dataset.</p>
<pre><code> Location location = ... ;
Dataset dataset = ... ;
dataset.begin(ReadWrite.WRITE) ;
try {
Model model = dataset.getDefaultModel() ;
// API calls to a model in the dataset
model.add( ... )
// A SPARQL query will see the new statement added.
try (QueryExecution qExec = QueryExecution.dataset(dataset)
.query(&quot;SELECT (count(*) AS ?count) { ?s ?p ?o} LIMIT 10&quot;)
.build() ) {
ResultSet rs = qExec.execSelect() ;
ResultSetFormatter.out(rs) ;
}
// ... perform a SPARQL Update
String sparqlUpdateString = StrUtils.strjoinNL(
&quot;PREFIX . &lt;http://example/&gt;&quot;,
&quot;INSERT { :s :p ?now } WHERE { BIND(now() AS ?now) }&quot;
) ;
UpdateRequest request = UpdateFactory.create(sparqlUpdateString) ;
UpdateExecution.dataset(dataset).update(request).execute();
// Finally, commit the transaction.
dataset.commit() ;
// Or call .abort()
} finally {
dataset.end() ;
}
</code></pre>
<h2 id="multi-threaded-use">Multi-threaded use</h2>
<p>Each dataset object has one transaction active at a time per thread.
A dataset object can be used by different threads, with independent transactions.</p>
<p>The usual idiom within multi-threaded applications is to have
one dataset per thread, and so there is one transaction per thread.</p>
<p>Either:</p>
<pre><code> // Create a dataset and keep it globally.
Dataset dataset = TDBFactory.createDataset(location) ;
</code></pre>
<p>Thread 1:</p>
<pre><code> dataset.begin(ReadWrite.WRITE) ;
try {
...
dataset.commit() ;
} finally { dataset.end() ; }
</code></pre>
<p>Thread 2:</p>
<pre><code> dataset.begin(ReadWrite.READ) ;
try {
...
} finally { dataset.end() ; }
</code></pre>
<p>or create a dataset object on the thread:</p>
<p>Thread 1:</p>
<pre><code> Dataset dataset = TDBFactory.createDataset(location) ;
dataset.begin(ReadWrite.WRITE) ;
try {
...
dataset.commit() ;
} finally { dataset.end() ; }
</code></pre>
<p>Thread 2:</p>
<pre><code> Dataset dataset = TDBFactory.createDataset(location) ;
dataset.begin(ReadWrite.READ) ;
try {
...
} finally { dataset.end() ; }
</code></pre>
<p>Each thread has a separate <code>dataset</code> object; these safely share the
same storage. in both cases, the transactions are independent.</p>
<h2 id="multi-jvm">Multi JVM</h2>
<p>Multiple applications, running in multiple JVMs, using the same
file databases is not supported and has a high risk of data corruption. Once corrupted a database cannot be repaired
and must be rebuilt from the original source data. Therefore there <strong>must</strong> be a single JVM
controlling the database directory and files. TDB includes automatic prevention against multi-JVM usage
which prevents this under most circumstances.</p>
<p>Use <a href="../fuseki2/">Fuseki</a> to provide a database server for multiple
applications. Fuseki supports <a href="http://www.w3.org/TR/sparql11-query/">SPARQL
Query</a>, <a href="http://www.w3.org/TR/sparql11-update/">SPARQL
Update</a> and the <a href="http://www.w3.org/TR/sparql11-http-rdf-update/">SPARQL Graph Store
protocol</a>.</p>
<h2 id="bulk-loading">Bulk loading</h2>
<p>Bulk loaders are not transactional.</p>
</article>
<aside class="text-muted align-self-start mb-3 mb-xl-5 p-0 d-none d-xl-flex flex-column sticky-top">
<h2 class="h6 sticky-top m-0 p-2 bg-body-tertiary">On this page</h2>
<nav id="TableOfContents">
<ul>
<li><a href="#overview">Overview</a></li>
<li><a href="#limitations">Limitations</a></li>
<li><a href="#api-for-transactions">API for Transactions</a>
<ul>
<li><a href="#read-transactions">Read transactions</a></li>
<li><a href="#write-transactions">Write transactions</a></li>
</ul>
</li>
<li><a href="#multi-threaded-use">Multi-threaded use</a></li>
<li><a href="#multi-jvm">Multi JVM</a></li>
<li><a href="#bulk-loading">Bulk loading</a></li>
</ul>
</nav>
</aside>
</main>
</div>
</div>
</div>
<footer class="bd-footer py-4 py-md-5 mt-4 mt-lg-5 bg-body-tertiary">
<div class="container" style="font-size:80%" >
<p>
Copyright &copy; 2011&ndash;2024 The Apache Software Foundation, Licensed under the
<a href="https://www.apache.org/licenses/LICENSE-2.0">Apache License, Version 2.0</a>.
</p>
<p>
Apache Jena, Jena, the Apache Jena project logo, Apache and the Apache feather logos are trademarks of
The Apache Software Foundation.
<br/>
<a href="https://privacy.apache.org/policies/privacy-policy-public.html"
>Apache Software Foundation Privacy Policy</a>.
</p>
</div>
</footer>
<script src="/js/popper.min.js.js" type="text/javascript"></script>
<script src="/js/bootstrap.min.js" type="text/javascript"></script>
<script src="/js/improve.js" type="text/javascript"></script>
<script type="text/javascript">
(function() {
'use strict'
const links = document.querySelectorAll(`a[href="${window.location.pathname}"]`)
if (links !== undefined && links !== null) {
for (const link of links) {
link.classList.add('active')
let parentElement = link.parentElement
let count = 0
const levelsLimit = 4
while (['UL', 'LI'].includes(parentElement.tagName) && count <= levelsLimit) {
if (parentElement.tagName === 'LI') {
parentElement.querySelector('a:first-child').classList.add('active')
}
parentElement = parentElement.parentElement
count++
}
}
}
})()
</script>
</body>
</html>