blob: 4dc0d5dda9a9420c61ff22438e673d8525bc5e5f [file] [log] [blame]
<!DOCTYPE HTML>
<html lang="en">
<head>
<!-- Generated by javadoc (17) -->
<title>DistinctDataBag (Apache Jena ARQ)</title>
<meta name="viewport" content="width=device-width, initial-scale=1">
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
<meta name="description" content="declaration: module: org.apache.jena.arq, package: org.apache.jena.atlas.data, class: DistinctDataBag">
<meta name="generator" content="javadoc/ClassWriterImpl">
<link rel="stylesheet" type="text/css" href="../../../../../../stylesheet.css" title="Style">
<link rel="stylesheet" type="text/css" href="../../../../../../script-dir/jquery-ui.min.css" title="Style">
<link rel="stylesheet" type="text/css" href="../../../../../../jquery-ui.overrides.css" title="Style">
<script type="text/javascript" src="../../../../../../script.js"></script>
<script type="text/javascript" src="../../../../../../script-dir/jquery-3.6.1.min.js"></script>
<script type="text/javascript" src="../../../../../../script-dir/jquery-ui.min.js"></script>
</head>
<body class="class-declaration-page">
<script type="text/javascript">var evenRowColor = "even-row-color";
var oddRowColor = "odd-row-color";
var tableTab = "table-tab";
var activeTableTab = "active-table-tab";
var pathtoroot = "../../../../../../";
loadScripts(document, 'script');</script>
<noscript>
<div>JavaScript is disabled on your browser.</div>
</noscript>
<div class="flex-box">
<header role="banner" class="flex-header">
<nav role="navigation">
<!-- ========= START OF TOP NAVBAR ======= -->
<div class="top-nav" id="navbar-top">
<div class="skip-nav"><a href="#skip-navbar-top" title="Skip navigation links">Skip navigation links</a></div>
<ul id="navbar-top-firstrow" class="nav-list" title="Navigation">
<li><a href="../../../../../module-summary.html">Module</a></li>
<li><a href="package-summary.html">Package</a></li>
<li class="nav-bar-cell1-rev">Class</li>
<li><a href="class-use/DistinctDataBag.html">Use</a></li>
<li><a href="package-tree.html">Tree</a></li>
<li><a href="../../../../../../deprecated-list.html">Deprecated</a></li>
<li><a href="../../../../../../index-all.html">Index</a></li>
<li><a href="../../../../../../help-doc.html#class">Help</a></li>
</ul>
</div>
<div class="sub-nav">
<div>
<ul class="sub-nav-list">
<li>Summary:&nbsp;</li>
<li>Nested&nbsp;|&nbsp;</li>
<li>Field&nbsp;|&nbsp;</li>
<li><a href="#constructor-summary">Constr</a>&nbsp;|&nbsp;</li>
<li><a href="#method-summary">Method</a></li>
</ul>
<ul class="sub-nav-list">
<li>Detail:&nbsp;</li>
<li>Field&nbsp;|&nbsp;</li>
<li><a href="#constructor-detail">Constr</a>&nbsp;|&nbsp;</li>
<li><a href="#method-detail">Method</a></li>
</ul>
</div>
<div class="nav-list-search"><label for="search-input">SEARCH:</label>
<input type="text" id="search-input" value="search" disabled="disabled">
<input type="reset" id="reset-button" value="reset" disabled="disabled">
</div>
</div>
<!-- ========= END OF TOP NAVBAR ========= -->
<span class="skip-nav" id="skip-navbar-top"></span></nav>
</header>
<div class="flex-content">
<main role="main">
<!-- ======== START OF CLASS DATA ======== -->
<div class="header">
<div class="sub-title"><span class="module-label-in-type">Module</span>&nbsp;<a href="../../../../../module-summary.html">org.apache.jena.arq</a></div>
<div class="sub-title"><span class="package-label-in-type">Package</span>&nbsp;<a href="package-summary.html">org.apache.jena.atlas.data</a></div>
<h1 title="Class DistinctDataBag" class="title">Class DistinctDataBag&lt;E&gt;</h1>
</div>
<div class="inheritance" title="Inheritance Tree"><a href="https://docs.oracle.com/en/java/javase/17/docs/api/java.base/java/lang/Object.html" title="class or interface in java.lang" class="external-link">java.lang.Object</a>
<div class="inheritance"><a href="AbstractDataBag.html" title="class in org.apache.jena.atlas.data">org.apache.jena.atlas.data.AbstractDataBag</a>&lt;E&gt;
<div class="inheritance"><a href="SortedDataBag.html" title="class in org.apache.jena.atlas.data">org.apache.jena.atlas.data.SortedDataBag</a>&lt;E&gt;
<div class="inheritance">org.apache.jena.atlas.data.DistinctDataBag&lt;E&gt;</div>
</div>
</div>
</div>
<section class="class-description" id="class-description">
<dl class="notes">
<dt>All Implemented Interfaces:</dt>
<dd><code><a href="https://docs.oracle.com/en/java/javase/17/docs/api/java.base/java/lang/Iterable.html" title="class or interface in java.lang" class="external-link">Iterable</a>&lt;E&gt;</code>, <code><a href="DataBag.html" title="interface in org.apache.jena.atlas.data">DataBag</a>&lt;E&gt;</code>, <code>org.apache.jena.atlas.lib.Closeable</code>, <code>org.apache.jena.atlas.lib.Sink&lt;E&gt;</code></dd>
</dl>
<dl class="notes">
<dt>Direct Known Subclasses:</dt>
<dd><code><a href="DistinctDataNet.html" title="class in org.apache.jena.atlas.data">DistinctDataNet</a></code></dd>
</dl>
<hr>
<div class="type-signature"><span class="modifiers">public class </span><span class="element-name type-name-label">DistinctDataBag&lt;E&gt;</span>
<span class="extends-implements">extends <a href="SortedDataBag.html" title="class in org.apache.jena.atlas.data">SortedDataBag</a>&lt;E&gt;</span></div>
<div class="block"><p>
This data bag will gather distinct items in memory until a size threshold is passed, at which point it will write
out all of the items to disk using the supplied serializer.
</p>
<p>
After adding is finished, call <a href="#iterator()"><code>iterator()</code></a> to set up the data bag for reading back items and iterating over them.
The iterator will retrieve only distinct items.
</p>
<p>
IMPORTANT: You may not add any more items after this call. You may subsequently call <a href="#iterator()"><code>iterator()</code></a> multiple
times which will give you a new iterator for each invocation. If you do not consume the entire iterator, you should
call <code>Iter.close(Iterator)</code> to close any FileInputStreams associated with the iterator.
</p>
<p>
Additionally, make sure to call <a href="SortedDataBag.html#close()"><code>SortedDataBag.close()</code></a> when you are finished to free any system resources (preferably in a finally block).
</p>
<p>
Implementation Notes: Data is stored without duplicates as it comes in in a HashSet. When it is time to spill,
that data is sorted and written to disk. An iterator that eliminates adjacent duplicates is used in conjunction
with the SortedDataBag's iterator.
</p></div>
</section>
<section class="summary">
<ul class="summary-list">
<!-- ======== CONSTRUCTOR SUMMARY ======== -->
<li>
<section class="constructor-summary" id="constructor-summary">
<h2>Constructor Summary</h2>
<div class="caption"><span>Constructors</span></div>
<div class="summary-table two-column-summary">
<div class="table-header col-first">Constructor</div>
<div class="table-header col-last">Description</div>
<div class="col-constructor-name even-row-color"><code><a href="#%3Cinit%3E(org.apache.jena.atlas.data.ThresholdPolicy,org.apache.jena.atlas.data.SerializationFactory,java.util.Comparator)" class="member-name-link">DistinctDataBag</a><wbr>(<a href="ThresholdPolicy.html" title="interface in org.apache.jena.atlas.data">ThresholdPolicy</a>&lt;<a href="DistinctDataBag.html" title="type parameter in DistinctDataBag">E</a>&gt;&nbsp;policy,
<a href="SerializationFactory.html" title="interface in org.apache.jena.atlas.data">SerializationFactory</a>&lt;<a href="DistinctDataBag.html" title="type parameter in DistinctDataBag">E</a>&gt;&nbsp;serializerFactory,
<a href="https://docs.oracle.com/en/java/javase/17/docs/api/java.base/java/util/Comparator.html" title="class or interface in java.util" class="external-link">Comparator</a>&lt;<a href="DistinctDataBag.html" title="type parameter in DistinctDataBag">E</a>&gt;&nbsp;comparator)</code></div>
<div class="col-last even-row-color">&nbsp;</div>
</div>
</section>
</li>
<!-- ========== METHOD SUMMARY =========== -->
<li>
<section class="method-summary" id="method-summary">
<h2>Method Summary</h2>
<div id="method-summary-table">
<div class="table-tabs" role="tablist" aria-orientation="horizontal"><button id="method-summary-table-tab0" role="tab" aria-selected="true" aria-controls="method-summary-table.tabpanel" tabindex="0" onkeydown="switchTab(event)" onclick="show('method-summary-table', 'method-summary-table', 3)" class="active-table-tab">All Methods</button><button id="method-summary-table-tab2" role="tab" aria-selected="false" aria-controls="method-summary-table.tabpanel" tabindex="-1" onkeydown="switchTab(event)" onclick="show('method-summary-table', 'method-summary-table-tab2', 3)" class="table-tab">Instance Methods</button><button id="method-summary-table-tab4" role="tab" aria-selected="false" aria-controls="method-summary-table.tabpanel" tabindex="-1" onkeydown="switchTab(event)" onclick="show('method-summary-table', 'method-summary-table-tab4', 3)" class="table-tab">Concrete Methods</button></div>
<div id="method-summary-table.tabpanel" role="tabpanel">
<div class="summary-table three-column-summary" aria-labelledby="method-summary-table-tab0">
<div class="table-header col-first">Modifier and Type</div>
<div class="table-header col-second">Method</div>
<div class="table-header col-last">Description</div>
<div class="col-first even-row-color method-summary-table method-summary-table-tab2 method-summary-table-tab4"><code>boolean</code></div>
<div class="col-second even-row-color method-summary-table method-summary-table-tab2 method-summary-table-tab4"><code><a href="#isDistinct()" class="member-name-link">isDistinct</a>()</code></div>
<div class="col-last even-row-color method-summary-table method-summary-table-tab2 method-summary-table-tab4">
<div class="block">Find out if the bag is distinct.</div>
</div>
<div class="col-first odd-row-color method-summary-table method-summary-table-tab2 method-summary-table-tab4"><code>boolean</code></div>
<div class="col-second odd-row-color method-summary-table method-summary-table-tab2 method-summary-table-tab4"><code><a href="#isSorted()" class="member-name-link">isSorted</a>()</code></div>
<div class="col-last odd-row-color method-summary-table method-summary-table-tab2 method-summary-table-tab4">
<div class="block">Find out if the bag is sorted.</div>
</div>
<div class="col-first even-row-color method-summary-table method-summary-table-tab2 method-summary-table-tab4"><code><a href="https://docs.oracle.com/en/java/javase/17/docs/api/java.base/java/util/Iterator.html" title="class or interface in java.util" class="external-link">Iterator</a>&lt;<a href="DistinctDataBag.html" title="type parameter in DistinctDataBag">E</a>&gt;</code></div>
<div class="col-second even-row-color method-summary-table method-summary-table-tab2 method-summary-table-tab4"><code><a href="#iterator()" class="member-name-link">iterator</a>()</code></div>
<div class="col-last even-row-color method-summary-table method-summary-table-tab2 method-summary-table-tab4">
<div class="block">Returns an iterator over a set of elements of type E.</div>
</div>
</div>
</div>
</div>
<div class="inherited-list">
<h3 id="methods-inherited-from-class-org.apache.jena.atlas.data.SortedDataBag">Methods inherited from class&nbsp;org.apache.jena.atlas.data.<a href="SortedDataBag.html" title="class in org.apache.jena.atlas.data">SortedDataBag</a></h3>
<code><a href="SortedDataBag.html#add(E)">add</a>, <a href="SortedDataBag.html#cancel()">cancel</a>, <a href="SortedDataBag.html#close()">close</a>, <a href="SortedDataBag.html#flush()">flush</a>, <a href="SortedDataBag.html#isCancelled()">isCancelled</a>, <a href="SortedDataBag.html#isClosed()">isClosed</a></code></div>
<div class="inherited-list">
<h3 id="methods-inherited-from-class-org.apache.jena.atlas.data.AbstractDataBag">Methods inherited from class&nbsp;org.apache.jena.atlas.data.<a href="AbstractDataBag.html" title="class in org.apache.jena.atlas.data">AbstractDataBag</a></h3>
<code><a href="AbstractDataBag.html#isEmpty()">isEmpty</a>, <a href="AbstractDataBag.html#send(E)">send</a>, <a href="AbstractDataBag.html#size()">size</a></code></div>
<div class="inherited-list">
<h3 id="methods-inherited-from-class-java.lang.Object">Methods inherited from class&nbsp;java.lang.<a href="https://docs.oracle.com/en/java/javase/17/docs/api/java.base/java/lang/Object.html" title="class or interface in java.lang" class="external-link">Object</a></h3>
<code><a href="https://docs.oracle.com/en/java/javase/17/docs/api/java.base/java/lang/Object.html#equals(java.lang.Object)" title="class or interface in java.lang" class="external-link">equals</a>, <a href="https://docs.oracle.com/en/java/javase/17/docs/api/java.base/java/lang/Object.html#getClass()" title="class or interface in java.lang" class="external-link">getClass</a>, <a href="https://docs.oracle.com/en/java/javase/17/docs/api/java.base/java/lang/Object.html#hashCode()" title="class or interface in java.lang" class="external-link">hashCode</a>, <a href="https://docs.oracle.com/en/java/javase/17/docs/api/java.base/java/lang/Object.html#notify()" title="class or interface in java.lang" class="external-link">notify</a>, <a href="https://docs.oracle.com/en/java/javase/17/docs/api/java.base/java/lang/Object.html#notifyAll()" title="class or interface in java.lang" class="external-link">notifyAll</a>, <a href="https://docs.oracle.com/en/java/javase/17/docs/api/java.base/java/lang/Object.html#toString()" title="class or interface in java.lang" class="external-link">toString</a>, <a href="https://docs.oracle.com/en/java/javase/17/docs/api/java.base/java/lang/Object.html#wait()" title="class or interface in java.lang" class="external-link">wait</a>, <a href="https://docs.oracle.com/en/java/javase/17/docs/api/java.base/java/lang/Object.html#wait(long)" title="class or interface in java.lang" class="external-link">wait</a>, <a href="https://docs.oracle.com/en/java/javase/17/docs/api/java.base/java/lang/Object.html#wait(long,int)" title="class or interface in java.lang" class="external-link">wait</a></code></div>
<div class="inherited-list">
<h3 id="methods-inherited-from-class-org.apache.jena.atlas.data.DataBag">Methods inherited from interface&nbsp;org.apache.jena.atlas.data.<a href="DataBag.html" title="interface in org.apache.jena.atlas.data">DataBag</a></h3>
<code><a href="DataBag.html#addAll(java.lang.Iterable)">addAll</a>, <a href="DataBag.html#addAll(java.util.Iterator)">addAll</a></code></div>
<div class="inherited-list">
<h3 id="methods-inherited-from-class-java.lang.Iterable">Methods inherited from interface&nbsp;java.lang.<a href="https://docs.oracle.com/en/java/javase/17/docs/api/java.base/java/lang/Iterable.html" title="class or interface in java.lang" class="external-link">Iterable</a></h3>
<code><a href="https://docs.oracle.com/en/java/javase/17/docs/api/java.base/java/lang/Iterable.html#forEach(java.util.function.Consumer)" title="class or interface in java.lang" class="external-link">forEach</a>, <a href="https://docs.oracle.com/en/java/javase/17/docs/api/java.base/java/lang/Iterable.html#spliterator()" title="class or interface in java.lang" class="external-link">spliterator</a></code></div>
</section>
</li>
</ul>
</section>
<section class="details">
<ul class="details-list">
<!-- ========= CONSTRUCTOR DETAIL ======== -->
<li>
<section class="constructor-details" id="constructor-detail">
<h2>Constructor Details</h2>
<ul class="member-list">
<li>
<section class="detail" id="&lt;init&gt;(org.apache.jena.atlas.data.ThresholdPolicy,org.apache.jena.atlas.data.SerializationFactory,java.util.Comparator)">
<h3>DistinctDataBag</h3>
<div class="member-signature"><span class="modifiers">public</span>&nbsp;<span class="element-name">DistinctDataBag</span><wbr><span class="parameters">(<a href="ThresholdPolicy.html" title="interface in org.apache.jena.atlas.data">ThresholdPolicy</a>&lt;<a href="DistinctDataBag.html" title="type parameter in DistinctDataBag">E</a>&gt;&nbsp;policy,
<a href="SerializationFactory.html" title="interface in org.apache.jena.atlas.data">SerializationFactory</a>&lt;<a href="DistinctDataBag.html" title="type parameter in DistinctDataBag">E</a>&gt;&nbsp;serializerFactory,
<a href="https://docs.oracle.com/en/java/javase/17/docs/api/java.base/java/util/Comparator.html" title="class or interface in java.util" class="external-link">Comparator</a>&lt;<a href="DistinctDataBag.html" title="type parameter in DistinctDataBag">E</a>&gt;&nbsp;comparator)</span></div>
</section>
</li>
</ul>
</section>
</li>
<!-- ============ METHOD DETAIL ========== -->
<li>
<section class="method-details" id="method-detail">
<h2>Method Details</h2>
<ul class="member-list">
<li>
<section class="detail" id="isSorted()">
<h3>isSorted</h3>
<div class="member-signature"><span class="modifiers">public</span>&nbsp;<span class="return-type">boolean</span>&nbsp;<span class="element-name">isSorted</span>()</div>
<div class="block"><span class="descfrm-type-label">Description copied from interface:&nbsp;<code><a href="DataBag.html#isSorted()">DataBag</a></code></span></div>
<div class="block">Find out if the bag is sorted.</div>
<dl class="notes">
<dt>Specified by:</dt>
<dd><code><a href="DataBag.html#isSorted()">isSorted</a></code>&nbsp;in interface&nbsp;<code><a href="DataBag.html" title="interface in org.apache.jena.atlas.data">DataBag</a>&lt;<a href="DistinctDataBag.html" title="type parameter in DistinctDataBag">E</a>&gt;</code></dd>
<dt>Overrides:</dt>
<dd><code><a href="SortedDataBag.html#isSorted()">isSorted</a></code>&nbsp;in class&nbsp;<code><a href="SortedDataBag.html" title="class in org.apache.jena.atlas.data">SortedDataBag</a>&lt;<a href="DistinctDataBag.html" title="type parameter in DistinctDataBag">E</a>&gt;</code></dd>
<dt>Returns:</dt>
<dd>true if this is a sorted data bag, false otherwise.</dd>
</dl>
</section>
</li>
<li>
<section class="detail" id="isDistinct()">
<h3>isDistinct</h3>
<div class="member-signature"><span class="modifiers">public</span>&nbsp;<span class="return-type">boolean</span>&nbsp;<span class="element-name">isDistinct</span>()</div>
<div class="block"><span class="descfrm-type-label">Description copied from interface:&nbsp;<code><a href="DataBag.html#isDistinct()">DataBag</a></code></span></div>
<div class="block">Find out if the bag is distinct.</div>
<dl class="notes">
<dt>Specified by:</dt>
<dd><code><a href="DataBag.html#isDistinct()">isDistinct</a></code>&nbsp;in interface&nbsp;<code><a href="DataBag.html" title="interface in org.apache.jena.atlas.data">DataBag</a>&lt;<a href="DistinctDataBag.html" title="type parameter in DistinctDataBag">E</a>&gt;</code></dd>
<dt>Overrides:</dt>
<dd><code><a href="SortedDataBag.html#isDistinct()">isDistinct</a></code>&nbsp;in class&nbsp;<code><a href="SortedDataBag.html" title="class in org.apache.jena.atlas.data">SortedDataBag</a>&lt;<a href="DistinctDataBag.html" title="type parameter in DistinctDataBag">E</a>&gt;</code></dd>
<dt>Returns:</dt>
<dd>true if the bag is a distinct bag, false otherwise.</dd>
</dl>
</section>
</li>
<li>
<section class="detail" id="iterator()">
<h3>iterator</h3>
<div class="member-signature"><span class="modifiers">public</span>&nbsp;<span class="return-type"><a href="https://docs.oracle.com/en/java/javase/17/docs/api/java.base/java/util/Iterator.html" title="class or interface in java.util" class="external-link">Iterator</a>&lt;<a href="DistinctDataBag.html" title="type parameter in DistinctDataBag">E</a>&gt;</span>&nbsp;<span class="element-name">iterator</span>()</div>
<div class="block"><span class="descfrm-type-label">Description copied from class:&nbsp;<code><a href="SortedDataBag.html#iterator()">SortedDataBag</a></code></span></div>
<div class="block">Returns an iterator over a set of elements of type E. If you do not exhaust
the iterator, you should call
<code>Iter.close(Iterator)</code> to be sure any
open file handles are closed.</div>
<dl class="notes">
<dt>Specified by:</dt>
<dd><code><a href="https://docs.oracle.com/en/java/javase/17/docs/api/java.base/java/lang/Iterable.html#iterator()" title="class or interface in java.lang" class="external-link">iterator</a></code>&nbsp;in interface&nbsp;<code><a href="https://docs.oracle.com/en/java/javase/17/docs/api/java.base/java/lang/Iterable.html" title="class or interface in java.lang" class="external-link">Iterable</a>&lt;<a href="DistinctDataBag.html" title="type parameter in DistinctDataBag">E</a>&gt;</code></dd>
<dt>Overrides:</dt>
<dd><code><a href="SortedDataBag.html#iterator()">iterator</a></code>&nbsp;in class&nbsp;<code><a href="SortedDataBag.html" title="class in org.apache.jena.atlas.data">SortedDataBag</a>&lt;<a href="DistinctDataBag.html" title="type parameter in DistinctDataBag">E</a>&gt;</code></dd>
<dt>Returns:</dt>
<dd>an Iterator</dd>
</dl>
</section>
</li>
</ul>
</section>
</li>
</ul>
</section>
<!-- ========= END OF CLASS DATA ========= -->
</main>
<footer role="contentinfo">
<hr>
<p class="legal-copy"><small>Licensed under the Apache License, Version 2.0</small></p>
</footer>
</div>
</div>
</body>
</html>