blob: b10af6c7a0233feb08f062ff7ae92aa563aa287d [file] [log] [blame]
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
<!-- NewPage -->
<html lang="en">
<head>
<!-- Generated by javadoc (1.8.0_40) on Thu Jul 20 13:17:29 PDT 2017 -->
<title>DatePartitionedNestedRetriever (gobblin-core 0.11.0 API)</title>
<meta name="date" content="2017-07-20">
<link rel="stylesheet" type="text/css" href="../../stylesheet.css" title="Style">
<script type="text/javascript" src="../../script.js"></script>
</head>
<body>
<script type="text/javascript"><!--
try {
if (location.href.indexOf('is-external=true') == -1) {
parent.document.title="DatePartitionedNestedRetriever (gobblin-core 0.11.0 API)";
}
}
catch(err) {
}
//-->
var methods = {"i0":10,"i1":10,"i2":10,"i3":10};
var tabs = {65535:["t0","All Methods"],2:["t2","Instance Methods"],8:["t4","Concrete Methods"]};
var altColor = "altColor";
var rowColor = "rowColor";
var tableTab = "tableTab";
var activeTableTab = "activeTableTab";
</script>
<noscript>
<div>JavaScript is disabled on your browser.</div>
</noscript>
<!-- ========= START OF TOP NAVBAR ======= -->
<div class="topNav"><a name="navbar.top">
<!-- -->
</a>
<div class="skipNav"><a href="#skip.navbar.top" title="Skip navigation links">Skip navigation links</a></div>
<a name="navbar.top.firstrow">
<!-- -->
</a>
<ul class="navList" title="Navigation">
<li><a href="../../overview-summary.html">Overview</a></li>
<li><a href="package-summary.html">Package</a></li>
<li class="navBarCell1Rev">Class</li>
<li><a href="package-tree.html">Tree</a></li>
<li><a href="../../deprecated-list.html">Deprecated</a></li>
<li><a href="../../index-all.html">Index</a></li>
<li><a href="../../help-doc.html">Help</a></li>
</ul>
</div>
<div class="subNav">
<ul class="navList">
<li><a href="../../gobblin/source/DatePartitionedDailyAvroSource.html" title="class in gobblin.source"><span class="typeNameLink">Prev&nbsp;Class</span></a></li>
<li><a href="../../gobblin/source/PartitionAwareFileRetriever.html" title="interface in gobblin.source"><span class="typeNameLink">Next&nbsp;Class</span></a></li>
</ul>
<ul class="navList">
<li><a href="../../index.html?gobblin/source/DatePartitionedNestedRetriever.html" target="_top">Frames</a></li>
<li><a href="DatePartitionedNestedRetriever.html" target="_top">No&nbsp;Frames</a></li>
</ul>
<ul class="navList" id="allclasses_navbar_top">
<li><a href="../../allclasses-noframe.html">All&nbsp;Classes</a></li>
</ul>
<div>
<script type="text/javascript"><!--
allClassesLink = document.getElementById("allclasses_navbar_top");
if(window==top) {
allClassesLink.style.display = "block";
}
else {
allClassesLink.style.display = "none";
}
//-->
</script>
</div>
<div>
<ul class="subNavList">
<li>Summary:&nbsp;</li>
<li>Nested&nbsp;|&nbsp;</li>
<li>Field&nbsp;|&nbsp;</li>
<li><a href="#constructor.summary">Constr</a>&nbsp;|&nbsp;</li>
<li><a href="#method.summary">Method</a></li>
</ul>
<ul class="subNavList">
<li>Detail:&nbsp;</li>
<li>Field&nbsp;|&nbsp;</li>
<li><a href="#constructor.detail">Constr</a>&nbsp;|&nbsp;</li>
<li><a href="#method.detail">Method</a></li>
</ul>
</div>
<a name="skip.navbar.top">
<!-- -->
</a></div>
<!-- ========= END OF TOP NAVBAR ========= -->
<!-- ======== START OF CLASS DATA ======== -->
<div class="header">
<div class="subTitle">gobblin.source</div>
<h2 title="Class DatePartitionedNestedRetriever" class="title">Class DatePartitionedNestedRetriever</h2>
</div>
<div class="contentContainer">
<ul class="inheritance">
<li><a href="https://docs.oracle.com/javase/7/docs/api/java/lang/Object.html?is-external=true" title="class or interface in java.lang">java.lang.Object</a></li>
<li>
<ul class="inheritance">
<li>gobblin.source.DatePartitionedNestedRetriever</li>
</ul>
</li>
</ul>
<div class="description">
<ul class="blockList">
<li class="blockList">
<dl>
<dt>All Implemented Interfaces:</dt>
<dd><a href="../../gobblin/source/PartitionAwareFileRetriever.html" title="interface in gobblin.source">PartitionAwareFileRetriever</a></dd>
</dl>
<hr>
<br>
<pre>public class <span class="typeNameLabel">DatePartitionedNestedRetriever</span>
extends <a href="https://docs.oracle.com/javase/7/docs/api/java/lang/Object.html?is-external=true" title="class or interface in java.lang">Object</a>
implements <a href="../../gobblin/source/PartitionAwareFileRetriever.html" title="interface in gobblin.source">PartitionAwareFileRetriever</a></pre>
<div class="block">PartitionRetriever that is optimized for nested directory structures where data is dumped on a regular basis
and most data has likely been processed by Gobblin already.
For example, if <a href="http://linkedin.github.io/gobblin/javadoc/0.11.0/gobblin-api/gobblin/configuration/ConfigurationKeys.html?is-external=true#SOURCE_FILEBASED_DATA_DIRECTORY" title="class or interface in gobblin.configuration"><code>ConfigurationKeys.SOURCE_FILEBASED_DATA_DIRECTORY</code></a> is set to /my/data/, then the class assumes
folders following the pattern /my/data/daily/[year]/[month]/[day] are present. It will iterate through all the data
under these folders starting from the date specified by <code>#DATE_PARTITIONED_SOURCE_MIN_WATERMARK_VALUE</code> until
either <code>#DATE_PARTITIONED_SOURCE_MAX_FILES_PER_JOB</code> files have been processed, or until there is no more data
to process. For example, if <code>#DATE_PARTITIONED_SOURCE_MIN_WATERMARK_VALUE</code> is set to 2015/01/01, then the job
will read from the folder /my/data/daily/2015/01/01/, /my/data/daily/2015/01/02/, /my/data/2015/01/03/ etc.</div>
</li>
</ul>
</div>
<div class="summary">
<ul class="blockList">
<li class="blockList">
<!-- ======== NESTED CLASS SUMMARY ======== -->
<ul class="blockList">
<li class="blockList"><a name="nested.class.summary">
<!-- -->
</a>
<h3>Nested Class Summary</h3>
<ul class="blockList">
<li class="blockList"><a name="nested.classes.inherited.from.class.gobblin.source.PartitionAwareFileRetriever">
<!-- -->
</a>
<h3>Nested classes/interfaces inherited from interface&nbsp;gobblin.source.<a href="../../gobblin/source/PartitionAwareFileRetriever.html" title="interface in gobblin.source">PartitionAwareFileRetriever</a></h3>
<code><a href="../../gobblin/source/PartitionAwareFileRetriever.FileInfo.html" title="class in gobblin.source">PartitionAwareFileRetriever.FileInfo</a></code></li>
</ul>
</li>
</ul>
<!-- ======== CONSTRUCTOR SUMMARY ======== -->
<ul class="blockList">
<li class="blockList"><a name="constructor.summary">
<!-- -->
</a>
<h3>Constructor Summary</h3>
<table class="memberSummary" border="0" cellpadding="3" cellspacing="0" summary="Constructor Summary table, listing constructors, and an explanation">
<caption><span>Constructors</span><span class="tabEnd">&nbsp;</span></caption>
<tr>
<th class="colOne" scope="col">Constructor and Description</th>
</tr>
<tr class="altColor">
<td class="colOne"><code><span class="memberNameLink"><a href="../../gobblin/source/DatePartitionedNestedRetriever.html#DatePartitionedNestedRetriever-java.lang.String-">DatePartitionedNestedRetriever</a></span>(<a href="https://docs.oracle.com/javase/7/docs/api/java/lang/String.html?is-external=true" title="class or interface in java.lang">String</a>&nbsp;expectedExtension)</code>&nbsp;</td>
</tr>
</table>
</li>
</ul>
<!-- ========== METHOD SUMMARY =========== -->
<ul class="blockList">
<li class="blockList"><a name="method.summary">
<!-- -->
</a>
<h3>Method Summary</h3>
<table class="memberSummary" border="0" cellpadding="3" cellspacing="0" summary="Method Summary table, listing methods, and an explanation">
<caption><span id="t0" class="activeTableTab"><span>All Methods</span><span class="tabEnd">&nbsp;</span></span><span id="t2" class="tableTab"><span><a href="javascript:show(2);">Instance Methods</a></span><span class="tabEnd">&nbsp;</span></span><span id="t4" class="tableTab"><span><a href="javascript:show(8);">Concrete Methods</a></span><span class="tabEnd">&nbsp;</span></span></caption>
<tr>
<th class="colFirst" scope="col">Modifier and Type</th>
<th class="colLast" scope="col">Method and Description</th>
</tr>
<tr id="i0" class="altColor">
<td class="colFirst"><code><a href="https://docs.oracle.com/javase/7/docs/api/java/util/List.html?is-external=true" title="class or interface in java.util">List</a>&lt;<a href="../../gobblin/source/PartitionAwareFileRetriever.FileInfo.html" title="class in gobblin.source">PartitionAwareFileRetriever.FileInfo</a>&gt;</code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../gobblin/source/DatePartitionedNestedRetriever.html#getFilesToProcess-long-int-">getFilesToProcess</a></span>(long&nbsp;minWatermark,
int&nbsp;maxFilesToReturn)</code>
<div class="block">Return a list of files to process that have a watermark later than minWatermark.</div>
</td>
</tr>
<tr id="i1" class="rowColor">
<td class="colFirst"><code>long</code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../gobblin/source/DatePartitionedNestedRetriever.html#getWatermarkFromString-java.lang.String-">getWatermarkFromString</a></span>(<a href="https://docs.oracle.com/javase/7/docs/api/java/lang/String.html?is-external=true" title="class or interface in java.lang">String</a>&nbsp;lowWaterMark)</code>&nbsp;</td>
</tr>
<tr id="i2" class="altColor">
<td class="colFirst"><code>long</code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../gobblin/source/DatePartitionedNestedRetriever.html#getWatermarkIncrementMs--">getWatermarkIncrementMs</a></span>()</code>&nbsp;</td>
</tr>
<tr id="i3" class="rowColor">
<td class="colFirst"><code>void</code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../gobblin/source/DatePartitionedNestedRetriever.html#init-gobblin.configuration.SourceState-">init</a></span>(<a href="http://linkedin.github.io/gobblin/javadoc/0.11.0/gobblin-api/gobblin/configuration/SourceState.html?is-external=true" title="class or interface in gobblin.configuration">SourceState</a>&nbsp;state)</code>
<div class="block">Initialize the retriever with configuration parameters</div>
</td>
</tr>
</table>
<ul class="blockList">
<li class="blockList"><a name="methods.inherited.from.class.java.lang.Object">
<!-- -->
</a>
<h3>Methods inherited from class&nbsp;java.lang.<a href="https://docs.oracle.com/javase/7/docs/api/java/lang/Object.html?is-external=true" title="class or interface in java.lang">Object</a></h3>
<code><a href="https://docs.oracle.com/javase/7/docs/api/java/lang/Object.html?is-external=true#clone--" title="class or interface in java.lang">clone</a>, <a href="https://docs.oracle.com/javase/7/docs/api/java/lang/Object.html?is-external=true#equals-java.lang.Object-" title="class or interface in java.lang">equals</a>, <a href="https://docs.oracle.com/javase/7/docs/api/java/lang/Object.html?is-external=true#finalize--" title="class or interface in java.lang">finalize</a>, <a href="https://docs.oracle.com/javase/7/docs/api/java/lang/Object.html?is-external=true#getClass--" title="class or interface in java.lang">getClass</a>, <a href="https://docs.oracle.com/javase/7/docs/api/java/lang/Object.html?is-external=true#hashCode--" title="class or interface in java.lang">hashCode</a>, <a href="https://docs.oracle.com/javase/7/docs/api/java/lang/Object.html?is-external=true#notify--" title="class or interface in java.lang">notify</a>, <a href="https://docs.oracle.com/javase/7/docs/api/java/lang/Object.html?is-external=true#notifyAll--" title="class or interface in java.lang">notifyAll</a>, <a href="https://docs.oracle.com/javase/7/docs/api/java/lang/Object.html?is-external=true#toString--" title="class or interface in java.lang">toString</a>, <a href="https://docs.oracle.com/javase/7/docs/api/java/lang/Object.html?is-external=true#wait--" title="class or interface in java.lang">wait</a>, <a href="https://docs.oracle.com/javase/7/docs/api/java/lang/Object.html?is-external=true#wait-long-" title="class or interface in java.lang">wait</a>, <a href="https://docs.oracle.com/javase/7/docs/api/java/lang/Object.html?is-external=true#wait-long-int-" title="class or interface in java.lang">wait</a></code></li>
</ul>
</li>
</ul>
</li>
</ul>
</div>
<div class="details">
<ul class="blockList">
<li class="blockList">
<!-- ========= CONSTRUCTOR DETAIL ======== -->
<ul class="blockList">
<li class="blockList"><a name="constructor.detail">
<!-- -->
</a>
<h3>Constructor Detail</h3>
<a name="DatePartitionedNestedRetriever-java.lang.String-">
<!-- -->
</a>
<ul class="blockListLast">
<li class="blockList">
<h4>DatePartitionedNestedRetriever</h4>
<pre>public&nbsp;DatePartitionedNestedRetriever(<a href="https://docs.oracle.com/javase/7/docs/api/java/lang/String.html?is-external=true" title="class or interface in java.lang">String</a>&nbsp;expectedExtension)</pre>
</li>
</ul>
</li>
</ul>
<!-- ============ METHOD DETAIL ========== -->
<ul class="blockList">
<li class="blockList"><a name="method.detail">
<!-- -->
</a>
<h3>Method Detail</h3>
<a name="init-gobblin.configuration.SourceState-">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>init</h4>
<pre>public&nbsp;void&nbsp;init(<a href="http://linkedin.github.io/gobblin/javadoc/0.11.0/gobblin-api/gobblin/configuration/SourceState.html?is-external=true" title="class or interface in gobblin.configuration">SourceState</a>&nbsp;state)</pre>
<div class="block"><span class="descfrmTypeLabel">Description copied from interface:&nbsp;<code><a href="../../gobblin/source/PartitionAwareFileRetriever.html#init-gobblin.configuration.SourceState-">PartitionAwareFileRetriever</a></code></span></div>
<div class="block">Initialize the retriever with configuration parameters</div>
<dl>
<dt><span class="overrideSpecifyLabel">Specified by:</span></dt>
<dd><code><a href="../../gobblin/source/PartitionAwareFileRetriever.html#init-gobblin.configuration.SourceState-">init</a></code>&nbsp;in interface&nbsp;<code><a href="../../gobblin/source/PartitionAwareFileRetriever.html" title="interface in gobblin.source">PartitionAwareFileRetriever</a></code></dd>
</dl>
</li>
</ul>
<a name="getFilesToProcess-long-int-">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>getFilesToProcess</h4>
<pre>public&nbsp;<a href="https://docs.oracle.com/javase/7/docs/api/java/util/List.html?is-external=true" title="class or interface in java.util">List</a>&lt;<a href="../../gobblin/source/PartitionAwareFileRetriever.FileInfo.html" title="class in gobblin.source">PartitionAwareFileRetriever.FileInfo</a>&gt;&nbsp;getFilesToProcess(long&nbsp;minWatermark,
int&nbsp;maxFilesToReturn)
throws <a href="https://docs.oracle.com/javase/7/docs/api/java/io/IOException.html?is-external=true" title="class or interface in java.io">IOException</a></pre>
<div class="block"><span class="descfrmTypeLabel">Description copied from interface:&nbsp;<code><a href="../../gobblin/source/PartitionAwareFileRetriever.html#getFilesToProcess-long-int-">PartitionAwareFileRetriever</a></code></span></div>
<div class="block">Return a list of files to process that have a watermark later than minWatermark. Generally, a FileRetriever should
find each valid partition after minWatermark, sorted by ascending time.
For each partition:
1. Add all files in the partition
2. If the # of files in the return list is now greater than maxFilesToReturn, return immediately
3. Else continue to next partition until there are none left
maxFilesToReturn is a soft cap - all files in a partition should be returned by getFilesToProcess().</div>
<dl>
<dt><span class="overrideSpecifyLabel">Specified by:</span></dt>
<dd><code><a href="../../gobblin/source/PartitionAwareFileRetriever.html#getFilesToProcess-long-int-">getFilesToProcess</a></code>&nbsp;in interface&nbsp;<code><a href="../../gobblin/source/PartitionAwareFileRetriever.html" title="interface in gobblin.source">PartitionAwareFileRetriever</a></code></dd>
<dt><span class="throwsLabel">Throws:</span></dt>
<dd><code><a href="https://docs.oracle.com/javase/7/docs/api/java/io/IOException.html?is-external=true" title="class or interface in java.io">IOException</a></code></dd>
</dl>
</li>
</ul>
<a name="getWatermarkFromString-java.lang.String-">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>getWatermarkFromString</h4>
<pre>public&nbsp;long&nbsp;getWatermarkFromString(<a href="https://docs.oracle.com/javase/7/docs/api/java/lang/String.html?is-external=true" title="class or interface in java.lang">String</a>&nbsp;lowWaterMark)</pre>
<dl>
<dt><span class="overrideSpecifyLabel">Specified by:</span></dt>
<dd><code><a href="../../gobblin/source/PartitionAwareFileRetriever.html#getWatermarkFromString-java.lang.String-">getWatermarkFromString</a></code>&nbsp;in interface&nbsp;<code><a href="../../gobblin/source/PartitionAwareFileRetriever.html" title="interface in gobblin.source">PartitionAwareFileRetriever</a></code></dd>
</dl>
</li>
</ul>
<a name="getWatermarkIncrementMs--">
<!-- -->
</a>
<ul class="blockListLast">
<li class="blockList">
<h4>getWatermarkIncrementMs</h4>
<pre>public&nbsp;long&nbsp;getWatermarkIncrementMs()</pre>
<dl>
<dt><span class="overrideSpecifyLabel">Specified by:</span></dt>
<dd><code><a href="../../gobblin/source/PartitionAwareFileRetriever.html#getWatermarkIncrementMs--">getWatermarkIncrementMs</a></code>&nbsp;in interface&nbsp;<code><a href="../../gobblin/source/PartitionAwareFileRetriever.html" title="interface in gobblin.source">PartitionAwareFileRetriever</a></code></dd>
</dl>
</li>
</ul>
</li>
</ul>
</li>
</ul>
</div>
</div>
<!-- ========= END OF CLASS DATA ========= -->
<!-- ======= START OF BOTTOM NAVBAR ====== -->
<div class="bottomNav"><a name="navbar.bottom">
<!-- -->
</a>
<div class="skipNav"><a href="#skip.navbar.bottom" title="Skip navigation links">Skip navigation links</a></div>
<a name="navbar.bottom.firstrow">
<!-- -->
</a>
<ul class="navList" title="Navigation">
<li><a href="../../overview-summary.html">Overview</a></li>
<li><a href="package-summary.html">Package</a></li>
<li class="navBarCell1Rev">Class</li>
<li><a href="package-tree.html">Tree</a></li>
<li><a href="../../deprecated-list.html">Deprecated</a></li>
<li><a href="../../index-all.html">Index</a></li>
<li><a href="../../help-doc.html">Help</a></li>
</ul>
</div>
<div class="subNav">
<ul class="navList">
<li><a href="../../gobblin/source/DatePartitionedDailyAvroSource.html" title="class in gobblin.source"><span class="typeNameLink">Prev&nbsp;Class</span></a></li>
<li><a href="../../gobblin/source/PartitionAwareFileRetriever.html" title="interface in gobblin.source"><span class="typeNameLink">Next&nbsp;Class</span></a></li>
</ul>
<ul class="navList">
<li><a href="../../index.html?gobblin/source/DatePartitionedNestedRetriever.html" target="_top">Frames</a></li>
<li><a href="DatePartitionedNestedRetriever.html" target="_top">No&nbsp;Frames</a></li>
</ul>
<ul class="navList" id="allclasses_navbar_bottom">
<li><a href="../../allclasses-noframe.html">All&nbsp;Classes</a></li>
</ul>
<div>
<script type="text/javascript"><!--
allClassesLink = document.getElementById("allclasses_navbar_bottom");
if(window==top) {
allClassesLink.style.display = "block";
}
else {
allClassesLink.style.display = "none";
}
//-->
</script>
</div>
<div>
<ul class="subNavList">
<li>Summary:&nbsp;</li>
<li>Nested&nbsp;|&nbsp;</li>
<li>Field&nbsp;|&nbsp;</li>
<li><a href="#constructor.summary">Constr</a>&nbsp;|&nbsp;</li>
<li><a href="#method.summary">Method</a></li>
</ul>
<ul class="subNavList">
<li>Detail:&nbsp;</li>
<li>Field&nbsp;|&nbsp;</li>
<li><a href="#constructor.detail">Constr</a>&nbsp;|&nbsp;</li>
<li><a href="#method.detail">Method</a></li>
</ul>
</div>
<a name="skip.navbar.bottom">
<!-- -->
</a></div>
<!-- ======== END OF BOTTOM NAVBAR ======= -->
</body>
</html>