| <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd"> |
| <!-- NewPage --> |
| <html lang="en"> |
| <head> |
| <!-- Generated by javadoc (1.8.0_221) on Tue Jan 19 12:27:55 PST 2021 --> |
| <title>AdaptiveFetchSchedule (apache-nutch 1.18 API)</title> |
| <meta name="date" content="2021-01-19"> |
| <link rel="stylesheet" type="text/css" href="../../../../stylesheet.css" title="Style"> |
| <script type="text/javascript" src="../../../../script.js"></script> |
| </head> |
| <body> |
| <script type="text/javascript"><!-- |
| try { |
| if (location.href.indexOf('is-external=true') == -1) { |
| parent.document.title="AdaptiveFetchSchedule (apache-nutch 1.18 API)"; |
| } |
| } |
| catch(err) { |
| } |
| //--> |
| var methods = {"i0":9,"i1":10,"i2":10}; |
| var tabs = {65535:["t0","All Methods"],1:["t1","Static Methods"],2:["t2","Instance Methods"],8:["t4","Concrete Methods"]}; |
| var altColor = "altColor"; |
| var rowColor = "rowColor"; |
| var tableTab = "tableTab"; |
| var activeTableTab = "activeTableTab"; |
| </script> |
| <noscript> |
| <div>JavaScript is disabled on your browser.</div> |
| </noscript> |
| <!-- ========= START OF TOP NAVBAR ======= --> |
| <div class="topNav"><a name="navbar.top"> |
| <!-- --> |
| </a> |
| <div class="skipNav"><a href="#skip.navbar.top" title="Skip navigation links">Skip navigation links</a></div> |
| <a name="navbar.top.firstrow"> |
| <!-- --> |
| </a> |
| <ul class="navList" title="Navigation"> |
| <li><a href="../../../../overview-summary.html">Overview</a></li> |
| <li><a href="package-summary.html">Package</a></li> |
| <li class="navBarCell1Rev">Class</li> |
| <li><a href="class-use/AdaptiveFetchSchedule.html">Use</a></li> |
| <li><a href="package-tree.html">Tree</a></li> |
| <li><a href="../../../../deprecated-list.html">Deprecated</a></li> |
| <li><a href="../../../../index-all.html">Index</a></li> |
| <li><a href="../../../../help-doc.html">Help</a></li> |
| </ul> |
| </div> |
| <div class="subNav"> |
| <ul class="navList"> |
| <li><a href="../../../../org/apache/nutch/crawl/AbstractFetchSchedule.html" title="class in org.apache.nutch.crawl"><span class="typeNameLink">Prev Class</span></a></li> |
| <li><a href="../../../../org/apache/nutch/crawl/CrawlDatum.html" title="class in org.apache.nutch.crawl"><span class="typeNameLink">Next Class</span></a></li> |
| </ul> |
| <ul class="navList"> |
| <li><a href="../../../../index.html?org/apache/nutch/crawl/AdaptiveFetchSchedule.html" target="_top">Frames</a></li> |
| <li><a href="AdaptiveFetchSchedule.html" target="_top">No Frames</a></li> |
| </ul> |
| <ul class="navList" id="allclasses_navbar_top"> |
| <li><a href="../../../../allclasses-noframe.html">All Classes</a></li> |
| </ul> |
| <div> |
| <script type="text/javascript"><!-- |
| allClassesLink = document.getElementById("allclasses_navbar_top"); |
| if(window==top) { |
| allClassesLink.style.display = "block"; |
| } |
| else { |
| allClassesLink.style.display = "none"; |
| } |
| //--> |
| </script> |
| </div> |
| <div> |
| <ul class="subNavList"> |
| <li>Summary: </li> |
| <li>Nested | </li> |
| <li><a href="#field.summary">Field</a> | </li> |
| <li><a href="#constructor.summary">Constr</a> | </li> |
| <li><a href="#method.summary">Method</a></li> |
| </ul> |
| <ul class="subNavList"> |
| <li>Detail: </li> |
| <li><a href="#field.detail">Field</a> | </li> |
| <li><a href="#constructor.detail">Constr</a> | </li> |
| <li><a href="#method.detail">Method</a></li> |
| </ul> |
| </div> |
| <a name="skip.navbar.top"> |
| <!-- --> |
| </a></div> |
| <!-- ========= END OF TOP NAVBAR ========= --> |
| <!-- ======== START OF CLASS DATA ======== --> |
| <div class="header"> |
| <div class="subTitle">org.apache.nutch.crawl</div> |
| <h2 title="Class AdaptiveFetchSchedule" class="title">Class AdaptiveFetchSchedule</h2> |
| </div> |
| <div class="contentContainer"> |
| <ul class="inheritance"> |
| <li><a href="https://docs.oracle.com/javase/8/docs/api/java/lang/Object.html?is-external=true" title="class or interface in java.lang">java.lang.Object</a></li> |
| <li> |
| <ul class="inheritance"> |
| <li><a href="https://hadoop.apache.org/docs/r3.1.3/api/org/apache/hadoop/conf/Configured.html?is-external=true" title="class or interface in org.apache.hadoop.conf">org.apache.hadoop.conf.Configured</a></li> |
| <li> |
| <ul class="inheritance"> |
| <li><a href="../../../../org/apache/nutch/crawl/AbstractFetchSchedule.html" title="class in org.apache.nutch.crawl">org.apache.nutch.crawl.AbstractFetchSchedule</a></li> |
| <li> |
| <ul class="inheritance"> |
| <li>org.apache.nutch.crawl.AdaptiveFetchSchedule</li> |
| </ul> |
| </li> |
| </ul> |
| </li> |
| </ul> |
| </li> |
| </ul> |
| <div class="description"> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <dl> |
| <dt>All Implemented Interfaces:</dt> |
| <dd><a href="https://hadoop.apache.org/docs/r3.1.3/api/org/apache/hadoop/conf/Configurable.html?is-external=true" title="class or interface in org.apache.hadoop.conf">Configurable</a>, <a href="../../../../org/apache/nutch/crawl/FetchSchedule.html" title="interface in org.apache.nutch.crawl">FetchSchedule</a></dd> |
| </dl> |
| <dl> |
| <dt>Direct Known Subclasses:</dt> |
| <dd><a href="../../../../org/apache/nutch/crawl/MimeAdaptiveFetchSchedule.html" title="class in org.apache.nutch.crawl">MimeAdaptiveFetchSchedule</a></dd> |
| </dl> |
| <hr> |
| <br> |
| <pre>public class <span class="typeNameLabel">AdaptiveFetchSchedule</span> |
| extends <a href="../../../../org/apache/nutch/crawl/AbstractFetchSchedule.html" title="class in org.apache.nutch.crawl">AbstractFetchSchedule</a></pre> |
| <div class="block">This class implements an adaptive re-fetch algorithm. This works as follows: |
| <ul> |
| <li>for pages that has changed since the last fetchTime, decrease their |
| fetchInterval by a factor of DEC_FACTOR (default value is 0.2f).</li> |
| <li>for pages that haven't changed since the last fetchTime, increase their |
| fetchInterval by a factor of INC_FACTOR (default value is 0.2f).<br> |
| If SYNC_DELTA property is true, then: |
| <ul> |
| <li>calculate a <code>delta = fetchTime - modifiedTime</code></li> |
| <li>try to synchronize with the time of change, by shifting the next |
| fetchTime by a fraction of the difference between the last modification time |
| and the last fetch time. I.e. the next fetch time will be set to |
| <code>fetchTime + fetchInterval - delta * SYNC_DELTA_RATE</code></li> |
| <li>if the adjusted fetch interval is bigger than the delta, then |
| <code>fetchInterval = delta</code>.</li> |
| </ul> |
| </li> |
| <li>the minimum value of fetchInterval may not be smaller than MIN_INTERVAL |
| (default is 1 minute).</li> |
| <li>the maximum value of fetchInterval may not be bigger than MAX_INTERVAL |
| (default is 365 days).</li> |
| </ul> |
| <p> |
| NOTE: values of DEC_FACTOR and INC_FACTOR higher than 0.4f may destabilize |
| the algorithm, so that the fetch interval either increases or decreases |
| infinitely, with little relevance to the page changes. Please use |
| <a href="../../../../org/apache/nutch/crawl/AdaptiveFetchSchedule.html#main-java.lang.String:A-"><code>main(String[])</code></a> method to test the values before applying them in a |
| production system. |
| </p></div> |
| <dl> |
| <dt><span class="simpleTagLabel">Author:</span></dt> |
| <dd>Andrzej Bialecki</dd> |
| </dl> |
| </li> |
| </ul> |
| </div> |
| <div class="summary"> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <!-- =========== FIELD SUMMARY =========== --> |
| <ul class="blockList"> |
| <li class="blockList"><a name="field.summary"> |
| <!-- --> |
| </a> |
| <h3>Field Summary</h3> |
| <table class="memberSummary" border="0" cellpadding="3" cellspacing="0" summary="Field Summary table, listing fields, and an explanation"> |
| <caption><span>Fields</span><span class="tabEnd"> </span></caption> |
| <tr> |
| <th class="colFirst" scope="col">Modifier and Type</th> |
| <th class="colLast" scope="col">Field and Description</th> |
| </tr> |
| <tr class="altColor"> |
| <td class="colFirst"><code>protected float</code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../org/apache/nutch/crawl/AdaptiveFetchSchedule.html#DEC_RATE">DEC_RATE</a></span></code> </td> |
| </tr> |
| <tr class="rowColor"> |
| <td class="colFirst"><code>protected float</code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../org/apache/nutch/crawl/AdaptiveFetchSchedule.html#INC_RATE">INC_RATE</a></span></code> </td> |
| </tr> |
| </table> |
| <ul class="blockList"> |
| <li class="blockList"><a name="fields.inherited.from.class.org.apache.nutch.crawl.AbstractFetchSchedule"> |
| <!-- --> |
| </a> |
| <h3>Fields inherited from class org.apache.nutch.crawl.<a href="../../../../org/apache/nutch/crawl/AbstractFetchSchedule.html" title="class in org.apache.nutch.crawl">AbstractFetchSchedule</a></h3> |
| <code><a href="../../../../org/apache/nutch/crawl/AbstractFetchSchedule.html#defaultInterval">defaultInterval</a>, <a href="../../../../org/apache/nutch/crawl/AbstractFetchSchedule.html#maxInterval">maxInterval</a></code></li> |
| </ul> |
| <ul class="blockList"> |
| <li class="blockList"><a name="fields.inherited.from.class.org.apache.nutch.crawl.FetchSchedule"> |
| <!-- --> |
| </a> |
| <h3>Fields inherited from interface org.apache.nutch.crawl.<a href="../../../../org/apache/nutch/crawl/FetchSchedule.html" title="interface in org.apache.nutch.crawl">FetchSchedule</a></h3> |
| <code><a href="../../../../org/apache/nutch/crawl/FetchSchedule.html#SECONDS_PER_DAY">SECONDS_PER_DAY</a>, <a href="../../../../org/apache/nutch/crawl/FetchSchedule.html#STATUS_MODIFIED">STATUS_MODIFIED</a>, <a href="../../../../org/apache/nutch/crawl/FetchSchedule.html#STATUS_NOTMODIFIED">STATUS_NOTMODIFIED</a>, <a href="../../../../org/apache/nutch/crawl/FetchSchedule.html#STATUS_UNKNOWN">STATUS_UNKNOWN</a></code></li> |
| </ul> |
| </li> |
| </ul> |
| <!-- ======== CONSTRUCTOR SUMMARY ======== --> |
| <ul class="blockList"> |
| <li class="blockList"><a name="constructor.summary"> |
| <!-- --> |
| </a> |
| <h3>Constructor Summary</h3> |
| <table class="memberSummary" border="0" cellpadding="3" cellspacing="0" summary="Constructor Summary table, listing constructors, and an explanation"> |
| <caption><span>Constructors</span><span class="tabEnd"> </span></caption> |
| <tr> |
| <th class="colOne" scope="col">Constructor and Description</th> |
| </tr> |
| <tr class="altColor"> |
| <td class="colOne"><code><span class="memberNameLink"><a href="../../../../org/apache/nutch/crawl/AdaptiveFetchSchedule.html#AdaptiveFetchSchedule--">AdaptiveFetchSchedule</a></span>()</code> </td> |
| </tr> |
| </table> |
| </li> |
| </ul> |
| <!-- ========== METHOD SUMMARY =========== --> |
| <ul class="blockList"> |
| <li class="blockList"><a name="method.summary"> |
| <!-- --> |
| </a> |
| <h3>Method Summary</h3> |
| <table class="memberSummary" border="0" cellpadding="3" cellspacing="0" summary="Method Summary table, listing methods, and an explanation"> |
| <caption><span id="t0" class="activeTableTab"><span>All Methods</span><span class="tabEnd"> </span></span><span id="t1" class="tableTab"><span><a href="javascript:show(1);">Static Methods</a></span><span class="tabEnd"> </span></span><span id="t2" class="tableTab"><span><a href="javascript:show(2);">Instance Methods</a></span><span class="tabEnd"> </span></span><span id="t4" class="tableTab"><span><a href="javascript:show(8);">Concrete Methods</a></span><span class="tabEnd"> </span></span></caption> |
| <tr> |
| <th class="colFirst" scope="col">Modifier and Type</th> |
| <th class="colLast" scope="col">Method and Description</th> |
| </tr> |
| <tr id="i0" class="altColor"> |
| <td class="colFirst"><code>static void</code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../org/apache/nutch/crawl/AdaptiveFetchSchedule.html#main-java.lang.String:A-">main</a></span>(<a href="https://docs.oracle.com/javase/8/docs/api/java/lang/String.html?is-external=true" title="class or interface in java.lang">String</a>[] args)</code> </td> |
| </tr> |
| <tr id="i1" class="rowColor"> |
| <td class="colFirst"><code>void</code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../org/apache/nutch/crawl/AdaptiveFetchSchedule.html#setConf-org.apache.hadoop.conf.Configuration-">setConf</a></span>(<a href="https://hadoop.apache.org/docs/r3.1.3/api/org/apache/hadoop/conf/Configuration.html?is-external=true" title="class or interface in org.apache.hadoop.conf">Configuration</a> conf)</code> </td> |
| </tr> |
| <tr id="i2" class="altColor"> |
| <td class="colFirst"><code><a href="../../../../org/apache/nutch/crawl/CrawlDatum.html" title="class in org.apache.nutch.crawl">CrawlDatum</a></code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../org/apache/nutch/crawl/AdaptiveFetchSchedule.html#setFetchSchedule-org.apache.hadoop.io.Text-org.apache.nutch.crawl.CrawlDatum-long-long-long-long-int-">setFetchSchedule</a></span>(<a href="https://hadoop.apache.org/docs/r3.1.3/api/org/apache/hadoop/io/Text.html?is-external=true" title="class or interface in org.apache.hadoop.io">Text</a> url, |
| <a href="../../../../org/apache/nutch/crawl/CrawlDatum.html" title="class in org.apache.nutch.crawl">CrawlDatum</a> datum, |
| long prevFetchTime, |
| long prevModifiedTime, |
| long fetchTime, |
| long modifiedTime, |
| int state)</code> |
| <div class="block">Sets the <code>fetchInterval</code> and <code>fetchTime</code> on a |
| successfully fetched page.</div> |
| </td> |
| </tr> |
| </table> |
| <ul class="blockList"> |
| <li class="blockList"><a name="methods.inherited.from.class.org.apache.nutch.crawl.AbstractFetchSchedule"> |
| <!-- --> |
| </a> |
| <h3>Methods inherited from class org.apache.nutch.crawl.<a href="../../../../org/apache/nutch/crawl/AbstractFetchSchedule.html" title="class in org.apache.nutch.crawl">AbstractFetchSchedule</a></h3> |
| <code><a href="../../../../org/apache/nutch/crawl/AbstractFetchSchedule.html#calculateLastFetchTime-org.apache.nutch.crawl.CrawlDatum-">calculateLastFetchTime</a>, <a href="../../../../org/apache/nutch/crawl/AbstractFetchSchedule.html#forceRefetch-org.apache.hadoop.io.Text-org.apache.nutch.crawl.CrawlDatum-boolean-">forceRefetch</a>, <a href="../../../../org/apache/nutch/crawl/AbstractFetchSchedule.html#initializeSchedule-org.apache.hadoop.io.Text-org.apache.nutch.crawl.CrawlDatum-">initializeSchedule</a>, <a href="../../../../org/apache/nutch/crawl/AbstractFetchSchedule.html#setPageGoneSchedule-org.apache.hadoop.io.Text-org.apache.nutch.crawl.CrawlDatum-long-long-long-">setPageGoneSchedule</a>, <a href="../../../../org/apache/nutch/crawl/AbstractFetchSchedule.html#setPageRetrySchedule-org.apache.hadoop.io.Text-org.apache.nutch.crawl.CrawlDatum-long-long-long-">setPageRetrySchedule</a>, <a href="../../../../org/apache/nutch/crawl/AbstractFetchSchedule.html#shouldFetch-org.apache.hadoop.io.Text-org.apache.nutch.crawl.CrawlDatum-long-">shouldFetch</a></code></li> |
| </ul> |
| <ul class="blockList"> |
| <li class="blockList"><a name="methods.inherited.from.class.org.apache.hadoop.conf.Configured"> |
| <!-- --> |
| </a> |
| <h3>Methods inherited from class org.apache.hadoop.conf.<a href="https://hadoop.apache.org/docs/r3.1.3/api/org/apache/hadoop/conf/Configured.html?is-external=true" title="class or interface in org.apache.hadoop.conf">Configured</a></h3> |
| <code><a href="https://hadoop.apache.org/docs/r3.1.3/api/org/apache/hadoop/conf/Configured.html?is-external=true#getConf--" title="class or interface in org.apache.hadoop.conf">getConf</a></code></li> |
| </ul> |
| <ul class="blockList"> |
| <li class="blockList"><a name="methods.inherited.from.class.java.lang.Object"> |
| <!-- --> |
| </a> |
| <h3>Methods inherited from class java.lang.<a href="https://docs.oracle.com/javase/8/docs/api/java/lang/Object.html?is-external=true" title="class or interface in java.lang">Object</a></h3> |
| <code><a href="https://docs.oracle.com/javase/8/docs/api/java/lang/Object.html?is-external=true#clone--" title="class or interface in java.lang">clone</a>, <a href="https://docs.oracle.com/javase/8/docs/api/java/lang/Object.html?is-external=true#equals-java.lang.Object-" title="class or interface in java.lang">equals</a>, <a href="https://docs.oracle.com/javase/8/docs/api/java/lang/Object.html?is-external=true#finalize--" title="class or interface in java.lang">finalize</a>, <a href="https://docs.oracle.com/javase/8/docs/api/java/lang/Object.html?is-external=true#getClass--" title="class or interface in java.lang">getClass</a>, <a href="https://docs.oracle.com/javase/8/docs/api/java/lang/Object.html?is-external=true#hashCode--" title="class or interface in java.lang">hashCode</a>, <a href="https://docs.oracle.com/javase/8/docs/api/java/lang/Object.html?is-external=true#notify--" title="class or interface in java.lang">notify</a>, <a href="https://docs.oracle.com/javase/8/docs/api/java/lang/Object.html?is-external=true#notifyAll--" title="class or interface in java.lang">notifyAll</a>, <a href="https://docs.oracle.com/javase/8/docs/api/java/lang/Object.html?is-external=true#toString--" title="class or interface in java.lang">toString</a>, <a href="https://docs.oracle.com/javase/8/docs/api/java/lang/Object.html?is-external=true#wait--" title="class or interface in java.lang">wait</a>, <a href="https://docs.oracle.com/javase/8/docs/api/java/lang/Object.html?is-external=true#wait-long-" title="class or interface in java.lang">wait</a>, <a href="https://docs.oracle.com/javase/8/docs/api/java/lang/Object.html?is-external=true#wait-long-int-" title="class or interface in java.lang">wait</a></code></li> |
| </ul> |
| <ul class="blockList"> |
| <li class="blockList"><a name="methods.inherited.from.class.org.apache.hadoop.conf.Configurable"> |
| <!-- --> |
| </a> |
| <h3>Methods inherited from interface org.apache.hadoop.conf.<a href="https://hadoop.apache.org/docs/r3.1.3/api/org/apache/hadoop/conf/Configurable.html?is-external=true" title="class or interface in org.apache.hadoop.conf">Configurable</a></h3> |
| <code><a href="https://hadoop.apache.org/docs/r3.1.3/api/org/apache/hadoop/conf/Configurable.html?is-external=true#getConf--" title="class or interface in org.apache.hadoop.conf">getConf</a></code></li> |
| </ul> |
| </li> |
| </ul> |
| </li> |
| </ul> |
| </div> |
| <div class="details"> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <!-- ============ FIELD DETAIL =========== --> |
| <ul class="blockList"> |
| <li class="blockList"><a name="field.detail"> |
| <!-- --> |
| </a> |
| <h3>Field Detail</h3> |
| <a name="INC_RATE"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>INC_RATE</h4> |
| <pre>protected float INC_RATE</pre> |
| </li> |
| </ul> |
| <a name="DEC_RATE"> |
| <!-- --> |
| </a> |
| <ul class="blockListLast"> |
| <li class="blockList"> |
| <h4>DEC_RATE</h4> |
| <pre>protected float DEC_RATE</pre> |
| </li> |
| </ul> |
| </li> |
| </ul> |
| <!-- ========= CONSTRUCTOR DETAIL ======== --> |
| <ul class="blockList"> |
| <li class="blockList"><a name="constructor.detail"> |
| <!-- --> |
| </a> |
| <h3>Constructor Detail</h3> |
| <a name="AdaptiveFetchSchedule--"> |
| <!-- --> |
| </a> |
| <ul class="blockListLast"> |
| <li class="blockList"> |
| <h4>AdaptiveFetchSchedule</h4> |
| <pre>public AdaptiveFetchSchedule()</pre> |
| </li> |
| </ul> |
| </li> |
| </ul> |
| <!-- ============ METHOD DETAIL ========== --> |
| <ul class="blockList"> |
| <li class="blockList"><a name="method.detail"> |
| <!-- --> |
| </a> |
| <h3>Method Detail</h3> |
| <a name="setConf-org.apache.hadoop.conf.Configuration-"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>setConf</h4> |
| <pre>public void setConf(<a href="https://hadoop.apache.org/docs/r3.1.3/api/org/apache/hadoop/conf/Configuration.html?is-external=true" title="class or interface in org.apache.hadoop.conf">Configuration</a> conf)</pre> |
| <dl> |
| <dt><span class="overrideSpecifyLabel">Specified by:</span></dt> |
| <dd><code><a href="https://hadoop.apache.org/docs/r3.1.3/api/org/apache/hadoop/conf/Configurable.html?is-external=true#setConf-org.apache.hadoop.conf.Configuration-" title="class or interface in org.apache.hadoop.conf">setConf</a></code> in interface <code><a href="https://hadoop.apache.org/docs/r3.1.3/api/org/apache/hadoop/conf/Configurable.html?is-external=true" title="class or interface in org.apache.hadoop.conf">Configurable</a></code></dd> |
| <dt><span class="overrideSpecifyLabel">Overrides:</span></dt> |
| <dd><code><a href="../../../../org/apache/nutch/crawl/AbstractFetchSchedule.html#setConf-org.apache.hadoop.conf.Configuration-">setConf</a></code> in class <code><a href="../../../../org/apache/nutch/crawl/AbstractFetchSchedule.html" title="class in org.apache.nutch.crawl">AbstractFetchSchedule</a></code></dd> |
| </dl> |
| </li> |
| </ul> |
| <a name="setFetchSchedule-org.apache.hadoop.io.Text-org.apache.nutch.crawl.CrawlDatum-long-long-long-long-int-"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>setFetchSchedule</h4> |
| <pre>public <a href="../../../../org/apache/nutch/crawl/CrawlDatum.html" title="class in org.apache.nutch.crawl">CrawlDatum</a> setFetchSchedule(<a href="https://hadoop.apache.org/docs/r3.1.3/api/org/apache/hadoop/io/Text.html?is-external=true" title="class or interface in org.apache.hadoop.io">Text</a> url, |
| <a href="../../../../org/apache/nutch/crawl/CrawlDatum.html" title="class in org.apache.nutch.crawl">CrawlDatum</a> datum, |
| long prevFetchTime, |
| long prevModifiedTime, |
| long fetchTime, |
| long modifiedTime, |
| int state)</pre> |
| <div class="block"><span class="descfrmTypeLabel">Description copied from class: <code><a href="../../../../org/apache/nutch/crawl/AbstractFetchSchedule.html#setFetchSchedule-org.apache.hadoop.io.Text-org.apache.nutch.crawl.CrawlDatum-long-long-long-long-int-">AbstractFetchSchedule</a></code></span></div> |
| <div class="block">Sets the <code>fetchInterval</code> and <code>fetchTime</code> on a |
| successfully fetched page. NOTE: this implementation resets the retry |
| counter - extending classes should call super.setFetchSchedule() to |
| preserve this behavior.</div> |
| <dl> |
| <dt><span class="overrideSpecifyLabel">Specified by:</span></dt> |
| <dd><code><a href="../../../../org/apache/nutch/crawl/FetchSchedule.html#setFetchSchedule-org.apache.hadoop.io.Text-org.apache.nutch.crawl.CrawlDatum-long-long-long-long-int-">setFetchSchedule</a></code> in interface <code><a href="../../../../org/apache/nutch/crawl/FetchSchedule.html" title="interface in org.apache.nutch.crawl">FetchSchedule</a></code></dd> |
| <dt><span class="overrideSpecifyLabel">Overrides:</span></dt> |
| <dd><code><a href="../../../../org/apache/nutch/crawl/AbstractFetchSchedule.html#setFetchSchedule-org.apache.hadoop.io.Text-org.apache.nutch.crawl.CrawlDatum-long-long-long-long-int-">setFetchSchedule</a></code> in class <code><a href="../../../../org/apache/nutch/crawl/AbstractFetchSchedule.html" title="class in org.apache.nutch.crawl">AbstractFetchSchedule</a></code></dd> |
| <dt><span class="paramLabel">Parameters:</span></dt> |
| <dd><code>url</code> - url of the page</dd> |
| <dd><code>datum</code> - page description to be adjusted. NOTE: this instance, passed by |
| reference, may be modified inside the method.</dd> |
| <dd><code>prevFetchTime</code> - previous value of fetch time, or 0 if not available.</dd> |
| <dd><code>prevModifiedTime</code> - previous value of modifiedTime, or 0 if not available.</dd> |
| <dd><code>fetchTime</code> - the latest time, when the page was recently re-fetched. Most |
| FetchSchedule implementations should update the value in @see |
| CrawlDatum to something greater than this value.</dd> |
| <dd><code>modifiedTime</code> - last time the content was modified. This information comes from |
| the protocol implementations, or is set to < 0 if not available. |
| Most FetchSchedule implementations should update the value in @see |
| CrawlDatum to this value.</dd> |
| <dd><code>state</code> - if <a href="../../../../org/apache/nutch/crawl/FetchSchedule.html#STATUS_MODIFIED"><code>FetchSchedule.STATUS_MODIFIED</code></a>, then the content is considered to be |
| "changed" before the <code>fetchTime</code>, if |
| <a href="../../../../org/apache/nutch/crawl/FetchSchedule.html#STATUS_NOTMODIFIED"><code>FetchSchedule.STATUS_NOTMODIFIED</code></a> then the content is known to be |
| unchanged. This information may be obtained by comparing page |
| signatures before and after fetching. If this is set to |
| <a href="../../../../org/apache/nutch/crawl/FetchSchedule.html#STATUS_UNKNOWN"><code>FetchSchedule.STATUS_UNKNOWN</code></a>, then it is unknown whether the page was |
| changed; implementations are free to follow a sensible default |
| behavior.</dd> |
| <dt><span class="returnLabel">Returns:</span></dt> |
| <dd>adjusted page information, including all original information. |
| NOTE: this may be a different instance than @see CrawlDatum, but |
| implementations should make sure that it contains at least all |
| information from @see CrawlDatum}.</dd> |
| </dl> |
| </li> |
| </ul> |
| <a name="main-java.lang.String:A-"> |
| <!-- --> |
| </a> |
| <ul class="blockListLast"> |
| <li class="blockList"> |
| <h4>main</h4> |
| <pre>public static void main(<a href="https://docs.oracle.com/javase/8/docs/api/java/lang/String.html?is-external=true" title="class or interface in java.lang">String</a>[] args) |
| throws <a href="https://docs.oracle.com/javase/8/docs/api/java/lang/Exception.html?is-external=true" title="class or interface in java.lang">Exception</a></pre> |
| <dl> |
| <dt><span class="throwsLabel">Throws:</span></dt> |
| <dd><code><a href="https://docs.oracle.com/javase/8/docs/api/java/lang/Exception.html?is-external=true" title="class or interface in java.lang">Exception</a></code></dd> |
| </dl> |
| </li> |
| </ul> |
| </li> |
| </ul> |
| </li> |
| </ul> |
| </div> |
| </div> |
| <!-- ========= END OF CLASS DATA ========= --> |
| <!-- ======= START OF BOTTOM NAVBAR ====== --> |
| <div class="bottomNav"><a name="navbar.bottom"> |
| <!-- --> |
| </a> |
| <div class="skipNav"><a href="#skip.navbar.bottom" title="Skip navigation links">Skip navigation links</a></div> |
| <a name="navbar.bottom.firstrow"> |
| <!-- --> |
| </a> |
| <ul class="navList" title="Navigation"> |
| <li><a href="../../../../overview-summary.html">Overview</a></li> |
| <li><a href="package-summary.html">Package</a></li> |
| <li class="navBarCell1Rev">Class</li> |
| <li><a href="class-use/AdaptiveFetchSchedule.html">Use</a></li> |
| <li><a href="package-tree.html">Tree</a></li> |
| <li><a href="../../../../deprecated-list.html">Deprecated</a></li> |
| <li><a href="../../../../index-all.html">Index</a></li> |
| <li><a href="../../../../help-doc.html">Help</a></li> |
| </ul> |
| </div> |
| <div class="subNav"> |
| <ul class="navList"> |
| <li><a href="../../../../org/apache/nutch/crawl/AbstractFetchSchedule.html" title="class in org.apache.nutch.crawl"><span class="typeNameLink">Prev Class</span></a></li> |
| <li><a href="../../../../org/apache/nutch/crawl/CrawlDatum.html" title="class in org.apache.nutch.crawl"><span class="typeNameLink">Next Class</span></a></li> |
| </ul> |
| <ul class="navList"> |
| <li><a href="../../../../index.html?org/apache/nutch/crawl/AdaptiveFetchSchedule.html" target="_top">Frames</a></li> |
| <li><a href="AdaptiveFetchSchedule.html" target="_top">No Frames</a></li> |
| </ul> |
| <ul class="navList" id="allclasses_navbar_bottom"> |
| <li><a href="../../../../allclasses-noframe.html">All Classes</a></li> |
| </ul> |
| <div> |
| <script type="text/javascript"><!-- |
| allClassesLink = document.getElementById("allclasses_navbar_bottom"); |
| if(window==top) { |
| allClassesLink.style.display = "block"; |
| } |
| else { |
| allClassesLink.style.display = "none"; |
| } |
| //--> |
| </script> |
| </div> |
| <div> |
| <ul class="subNavList"> |
| <li>Summary: </li> |
| <li>Nested | </li> |
| <li><a href="#field.summary">Field</a> | </li> |
| <li><a href="#constructor.summary">Constr</a> | </li> |
| <li><a href="#method.summary">Method</a></li> |
| </ul> |
| <ul class="subNavList"> |
| <li>Detail: </li> |
| <li><a href="#field.detail">Field</a> | </li> |
| <li><a href="#constructor.detail">Constr</a> | </li> |
| <li><a href="#method.detail">Method</a></li> |
| </ul> |
| </div> |
| <a name="skip.navbar.bottom"> |
| <!-- --> |
| </a></div> |
| <!-- ======== END OF BOTTOM NAVBAR ======= --> |
| <p class="legalCopy"><small>Copyright © 2021 The Apache Software Foundation</small></p> |
| </body> |
| </html> |