| <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd"> |
| <!-- NewPage --> |
| <html lang="en"> |
| <head> |
| <!-- Generated by javadoc (1.8.0_121) on Fri Apr 14 22:12:45 PDT 2017 --> |
| <meta http-equiv="Content-Type" content="text/html; charset=UTF-8"> |
| <title>SplitInput (Mahout Integration 0.13.0 API)</title> |
| <meta name="date" content="2017-04-14"> |
| <link rel="stylesheet" type="text/css" href="../../../../stylesheet.css" title="Style"> |
| <script type="text/javascript" src="../../../../script.js"></script> |
| </head> |
| <body> |
| <script type="text/javascript"><!-- |
| try { |
| if (location.href.indexOf('is-external=true') == -1) { |
| parent.document.title="SplitInput (Mahout Integration 0.13.0 API)"; |
| } |
| } |
| catch(err) { |
| } |
| //--> |
| var methods = {"i0":9,"i1":10,"i2":10,"i3":10,"i4":10,"i5":10,"i6":10,"i7":10,"i8":10,"i9":10,"i10":10,"i11":9,"i12":10,"i13":10,"i14":10,"i15":10,"i16":10,"i17":10,"i18":10,"i19":10,"i20":10,"i21":10,"i22":10,"i23":10,"i24":10,"i25":10,"i26":10,"i27":10,"i28":10,"i29":10,"i30":10}; |
| var tabs = {65535:["t0","All Methods"],1:["t1","Static Methods"],2:["t2","Instance Methods"],8:["t4","Concrete Methods"]}; |
| var altColor = "altColor"; |
| var rowColor = "rowColor"; |
| var tableTab = "tableTab"; |
| var activeTableTab = "activeTableTab"; |
| </script> |
| <noscript> |
| <div>JavaScript is disabled on your browser.</div> |
| </noscript> |
| <!-- ========= START OF TOP NAVBAR ======= --> |
| <div class="topNav"><a name="navbar.top"> |
| <!-- --> |
| </a> |
| <div class="skipNav"><a href="#skip.navbar.top" title="Skip navigation links">Skip navigation links</a></div> |
| <a name="navbar.top.firstrow"> |
| <!-- --> |
| </a> |
| <ul class="navList" title="Navigation"> |
| <li><a href="../../../../overview-summary.html">Overview</a></li> |
| <li><a href="package-summary.html">Package</a></li> |
| <li class="navBarCell1Rev">Class</li> |
| <li><a href="class-use/SplitInput.html">Use</a></li> |
| <li><a href="package-tree.html">Tree</a></li> |
| <li><a href="../../../../deprecated-list.html">Deprecated</a></li> |
| <li><a href="../../../../index-all.html">Index</a></li> |
| <li><a href="../../../../help-doc.html">Help</a></li> |
| </ul> |
| </div> |
| <div class="subNav"> |
| <ul class="navList"> |
| <li><a href="../../../../org/apache/mahout/utils/SequenceFileDumper.html" title="class in org.apache.mahout.utils"><span class="typeNameLink">Prev Class</span></a></li> |
| <li><a href="../../../../org/apache/mahout/utils/SplitInput.SplitCallback.html" title="interface in org.apache.mahout.utils"><span class="typeNameLink">Next Class</span></a></li> |
| </ul> |
| <ul class="navList"> |
| <li><a href="../../../../index.html?org/apache/mahout/utils/SplitInput.html" target="_top">Frames</a></li> |
| <li><a href="SplitInput.html" target="_top">No Frames</a></li> |
| </ul> |
| <ul class="navList" id="allclasses_navbar_top"> |
| <li><a href="../../../../allclasses-noframe.html">All Classes</a></li> |
| </ul> |
| <div> |
| <script type="text/javascript"><!-- |
| allClassesLink = document.getElementById("allclasses_navbar_top"); |
| if(window==top) { |
| allClassesLink.style.display = "block"; |
| } |
| else { |
| allClassesLink.style.display = "none"; |
| } |
| //--> |
| </script> |
| </div> |
| <div> |
| <ul class="subNavList"> |
| <li>Summary: </li> |
| <li><a href="#nested.class.summary">Nested</a> | </li> |
| <li><a href="#fields.inherited.from.class.org.apache.mahout.common.AbstractJob">Field</a> | </li> |
| <li><a href="#constructor.summary">Constr</a> | </li> |
| <li><a href="#method.summary">Method</a></li> |
| </ul> |
| <ul class="subNavList"> |
| <li>Detail: </li> |
| <li>Field | </li> |
| <li><a href="#constructor.detail">Constr</a> | </li> |
| <li><a href="#method.detail">Method</a></li> |
| </ul> |
| </div> |
| <a name="skip.navbar.top"> |
| <!-- --> |
| </a></div> |
| <!-- ========= END OF TOP NAVBAR ========= --> |
| <!-- ======== START OF CLASS DATA ======== --> |
| <div class="header"> |
| <div class="subTitle">org.apache.mahout.utils</div> |
| <h2 title="Class SplitInput" class="title">Class SplitInput</h2> |
| </div> |
| <div class="contentContainer"> |
| <ul class="inheritance"> |
| <li><a href="http://docs.oracle.com/javase/7/docs/api/java/lang/Object.html?is-external=true" title="class or interface in java.lang">java.lang.Object</a></li> |
| <li> |
| <ul class="inheritance"> |
| <li>org.apache.hadoop.conf.Configured</li> |
| <li> |
| <ul class="inheritance"> |
| <li><a href="http://mahout.apache.org/mahout-math/apidocs/org/apache/mahout/common/AbstractJob.html?is-external=true" title="class or interface in org.apache.mahout.common">org.apache.mahout.common.AbstractJob</a></li> |
| <li> |
| <ul class="inheritance"> |
| <li>org.apache.mahout.utils.SplitInput</li> |
| </ul> |
| </li> |
| </ul> |
| </li> |
| </ul> |
| </li> |
| </ul> |
| <div class="description"> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <dl> |
| <dt>All Implemented Interfaces:</dt> |
| <dd>org.apache.hadoop.conf.Configurable, org.apache.hadoop.util.Tool</dd> |
| </dl> |
| <hr> |
| <br> |
| <pre>public class <span class="typeNameLabel">SplitInput</span> |
| extends <a href="http://mahout.apache.org/mahout-math/apidocs/org/apache/mahout/common/AbstractJob.html?is-external=true" title="class or interface in org.apache.mahout.common">AbstractJob</a></pre> |
| <div class="block">A utility for splitting files in the input format used by the Bayes |
| classifiers or anything else that has one item per line or SequenceFiles (key/value) |
| into training and test sets in order to perform cross-validation. |
| <p/> |
| <p/> |
| This class can be used to split directories of files or individual files into |
| training and test sets using a number of different methods. |
| <p/> |
| When executed via <a href="../../../../org/apache/mahout/utils/SplitInput.html#splitDirectory-org.apache.hadoop.fs.Path-"><code>splitDirectory(Path)</code></a> or <a href="../../../../org/apache/mahout/utils/SplitInput.html#splitFile-org.apache.hadoop.fs.Path-"><code>splitFile(Path)</code></a>, |
| the lines read from one or more, input files are written to files of the same |
| name into the directories specified by the |
| <a href="../../../../org/apache/mahout/utils/SplitInput.html#setTestOutputDirectory-org.apache.hadoop.fs.Path-"><code>setTestOutputDirectory(Path)</code></a> and |
| <a href="../../../../org/apache/mahout/utils/SplitInput.html#setTrainingOutputDirectory-org.apache.hadoop.fs.Path-"><code>setTrainingOutputDirectory(Path)</code></a> methods. |
| <p/> |
| The composition of the test set is determined using one of the following |
| approaches: |
| <ul> |
| <li>A contiguous set of items can be chosen from the input file(s) using the |
| <a href="../../../../org/apache/mahout/utils/SplitInput.html#setTestSplitSize-int-"><code>setTestSplitSize(int)</code></a> or <a href="../../../../org/apache/mahout/utils/SplitInput.html#setTestSplitPct-int-"><code>setTestSplitPct(int)</code></a> methods. |
| <a href="../../../../org/apache/mahout/utils/SplitInput.html#setTestSplitSize-int-"><code>setTestSplitSize(int)</code></a> allocates a fixed number of items, while |
| <a href="../../../../org/apache/mahout/utils/SplitInput.html#setTestSplitPct-int-"><code>setTestSplitPct(int)</code></a> allocates a percentage of the original input, |
| rounded up to the nearest integer. <a href="../../../../org/apache/mahout/utils/SplitInput.html#setSplitLocation-int-"><code>setSplitLocation(int)</code></a> is used to |
| control the position in the input from which the test data is extracted and |
| is described further below.</li> |
| <li>A random sampling of items can be chosen from the input files(s) using |
| the <a href="../../../../org/apache/mahout/utils/SplitInput.html#setTestRandomSelectionSize-int-"><code>setTestRandomSelectionSize(int)</code></a> or |
| <a href="../../../../org/apache/mahout/utils/SplitInput.html#setTestRandomSelectionPct-int-"><code>setTestRandomSelectionPct(int)</code></a> methods, each choosing a fixed test |
| set size or percentage of the input set size as described above. The |
| <a href="http://mahout.apache.org/mahout-math/apidocs/org/apache/mahout/math/jet/random/sampling/RandomSampler.html?is-external=true" title="class or interface in org.apache.mahout.math.jet.random.sampling"><code>RandomSampler</code></a> class from <code>mahout-math</code> is used to create a sample |
| of the appropriate size.</li> |
| </ul> |
| <p/> |
| Any one of the methods above can be used to control the size of the test set. |
| If multiple methods are called, a runtime exception will be thrown at |
| execution time. |
| <p/> |
| The <a href="../../../../org/apache/mahout/utils/SplitInput.html#setSplitLocation-int-"><code>setSplitLocation(int)</code></a> method is passed an integer from 0 to 100 |
| (inclusive) which is translated into the position of the start of the test |
| data within the input file. |
| <p/> |
| Given: |
| <ul> |
| <li>an input file of 1500 lines</li> |
| <li>a desired test data size of 10 percent</li> |
| </ul> |
| <p/> |
| <ul> |
| <li>A split location of 0 will cause the first 150 items appearing in the |
| input set to be written to the test set.</li> |
| <li>A split location of 25 will cause items 375-525 to be written to the test |
| set.</li> |
| <li>A split location of 100 will cause the last 150 items in the input to be |
| written to the test set</li> |
| </ul> |
| The start of the split will always be adjusted forwards in order to ensure |
| that the desired test set size is allocated. Split location has no effect is |
| random sampling is employed.</div> |
| </li> |
| </ul> |
| </div> |
| <div class="summary"> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <!-- ======== NESTED CLASS SUMMARY ======== --> |
| <ul class="blockList"> |
| <li class="blockList"><a name="nested.class.summary"> |
| <!-- --> |
| </a> |
| <h3>Nested Class Summary</h3> |
| <table class="memberSummary" border="0" cellpadding="3" cellspacing="0" summary="Nested Class Summary table, listing nested classes, and an explanation"> |
| <caption><span>Nested Classes</span><span class="tabEnd"> </span></caption> |
| <tr> |
| <th class="colFirst" scope="col">Modifier and Type</th> |
| <th class="colLast" scope="col">Class and Description</th> |
| </tr> |
| <tr class="altColor"> |
| <td class="colFirst"><code>static interface </code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../org/apache/mahout/utils/SplitInput.SplitCallback.html" title="interface in org.apache.mahout.utils">SplitInput.SplitCallback</a></span></code> |
| <div class="block">Used to pass information back to a caller once a file has been split without the need for a data object</div> |
| </td> |
| </tr> |
| </table> |
| </li> |
| </ul> |
| <!-- =========== FIELD SUMMARY =========== --> |
| <ul class="blockList"> |
| <li class="blockList"><a name="field.summary"> |
| <!-- --> |
| </a> |
| <h3>Field Summary</h3> |
| <ul class="blockList"> |
| <li class="blockList"><a name="fields.inherited.from.class.org.apache.mahout.common.AbstractJob"> |
| <!-- --> |
| </a> |
| <h3>Fields inherited from class org.apache.mahout.common.<a href="http://mahout.apache.org/mahout-math/apidocs/org/apache/mahout/common/AbstractJob.html?is-external=true" title="class or interface in org.apache.mahout.common">AbstractJob</a></h3> |
| <code><a href="http://mahout.apache.org/mahout-math/apidocs/org/apache/mahout/common/AbstractJob.html?is-external=true#argMap" title="class or interface in org.apache.mahout.common">argMap</a>, <a href="http://mahout.apache.org/mahout-math/apidocs/org/apache/mahout/common/AbstractJob.html?is-external=true#inputFile" title="class or interface in org.apache.mahout.common">inputFile</a>, <a href="http://mahout.apache.org/mahout-math/apidocs/org/apache/mahout/common/AbstractJob.html?is-external=true#inputPath" title="class or interface in org.apache.mahout.common">inputPath</a>, <a href="http://mahout.apache.org/mahout-math/apidocs/org/apache/mahout/common/AbstractJob.html?is-external=true#outputFile" title="class or interface in org.apache.mahout.common">outputFile</a>, <a href="http://mahout.apache.org/mahout-math/apidocs/org/apache/mahout/common/AbstractJob.html?is-external=true#outputPath" title="class or interface in org.apache.mahout.common">outputPath</a>, <a href="http://mahout.apache.org/mahout-math/apidocs/org/apache/mahout/common/AbstractJob.html?is-external=true#tempPath" title="class or interface in org.apache.mahout.common">tempPath</a></code></li> |
| </ul> |
| </li> |
| </ul> |
| <!-- ======== CONSTRUCTOR SUMMARY ======== --> |
| <ul class="blockList"> |
| <li class="blockList"><a name="constructor.summary"> |
| <!-- --> |
| </a> |
| <h3>Constructor Summary</h3> |
| <table class="memberSummary" border="0" cellpadding="3" cellspacing="0" summary="Constructor Summary table, listing constructors, and an explanation"> |
| <caption><span>Constructors</span><span class="tabEnd"> </span></caption> |
| <tr> |
| <th class="colOne" scope="col">Constructor and Description</th> |
| </tr> |
| <tr class="altColor"> |
| <td class="colOne"><code><span class="memberNameLink"><a href="../../../../org/apache/mahout/utils/SplitInput.html#SplitInput--">SplitInput</a></span>()</code> </td> |
| </tr> |
| </table> |
| </li> |
| </ul> |
| <!-- ========== METHOD SUMMARY =========== --> |
| <ul class="blockList"> |
| <li class="blockList"><a name="method.summary"> |
| <!-- --> |
| </a> |
| <h3>Method Summary</h3> |
| <table class="memberSummary" border="0" cellpadding="3" cellspacing="0" summary="Method Summary table, listing methods, and an explanation"> |
| <caption><span id="t0" class="activeTableTab"><span>All Methods</span><span class="tabEnd"> </span></span><span id="t1" class="tableTab"><span><a href="javascript:show(1);">Static Methods</a></span><span class="tabEnd"> </span></span><span id="t2" class="tableTab"><span><a href="javascript:show(2);">Instance Methods</a></span><span class="tabEnd"> </span></span><span id="t4" class="tableTab"><span><a href="javascript:show(8);">Concrete Methods</a></span><span class="tabEnd"> </span></span></caption> |
| <tr> |
| <th class="colFirst" scope="col">Modifier and Type</th> |
| <th class="colLast" scope="col">Method and Description</th> |
| </tr> |
| <tr id="i0" class="altColor"> |
| <td class="colFirst"><code>static int</code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../org/apache/mahout/utils/SplitInput.html#countLines-org.apache.hadoop.fs.FileSystem-org.apache.hadoop.fs.Path-java.nio.charset.Charset-">countLines</a></span>(org.apache.hadoop.fs.FileSystem fs, |
| org.apache.hadoop.fs.Path inputFile, |
| <a href="http://docs.oracle.com/javase/7/docs/api/java/nio/charset/Charset.html?is-external=true" title="class or interface in java.nio.charset">Charset</a> charset)</code> |
| <div class="block">Count the lines in the file specified as returned by <code>BufferedReader.readLine()</code></div> |
| </td> |
| </tr> |
| <tr id="i1" class="rowColor"> |
| <td class="colFirst"><code><a href="../../../../org/apache/mahout/utils/SplitInput.SplitCallback.html" title="interface in org.apache.mahout.utils">SplitInput.SplitCallback</a></code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../org/apache/mahout/utils/SplitInput.html#getCallback--">getCallback</a></span>()</code> </td> |
| </tr> |
| <tr id="i2" class="altColor"> |
| <td class="colFirst"><code><a href="http://docs.oracle.com/javase/7/docs/api/java/nio/charset/Charset.html?is-external=true" title="class or interface in java.nio.charset">Charset</a></code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../org/apache/mahout/utils/SplitInput.html#getCharset--">getCharset</a></span>()</code> </td> |
| </tr> |
| <tr id="i3" class="rowColor"> |
| <td class="colFirst"><code>org.apache.hadoop.fs.Path</code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../org/apache/mahout/utils/SplitInput.html#getInputDirectory--">getInputDirectory</a></span>()</code> </td> |
| </tr> |
| <tr id="i4" class="altColor"> |
| <td class="colFirst"><code>int</code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../org/apache/mahout/utils/SplitInput.html#getSplitLocation--">getSplitLocation</a></span>()</code> </td> |
| </tr> |
| <tr id="i5" class="rowColor"> |
| <td class="colFirst"><code>org.apache.hadoop.fs.Path</code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../org/apache/mahout/utils/SplitInput.html#getTestOutputDirectory--">getTestOutputDirectory</a></span>()</code> </td> |
| </tr> |
| <tr id="i6" class="altColor"> |
| <td class="colFirst"><code>int</code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../org/apache/mahout/utils/SplitInput.html#getTestRandomSelectionPct--">getTestRandomSelectionPct</a></span>()</code> </td> |
| </tr> |
| <tr id="i7" class="rowColor"> |
| <td class="colFirst"><code>int</code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../org/apache/mahout/utils/SplitInput.html#getTestRandomSelectionSize--">getTestRandomSelectionSize</a></span>()</code> </td> |
| </tr> |
| <tr id="i8" class="altColor"> |
| <td class="colFirst"><code>int</code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../org/apache/mahout/utils/SplitInput.html#getTestSplitPct--">getTestSplitPct</a></span>()</code> </td> |
| </tr> |
| <tr id="i9" class="rowColor"> |
| <td class="colFirst"><code>int</code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../org/apache/mahout/utils/SplitInput.html#getTestSplitSize--">getTestSplitSize</a></span>()</code> </td> |
| </tr> |
| <tr id="i10" class="altColor"> |
| <td class="colFirst"><code>org.apache.hadoop.fs.Path</code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../org/apache/mahout/utils/SplitInput.html#getTrainingOutputDirectory--">getTrainingOutputDirectory</a></span>()</code> </td> |
| </tr> |
| <tr id="i11" class="rowColor"> |
| <td class="colFirst"><code>static void</code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../org/apache/mahout/utils/SplitInput.html#main-java.lang.String:A-">main</a></span>(<a href="http://docs.oracle.com/javase/7/docs/api/java/lang/String.html?is-external=true" title="class or interface in java.lang">String</a>[] args)</code> </td> |
| </tr> |
| <tr id="i12" class="altColor"> |
| <td class="colFirst"><code>int</code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../org/apache/mahout/utils/SplitInput.html#run-java.lang.String:A-">run</a></span>(<a href="http://docs.oracle.com/javase/7/docs/api/java/lang/String.html?is-external=true" title="class or interface in java.lang">String</a>[] args)</code> </td> |
| </tr> |
| <tr id="i13" class="rowColor"> |
| <td class="colFirst"><code>void</code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../org/apache/mahout/utils/SplitInput.html#setCallback-org.apache.mahout.utils.SplitInput.SplitCallback-">setCallback</a></span>(<a href="../../../../org/apache/mahout/utils/SplitInput.SplitCallback.html" title="interface in org.apache.mahout.utils">SplitInput.SplitCallback</a> callback)</code> |
| <div class="block">Sets the callback used to inform the caller that an input file has been successfully split</div> |
| </td> |
| </tr> |
| <tr id="i14" class="altColor"> |
| <td class="colFirst"><code>void</code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../org/apache/mahout/utils/SplitInput.html#setCharset-java.nio.charset.Charset-">setCharset</a></span>(<a href="http://docs.oracle.com/javase/7/docs/api/java/nio/charset/Charset.html?is-external=true" title="class or interface in java.nio.charset">Charset</a> charset)</code> |
| <div class="block">Set the charset used to read and write files</div> |
| </td> |
| </tr> |
| <tr id="i15" class="rowColor"> |
| <td class="colFirst"><code>void</code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../org/apache/mahout/utils/SplitInput.html#setInputDirectory-org.apache.hadoop.fs.Path-">setInputDirectory</a></span>(org.apache.hadoop.fs.Path inputDir)</code> |
| <div class="block">Set the directory from which input data will be read when the the <a href="../../../../org/apache/mahout/utils/SplitInput.html#splitDirectory--"><code>splitDirectory()</code></a> method is invoked</div> |
| </td> |
| </tr> |
| <tr id="i16" class="altColor"> |
| <td class="colFirst"><code>void</code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../org/apache/mahout/utils/SplitInput.html#setKeepPct-int-">setKeepPct</a></span>(int keepPct)</code> |
| <div class="block">Sets the percentage of the input data to keep in a map reduce split input job</div> |
| </td> |
| </tr> |
| <tr id="i17" class="rowColor"> |
| <td class="colFirst"><code>void</code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../org/apache/mahout/utils/SplitInput.html#setMapRedOutputDirectory-org.apache.hadoop.fs.Path-">setMapRedOutputDirectory</a></span>(org.apache.hadoop.fs.Path mapRedOutputDirectory)</code> </td> |
| </tr> |
| <tr id="i18" class="altColor"> |
| <td class="colFirst"><code>void</code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../org/apache/mahout/utils/SplitInput.html#setSplitLocation-int-">setSplitLocation</a></span>(int splitLocation)</code> |
| <div class="block">Set the location of the start of the test/training data split.</div> |
| </td> |
| </tr> |
| <tr id="i19" class="rowColor"> |
| <td class="colFirst"><code>void</code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../org/apache/mahout/utils/SplitInput.html#setTestOutputDirectory-org.apache.hadoop.fs.Path-">setTestOutputDirectory</a></span>(org.apache.hadoop.fs.Path testOutputDir)</code> |
| <div class="block">Set the directory to which test data will be written.</div> |
| </td> |
| </tr> |
| <tr id="i20" class="altColor"> |
| <td class="colFirst"><code>void</code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../org/apache/mahout/utils/SplitInput.html#setTestRandomSelectionPct-int-">setTestRandomSelectionPct</a></span>(int randomSelectionPct)</code> |
| <div class="block">Sets number of random input samples that will be saved to the test set as a percentage of the size of the |
| input set.</div> |
| </td> |
| </tr> |
| <tr id="i21" class="rowColor"> |
| <td class="colFirst"><code>void</code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../org/apache/mahout/utils/SplitInput.html#setTestRandomSelectionSize-int-">setTestRandomSelectionSize</a></span>(int testRandomSelectionSize)</code> |
| <div class="block">Sets number of random input samples that will be saved to the test set.</div> |
| </td> |
| </tr> |
| <tr id="i22" class="altColor"> |
| <td class="colFirst"><code>void</code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../org/apache/mahout/utils/SplitInput.html#setTestSplitPct-int-">setTestSplitPct</a></span>(int testSplitPct)</code> |
| <div class="block">Sets the percentage of the input data to allocate to the test split</div> |
| </td> |
| </tr> |
| <tr id="i23" class="rowColor"> |
| <td class="colFirst"><code>void</code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../org/apache/mahout/utils/SplitInput.html#setTestSplitSize-int-">setTestSplitSize</a></span>(int testSplitSize)</code> </td> |
| </tr> |
| <tr id="i24" class="altColor"> |
| <td class="colFirst"><code>void</code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../org/apache/mahout/utils/SplitInput.html#setTrainingOutputDirectory-org.apache.hadoop.fs.Path-">setTrainingOutputDirectory</a></span>(org.apache.hadoop.fs.Path trainingOutputDir)</code> |
| <div class="block">Set the directory to which training data will be written.</div> |
| </td> |
| </tr> |
| <tr id="i25" class="rowColor"> |
| <td class="colFirst"><code>void</code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../org/apache/mahout/utils/SplitInput.html#setUseMapRed-boolean-">setUseMapRed</a></span>(boolean useMapRed)</code> |
| <div class="block">Set to true to use map reduce to split the input</div> |
| </td> |
| </tr> |
| <tr id="i26" class="altColor"> |
| <td class="colFirst"><code>void</code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../org/apache/mahout/utils/SplitInput.html#splitDirectory--">splitDirectory</a></span>()</code> |
| <div class="block">Perform a split on directory specified by <a href="../../../../org/apache/mahout/utils/SplitInput.html#setInputDirectory-org.apache.hadoop.fs.Path-"><code>setInputDirectory(Path)</code></a> by calling <a href="../../../../org/apache/mahout/utils/SplitInput.html#splitFile-org.apache.hadoop.fs.Path-"><code>splitFile(Path)</code></a> |
| on each file found within that directory.</div> |
| </td> |
| </tr> |
| <tr id="i27" class="rowColor"> |
| <td class="colFirst"><code>void</code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../org/apache/mahout/utils/SplitInput.html#splitDirectory-org.apache.hadoop.conf.Configuration-org.apache.hadoop.fs.Path-">splitDirectory</a></span>(org.apache.hadoop.conf.Configuration conf, |
| org.apache.hadoop.fs.Path inputDir)</code> </td> |
| </tr> |
| <tr id="i28" class="altColor"> |
| <td class="colFirst"><code>void</code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../org/apache/mahout/utils/SplitInput.html#splitDirectory-org.apache.hadoop.fs.Path-">splitDirectory</a></span>(org.apache.hadoop.fs.Path inputDir)</code> |
| <div class="block">Perform a split on the specified directory by calling <a href="../../../../org/apache/mahout/utils/SplitInput.html#splitFile-org.apache.hadoop.fs.Path-"><code>splitFile(Path)</code></a> on each file found within that |
| directory.</div> |
| </td> |
| </tr> |
| <tr id="i29" class="rowColor"> |
| <td class="colFirst"><code>void</code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../org/apache/mahout/utils/SplitInput.html#splitFile-org.apache.hadoop.fs.Path-">splitFile</a></span>(org.apache.hadoop.fs.Path inputFile)</code> |
| <div class="block">Perform a split on the specified input file.</div> |
| </td> |
| </tr> |
| <tr id="i30" class="altColor"> |
| <td class="colFirst"><code>void</code></td> |
| <td class="colLast"><code><span class="memberNameLink"><a href="../../../../org/apache/mahout/utils/SplitInput.html#validate--">validate</a></span>()</code> |
| <div class="block">Validates that the current instance is in a consistent state</div> |
| </td> |
| </tr> |
| </table> |
| <ul class="blockList"> |
| <li class="blockList"><a name="methods.inherited.from.class.org.apache.mahout.common.AbstractJob"> |
| <!-- --> |
| </a> |
| <h3>Methods inherited from class org.apache.mahout.common.<a href="http://mahout.apache.org/mahout-math/apidocs/org/apache/mahout/common/AbstractJob.html?is-external=true" title="class or interface in org.apache.mahout.common">AbstractJob</a></h3> |
| <code><a href="http://mahout.apache.org/mahout-math/apidocs/org/apache/mahout/common/AbstractJob.html?is-external=true#addFlag-java.lang.String-java.lang.String-java.lang.String-" title="class or interface in org.apache.mahout.common">addFlag</a>, <a href="http://mahout.apache.org/mahout-math/apidocs/org/apache/mahout/common/AbstractJob.html?is-external=true#addInputOption--" title="class or interface in org.apache.mahout.common">addInputOption</a>, <a href="http://mahout.apache.org/mahout-math/apidocs/org/apache/mahout/common/AbstractJob.html?is-external=true#addOption-org.apache.commons.cli2.Option-" title="class or interface in org.apache.mahout.common">addOption</a>, <a href="http://mahout.apache.org/mahout-math/apidocs/org/apache/mahout/common/AbstractJob.html?is-external=true#addOption-java.lang.String-java.lang.String-java.lang.String-" title="class or interface in org.apache.mahout.common">addOption</a>, <a href="http://mahout.apache.org/mahout-math/apidocs/org/apache/mahout/common/AbstractJob.html?is-external=true#addOption-java.lang.String-java.lang.String-java.lang.String-boolean-" title="class or interface in org.apache.mahout.common">addOption</a>, <a href="http://mahout.apache.org/mahout-math/apidocs/org/apache/mahout/common/AbstractJob.html?is-external=true#addOption-java.lang.String-java.lang.String-java.lang.String-java.lang.String-" title="class or interface in org.apache.mahout.common">addOption</a>, <a href="http://mahout.apache.org/mahout-math/apidocs/org/apache/mahout/common/AbstractJob.html?is-external=true#addOutputOption--" title="class or interface in org.apache.mahout.common">addOutputOption</a>, <a href="http://mahout.apache.org/mahout-math/apidocs/org/apache/mahout/common/AbstractJob.html?is-external=true#buildOption-java.lang.String-java.lang.String-java.lang.String-boolean-boolean-java.lang.String-" title="class or interface in org.apache.mahout.common">buildOption</a>, <a href="http://mahout.apache.org/mahout-math/apidocs/org/apache/mahout/common/AbstractJob.html?is-external=true#buildOption-java.lang.String-java.lang.String-java.lang.String-boolean-int-int-boolean-java.lang.String-" title="class or interface in org.apache.mahout.common">buildOption</a>, <a href="http://mahout.apache.org/mahout-math/apidocs/org/apache/mahout/common/AbstractJob.html?is-external=true#getAnalyzerClassFromOption--" title="class or interface in org.apache.mahout.common">getAnalyzerClassFromOption</a>, <a href="http://mahout.apache.org/mahout-math/apidocs/org/apache/mahout/common/AbstractJob.html?is-external=true#getCLIOption-java.lang.String-" title="class or interface in org.apache.mahout.common">getCLIOption</a>, <a href="http://mahout.apache.org/mahout-math/apidocs/org/apache/mahout/common/AbstractJob.html?is-external=true#getConf--" title="class or interface in org.apache.mahout.common">getConf</a>, <a href="http://mahout.apache.org/mahout-math/apidocs/org/apache/mahout/common/AbstractJob.html?is-external=true#getDimensions-org.apache.hadoop.fs.Path-" title="class or interface in org.apache.mahout.common">getDimensions</a>, <a href="http://mahout.apache.org/mahout-math/apidocs/org/apache/mahout/common/AbstractJob.html?is-external=true#getFloat-java.lang.String-" title="class or interface in org.apache.mahout.common">getFloat</a>, <a href="http://mahout.apache.org/mahout-math/apidocs/org/apache/mahout/common/AbstractJob.html?is-external=true#getFloat-java.lang.String-float-" title="class or interface in org.apache.mahout.common">getFloat</a>, <a href="http://mahout.apache.org/mahout-math/apidocs/org/apache/mahout/common/AbstractJob.html?is-external=true#getGroup--" title="class or interface in org.apache.mahout.common">getGroup</a>, <a href="http://mahout.apache.org/mahout-math/apidocs/org/apache/mahout/common/AbstractJob.html?is-external=true#getInputFile--" title="class or interface in org.apache.mahout.common">getInputFile</a>, <a href="http://mahout.apache.org/mahout-math/apidocs/org/apache/mahout/common/AbstractJob.html?is-external=true#getInputPath--" title="class or interface in org.apache.mahout.common">getInputPath</a>, <a href="http://mahout.apache.org/mahout-math/apidocs/org/apache/mahout/common/AbstractJob.html?is-external=true#getInt-java.lang.String-" title="class or interface in org.apache.mahout.common">getInt</a>, <a href="http://mahout.apache.org/mahout-math/apidocs/org/apache/mahout/common/AbstractJob.html?is-external=true#getInt-java.lang.String-int-" title="class or interface in org.apache.mahout.common">getInt</a>, <a href="http://mahout.apache.org/mahout-math/apidocs/org/apache/mahout/common/AbstractJob.html?is-external=true#getOption-java.util.Map-java.lang.String-" title="class or interface in org.apache.mahout.common">getOption</a>, <a href="http://mahout.apache.org/mahout-math/apidocs/org/apache/mahout/common/AbstractJob.html?is-external=true#getOption-java.lang.String-" title="class or interface in org.apache.mahout.common">getOption</a>, <a href="http://mahout.apache.org/mahout-math/apidocs/org/apache/mahout/common/AbstractJob.html?is-external=true#getOption-java.lang.String-java.lang.String-" title="class or interface in org.apache.mahout.common">getOption</a>, <a href="http://mahout.apache.org/mahout-math/apidocs/org/apache/mahout/common/AbstractJob.html?is-external=true#getOptions-java.lang.String-" title="class or interface in org.apache.mahout.common">getOptions</a>, <a href="http://mahout.apache.org/mahout-math/apidocs/org/apache/mahout/common/AbstractJob.html?is-external=true#getOutputFile--" title="class or interface in org.apache.mahout.common">getOutputFile</a>, <a href="http://mahout.apache.org/mahout-math/apidocs/org/apache/mahout/common/AbstractJob.html?is-external=true#getOutputPath--" title="class or interface in org.apache.mahout.common">getOutputPath</a>, <a href="http://mahout.apache.org/mahout-math/apidocs/org/apache/mahout/common/AbstractJob.html?is-external=true#getOutputPath-java.lang.String-" title="class or interface in org.apache.mahout.common">getOutputPath</a>, <a href="http://mahout.apache.org/mahout-math/apidocs/org/apache/mahout/common/AbstractJob.html?is-external=true#getTempPath--" title="class or interface in org.apache.mahout.common">getTempPath</a>, <a href="http://mahout.apache.org/mahout-math/apidocs/org/apache/mahout/common/AbstractJob.html?is-external=true#getTempPath-java.lang.String-" title="class or interface in org.apache.mahout.common">getTempPath</a>, <a href="http://mahout.apache.org/mahout-math/apidocs/org/apache/mahout/common/AbstractJob.html?is-external=true#hasOption-java.lang.String-" title="class or interface in org.apache.mahout.common">hasOption</a>, <a href="http://mahout.apache.org/mahout-math/apidocs/org/apache/mahout/common/AbstractJob.html?is-external=true#keyFor-java.lang.String-" title="class or interface in org.apache.mahout.common">keyFor</a>, <a href="http://mahout.apache.org/mahout-math/apidocs/org/apache/mahout/common/AbstractJob.html?is-external=true#maybePut-java.util.Map-org.apache.commons.cli2.CommandLine-org.apache.commons.cli2.Option...-" title="class or interface in org.apache.mahout.common">maybePut</a>, <a href="http://mahout.apache.org/mahout-math/apidocs/org/apache/mahout/common/AbstractJob.html?is-external=true#parseArguments-java.lang.String:A-" title="class or interface in org.apache.mahout.common">parseArguments</a>, <a href="http://mahout.apache.org/mahout-math/apidocs/org/apache/mahout/common/AbstractJob.html?is-external=true#parseArguments-java.lang.String:A-boolean-boolean-" title="class or interface in org.apache.mahout.common">parseArguments</a>, <a href="http://mahout.apache.org/mahout-math/apidocs/org/apache/mahout/common/AbstractJob.html?is-external=true#parseDirectories-org.apache.commons.cli2.CommandLine-boolean-boolean-" title="class or interface in org.apache.mahout.common">parseDirectories</a>, <a href="http://mahout.apache.org/mahout-math/apidocs/org/apache/mahout/common/AbstractJob.html?is-external=true#prepareJob-org.apache.hadoop.fs.Path-org.apache.hadoop.fs.Path-java.lang.Class-java.lang.Class-java.lang.Class-java.lang.Class-java.lang.Class-" title="class or interface in org.apache.mahout.common">prepareJob</a>, <a href="http://mahout.apache.org/mahout-math/apidocs/org/apache/mahout/common/AbstractJob.html?is-external=true#prepareJob-org.apache.hadoop.fs.Path-org.apache.hadoop.fs.Path-java.lang.Class-java.lang.Class-java.lang.Class-java.lang.Class-java.lang.Class-java.lang.String-" title="class or interface in org.apache.mahout.common">prepareJob</a>, <a href="http://mahout.apache.org/mahout-math/apidocs/org/apache/mahout/common/AbstractJob.html?is-external=true#prepareJob-org.apache.hadoop.fs.Path-org.apache.hadoop.fs.Path-java.lang.Class-java.lang.Class-java.lang.Class-java.lang.Class-java.lang.Class-java.lang.Class-java.lang.Class-java.lang.Class-" title="class or interface in org.apache.mahout.common">prepareJob</a>, <a href="http://mahout.apache.org/mahout-math/apidocs/org/apache/mahout/common/AbstractJob.html?is-external=true#prepareJob-org.apache.hadoop.fs.Path-org.apache.hadoop.fs.Path-java.lang.Class-java.lang.Class-java.lang.Class-java.lang.Class-java.lang.Class-java.lang.Class-" title="class or interface in org.apache.mahout.common">prepareJob</a>, <a href="http://mahout.apache.org/mahout-math/apidocs/org/apache/mahout/common/AbstractJob.html?is-external=true#setConf-org.apache.hadoop.conf.Configuration-" title="class or interface in org.apache.mahout.common">setConf</a>, <a href="http://mahout.apache.org/mahout-math/apidocs/org/apache/mahout/common/AbstractJob.html?is-external=true#setS3SafeCombinedInputPath-org.apache.hadoop.mapreduce.Job-org.apache.hadoop.fs.Path-org.apache.hadoop.fs.Path-org.apache.hadoop.fs.Path-" title="class or interface in org.apache.mahout.common">setS3SafeCombinedInputPath</a>, <a href="http://mahout.apache.org/mahout-math/apidocs/org/apache/mahout/common/AbstractJob.html?is-external=true#shouldRunNextPhase-java.util.Map-java.util.concurrent.atomic.AtomicInteger-" title="class or interface in org.apache.mahout.common">shouldRunNextPhase</a></code></li> |
| </ul> |
| <ul class="blockList"> |
| <li class="blockList"><a name="methods.inherited.from.class.java.lang.Object"> |
| <!-- --> |
| </a> |
| <h3>Methods inherited from class java.lang.<a href="http://docs.oracle.com/javase/7/docs/api/java/lang/Object.html?is-external=true" title="class or interface in java.lang">Object</a></h3> |
| <code><a href="http://docs.oracle.com/javase/7/docs/api/java/lang/Object.html?is-external=true#clone--" title="class or interface in java.lang">clone</a>, <a href="http://docs.oracle.com/javase/7/docs/api/java/lang/Object.html?is-external=true#equals-java.lang.Object-" title="class or interface in java.lang">equals</a>, <a href="http://docs.oracle.com/javase/7/docs/api/java/lang/Object.html?is-external=true#finalize--" title="class or interface in java.lang">finalize</a>, <a href="http://docs.oracle.com/javase/7/docs/api/java/lang/Object.html?is-external=true#getClass--" title="class or interface in java.lang">getClass</a>, <a href="http://docs.oracle.com/javase/7/docs/api/java/lang/Object.html?is-external=true#hashCode--" title="class or interface in java.lang">hashCode</a>, <a href="http://docs.oracle.com/javase/7/docs/api/java/lang/Object.html?is-external=true#notify--" title="class or interface in java.lang">notify</a>, <a href="http://docs.oracle.com/javase/7/docs/api/java/lang/Object.html?is-external=true#notifyAll--" title="class or interface in java.lang">notifyAll</a>, <a href="http://docs.oracle.com/javase/7/docs/api/java/lang/Object.html?is-external=true#toString--" title="class or interface in java.lang">toString</a>, <a href="http://docs.oracle.com/javase/7/docs/api/java/lang/Object.html?is-external=true#wait--" title="class or interface in java.lang">wait</a>, <a href="http://docs.oracle.com/javase/7/docs/api/java/lang/Object.html?is-external=true#wait-long-" title="class or interface in java.lang">wait</a>, <a href="http://docs.oracle.com/javase/7/docs/api/java/lang/Object.html?is-external=true#wait-long-int-" title="class or interface in java.lang">wait</a></code></li> |
| </ul> |
| </li> |
| </ul> |
| </li> |
| </ul> |
| </div> |
| <div class="details"> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <!-- ========= CONSTRUCTOR DETAIL ======== --> |
| <ul class="blockList"> |
| <li class="blockList"><a name="constructor.detail"> |
| <!-- --> |
| </a> |
| <h3>Constructor Detail</h3> |
| <a name="SplitInput--"> |
| <!-- --> |
| </a> |
| <ul class="blockListLast"> |
| <li class="blockList"> |
| <h4>SplitInput</h4> |
| <pre>public SplitInput()</pre> |
| </li> |
| </ul> |
| </li> |
| </ul> |
| <!-- ============ METHOD DETAIL ========== --> |
| <ul class="blockList"> |
| <li class="blockList"><a name="method.detail"> |
| <!-- --> |
| </a> |
| <h3>Method Detail</h3> |
| <a name="run-java.lang.String:A-"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>run</h4> |
| <pre>public int run(<a href="http://docs.oracle.com/javase/7/docs/api/java/lang/String.html?is-external=true" title="class or interface in java.lang">String</a>[] args) |
| throws <a href="http://docs.oracle.com/javase/7/docs/api/java/lang/Exception.html?is-external=true" title="class or interface in java.lang">Exception</a></pre> |
| <dl> |
| <dt><span class="throwsLabel">Throws:</span></dt> |
| <dd><code><a href="http://docs.oracle.com/javase/7/docs/api/java/lang/Exception.html?is-external=true" title="class or interface in java.lang">Exception</a></code></dd> |
| </dl> |
| </li> |
| </ul> |
| <a name="main-java.lang.String:A-"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>main</h4> |
| <pre>public static void main(<a href="http://docs.oracle.com/javase/7/docs/api/java/lang/String.html?is-external=true" title="class or interface in java.lang">String</a>[] args) |
| throws <a href="http://docs.oracle.com/javase/7/docs/api/java/lang/Exception.html?is-external=true" title="class or interface in java.lang">Exception</a></pre> |
| <dl> |
| <dt><span class="throwsLabel">Throws:</span></dt> |
| <dd><code><a href="http://docs.oracle.com/javase/7/docs/api/java/lang/Exception.html?is-external=true" title="class or interface in java.lang">Exception</a></code></dd> |
| </dl> |
| </li> |
| </ul> |
| <a name="splitDirectory--"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>splitDirectory</h4> |
| <pre>public void splitDirectory() |
| throws <a href="http://docs.oracle.com/javase/7/docs/api/java/io/IOException.html?is-external=true" title="class or interface in java.io">IOException</a>, |
| <a href="http://docs.oracle.com/javase/7/docs/api/java/lang/ClassNotFoundException.html?is-external=true" title="class or interface in java.lang">ClassNotFoundException</a>, |
| <a href="http://docs.oracle.com/javase/7/docs/api/java/lang/InterruptedException.html?is-external=true" title="class or interface in java.lang">InterruptedException</a></pre> |
| <div class="block">Perform a split on directory specified by <a href="../../../../org/apache/mahout/utils/SplitInput.html#setInputDirectory-org.apache.hadoop.fs.Path-"><code>setInputDirectory(Path)</code></a> by calling <a href="../../../../org/apache/mahout/utils/SplitInput.html#splitFile-org.apache.hadoop.fs.Path-"><code>splitFile(Path)</code></a> |
| on each file found within that directory.</div> |
| <dl> |
| <dt><span class="throwsLabel">Throws:</span></dt> |
| <dd><code><a href="http://docs.oracle.com/javase/7/docs/api/java/io/IOException.html?is-external=true" title="class or interface in java.io">IOException</a></code></dd> |
| <dd><code><a href="http://docs.oracle.com/javase/7/docs/api/java/lang/ClassNotFoundException.html?is-external=true" title="class or interface in java.lang">ClassNotFoundException</a></code></dd> |
| <dd><code><a href="http://docs.oracle.com/javase/7/docs/api/java/lang/InterruptedException.html?is-external=true" title="class or interface in java.lang">InterruptedException</a></code></dd> |
| </dl> |
| </li> |
| </ul> |
| <a name="splitDirectory-org.apache.hadoop.fs.Path-"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>splitDirectory</h4> |
| <pre>public void splitDirectory(org.apache.hadoop.fs.Path inputDir) |
| throws <a href="http://docs.oracle.com/javase/7/docs/api/java/io/IOException.html?is-external=true" title="class or interface in java.io">IOException</a>, |
| <a href="http://docs.oracle.com/javase/7/docs/api/java/lang/ClassNotFoundException.html?is-external=true" title="class or interface in java.lang">ClassNotFoundException</a>, |
| <a href="http://docs.oracle.com/javase/7/docs/api/java/lang/InterruptedException.html?is-external=true" title="class or interface in java.lang">InterruptedException</a></pre> |
| <div class="block">Perform a split on the specified directory by calling <a href="../../../../org/apache/mahout/utils/SplitInput.html#splitFile-org.apache.hadoop.fs.Path-"><code>splitFile(Path)</code></a> on each file found within that |
| directory.</div> |
| <dl> |
| <dt><span class="throwsLabel">Throws:</span></dt> |
| <dd><code><a href="http://docs.oracle.com/javase/7/docs/api/java/io/IOException.html?is-external=true" title="class or interface in java.io">IOException</a></code></dd> |
| <dd><code><a href="http://docs.oracle.com/javase/7/docs/api/java/lang/ClassNotFoundException.html?is-external=true" title="class or interface in java.lang">ClassNotFoundException</a></code></dd> |
| <dd><code><a href="http://docs.oracle.com/javase/7/docs/api/java/lang/InterruptedException.html?is-external=true" title="class or interface in java.lang">InterruptedException</a></code></dd> |
| </dl> |
| </li> |
| </ul> |
| <a name="splitDirectory-org.apache.hadoop.conf.Configuration-org.apache.hadoop.fs.Path-"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>splitDirectory</h4> |
| <pre>public void splitDirectory(org.apache.hadoop.conf.Configuration conf, |
| org.apache.hadoop.fs.Path inputDir) |
| throws <a href="http://docs.oracle.com/javase/7/docs/api/java/io/IOException.html?is-external=true" title="class or interface in java.io">IOException</a>, |
| <a href="http://docs.oracle.com/javase/7/docs/api/java/lang/ClassNotFoundException.html?is-external=true" title="class or interface in java.lang">ClassNotFoundException</a>, |
| <a href="http://docs.oracle.com/javase/7/docs/api/java/lang/InterruptedException.html?is-external=true" title="class or interface in java.lang">InterruptedException</a></pre> |
| <dl> |
| <dt><span class="throwsLabel">Throws:</span></dt> |
| <dd><code><a href="http://docs.oracle.com/javase/7/docs/api/java/io/IOException.html?is-external=true" title="class or interface in java.io">IOException</a></code></dd> |
| <dd><code><a href="http://docs.oracle.com/javase/7/docs/api/java/lang/ClassNotFoundException.html?is-external=true" title="class or interface in java.lang">ClassNotFoundException</a></code></dd> |
| <dd><code><a href="http://docs.oracle.com/javase/7/docs/api/java/lang/InterruptedException.html?is-external=true" title="class or interface in java.lang">InterruptedException</a></code></dd> |
| </dl> |
| </li> |
| </ul> |
| <a name="splitFile-org.apache.hadoop.fs.Path-"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>splitFile</h4> |
| <pre>public void splitFile(org.apache.hadoop.fs.Path inputFile) |
| throws <a href="http://docs.oracle.com/javase/7/docs/api/java/io/IOException.html?is-external=true" title="class or interface in java.io">IOException</a></pre> |
| <div class="block">Perform a split on the specified input file. Results will be written to files of the same name in the specified |
| training and test output directories. The <a href="../../../../org/apache/mahout/utils/SplitInput.html#validate--"><code>validate()</code></a> method is called prior to executing the split.</div> |
| <dl> |
| <dt><span class="throwsLabel">Throws:</span></dt> |
| <dd><code><a href="http://docs.oracle.com/javase/7/docs/api/java/io/IOException.html?is-external=true" title="class or interface in java.io">IOException</a></code></dd> |
| </dl> |
| </li> |
| </ul> |
| <a name="getTestSplitSize--"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>getTestSplitSize</h4> |
| <pre>public int getTestSplitSize()</pre> |
| </li> |
| </ul> |
| <a name="setTestSplitSize-int-"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>setTestSplitSize</h4> |
| <pre>public void setTestSplitSize(int testSplitSize)</pre> |
| </li> |
| </ul> |
| <a name="getTestSplitPct--"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>getTestSplitPct</h4> |
| <pre>public int getTestSplitPct()</pre> |
| </li> |
| </ul> |
| <a name="setTestSplitPct-int-"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>setTestSplitPct</h4> |
| <pre>public void setTestSplitPct(int testSplitPct)</pre> |
| <div class="block">Sets the percentage of the input data to allocate to the test split</div> |
| <dl> |
| <dt><span class="paramLabel">Parameters:</span></dt> |
| <dd><code>testSplitPct</code> - a value between 0 and 100 inclusive.</dd> |
| </dl> |
| </li> |
| </ul> |
| <a name="setKeepPct-int-"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>setKeepPct</h4> |
| <pre>public void setKeepPct(int keepPct)</pre> |
| <div class="block">Sets the percentage of the input data to keep in a map reduce split input job</div> |
| <dl> |
| <dt><span class="paramLabel">Parameters:</span></dt> |
| <dd><code>keepPct</code> - a value between 0 and 100 inclusive.</dd> |
| </dl> |
| </li> |
| </ul> |
| <a name="setUseMapRed-boolean-"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>setUseMapRed</h4> |
| <pre>public void setUseMapRed(boolean useMapRed)</pre> |
| <div class="block">Set to true to use map reduce to split the input</div> |
| <dl> |
| <dt><span class="paramLabel">Parameters:</span></dt> |
| <dd><code>useMapRed</code> - a boolean to indicate whether map reduce should be used</dd> |
| </dl> |
| </li> |
| </ul> |
| <a name="setMapRedOutputDirectory-org.apache.hadoop.fs.Path-"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>setMapRedOutputDirectory</h4> |
| <pre>public void setMapRedOutputDirectory(org.apache.hadoop.fs.Path mapRedOutputDirectory)</pre> |
| </li> |
| </ul> |
| <a name="getSplitLocation--"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>getSplitLocation</h4> |
| <pre>public int getSplitLocation()</pre> |
| </li> |
| </ul> |
| <a name="setSplitLocation-int-"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>setSplitLocation</h4> |
| <pre>public void setSplitLocation(int splitLocation)</pre> |
| <div class="block">Set the location of the start of the test/training data split. Expressed as percentage of lines, for example |
| 0 indicates that the test data should be taken from the start of the file, 100 indicates that the test data |
| should be taken from the end of the input file, while 25 indicates that the test data should be taken from the |
| first quarter of the file. |
| <p/> |
| This option is only relevant in cases where random selection is not employed</div> |
| <dl> |
| <dt><span class="paramLabel">Parameters:</span></dt> |
| <dd><code>splitLocation</code> - a value between 0 and 100 inclusive.</dd> |
| </dl> |
| </li> |
| </ul> |
| <a name="getCharset--"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>getCharset</h4> |
| <pre>public <a href="http://docs.oracle.com/javase/7/docs/api/java/nio/charset/Charset.html?is-external=true" title="class or interface in java.nio.charset">Charset</a> getCharset()</pre> |
| </li> |
| </ul> |
| <a name="setCharset-java.nio.charset.Charset-"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>setCharset</h4> |
| <pre>public void setCharset(<a href="http://docs.oracle.com/javase/7/docs/api/java/nio/charset/Charset.html?is-external=true" title="class or interface in java.nio.charset">Charset</a> charset)</pre> |
| <div class="block">Set the charset used to read and write files</div> |
| </li> |
| </ul> |
| <a name="getInputDirectory--"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>getInputDirectory</h4> |
| <pre>public org.apache.hadoop.fs.Path getInputDirectory()</pre> |
| </li> |
| </ul> |
| <a name="setInputDirectory-org.apache.hadoop.fs.Path-"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>setInputDirectory</h4> |
| <pre>public void setInputDirectory(org.apache.hadoop.fs.Path inputDir)</pre> |
| <div class="block">Set the directory from which input data will be read when the the <a href="../../../../org/apache/mahout/utils/SplitInput.html#splitDirectory--"><code>splitDirectory()</code></a> method is invoked</div> |
| </li> |
| </ul> |
| <a name="getTrainingOutputDirectory--"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>getTrainingOutputDirectory</h4> |
| <pre>public org.apache.hadoop.fs.Path getTrainingOutputDirectory()</pre> |
| </li> |
| </ul> |
| <a name="setTrainingOutputDirectory-org.apache.hadoop.fs.Path-"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>setTrainingOutputDirectory</h4> |
| <pre>public void setTrainingOutputDirectory(org.apache.hadoop.fs.Path trainingOutputDir)</pre> |
| <div class="block">Set the directory to which training data will be written.</div> |
| </li> |
| </ul> |
| <a name="getTestOutputDirectory--"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>getTestOutputDirectory</h4> |
| <pre>public org.apache.hadoop.fs.Path getTestOutputDirectory()</pre> |
| </li> |
| </ul> |
| <a name="setTestOutputDirectory-org.apache.hadoop.fs.Path-"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>setTestOutputDirectory</h4> |
| <pre>public void setTestOutputDirectory(org.apache.hadoop.fs.Path testOutputDir)</pre> |
| <div class="block">Set the directory to which test data will be written.</div> |
| </li> |
| </ul> |
| <a name="getCallback--"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>getCallback</h4> |
| <pre>public <a href="../../../../org/apache/mahout/utils/SplitInput.SplitCallback.html" title="interface in org.apache.mahout.utils">SplitInput.SplitCallback</a> getCallback()</pre> |
| </li> |
| </ul> |
| <a name="setCallback-org.apache.mahout.utils.SplitInput.SplitCallback-"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>setCallback</h4> |
| <pre>public void setCallback(<a href="../../../../org/apache/mahout/utils/SplitInput.SplitCallback.html" title="interface in org.apache.mahout.utils">SplitInput.SplitCallback</a> callback)</pre> |
| <div class="block">Sets the callback used to inform the caller that an input file has been successfully split</div> |
| </li> |
| </ul> |
| <a name="getTestRandomSelectionSize--"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>getTestRandomSelectionSize</h4> |
| <pre>public int getTestRandomSelectionSize()</pre> |
| </li> |
| </ul> |
| <a name="setTestRandomSelectionSize-int-"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>setTestRandomSelectionSize</h4> |
| <pre>public void setTestRandomSelectionSize(int testRandomSelectionSize)</pre> |
| <div class="block">Sets number of random input samples that will be saved to the test set.</div> |
| </li> |
| </ul> |
| <a name="getTestRandomSelectionPct--"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>getTestRandomSelectionPct</h4> |
| <pre>public int getTestRandomSelectionPct()</pre> |
| </li> |
| </ul> |
| <a name="setTestRandomSelectionPct-int-"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>setTestRandomSelectionPct</h4> |
| <pre>public void setTestRandomSelectionPct(int randomSelectionPct)</pre> |
| <div class="block">Sets number of random input samples that will be saved to the test set as a percentage of the size of the |
| input set.</div> |
| <dl> |
| <dt><span class="paramLabel">Parameters:</span></dt> |
| <dd><code>randomSelectionPct</code> - a value between 0 and 100 inclusive.</dd> |
| </dl> |
| </li> |
| </ul> |
| <a name="validate--"> |
| <!-- --> |
| </a> |
| <ul class="blockList"> |
| <li class="blockList"> |
| <h4>validate</h4> |
| <pre>public void validate() |
| throws <a href="http://docs.oracle.com/javase/7/docs/api/java/io/IOException.html?is-external=true" title="class or interface in java.io">IOException</a></pre> |
| <div class="block">Validates that the current instance is in a consistent state</div> |
| <dl> |
| <dt><span class="throwsLabel">Throws:</span></dt> |
| <dd><code><a href="http://docs.oracle.com/javase/7/docs/api/java/lang/IllegalArgumentException.html?is-external=true" title="class or interface in java.lang">IllegalArgumentException</a></code> - if settings violate class invariants.</dd> |
| <dd><code><a href="http://docs.oracle.com/javase/7/docs/api/java/io/IOException.html?is-external=true" title="class or interface in java.io">IOException</a></code> - if output directories do not exist or are not directories.</dd> |
| </dl> |
| </li> |
| </ul> |
| <a name="countLines-org.apache.hadoop.fs.FileSystem-org.apache.hadoop.fs.Path-java.nio.charset.Charset-"> |
| <!-- --> |
| </a> |
| <ul class="blockListLast"> |
| <li class="blockList"> |
| <h4>countLines</h4> |
| <pre>public static int countLines(org.apache.hadoop.fs.FileSystem fs, |
| org.apache.hadoop.fs.Path inputFile, |
| <a href="http://docs.oracle.com/javase/7/docs/api/java/nio/charset/Charset.html?is-external=true" title="class or interface in java.nio.charset">Charset</a> charset) |
| throws <a href="http://docs.oracle.com/javase/7/docs/api/java/io/IOException.html?is-external=true" title="class or interface in java.io">IOException</a></pre> |
| <div class="block">Count the lines in the file specified as returned by <code>BufferedReader.readLine()</code></div> |
| <dl> |
| <dt><span class="paramLabel">Parameters:</span></dt> |
| <dd><code>inputFile</code> - the file whose lines will be counted</dd> |
| <dd><code>charset</code> - the charset of the file to read</dd> |
| <dt><span class="returnLabel">Returns:</span></dt> |
| <dd>the number of lines in the input file.</dd> |
| <dt><span class="throwsLabel">Throws:</span></dt> |
| <dd><code><a href="http://docs.oracle.com/javase/7/docs/api/java/io/IOException.html?is-external=true" title="class or interface in java.io">IOException</a></code> - if there is a problem opening or reading the file.</dd> |
| </dl> |
| </li> |
| </ul> |
| </li> |
| </ul> |
| </li> |
| </ul> |
| </div> |
| </div> |
| <!-- ========= END OF CLASS DATA ========= --> |
| <!-- ======= START OF BOTTOM NAVBAR ====== --> |
| <div class="bottomNav"><a name="navbar.bottom"> |
| <!-- --> |
| </a> |
| <div class="skipNav"><a href="#skip.navbar.bottom" title="Skip navigation links">Skip navigation links</a></div> |
| <a name="navbar.bottom.firstrow"> |
| <!-- --> |
| </a> |
| <ul class="navList" title="Navigation"> |
| <li><a href="../../../../overview-summary.html">Overview</a></li> |
| <li><a href="package-summary.html">Package</a></li> |
| <li class="navBarCell1Rev">Class</li> |
| <li><a href="class-use/SplitInput.html">Use</a></li> |
| <li><a href="package-tree.html">Tree</a></li> |
| <li><a href="../../../../deprecated-list.html">Deprecated</a></li> |
| <li><a href="../../../../index-all.html">Index</a></li> |
| <li><a href="../../../../help-doc.html">Help</a></li> |
| </ul> |
| </div> |
| <div class="subNav"> |
| <ul class="navList"> |
| <li><a href="../../../../org/apache/mahout/utils/SequenceFileDumper.html" title="class in org.apache.mahout.utils"><span class="typeNameLink">Prev Class</span></a></li> |
| <li><a href="../../../../org/apache/mahout/utils/SplitInput.SplitCallback.html" title="interface in org.apache.mahout.utils"><span class="typeNameLink">Next Class</span></a></li> |
| </ul> |
| <ul class="navList"> |
| <li><a href="../../../../index.html?org/apache/mahout/utils/SplitInput.html" target="_top">Frames</a></li> |
| <li><a href="SplitInput.html" target="_top">No Frames</a></li> |
| </ul> |
| <ul class="navList" id="allclasses_navbar_bottom"> |
| <li><a href="../../../../allclasses-noframe.html">All Classes</a></li> |
| </ul> |
| <div> |
| <script type="text/javascript"><!-- |
| allClassesLink = document.getElementById("allclasses_navbar_bottom"); |
| if(window==top) { |
| allClassesLink.style.display = "block"; |
| } |
| else { |
| allClassesLink.style.display = "none"; |
| } |
| //--> |
| </script> |
| </div> |
| <div> |
| <ul class="subNavList"> |
| <li>Summary: </li> |
| <li><a href="#nested.class.summary">Nested</a> | </li> |
| <li><a href="#fields.inherited.from.class.org.apache.mahout.common.AbstractJob">Field</a> | </li> |
| <li><a href="#constructor.summary">Constr</a> | </li> |
| <li><a href="#method.summary">Method</a></li> |
| </ul> |
| <ul class="subNavList"> |
| <li>Detail: </li> |
| <li>Field | </li> |
| <li><a href="#constructor.detail">Constr</a> | </li> |
| <li><a href="#method.detail">Method</a></li> |
| </ul> |
| </div> |
| <a name="skip.navbar.bottom"> |
| <!-- --> |
| </a></div> |
| <!-- ======== END OF BOTTOM NAVBAR ======= --> |
| <p class="legalCopy"><small>Copyright © 2008–2017 <a href="http://www.apache.org/">The Apache Software Foundation</a>. All rights reserved.</small></p> |
| </body> |
| </html> |