blob: 65df0c24961681e5fafa9f8ceb60d2dc8bc9ced7 [file] [log] [blame]
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
<!-- NewPage -->
<html lang="en">
<head>
<!-- Generated by javadoc (1.8.0_121) on Fri Apr 14 22:12:45 PDT 2017 -->
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
<title>SplitInput (Mahout Integration 0.13.0 API)</title>
<meta name="date" content="2017-04-14">
<link rel="stylesheet" type="text/css" href="../../../../stylesheet.css" title="Style">
<script type="text/javascript" src="../../../../script.js"></script>
</head>
<body>
<script type="text/javascript"><!--
try {
if (location.href.indexOf('is-external=true') == -1) {
parent.document.title="SplitInput (Mahout Integration 0.13.0 API)";
}
}
catch(err) {
}
//-->
var methods = {"i0":9,"i1":10,"i2":10,"i3":10,"i4":10,"i5":10,"i6":10,"i7":10,"i8":10,"i9":10,"i10":10,"i11":9,"i12":10,"i13":10,"i14":10,"i15":10,"i16":10,"i17":10,"i18":10,"i19":10,"i20":10,"i21":10,"i22":10,"i23":10,"i24":10,"i25":10,"i26":10,"i27":10,"i28":10,"i29":10,"i30":10};
var tabs = {65535:["t0","All Methods"],1:["t1","Static Methods"],2:["t2","Instance Methods"],8:["t4","Concrete Methods"]};
var altColor = "altColor";
var rowColor = "rowColor";
var tableTab = "tableTab";
var activeTableTab = "activeTableTab";
</script>
<noscript>
<div>JavaScript is disabled on your browser.</div>
</noscript>
<!-- ========= START OF TOP NAVBAR ======= -->
<div class="topNav"><a name="navbar.top">
<!-- -->
</a>
<div class="skipNav"><a href="#skip.navbar.top" title="Skip navigation links">Skip navigation links</a></div>
<a name="navbar.top.firstrow">
<!-- -->
</a>
<ul class="navList" title="Navigation">
<li><a href="../../../../overview-summary.html">Overview</a></li>
<li><a href="package-summary.html">Package</a></li>
<li class="navBarCell1Rev">Class</li>
<li><a href="class-use/SplitInput.html">Use</a></li>
<li><a href="package-tree.html">Tree</a></li>
<li><a href="../../../../deprecated-list.html">Deprecated</a></li>
<li><a href="../../../../index-all.html">Index</a></li>
<li><a href="../../../../help-doc.html">Help</a></li>
</ul>
</div>
<div class="subNav">
<ul class="navList">
<li><a href="../../../../org/apache/mahout/utils/SequenceFileDumper.html" title="class in org.apache.mahout.utils"><span class="typeNameLink">Prev&nbsp;Class</span></a></li>
<li><a href="../../../../org/apache/mahout/utils/SplitInput.SplitCallback.html" title="interface in org.apache.mahout.utils"><span class="typeNameLink">Next&nbsp;Class</span></a></li>
</ul>
<ul class="navList">
<li><a href="../../../../index.html?org/apache/mahout/utils/SplitInput.html" target="_top">Frames</a></li>
<li><a href="SplitInput.html" target="_top">No&nbsp;Frames</a></li>
</ul>
<ul class="navList" id="allclasses_navbar_top">
<li><a href="../../../../allclasses-noframe.html">All&nbsp;Classes</a></li>
</ul>
<div>
<script type="text/javascript"><!--
allClassesLink = document.getElementById("allclasses_navbar_top");
if(window==top) {
allClassesLink.style.display = "block";
}
else {
allClassesLink.style.display = "none";
}
//-->
</script>
</div>
<div>
<ul class="subNavList">
<li>Summary:&nbsp;</li>
<li><a href="#nested.class.summary">Nested</a>&nbsp;|&nbsp;</li>
<li><a href="#fields.inherited.from.class.org.apache.mahout.common.AbstractJob">Field</a>&nbsp;|&nbsp;</li>
<li><a href="#constructor.summary">Constr</a>&nbsp;|&nbsp;</li>
<li><a href="#method.summary">Method</a></li>
</ul>
<ul class="subNavList">
<li>Detail:&nbsp;</li>
<li>Field&nbsp;|&nbsp;</li>
<li><a href="#constructor.detail">Constr</a>&nbsp;|&nbsp;</li>
<li><a href="#method.detail">Method</a></li>
</ul>
</div>
<a name="skip.navbar.top">
<!-- -->
</a></div>
<!-- ========= END OF TOP NAVBAR ========= -->
<!-- ======== START OF CLASS DATA ======== -->
<div class="header">
<div class="subTitle">org.apache.mahout.utils</div>
<h2 title="Class SplitInput" class="title">Class SplitInput</h2>
</div>
<div class="contentContainer">
<ul class="inheritance">
<li><a href="http://docs.oracle.com/javase/7/docs/api/java/lang/Object.html?is-external=true" title="class or interface in java.lang">java.lang.Object</a></li>
<li>
<ul class="inheritance">
<li>org.apache.hadoop.conf.Configured</li>
<li>
<ul class="inheritance">
<li><a href="http://mahout.apache.org/mahout-math/apidocs/org/apache/mahout/common/AbstractJob.html?is-external=true" title="class or interface in org.apache.mahout.common">org.apache.mahout.common.AbstractJob</a></li>
<li>
<ul class="inheritance">
<li>org.apache.mahout.utils.SplitInput</li>
</ul>
</li>
</ul>
</li>
</ul>
</li>
</ul>
<div class="description">
<ul class="blockList">
<li class="blockList">
<dl>
<dt>All Implemented Interfaces:</dt>
<dd>org.apache.hadoop.conf.Configurable, org.apache.hadoop.util.Tool</dd>
</dl>
<hr>
<br>
<pre>public class <span class="typeNameLabel">SplitInput</span>
extends <a href="http://mahout.apache.org/mahout-math/apidocs/org/apache/mahout/common/AbstractJob.html?is-external=true" title="class or interface in org.apache.mahout.common">AbstractJob</a></pre>
<div class="block">A utility for splitting files in the input format used by the Bayes
classifiers or anything else that has one item per line or SequenceFiles (key/value)
into training and test sets in order to perform cross-validation.
<p/>
<p/>
This class can be used to split directories of files or individual files into
training and test sets using a number of different methods.
<p/>
When executed via <a href="../../../../org/apache/mahout/utils/SplitInput.html#splitDirectory-org.apache.hadoop.fs.Path-"><code>splitDirectory(Path)</code></a> or <a href="../../../../org/apache/mahout/utils/SplitInput.html#splitFile-org.apache.hadoop.fs.Path-"><code>splitFile(Path)</code></a>,
the lines read from one or more, input files are written to files of the same
name into the directories specified by the
<a href="../../../../org/apache/mahout/utils/SplitInput.html#setTestOutputDirectory-org.apache.hadoop.fs.Path-"><code>setTestOutputDirectory(Path)</code></a> and
<a href="../../../../org/apache/mahout/utils/SplitInput.html#setTrainingOutputDirectory-org.apache.hadoop.fs.Path-"><code>setTrainingOutputDirectory(Path)</code></a> methods.
<p/>
The composition of the test set is determined using one of the following
approaches:
<ul>
<li>A contiguous set of items can be chosen from the input file(s) using the
<a href="../../../../org/apache/mahout/utils/SplitInput.html#setTestSplitSize-int-"><code>setTestSplitSize(int)</code></a> or <a href="../../../../org/apache/mahout/utils/SplitInput.html#setTestSplitPct-int-"><code>setTestSplitPct(int)</code></a> methods.
<a href="../../../../org/apache/mahout/utils/SplitInput.html#setTestSplitSize-int-"><code>setTestSplitSize(int)</code></a> allocates a fixed number of items, while
<a href="../../../../org/apache/mahout/utils/SplitInput.html#setTestSplitPct-int-"><code>setTestSplitPct(int)</code></a> allocates a percentage of the original input,
rounded up to the nearest integer. <a href="../../../../org/apache/mahout/utils/SplitInput.html#setSplitLocation-int-"><code>setSplitLocation(int)</code></a> is used to
control the position in the input from which the test data is extracted and
is described further below.</li>
<li>A random sampling of items can be chosen from the input files(s) using
the <a href="../../../../org/apache/mahout/utils/SplitInput.html#setTestRandomSelectionSize-int-"><code>setTestRandomSelectionSize(int)</code></a> or
<a href="../../../../org/apache/mahout/utils/SplitInput.html#setTestRandomSelectionPct-int-"><code>setTestRandomSelectionPct(int)</code></a> methods, each choosing a fixed test
set size or percentage of the input set size as described above. The
<a href="http://mahout.apache.org/mahout-math/apidocs/org/apache/mahout/math/jet/random/sampling/RandomSampler.html?is-external=true" title="class or interface in org.apache.mahout.math.jet.random.sampling"><code>RandomSampler</code></a> class from <code>mahout-math</code> is used to create a sample
of the appropriate size.</li>
</ul>
<p/>
Any one of the methods above can be used to control the size of the test set.
If multiple methods are called, a runtime exception will be thrown at
execution time.
<p/>
The <a href="../../../../org/apache/mahout/utils/SplitInput.html#setSplitLocation-int-"><code>setSplitLocation(int)</code></a> method is passed an integer from 0 to 100
(inclusive) which is translated into the position of the start of the test
data within the input file.
<p/>
Given:
<ul>
<li>an input file of 1500 lines</li>
<li>a desired test data size of 10 percent</li>
</ul>
<p/>
<ul>
<li>A split location of 0 will cause the first 150 items appearing in the
input set to be written to the test set.</li>
<li>A split location of 25 will cause items 375-525 to be written to the test
set.</li>
<li>A split location of 100 will cause the last 150 items in the input to be
written to the test set</li>
</ul>
The start of the split will always be adjusted forwards in order to ensure
that the desired test set size is allocated. Split location has no effect is
random sampling is employed.</div>
</li>
</ul>
</div>
<div class="summary">
<ul class="blockList">
<li class="blockList">
<!-- ======== NESTED CLASS SUMMARY ======== -->
<ul class="blockList">
<li class="blockList"><a name="nested.class.summary">
<!-- -->
</a>
<h3>Nested Class Summary</h3>
<table class="memberSummary" border="0" cellpadding="3" cellspacing="0" summary="Nested Class Summary table, listing nested classes, and an explanation">
<caption><span>Nested Classes</span><span class="tabEnd">&nbsp;</span></caption>
<tr>
<th class="colFirst" scope="col">Modifier and Type</th>
<th class="colLast" scope="col">Class and Description</th>
</tr>
<tr class="altColor">
<td class="colFirst"><code>static interface&nbsp;</code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../../org/apache/mahout/utils/SplitInput.SplitCallback.html" title="interface in org.apache.mahout.utils">SplitInput.SplitCallback</a></span></code>
<div class="block">Used to pass information back to a caller once a file has been split without the need for a data object</div>
</td>
</tr>
</table>
</li>
</ul>
<!-- =========== FIELD SUMMARY =========== -->
<ul class="blockList">
<li class="blockList"><a name="field.summary">
<!-- -->
</a>
<h3>Field Summary</h3>
<ul class="blockList">
<li class="blockList"><a name="fields.inherited.from.class.org.apache.mahout.common.AbstractJob">
<!-- -->
</a>
<h3>Fields inherited from class&nbsp;org.apache.mahout.common.<a href="http://mahout.apache.org/mahout-math/apidocs/org/apache/mahout/common/AbstractJob.html?is-external=true" title="class or interface in org.apache.mahout.common">AbstractJob</a></h3>
<code><a href="http://mahout.apache.org/mahout-math/apidocs/org/apache/mahout/common/AbstractJob.html?is-external=true#argMap" title="class or interface in org.apache.mahout.common">argMap</a>, <a href="http://mahout.apache.org/mahout-math/apidocs/org/apache/mahout/common/AbstractJob.html?is-external=true#inputFile" title="class or interface in org.apache.mahout.common">inputFile</a>, <a href="http://mahout.apache.org/mahout-math/apidocs/org/apache/mahout/common/AbstractJob.html?is-external=true#inputPath" title="class or interface in org.apache.mahout.common">inputPath</a>, <a href="http://mahout.apache.org/mahout-math/apidocs/org/apache/mahout/common/AbstractJob.html?is-external=true#outputFile" title="class or interface in org.apache.mahout.common">outputFile</a>, <a href="http://mahout.apache.org/mahout-math/apidocs/org/apache/mahout/common/AbstractJob.html?is-external=true#outputPath" title="class or interface in org.apache.mahout.common">outputPath</a>, <a href="http://mahout.apache.org/mahout-math/apidocs/org/apache/mahout/common/AbstractJob.html?is-external=true#tempPath" title="class or interface in org.apache.mahout.common">tempPath</a></code></li>
</ul>
</li>
</ul>
<!-- ======== CONSTRUCTOR SUMMARY ======== -->
<ul class="blockList">
<li class="blockList"><a name="constructor.summary">
<!-- -->
</a>
<h3>Constructor Summary</h3>
<table class="memberSummary" border="0" cellpadding="3" cellspacing="0" summary="Constructor Summary table, listing constructors, and an explanation">
<caption><span>Constructors</span><span class="tabEnd">&nbsp;</span></caption>
<tr>
<th class="colOne" scope="col">Constructor and Description</th>
</tr>
<tr class="altColor">
<td class="colOne"><code><span class="memberNameLink"><a href="../../../../org/apache/mahout/utils/SplitInput.html#SplitInput--">SplitInput</a></span>()</code>&nbsp;</td>
</tr>
</table>
</li>
</ul>
<!-- ========== METHOD SUMMARY =========== -->
<ul class="blockList">
<li class="blockList"><a name="method.summary">
<!-- -->
</a>
<h3>Method Summary</h3>
<table class="memberSummary" border="0" cellpadding="3" cellspacing="0" summary="Method Summary table, listing methods, and an explanation">
<caption><span id="t0" class="activeTableTab"><span>All Methods</span><span class="tabEnd">&nbsp;</span></span><span id="t1" class="tableTab"><span><a href="javascript:show(1);">Static Methods</a></span><span class="tabEnd">&nbsp;</span></span><span id="t2" class="tableTab"><span><a href="javascript:show(2);">Instance Methods</a></span><span class="tabEnd">&nbsp;</span></span><span id="t4" class="tableTab"><span><a href="javascript:show(8);">Concrete Methods</a></span><span class="tabEnd">&nbsp;</span></span></caption>
<tr>
<th class="colFirst" scope="col">Modifier and Type</th>
<th class="colLast" scope="col">Method and Description</th>
</tr>
<tr id="i0" class="altColor">
<td class="colFirst"><code>static int</code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../../org/apache/mahout/utils/SplitInput.html#countLines-org.apache.hadoop.fs.FileSystem-org.apache.hadoop.fs.Path-java.nio.charset.Charset-">countLines</a></span>(org.apache.hadoop.fs.FileSystem&nbsp;fs,
org.apache.hadoop.fs.Path&nbsp;inputFile,
<a href="http://docs.oracle.com/javase/7/docs/api/java/nio/charset/Charset.html?is-external=true" title="class or interface in java.nio.charset">Charset</a>&nbsp;charset)</code>
<div class="block">Count the lines in the file specified as returned by <code>BufferedReader.readLine()</code></div>
</td>
</tr>
<tr id="i1" class="rowColor">
<td class="colFirst"><code><a href="../../../../org/apache/mahout/utils/SplitInput.SplitCallback.html" title="interface in org.apache.mahout.utils">SplitInput.SplitCallback</a></code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../../org/apache/mahout/utils/SplitInput.html#getCallback--">getCallback</a></span>()</code>&nbsp;</td>
</tr>
<tr id="i2" class="altColor">
<td class="colFirst"><code><a href="http://docs.oracle.com/javase/7/docs/api/java/nio/charset/Charset.html?is-external=true" title="class or interface in java.nio.charset">Charset</a></code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../../org/apache/mahout/utils/SplitInput.html#getCharset--">getCharset</a></span>()</code>&nbsp;</td>
</tr>
<tr id="i3" class="rowColor">
<td class="colFirst"><code>org.apache.hadoop.fs.Path</code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../../org/apache/mahout/utils/SplitInput.html#getInputDirectory--">getInputDirectory</a></span>()</code>&nbsp;</td>
</tr>
<tr id="i4" class="altColor">
<td class="colFirst"><code>int</code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../../org/apache/mahout/utils/SplitInput.html#getSplitLocation--">getSplitLocation</a></span>()</code>&nbsp;</td>
</tr>
<tr id="i5" class="rowColor">
<td class="colFirst"><code>org.apache.hadoop.fs.Path</code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../../org/apache/mahout/utils/SplitInput.html#getTestOutputDirectory--">getTestOutputDirectory</a></span>()</code>&nbsp;</td>
</tr>
<tr id="i6" class="altColor">
<td class="colFirst"><code>int</code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../../org/apache/mahout/utils/SplitInput.html#getTestRandomSelectionPct--">getTestRandomSelectionPct</a></span>()</code>&nbsp;</td>
</tr>
<tr id="i7" class="rowColor">
<td class="colFirst"><code>int</code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../../org/apache/mahout/utils/SplitInput.html#getTestRandomSelectionSize--">getTestRandomSelectionSize</a></span>()</code>&nbsp;</td>
</tr>
<tr id="i8" class="altColor">
<td class="colFirst"><code>int</code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../../org/apache/mahout/utils/SplitInput.html#getTestSplitPct--">getTestSplitPct</a></span>()</code>&nbsp;</td>
</tr>
<tr id="i9" class="rowColor">
<td class="colFirst"><code>int</code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../../org/apache/mahout/utils/SplitInput.html#getTestSplitSize--">getTestSplitSize</a></span>()</code>&nbsp;</td>
</tr>
<tr id="i10" class="altColor">
<td class="colFirst"><code>org.apache.hadoop.fs.Path</code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../../org/apache/mahout/utils/SplitInput.html#getTrainingOutputDirectory--">getTrainingOutputDirectory</a></span>()</code>&nbsp;</td>
</tr>
<tr id="i11" class="rowColor">
<td class="colFirst"><code>static void</code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../../org/apache/mahout/utils/SplitInput.html#main-java.lang.String:A-">main</a></span>(<a href="http://docs.oracle.com/javase/7/docs/api/java/lang/String.html?is-external=true" title="class or interface in java.lang">String</a>[]&nbsp;args)</code>&nbsp;</td>
</tr>
<tr id="i12" class="altColor">
<td class="colFirst"><code>int</code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../../org/apache/mahout/utils/SplitInput.html#run-java.lang.String:A-">run</a></span>(<a href="http://docs.oracle.com/javase/7/docs/api/java/lang/String.html?is-external=true" title="class or interface in java.lang">String</a>[]&nbsp;args)</code>&nbsp;</td>
</tr>
<tr id="i13" class="rowColor">
<td class="colFirst"><code>void</code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../../org/apache/mahout/utils/SplitInput.html#setCallback-org.apache.mahout.utils.SplitInput.SplitCallback-">setCallback</a></span>(<a href="../../../../org/apache/mahout/utils/SplitInput.SplitCallback.html" title="interface in org.apache.mahout.utils">SplitInput.SplitCallback</a>&nbsp;callback)</code>
<div class="block">Sets the callback used to inform the caller that an input file has been successfully split</div>
</td>
</tr>
<tr id="i14" class="altColor">
<td class="colFirst"><code>void</code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../../org/apache/mahout/utils/SplitInput.html#setCharset-java.nio.charset.Charset-">setCharset</a></span>(<a href="http://docs.oracle.com/javase/7/docs/api/java/nio/charset/Charset.html?is-external=true" title="class or interface in java.nio.charset">Charset</a>&nbsp;charset)</code>
<div class="block">Set the charset used to read and write files</div>
</td>
</tr>
<tr id="i15" class="rowColor">
<td class="colFirst"><code>void</code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../../org/apache/mahout/utils/SplitInput.html#setInputDirectory-org.apache.hadoop.fs.Path-">setInputDirectory</a></span>(org.apache.hadoop.fs.Path&nbsp;inputDir)</code>
<div class="block">Set the directory from which input data will be read when the the <a href="../../../../org/apache/mahout/utils/SplitInput.html#splitDirectory--"><code>splitDirectory()</code></a> method is invoked</div>
</td>
</tr>
<tr id="i16" class="altColor">
<td class="colFirst"><code>void</code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../../org/apache/mahout/utils/SplitInput.html#setKeepPct-int-">setKeepPct</a></span>(int&nbsp;keepPct)</code>
<div class="block">Sets the percentage of the input data to keep in a map reduce split input job</div>
</td>
</tr>
<tr id="i17" class="rowColor">
<td class="colFirst"><code>void</code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../../org/apache/mahout/utils/SplitInput.html#setMapRedOutputDirectory-org.apache.hadoop.fs.Path-">setMapRedOutputDirectory</a></span>(org.apache.hadoop.fs.Path&nbsp;mapRedOutputDirectory)</code>&nbsp;</td>
</tr>
<tr id="i18" class="altColor">
<td class="colFirst"><code>void</code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../../org/apache/mahout/utils/SplitInput.html#setSplitLocation-int-">setSplitLocation</a></span>(int&nbsp;splitLocation)</code>
<div class="block">Set the location of the start of the test/training data split.</div>
</td>
</tr>
<tr id="i19" class="rowColor">
<td class="colFirst"><code>void</code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../../org/apache/mahout/utils/SplitInput.html#setTestOutputDirectory-org.apache.hadoop.fs.Path-">setTestOutputDirectory</a></span>(org.apache.hadoop.fs.Path&nbsp;testOutputDir)</code>
<div class="block">Set the directory to which test data will be written.</div>
</td>
</tr>
<tr id="i20" class="altColor">
<td class="colFirst"><code>void</code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../../org/apache/mahout/utils/SplitInput.html#setTestRandomSelectionPct-int-">setTestRandomSelectionPct</a></span>(int&nbsp;randomSelectionPct)</code>
<div class="block">Sets number of random input samples that will be saved to the test set as a percentage of the size of the
input set.</div>
</td>
</tr>
<tr id="i21" class="rowColor">
<td class="colFirst"><code>void</code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../../org/apache/mahout/utils/SplitInput.html#setTestRandomSelectionSize-int-">setTestRandomSelectionSize</a></span>(int&nbsp;testRandomSelectionSize)</code>
<div class="block">Sets number of random input samples that will be saved to the test set.</div>
</td>
</tr>
<tr id="i22" class="altColor">
<td class="colFirst"><code>void</code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../../org/apache/mahout/utils/SplitInput.html#setTestSplitPct-int-">setTestSplitPct</a></span>(int&nbsp;testSplitPct)</code>
<div class="block">Sets the percentage of the input data to allocate to the test split</div>
</td>
</tr>
<tr id="i23" class="rowColor">
<td class="colFirst"><code>void</code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../../org/apache/mahout/utils/SplitInput.html#setTestSplitSize-int-">setTestSplitSize</a></span>(int&nbsp;testSplitSize)</code>&nbsp;</td>
</tr>
<tr id="i24" class="altColor">
<td class="colFirst"><code>void</code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../../org/apache/mahout/utils/SplitInput.html#setTrainingOutputDirectory-org.apache.hadoop.fs.Path-">setTrainingOutputDirectory</a></span>(org.apache.hadoop.fs.Path&nbsp;trainingOutputDir)</code>
<div class="block">Set the directory to which training data will be written.</div>
</td>
</tr>
<tr id="i25" class="rowColor">
<td class="colFirst"><code>void</code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../../org/apache/mahout/utils/SplitInput.html#setUseMapRed-boolean-">setUseMapRed</a></span>(boolean&nbsp;useMapRed)</code>
<div class="block">Set to true to use map reduce to split the input</div>
</td>
</tr>
<tr id="i26" class="altColor">
<td class="colFirst"><code>void</code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../../org/apache/mahout/utils/SplitInput.html#splitDirectory--">splitDirectory</a></span>()</code>
<div class="block">Perform a split on directory specified by <a href="../../../../org/apache/mahout/utils/SplitInput.html#setInputDirectory-org.apache.hadoop.fs.Path-"><code>setInputDirectory(Path)</code></a> by calling <a href="../../../../org/apache/mahout/utils/SplitInput.html#splitFile-org.apache.hadoop.fs.Path-"><code>splitFile(Path)</code></a>
on each file found within that directory.</div>
</td>
</tr>
<tr id="i27" class="rowColor">
<td class="colFirst"><code>void</code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../../org/apache/mahout/utils/SplitInput.html#splitDirectory-org.apache.hadoop.conf.Configuration-org.apache.hadoop.fs.Path-">splitDirectory</a></span>(org.apache.hadoop.conf.Configuration&nbsp;conf,
org.apache.hadoop.fs.Path&nbsp;inputDir)</code>&nbsp;</td>
</tr>
<tr id="i28" class="altColor">
<td class="colFirst"><code>void</code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../../org/apache/mahout/utils/SplitInput.html#splitDirectory-org.apache.hadoop.fs.Path-">splitDirectory</a></span>(org.apache.hadoop.fs.Path&nbsp;inputDir)</code>
<div class="block">Perform a split on the specified directory by calling <a href="../../../../org/apache/mahout/utils/SplitInput.html#splitFile-org.apache.hadoop.fs.Path-"><code>splitFile(Path)</code></a> on each file found within that
directory.</div>
</td>
</tr>
<tr id="i29" class="rowColor">
<td class="colFirst"><code>void</code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../../org/apache/mahout/utils/SplitInput.html#splitFile-org.apache.hadoop.fs.Path-">splitFile</a></span>(org.apache.hadoop.fs.Path&nbsp;inputFile)</code>
<div class="block">Perform a split on the specified input file.</div>
</td>
</tr>
<tr id="i30" class="altColor">
<td class="colFirst"><code>void</code></td>
<td class="colLast"><code><span class="memberNameLink"><a href="../../../../org/apache/mahout/utils/SplitInput.html#validate--">validate</a></span>()</code>
<div class="block">Validates that the current instance is in a consistent state</div>
</td>
</tr>
</table>
<ul class="blockList">
<li class="blockList"><a name="methods.inherited.from.class.org.apache.mahout.common.AbstractJob">
<!-- -->
</a>
<h3>Methods inherited from class&nbsp;org.apache.mahout.common.<a href="http://mahout.apache.org/mahout-math/apidocs/org/apache/mahout/common/AbstractJob.html?is-external=true" title="class or interface in org.apache.mahout.common">AbstractJob</a></h3>
<code><a href="http://mahout.apache.org/mahout-math/apidocs/org/apache/mahout/common/AbstractJob.html?is-external=true#addFlag-java.lang.String-java.lang.String-java.lang.String-" title="class or interface in org.apache.mahout.common">addFlag</a>, <a href="http://mahout.apache.org/mahout-math/apidocs/org/apache/mahout/common/AbstractJob.html?is-external=true#addInputOption--" title="class or interface in org.apache.mahout.common">addInputOption</a>, <a href="http://mahout.apache.org/mahout-math/apidocs/org/apache/mahout/common/AbstractJob.html?is-external=true#addOption-org.apache.commons.cli2.Option-" title="class or interface in org.apache.mahout.common">addOption</a>, <a href="http://mahout.apache.org/mahout-math/apidocs/org/apache/mahout/common/AbstractJob.html?is-external=true#addOption-java.lang.String-java.lang.String-java.lang.String-" title="class or interface in org.apache.mahout.common">addOption</a>, <a href="http://mahout.apache.org/mahout-math/apidocs/org/apache/mahout/common/AbstractJob.html?is-external=true#addOption-java.lang.String-java.lang.String-java.lang.String-boolean-" title="class or interface in org.apache.mahout.common">addOption</a>, <a href="http://mahout.apache.org/mahout-math/apidocs/org/apache/mahout/common/AbstractJob.html?is-external=true#addOption-java.lang.String-java.lang.String-java.lang.String-java.lang.String-" title="class or interface in org.apache.mahout.common">addOption</a>, <a href="http://mahout.apache.org/mahout-math/apidocs/org/apache/mahout/common/AbstractJob.html?is-external=true#addOutputOption--" title="class or interface in org.apache.mahout.common">addOutputOption</a>, <a href="http://mahout.apache.org/mahout-math/apidocs/org/apache/mahout/common/AbstractJob.html?is-external=true#buildOption-java.lang.String-java.lang.String-java.lang.String-boolean-boolean-java.lang.String-" title="class or interface in org.apache.mahout.common">buildOption</a>, <a href="http://mahout.apache.org/mahout-math/apidocs/org/apache/mahout/common/AbstractJob.html?is-external=true#buildOption-java.lang.String-java.lang.String-java.lang.String-boolean-int-int-boolean-java.lang.String-" title="class or interface in org.apache.mahout.common">buildOption</a>, <a href="http://mahout.apache.org/mahout-math/apidocs/org/apache/mahout/common/AbstractJob.html?is-external=true#getAnalyzerClassFromOption--" title="class or interface in org.apache.mahout.common">getAnalyzerClassFromOption</a>, <a href="http://mahout.apache.org/mahout-math/apidocs/org/apache/mahout/common/AbstractJob.html?is-external=true#getCLIOption-java.lang.String-" title="class or interface in org.apache.mahout.common">getCLIOption</a>, <a href="http://mahout.apache.org/mahout-math/apidocs/org/apache/mahout/common/AbstractJob.html?is-external=true#getConf--" title="class or interface in org.apache.mahout.common">getConf</a>, <a href="http://mahout.apache.org/mahout-math/apidocs/org/apache/mahout/common/AbstractJob.html?is-external=true#getDimensions-org.apache.hadoop.fs.Path-" title="class or interface in org.apache.mahout.common">getDimensions</a>, <a href="http://mahout.apache.org/mahout-math/apidocs/org/apache/mahout/common/AbstractJob.html?is-external=true#getFloat-java.lang.String-" title="class or interface in org.apache.mahout.common">getFloat</a>, <a href="http://mahout.apache.org/mahout-math/apidocs/org/apache/mahout/common/AbstractJob.html?is-external=true#getFloat-java.lang.String-float-" title="class or interface in org.apache.mahout.common">getFloat</a>, <a href="http://mahout.apache.org/mahout-math/apidocs/org/apache/mahout/common/AbstractJob.html?is-external=true#getGroup--" title="class or interface in org.apache.mahout.common">getGroup</a>, <a href="http://mahout.apache.org/mahout-math/apidocs/org/apache/mahout/common/AbstractJob.html?is-external=true#getInputFile--" title="class or interface in org.apache.mahout.common">getInputFile</a>, <a href="http://mahout.apache.org/mahout-math/apidocs/org/apache/mahout/common/AbstractJob.html?is-external=true#getInputPath--" title="class or interface in org.apache.mahout.common">getInputPath</a>, <a href="http://mahout.apache.org/mahout-math/apidocs/org/apache/mahout/common/AbstractJob.html?is-external=true#getInt-java.lang.String-" title="class or interface in org.apache.mahout.common">getInt</a>, <a href="http://mahout.apache.org/mahout-math/apidocs/org/apache/mahout/common/AbstractJob.html?is-external=true#getInt-java.lang.String-int-" title="class or interface in org.apache.mahout.common">getInt</a>, <a href="http://mahout.apache.org/mahout-math/apidocs/org/apache/mahout/common/AbstractJob.html?is-external=true#getOption-java.util.Map-java.lang.String-" title="class or interface in org.apache.mahout.common">getOption</a>, <a href="http://mahout.apache.org/mahout-math/apidocs/org/apache/mahout/common/AbstractJob.html?is-external=true#getOption-java.lang.String-" title="class or interface in org.apache.mahout.common">getOption</a>, <a href="http://mahout.apache.org/mahout-math/apidocs/org/apache/mahout/common/AbstractJob.html?is-external=true#getOption-java.lang.String-java.lang.String-" title="class or interface in org.apache.mahout.common">getOption</a>, <a href="http://mahout.apache.org/mahout-math/apidocs/org/apache/mahout/common/AbstractJob.html?is-external=true#getOptions-java.lang.String-" title="class or interface in org.apache.mahout.common">getOptions</a>, <a href="http://mahout.apache.org/mahout-math/apidocs/org/apache/mahout/common/AbstractJob.html?is-external=true#getOutputFile--" title="class or interface in org.apache.mahout.common">getOutputFile</a>, <a href="http://mahout.apache.org/mahout-math/apidocs/org/apache/mahout/common/AbstractJob.html?is-external=true#getOutputPath--" title="class or interface in org.apache.mahout.common">getOutputPath</a>, <a href="http://mahout.apache.org/mahout-math/apidocs/org/apache/mahout/common/AbstractJob.html?is-external=true#getOutputPath-java.lang.String-" title="class or interface in org.apache.mahout.common">getOutputPath</a>, <a href="http://mahout.apache.org/mahout-math/apidocs/org/apache/mahout/common/AbstractJob.html?is-external=true#getTempPath--" title="class or interface in org.apache.mahout.common">getTempPath</a>, <a href="http://mahout.apache.org/mahout-math/apidocs/org/apache/mahout/common/AbstractJob.html?is-external=true#getTempPath-java.lang.String-" title="class or interface in org.apache.mahout.common">getTempPath</a>, <a href="http://mahout.apache.org/mahout-math/apidocs/org/apache/mahout/common/AbstractJob.html?is-external=true#hasOption-java.lang.String-" title="class or interface in org.apache.mahout.common">hasOption</a>, <a href="http://mahout.apache.org/mahout-math/apidocs/org/apache/mahout/common/AbstractJob.html?is-external=true#keyFor-java.lang.String-" title="class or interface in org.apache.mahout.common">keyFor</a>, <a href="http://mahout.apache.org/mahout-math/apidocs/org/apache/mahout/common/AbstractJob.html?is-external=true#maybePut-java.util.Map-org.apache.commons.cli2.CommandLine-org.apache.commons.cli2.Option...-" title="class or interface in org.apache.mahout.common">maybePut</a>, <a href="http://mahout.apache.org/mahout-math/apidocs/org/apache/mahout/common/AbstractJob.html?is-external=true#parseArguments-java.lang.String:A-" title="class or interface in org.apache.mahout.common">parseArguments</a>, <a href="http://mahout.apache.org/mahout-math/apidocs/org/apache/mahout/common/AbstractJob.html?is-external=true#parseArguments-java.lang.String:A-boolean-boolean-" title="class or interface in org.apache.mahout.common">parseArguments</a>, <a href="http://mahout.apache.org/mahout-math/apidocs/org/apache/mahout/common/AbstractJob.html?is-external=true#parseDirectories-org.apache.commons.cli2.CommandLine-boolean-boolean-" title="class or interface in org.apache.mahout.common">parseDirectories</a>, <a href="http://mahout.apache.org/mahout-math/apidocs/org/apache/mahout/common/AbstractJob.html?is-external=true#prepareJob-org.apache.hadoop.fs.Path-org.apache.hadoop.fs.Path-java.lang.Class-java.lang.Class-java.lang.Class-java.lang.Class-java.lang.Class-" title="class or interface in org.apache.mahout.common">prepareJob</a>, <a href="http://mahout.apache.org/mahout-math/apidocs/org/apache/mahout/common/AbstractJob.html?is-external=true#prepareJob-org.apache.hadoop.fs.Path-org.apache.hadoop.fs.Path-java.lang.Class-java.lang.Class-java.lang.Class-java.lang.Class-java.lang.Class-java.lang.String-" title="class or interface in org.apache.mahout.common">prepareJob</a>, <a href="http://mahout.apache.org/mahout-math/apidocs/org/apache/mahout/common/AbstractJob.html?is-external=true#prepareJob-org.apache.hadoop.fs.Path-org.apache.hadoop.fs.Path-java.lang.Class-java.lang.Class-java.lang.Class-java.lang.Class-java.lang.Class-java.lang.Class-java.lang.Class-java.lang.Class-" title="class or interface in org.apache.mahout.common">prepareJob</a>, <a href="http://mahout.apache.org/mahout-math/apidocs/org/apache/mahout/common/AbstractJob.html?is-external=true#prepareJob-org.apache.hadoop.fs.Path-org.apache.hadoop.fs.Path-java.lang.Class-java.lang.Class-java.lang.Class-java.lang.Class-java.lang.Class-java.lang.Class-" title="class or interface in org.apache.mahout.common">prepareJob</a>, <a href="http://mahout.apache.org/mahout-math/apidocs/org/apache/mahout/common/AbstractJob.html?is-external=true#setConf-org.apache.hadoop.conf.Configuration-" title="class or interface in org.apache.mahout.common">setConf</a>, <a href="http://mahout.apache.org/mahout-math/apidocs/org/apache/mahout/common/AbstractJob.html?is-external=true#setS3SafeCombinedInputPath-org.apache.hadoop.mapreduce.Job-org.apache.hadoop.fs.Path-org.apache.hadoop.fs.Path-org.apache.hadoop.fs.Path-" title="class or interface in org.apache.mahout.common">setS3SafeCombinedInputPath</a>, <a href="http://mahout.apache.org/mahout-math/apidocs/org/apache/mahout/common/AbstractJob.html?is-external=true#shouldRunNextPhase-java.util.Map-java.util.concurrent.atomic.AtomicInteger-" title="class or interface in org.apache.mahout.common">shouldRunNextPhase</a></code></li>
</ul>
<ul class="blockList">
<li class="blockList"><a name="methods.inherited.from.class.java.lang.Object">
<!-- -->
</a>
<h3>Methods inherited from class&nbsp;java.lang.<a href="http://docs.oracle.com/javase/7/docs/api/java/lang/Object.html?is-external=true" title="class or interface in java.lang">Object</a></h3>
<code><a href="http://docs.oracle.com/javase/7/docs/api/java/lang/Object.html?is-external=true#clone--" title="class or interface in java.lang">clone</a>, <a href="http://docs.oracle.com/javase/7/docs/api/java/lang/Object.html?is-external=true#equals-java.lang.Object-" title="class or interface in java.lang">equals</a>, <a href="http://docs.oracle.com/javase/7/docs/api/java/lang/Object.html?is-external=true#finalize--" title="class or interface in java.lang">finalize</a>, <a href="http://docs.oracle.com/javase/7/docs/api/java/lang/Object.html?is-external=true#getClass--" title="class or interface in java.lang">getClass</a>, <a href="http://docs.oracle.com/javase/7/docs/api/java/lang/Object.html?is-external=true#hashCode--" title="class or interface in java.lang">hashCode</a>, <a href="http://docs.oracle.com/javase/7/docs/api/java/lang/Object.html?is-external=true#notify--" title="class or interface in java.lang">notify</a>, <a href="http://docs.oracle.com/javase/7/docs/api/java/lang/Object.html?is-external=true#notifyAll--" title="class or interface in java.lang">notifyAll</a>, <a href="http://docs.oracle.com/javase/7/docs/api/java/lang/Object.html?is-external=true#toString--" title="class or interface in java.lang">toString</a>, <a href="http://docs.oracle.com/javase/7/docs/api/java/lang/Object.html?is-external=true#wait--" title="class or interface in java.lang">wait</a>, <a href="http://docs.oracle.com/javase/7/docs/api/java/lang/Object.html?is-external=true#wait-long-" title="class or interface in java.lang">wait</a>, <a href="http://docs.oracle.com/javase/7/docs/api/java/lang/Object.html?is-external=true#wait-long-int-" title="class or interface in java.lang">wait</a></code></li>
</ul>
</li>
</ul>
</li>
</ul>
</div>
<div class="details">
<ul class="blockList">
<li class="blockList">
<!-- ========= CONSTRUCTOR DETAIL ======== -->
<ul class="blockList">
<li class="blockList"><a name="constructor.detail">
<!-- -->
</a>
<h3>Constructor Detail</h3>
<a name="SplitInput--">
<!-- -->
</a>
<ul class="blockListLast">
<li class="blockList">
<h4>SplitInput</h4>
<pre>public&nbsp;SplitInput()</pre>
</li>
</ul>
</li>
</ul>
<!-- ============ METHOD DETAIL ========== -->
<ul class="blockList">
<li class="blockList"><a name="method.detail">
<!-- -->
</a>
<h3>Method Detail</h3>
<a name="run-java.lang.String:A-">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>run</h4>
<pre>public&nbsp;int&nbsp;run(<a href="http://docs.oracle.com/javase/7/docs/api/java/lang/String.html?is-external=true" title="class or interface in java.lang">String</a>[]&nbsp;args)
throws <a href="http://docs.oracle.com/javase/7/docs/api/java/lang/Exception.html?is-external=true" title="class or interface in java.lang">Exception</a></pre>
<dl>
<dt><span class="throwsLabel">Throws:</span></dt>
<dd><code><a href="http://docs.oracle.com/javase/7/docs/api/java/lang/Exception.html?is-external=true" title="class or interface in java.lang">Exception</a></code></dd>
</dl>
</li>
</ul>
<a name="main-java.lang.String:A-">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>main</h4>
<pre>public static&nbsp;void&nbsp;main(<a href="http://docs.oracle.com/javase/7/docs/api/java/lang/String.html?is-external=true" title="class or interface in java.lang">String</a>[]&nbsp;args)
throws <a href="http://docs.oracle.com/javase/7/docs/api/java/lang/Exception.html?is-external=true" title="class or interface in java.lang">Exception</a></pre>
<dl>
<dt><span class="throwsLabel">Throws:</span></dt>
<dd><code><a href="http://docs.oracle.com/javase/7/docs/api/java/lang/Exception.html?is-external=true" title="class or interface in java.lang">Exception</a></code></dd>
</dl>
</li>
</ul>
<a name="splitDirectory--">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>splitDirectory</h4>
<pre>public&nbsp;void&nbsp;splitDirectory()
throws <a href="http://docs.oracle.com/javase/7/docs/api/java/io/IOException.html?is-external=true" title="class or interface in java.io">IOException</a>,
<a href="http://docs.oracle.com/javase/7/docs/api/java/lang/ClassNotFoundException.html?is-external=true" title="class or interface in java.lang">ClassNotFoundException</a>,
<a href="http://docs.oracle.com/javase/7/docs/api/java/lang/InterruptedException.html?is-external=true" title="class or interface in java.lang">InterruptedException</a></pre>
<div class="block">Perform a split on directory specified by <a href="../../../../org/apache/mahout/utils/SplitInput.html#setInputDirectory-org.apache.hadoop.fs.Path-"><code>setInputDirectory(Path)</code></a> by calling <a href="../../../../org/apache/mahout/utils/SplitInput.html#splitFile-org.apache.hadoop.fs.Path-"><code>splitFile(Path)</code></a>
on each file found within that directory.</div>
<dl>
<dt><span class="throwsLabel">Throws:</span></dt>
<dd><code><a href="http://docs.oracle.com/javase/7/docs/api/java/io/IOException.html?is-external=true" title="class or interface in java.io">IOException</a></code></dd>
<dd><code><a href="http://docs.oracle.com/javase/7/docs/api/java/lang/ClassNotFoundException.html?is-external=true" title="class or interface in java.lang">ClassNotFoundException</a></code></dd>
<dd><code><a href="http://docs.oracle.com/javase/7/docs/api/java/lang/InterruptedException.html?is-external=true" title="class or interface in java.lang">InterruptedException</a></code></dd>
</dl>
</li>
</ul>
<a name="splitDirectory-org.apache.hadoop.fs.Path-">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>splitDirectory</h4>
<pre>public&nbsp;void&nbsp;splitDirectory(org.apache.hadoop.fs.Path&nbsp;inputDir)
throws <a href="http://docs.oracle.com/javase/7/docs/api/java/io/IOException.html?is-external=true" title="class or interface in java.io">IOException</a>,
<a href="http://docs.oracle.com/javase/7/docs/api/java/lang/ClassNotFoundException.html?is-external=true" title="class or interface in java.lang">ClassNotFoundException</a>,
<a href="http://docs.oracle.com/javase/7/docs/api/java/lang/InterruptedException.html?is-external=true" title="class or interface in java.lang">InterruptedException</a></pre>
<div class="block">Perform a split on the specified directory by calling <a href="../../../../org/apache/mahout/utils/SplitInput.html#splitFile-org.apache.hadoop.fs.Path-"><code>splitFile(Path)</code></a> on each file found within that
directory.</div>
<dl>
<dt><span class="throwsLabel">Throws:</span></dt>
<dd><code><a href="http://docs.oracle.com/javase/7/docs/api/java/io/IOException.html?is-external=true" title="class or interface in java.io">IOException</a></code></dd>
<dd><code><a href="http://docs.oracle.com/javase/7/docs/api/java/lang/ClassNotFoundException.html?is-external=true" title="class or interface in java.lang">ClassNotFoundException</a></code></dd>
<dd><code><a href="http://docs.oracle.com/javase/7/docs/api/java/lang/InterruptedException.html?is-external=true" title="class or interface in java.lang">InterruptedException</a></code></dd>
</dl>
</li>
</ul>
<a name="splitDirectory-org.apache.hadoop.conf.Configuration-org.apache.hadoop.fs.Path-">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>splitDirectory</h4>
<pre>public&nbsp;void&nbsp;splitDirectory(org.apache.hadoop.conf.Configuration&nbsp;conf,
org.apache.hadoop.fs.Path&nbsp;inputDir)
throws <a href="http://docs.oracle.com/javase/7/docs/api/java/io/IOException.html?is-external=true" title="class or interface in java.io">IOException</a>,
<a href="http://docs.oracle.com/javase/7/docs/api/java/lang/ClassNotFoundException.html?is-external=true" title="class or interface in java.lang">ClassNotFoundException</a>,
<a href="http://docs.oracle.com/javase/7/docs/api/java/lang/InterruptedException.html?is-external=true" title="class or interface in java.lang">InterruptedException</a></pre>
<dl>
<dt><span class="throwsLabel">Throws:</span></dt>
<dd><code><a href="http://docs.oracle.com/javase/7/docs/api/java/io/IOException.html?is-external=true" title="class or interface in java.io">IOException</a></code></dd>
<dd><code><a href="http://docs.oracle.com/javase/7/docs/api/java/lang/ClassNotFoundException.html?is-external=true" title="class or interface in java.lang">ClassNotFoundException</a></code></dd>
<dd><code><a href="http://docs.oracle.com/javase/7/docs/api/java/lang/InterruptedException.html?is-external=true" title="class or interface in java.lang">InterruptedException</a></code></dd>
</dl>
</li>
</ul>
<a name="splitFile-org.apache.hadoop.fs.Path-">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>splitFile</h4>
<pre>public&nbsp;void&nbsp;splitFile(org.apache.hadoop.fs.Path&nbsp;inputFile)
throws <a href="http://docs.oracle.com/javase/7/docs/api/java/io/IOException.html?is-external=true" title="class or interface in java.io">IOException</a></pre>
<div class="block">Perform a split on the specified input file. Results will be written to files of the same name in the specified
training and test output directories. The <a href="../../../../org/apache/mahout/utils/SplitInput.html#validate--"><code>validate()</code></a> method is called prior to executing the split.</div>
<dl>
<dt><span class="throwsLabel">Throws:</span></dt>
<dd><code><a href="http://docs.oracle.com/javase/7/docs/api/java/io/IOException.html?is-external=true" title="class or interface in java.io">IOException</a></code></dd>
</dl>
</li>
</ul>
<a name="getTestSplitSize--">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>getTestSplitSize</h4>
<pre>public&nbsp;int&nbsp;getTestSplitSize()</pre>
</li>
</ul>
<a name="setTestSplitSize-int-">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>setTestSplitSize</h4>
<pre>public&nbsp;void&nbsp;setTestSplitSize(int&nbsp;testSplitSize)</pre>
</li>
</ul>
<a name="getTestSplitPct--">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>getTestSplitPct</h4>
<pre>public&nbsp;int&nbsp;getTestSplitPct()</pre>
</li>
</ul>
<a name="setTestSplitPct-int-">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>setTestSplitPct</h4>
<pre>public&nbsp;void&nbsp;setTestSplitPct(int&nbsp;testSplitPct)</pre>
<div class="block">Sets the percentage of the input data to allocate to the test split</div>
<dl>
<dt><span class="paramLabel">Parameters:</span></dt>
<dd><code>testSplitPct</code> - a value between 0 and 100 inclusive.</dd>
</dl>
</li>
</ul>
<a name="setKeepPct-int-">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>setKeepPct</h4>
<pre>public&nbsp;void&nbsp;setKeepPct(int&nbsp;keepPct)</pre>
<div class="block">Sets the percentage of the input data to keep in a map reduce split input job</div>
<dl>
<dt><span class="paramLabel">Parameters:</span></dt>
<dd><code>keepPct</code> - a value between 0 and 100 inclusive.</dd>
</dl>
</li>
</ul>
<a name="setUseMapRed-boolean-">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>setUseMapRed</h4>
<pre>public&nbsp;void&nbsp;setUseMapRed(boolean&nbsp;useMapRed)</pre>
<div class="block">Set to true to use map reduce to split the input</div>
<dl>
<dt><span class="paramLabel">Parameters:</span></dt>
<dd><code>useMapRed</code> - a boolean to indicate whether map reduce should be used</dd>
</dl>
</li>
</ul>
<a name="setMapRedOutputDirectory-org.apache.hadoop.fs.Path-">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>setMapRedOutputDirectory</h4>
<pre>public&nbsp;void&nbsp;setMapRedOutputDirectory(org.apache.hadoop.fs.Path&nbsp;mapRedOutputDirectory)</pre>
</li>
</ul>
<a name="getSplitLocation--">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>getSplitLocation</h4>
<pre>public&nbsp;int&nbsp;getSplitLocation()</pre>
</li>
</ul>
<a name="setSplitLocation-int-">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>setSplitLocation</h4>
<pre>public&nbsp;void&nbsp;setSplitLocation(int&nbsp;splitLocation)</pre>
<div class="block">Set the location of the start of the test/training data split. Expressed as percentage of lines, for example
0 indicates that the test data should be taken from the start of the file, 100 indicates that the test data
should be taken from the end of the input file, while 25 indicates that the test data should be taken from the
first quarter of the file.
<p/>
This option is only relevant in cases where random selection is not employed</div>
<dl>
<dt><span class="paramLabel">Parameters:</span></dt>
<dd><code>splitLocation</code> - a value between 0 and 100 inclusive.</dd>
</dl>
</li>
</ul>
<a name="getCharset--">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>getCharset</h4>
<pre>public&nbsp;<a href="http://docs.oracle.com/javase/7/docs/api/java/nio/charset/Charset.html?is-external=true" title="class or interface in java.nio.charset">Charset</a>&nbsp;getCharset()</pre>
</li>
</ul>
<a name="setCharset-java.nio.charset.Charset-">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>setCharset</h4>
<pre>public&nbsp;void&nbsp;setCharset(<a href="http://docs.oracle.com/javase/7/docs/api/java/nio/charset/Charset.html?is-external=true" title="class or interface in java.nio.charset">Charset</a>&nbsp;charset)</pre>
<div class="block">Set the charset used to read and write files</div>
</li>
</ul>
<a name="getInputDirectory--">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>getInputDirectory</h4>
<pre>public&nbsp;org.apache.hadoop.fs.Path&nbsp;getInputDirectory()</pre>
</li>
</ul>
<a name="setInputDirectory-org.apache.hadoop.fs.Path-">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>setInputDirectory</h4>
<pre>public&nbsp;void&nbsp;setInputDirectory(org.apache.hadoop.fs.Path&nbsp;inputDir)</pre>
<div class="block">Set the directory from which input data will be read when the the <a href="../../../../org/apache/mahout/utils/SplitInput.html#splitDirectory--"><code>splitDirectory()</code></a> method is invoked</div>
</li>
</ul>
<a name="getTrainingOutputDirectory--">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>getTrainingOutputDirectory</h4>
<pre>public&nbsp;org.apache.hadoop.fs.Path&nbsp;getTrainingOutputDirectory()</pre>
</li>
</ul>
<a name="setTrainingOutputDirectory-org.apache.hadoop.fs.Path-">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>setTrainingOutputDirectory</h4>
<pre>public&nbsp;void&nbsp;setTrainingOutputDirectory(org.apache.hadoop.fs.Path&nbsp;trainingOutputDir)</pre>
<div class="block">Set the directory to which training data will be written.</div>
</li>
</ul>
<a name="getTestOutputDirectory--">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>getTestOutputDirectory</h4>
<pre>public&nbsp;org.apache.hadoop.fs.Path&nbsp;getTestOutputDirectory()</pre>
</li>
</ul>
<a name="setTestOutputDirectory-org.apache.hadoop.fs.Path-">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>setTestOutputDirectory</h4>
<pre>public&nbsp;void&nbsp;setTestOutputDirectory(org.apache.hadoop.fs.Path&nbsp;testOutputDir)</pre>
<div class="block">Set the directory to which test data will be written.</div>
</li>
</ul>
<a name="getCallback--">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>getCallback</h4>
<pre>public&nbsp;<a href="../../../../org/apache/mahout/utils/SplitInput.SplitCallback.html" title="interface in org.apache.mahout.utils">SplitInput.SplitCallback</a>&nbsp;getCallback()</pre>
</li>
</ul>
<a name="setCallback-org.apache.mahout.utils.SplitInput.SplitCallback-">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>setCallback</h4>
<pre>public&nbsp;void&nbsp;setCallback(<a href="../../../../org/apache/mahout/utils/SplitInput.SplitCallback.html" title="interface in org.apache.mahout.utils">SplitInput.SplitCallback</a>&nbsp;callback)</pre>
<div class="block">Sets the callback used to inform the caller that an input file has been successfully split</div>
</li>
</ul>
<a name="getTestRandomSelectionSize--">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>getTestRandomSelectionSize</h4>
<pre>public&nbsp;int&nbsp;getTestRandomSelectionSize()</pre>
</li>
</ul>
<a name="setTestRandomSelectionSize-int-">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>setTestRandomSelectionSize</h4>
<pre>public&nbsp;void&nbsp;setTestRandomSelectionSize(int&nbsp;testRandomSelectionSize)</pre>
<div class="block">Sets number of random input samples that will be saved to the test set.</div>
</li>
</ul>
<a name="getTestRandomSelectionPct--">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>getTestRandomSelectionPct</h4>
<pre>public&nbsp;int&nbsp;getTestRandomSelectionPct()</pre>
</li>
</ul>
<a name="setTestRandomSelectionPct-int-">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>setTestRandomSelectionPct</h4>
<pre>public&nbsp;void&nbsp;setTestRandomSelectionPct(int&nbsp;randomSelectionPct)</pre>
<div class="block">Sets number of random input samples that will be saved to the test set as a percentage of the size of the
input set.</div>
<dl>
<dt><span class="paramLabel">Parameters:</span></dt>
<dd><code>randomSelectionPct</code> - a value between 0 and 100 inclusive.</dd>
</dl>
</li>
</ul>
<a name="validate--">
<!-- -->
</a>
<ul class="blockList">
<li class="blockList">
<h4>validate</h4>
<pre>public&nbsp;void&nbsp;validate()
throws <a href="http://docs.oracle.com/javase/7/docs/api/java/io/IOException.html?is-external=true" title="class or interface in java.io">IOException</a></pre>
<div class="block">Validates that the current instance is in a consistent state</div>
<dl>
<dt><span class="throwsLabel">Throws:</span></dt>
<dd><code><a href="http://docs.oracle.com/javase/7/docs/api/java/lang/IllegalArgumentException.html?is-external=true" title="class or interface in java.lang">IllegalArgumentException</a></code> - if settings violate class invariants.</dd>
<dd><code><a href="http://docs.oracle.com/javase/7/docs/api/java/io/IOException.html?is-external=true" title="class or interface in java.io">IOException</a></code> - if output directories do not exist or are not directories.</dd>
</dl>
</li>
</ul>
<a name="countLines-org.apache.hadoop.fs.FileSystem-org.apache.hadoop.fs.Path-java.nio.charset.Charset-">
<!-- -->
</a>
<ul class="blockListLast">
<li class="blockList">
<h4>countLines</h4>
<pre>public static&nbsp;int&nbsp;countLines(org.apache.hadoop.fs.FileSystem&nbsp;fs,
org.apache.hadoop.fs.Path&nbsp;inputFile,
<a href="http://docs.oracle.com/javase/7/docs/api/java/nio/charset/Charset.html?is-external=true" title="class or interface in java.nio.charset">Charset</a>&nbsp;charset)
throws <a href="http://docs.oracle.com/javase/7/docs/api/java/io/IOException.html?is-external=true" title="class or interface in java.io">IOException</a></pre>
<div class="block">Count the lines in the file specified as returned by <code>BufferedReader.readLine()</code></div>
<dl>
<dt><span class="paramLabel">Parameters:</span></dt>
<dd><code>inputFile</code> - the file whose lines will be counted</dd>
<dd><code>charset</code> - the charset of the file to read</dd>
<dt><span class="returnLabel">Returns:</span></dt>
<dd>the number of lines in the input file.</dd>
<dt><span class="throwsLabel">Throws:</span></dt>
<dd><code><a href="http://docs.oracle.com/javase/7/docs/api/java/io/IOException.html?is-external=true" title="class or interface in java.io">IOException</a></code> - if there is a problem opening or reading the file.</dd>
</dl>
</li>
</ul>
</li>
</ul>
</li>
</ul>
</div>
</div>
<!-- ========= END OF CLASS DATA ========= -->
<!-- ======= START OF BOTTOM NAVBAR ====== -->
<div class="bottomNav"><a name="navbar.bottom">
<!-- -->
</a>
<div class="skipNav"><a href="#skip.navbar.bottom" title="Skip navigation links">Skip navigation links</a></div>
<a name="navbar.bottom.firstrow">
<!-- -->
</a>
<ul class="navList" title="Navigation">
<li><a href="../../../../overview-summary.html">Overview</a></li>
<li><a href="package-summary.html">Package</a></li>
<li class="navBarCell1Rev">Class</li>
<li><a href="class-use/SplitInput.html">Use</a></li>
<li><a href="package-tree.html">Tree</a></li>
<li><a href="../../../../deprecated-list.html">Deprecated</a></li>
<li><a href="../../../../index-all.html">Index</a></li>
<li><a href="../../../../help-doc.html">Help</a></li>
</ul>
</div>
<div class="subNav">
<ul class="navList">
<li><a href="../../../../org/apache/mahout/utils/SequenceFileDumper.html" title="class in org.apache.mahout.utils"><span class="typeNameLink">Prev&nbsp;Class</span></a></li>
<li><a href="../../../../org/apache/mahout/utils/SplitInput.SplitCallback.html" title="interface in org.apache.mahout.utils"><span class="typeNameLink">Next&nbsp;Class</span></a></li>
</ul>
<ul class="navList">
<li><a href="../../../../index.html?org/apache/mahout/utils/SplitInput.html" target="_top">Frames</a></li>
<li><a href="SplitInput.html" target="_top">No&nbsp;Frames</a></li>
</ul>
<ul class="navList" id="allclasses_navbar_bottom">
<li><a href="../../../../allclasses-noframe.html">All&nbsp;Classes</a></li>
</ul>
<div>
<script type="text/javascript"><!--
allClassesLink = document.getElementById("allclasses_navbar_bottom");
if(window==top) {
allClassesLink.style.display = "block";
}
else {
allClassesLink.style.display = "none";
}
//-->
</script>
</div>
<div>
<ul class="subNavList">
<li>Summary:&nbsp;</li>
<li><a href="#nested.class.summary">Nested</a>&nbsp;|&nbsp;</li>
<li><a href="#fields.inherited.from.class.org.apache.mahout.common.AbstractJob">Field</a>&nbsp;|&nbsp;</li>
<li><a href="#constructor.summary">Constr</a>&nbsp;|&nbsp;</li>
<li><a href="#method.summary">Method</a></li>
</ul>
<ul class="subNavList">
<li>Detail:&nbsp;</li>
<li>Field&nbsp;|&nbsp;</li>
<li><a href="#constructor.detail">Constr</a>&nbsp;|&nbsp;</li>
<li><a href="#method.detail">Method</a></li>
</ul>
</div>
<a name="skip.navbar.bottom">
<!-- -->
</a></div>
<!-- ======== END OF BOTTOM NAVBAR ======= -->
<p class="legalCopy"><small>Copyright &#169; 2008&#x2013;2017 <a href="http://www.apache.org/">The Apache Software Foundation</a>. All rights reserved.</small></p>
</body>
</html>