blob: 7f99f7a9e8393aaaedc7c1a3a6932c1b0db9dfed [file] [log] [blame]
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "https://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
<meta http-equiv="X-UA-Compatible" content="IE=9"/>
<meta name="generator" content="Doxygen 1.8.17"/>
<meta name="viewport" content="width=device-width, initial-scale=1"/>
<title>mxnet: dmlc::InputSplit Class Reference</title>
<link href="tabs.css" rel="stylesheet" type="text/css"/>
<script type="text/javascript" src="jquery.js"></script>
<script type="text/javascript" src="dynsections.js"></script>
<link href="search/search.css" rel="stylesheet" type="text/css"/>
<script type="text/javascript" src="search/searchdata.js"></script>
<script type="text/javascript" src="search/search.js"></script>
<link href="doxygen.css" rel="stylesheet" type="text/css" />
</head>
<body>
<div id="top"><!-- do not remove this div, it is closed by doxygen! -->
<div id="titlearea">
<table cellspacing="0" cellpadding="0">
<tbody>
<tr style="height: 56px;">
<td id="projectalign" style="padding-left: 0.5em;">
<div id="projectname">mxnet
</div>
</td>
</tr>
</tbody>
</table>
</div>
<!-- end header part -->
<!-- Generated by Doxygen 1.8.17 -->
<script type="text/javascript">
/* @license magnet:?xt=urn:btih:cf05388f2679ee054f2beb29a391d25f4e673ac3&amp;dn=gpl-2.0.txt GPL-v2 */
var searchBox = new SearchBox("searchBox", "search",false,'Search');
/* @license-end */
</script>
<script type="text/javascript" src="menudata.js"></script>
<script type="text/javascript" src="menu.js"></script>
<script type="text/javascript">
/* @license magnet:?xt=urn:btih:cf05388f2679ee054f2beb29a391d25f4e673ac3&amp;dn=gpl-2.0.txt GPL-v2 */
$(function() {
initMenu('',true,false,'search.php','Search');
$(document).ready(function() { init_search(); });
});
/* @license-end */</script>
<div id="main-nav"></div>
<!-- window showing the filter options -->
<div id="MSearchSelectWindow"
onmouseover="return searchBox.OnSearchSelectShow()"
onmouseout="return searchBox.OnSearchSelectHide()"
onkeydown="return searchBox.OnSearchSelectKey(event)">
</div>
<!-- iframe showing the search results (closed by default) -->
<div id="MSearchResultsWindow">
<iframe src="javascript:void(0)" frameborder="0"
name="MSearchResults" id="MSearchResults">
</iframe>
</div>
<div id="nav-path" class="navpath">
<ul>
<li class="navelem"><a class="el" href="namespacedmlc.html">dmlc</a></li><li class="navelem"><a class="el" href="classdmlc_1_1InputSplit.html">InputSplit</a></li> </ul>
</div>
</div><!-- top -->
<div class="header">
<div class="summary">
<a href="#nested-classes">Classes</a> &#124;
<a href="#pub-methods">Public Member Functions</a> &#124;
<a href="#pub-static-methods">Static Public Member Functions</a> &#124;
<a href="classdmlc_1_1InputSplit-members.html">List of all members</a> </div>
<div class="headertitle">
<div class="title">dmlc::InputSplit Class Reference<span class="mlabels"><span class="mlabel">abstract</span></span></div> </div>
</div><!--header-->
<div class="contents">
<p>input split creates that allows reading of records from split of data, independent part that covers all the dataset
<a href="classdmlc_1_1InputSplit.html#details">More...</a></p>
<p><code>#include &lt;<a class="el" href="3rdparty_2dmlc-core_2include_2dmlc_2io_8h_source.html">io.h</a>&gt;</code></p>
<div class="dynheader">
Inheritance diagram for dmlc::InputSplit:</div>
<div class="dyncontent">
<div class="center"><img src="classdmlc_1_1InputSplit__inherit__graph.png" border="0" usemap="#dmlc_1_1InputSplit_inherit__map" alt="Inheritance graph"/></div>
<!-- MAP 0 -->
</div>
<div class="dynheader">
Collaboration diagram for dmlc::InputSplit:</div>
<div class="dyncontent">
<div class="center"><img src="classdmlc_1_1InputSplit__coll__graph.png" border="0" usemap="#dmlc_1_1InputSplit_coll__map" alt="Collaboration graph"/></div>
<!-- MAP 1 -->
</div>
<table class="memberdecls">
<tr class="heading"><td colspan="2"><h2 class="groupheader"><a name="nested-classes"></a>
Classes</h2></td></tr>
<tr class="memitem:"><td class="memItemLeft" align="right" valign="top">struct &#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="structdmlc_1_1InputSplit_1_1Blob.html">Blob</a></td></tr>
<tr class="memdesc:"><td class="mdescLeft">&#160;</td><td class="mdescRight">a blob of memory region <a href="structdmlc_1_1InputSplit_1_1Blob.html#details">More...</a><br /></td></tr>
<tr class="separator:"><td class="memSeparator" colspan="2">&#160;</td></tr>
</table><table class="memberdecls">
<tr class="heading"><td colspan="2"><h2 class="groupheader"><a name="pub-methods"></a>
Public Member Functions</h2></td></tr>
<tr class="memitem:a58c7f48bec6bd1a880e59c4bbfdf4417"><td class="memItemLeft" align="right" valign="top">virtual void&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="classdmlc_1_1InputSplit.html#a58c7f48bec6bd1a880e59c4bbfdf4417">HintChunkSize</a> (size_t chunk_size)</td></tr>
<tr class="memdesc:a58c7f48bec6bd1a880e59c4bbfdf4417"><td class="mdescLeft">&#160;</td><td class="mdescRight">hint the inputsplit how large the chunk size it should return when implementing NextChunk this is a hint so may not be enforced, but <a class="el" href="classdmlc_1_1InputSplit.html" title="input split creates that allows reading of records from split of data, independent part that covers a...">InputSplit</a> will try adjust its internal buffer size to the hinted value <a href="classdmlc_1_1InputSplit.html#a58c7f48bec6bd1a880e59c4bbfdf4417">More...</a><br /></td></tr>
<tr class="separator:a58c7f48bec6bd1a880e59c4bbfdf4417"><td class="memSeparator" colspan="2">&#160;</td></tr>
<tr class="memitem:af2fb3e0f9acd6bae599a398eb0758044"><td class="memItemLeft" align="right" valign="top">virtual size_t&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="classdmlc_1_1InputSplit.html#af2fb3e0f9acd6bae599a398eb0758044">GetTotalSize</a> (void)=0</td></tr>
<tr class="memdesc:af2fb3e0f9acd6bae599a398eb0758044"><td class="mdescLeft">&#160;</td><td class="mdescRight">get the total size of the <a class="el" href="classdmlc_1_1InputSplit.html" title="input split creates that allows reading of records from split of data, independent part that covers a...">InputSplit</a> <a href="classdmlc_1_1InputSplit.html#af2fb3e0f9acd6bae599a398eb0758044">More...</a><br /></td></tr>
<tr class="separator:af2fb3e0f9acd6bae599a398eb0758044"><td class="memSeparator" colspan="2">&#160;</td></tr>
<tr class="memitem:a2f111755a3434097bdd2a765982180a7"><td class="memItemLeft" align="right" valign="top">virtual void&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="classdmlc_1_1InputSplit.html#a2f111755a3434097bdd2a765982180a7">BeforeFirst</a> (void)=0</td></tr>
<tr class="memdesc:a2f111755a3434097bdd2a765982180a7"><td class="mdescLeft">&#160;</td><td class="mdescRight">reset the position of <a class="el" href="classdmlc_1_1InputSplit.html" title="input split creates that allows reading of records from split of data, independent part that covers a...">InputSplit</a> to beginning <a href="classdmlc_1_1InputSplit.html#a2f111755a3434097bdd2a765982180a7">More...</a><br /></td></tr>
<tr class="separator:a2f111755a3434097bdd2a765982180a7"><td class="memSeparator" colspan="2">&#160;</td></tr>
<tr class="memitem:a0f124b34e62d9319dca39fa42e5a59f7"><td class="memItemLeft" align="right" valign="top">virtual bool&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="classdmlc_1_1InputSplit.html#a0f124b34e62d9319dca39fa42e5a59f7">NextRecord</a> (<a class="el" href="structdmlc_1_1InputSplit_1_1Blob.html">Blob</a> *out_rec)=0</td></tr>
<tr class="memdesc:a0f124b34e62d9319dca39fa42e5a59f7"><td class="mdescLeft">&#160;</td><td class="mdescRight">get the next record, the returning value is valid until next call to NextRecord, NextChunk or NextBatch caller can modify the memory content of out_rec <a href="classdmlc_1_1InputSplit.html#a0f124b34e62d9319dca39fa42e5a59f7">More...</a><br /></td></tr>
<tr class="separator:a0f124b34e62d9319dca39fa42e5a59f7"><td class="memSeparator" colspan="2">&#160;</td></tr>
<tr class="memitem:a2e835c07ce77d8d873592e006c242156"><td class="memItemLeft" align="right" valign="top">virtual bool&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="classdmlc_1_1InputSplit.html#a2e835c07ce77d8d873592e006c242156">NextChunk</a> (<a class="el" href="structdmlc_1_1InputSplit_1_1Blob.html">Blob</a> *out_chunk)=0</td></tr>
<tr class="memdesc:a2e835c07ce77d8d873592e006c242156"><td class="mdescLeft">&#160;</td><td class="mdescRight">get a chunk of memory that can contain multiple records, the caller needs to parse the content of the resulting chunk, for text file, out_chunk can contain data of multiple lines for recordio, out_chunk can contain multiple records(including headers) <a href="classdmlc_1_1InputSplit.html#a2e835c07ce77d8d873592e006c242156">More...</a><br /></td></tr>
<tr class="separator:a2e835c07ce77d8d873592e006c242156"><td class="memSeparator" colspan="2">&#160;</td></tr>
<tr class="memitem:a938da99f6cbaf1a4667bb00c75692e4b"><td class="memItemLeft" align="right" valign="top">virtual bool&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="classdmlc_1_1InputSplit.html#a938da99f6cbaf1a4667bb00c75692e4b">NextBatch</a> (<a class="el" href="structdmlc_1_1InputSplit_1_1Blob.html">Blob</a> *out_chunk, size_t n_records)</td></tr>
<tr class="memdesc:a938da99f6cbaf1a4667bb00c75692e4b"><td class="mdescLeft">&#160;</td><td class="mdescRight">get a chunk of memory that can contain multiple records, with hint for how many records is needed, the caller needs to parse the content of the resulting chunk, for text file, out_chunk can contain data of multiple lines for recordio, out_chunk can contain multiple records(including headers) <a href="classdmlc_1_1InputSplit.html#a938da99f6cbaf1a4667bb00c75692e4b">More...</a><br /></td></tr>
<tr class="separator:a938da99f6cbaf1a4667bb00c75692e4b"><td class="memSeparator" colspan="2">&#160;</td></tr>
<tr class="memitem:a00a1f234a44de0950d1011495f41b265"><td class="memItemLeft" align="right" valign="top">virtual&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="classdmlc_1_1InputSplit.html#a00a1f234a44de0950d1011495f41b265">~InputSplit</a> (void) <a class="el" href="3rdparty_2dmlc-core_2include_2dmlc_2base_8h.html#a80bde16113dc79e7adb7953e006d6b60">DMLC_THROW_EXCEPTION</a></td></tr>
<tr class="memdesc:a00a1f234a44de0950d1011495f41b265"><td class="mdescLeft">&#160;</td><td class="mdescRight">destructor <a href="classdmlc_1_1InputSplit.html#a00a1f234a44de0950d1011495f41b265">More...</a><br /></td></tr>
<tr class="separator:a00a1f234a44de0950d1011495f41b265"><td class="memSeparator" colspan="2">&#160;</td></tr>
<tr class="memitem:aaa17b8be6b3f1da35ced68379e3e8dbe"><td class="memItemLeft" align="right" valign="top">virtual void&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="classdmlc_1_1InputSplit.html#aaa17b8be6b3f1da35ced68379e3e8dbe">ResetPartition</a> (unsigned part_index, unsigned num_parts)=0</td></tr>
<tr class="memdesc:aaa17b8be6b3f1da35ced68379e3e8dbe"><td class="mdescLeft">&#160;</td><td class="mdescRight">reset the Input split to a certain part id, The <a class="el" href="classdmlc_1_1InputSplit.html" title="input split creates that allows reading of records from split of data, independent part that covers a...">InputSplit</a> will be pointed to the head of the new specified segment. This feature may not be supported by every implementation of <a class="el" href="classdmlc_1_1InputSplit.html" title="input split creates that allows reading of records from split of data, independent part that covers a...">InputSplit</a>. <a href="classdmlc_1_1InputSplit.html#aaa17b8be6b3f1da35ced68379e3e8dbe">More...</a><br /></td></tr>
<tr class="separator:aaa17b8be6b3f1da35ced68379e3e8dbe"><td class="memSeparator" colspan="2">&#160;</td></tr>
</table><table class="memberdecls">
<tr class="heading"><td colspan="2"><h2 class="groupheader"><a name="pub-static-methods"></a>
Static Public Member Functions</h2></td></tr>
<tr class="memitem:a118dc34dbd121d5ff80cd904adee4c24"><td class="memItemLeft" align="right" valign="top">static <a class="el" href="classdmlc_1_1InputSplit.html">InputSplit</a> *&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="classdmlc_1_1InputSplit.html#a118dc34dbd121d5ff80cd904adee4c24">Create</a> (const char *uri, unsigned part_index, unsigned num_parts, const char *type)</td></tr>
<tr class="memdesc:a118dc34dbd121d5ff80cd904adee4c24"><td class="mdescLeft">&#160;</td><td class="mdescRight">factory function: create input split given a uri <a href="classdmlc_1_1InputSplit.html#a118dc34dbd121d5ff80cd904adee4c24">More...</a><br /></td></tr>
<tr class="separator:a118dc34dbd121d5ff80cd904adee4c24"><td class="memSeparator" colspan="2">&#160;</td></tr>
<tr class="memitem:a9b04228dac84268abf46719074700824"><td class="memItemLeft" align="right" valign="top">static <a class="el" href="classdmlc_1_1InputSplit.html">InputSplit</a> *&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="classdmlc_1_1InputSplit.html#a9b04228dac84268abf46719074700824">Create</a> (const char *uri, const char *index_uri, unsigned part_index, unsigned num_parts, const char *type, const bool shuffle=false, const int seed=0, const size_t batch_size=256, const bool recurse_directories=false)</td></tr>
<tr class="memdesc:a9b04228dac84268abf46719074700824"><td class="mdescLeft">&#160;</td><td class="mdescRight">factory function: create input split given a uri for input and index <a href="classdmlc_1_1InputSplit.html#a9b04228dac84268abf46719074700824">More...</a><br /></td></tr>
<tr class="separator:a9b04228dac84268abf46719074700824"><td class="memSeparator" colspan="2">&#160;</td></tr>
</table>
<a name="details" id="details"></a><h2 class="groupheader">Detailed Description</h2>
<div class="textblock"><p>input split creates that allows reading of records from split of data, independent part that covers all the dataset </p>
<p>see <a class="el" href="classdmlc_1_1InputSplit.html#a118dc34dbd121d5ff80cd904adee4c24" title="factory function: create input split given a uri">InputSplit::Create</a> for definition of record </p>
</div><h2 class="groupheader">Constructor &amp; Destructor Documentation</h2>
<a id="a00a1f234a44de0950d1011495f41b265"></a>
<h2 class="memtitle"><span class="permalink"><a href="#a00a1f234a44de0950d1011495f41b265">&#9670;&nbsp;</a></span>~InputSplit()</h2>
<div class="memitem">
<div class="memproto">
<table class="mlabels">
<tr>
<td class="mlabels-left">
<table class="memname">
<tr>
<td class="memname">virtual dmlc::InputSplit::~InputSplit </td>
<td>(</td>
<td class="paramtype">void&#160;</td>
<td class="paramname"></td><td>)</td>
<td></td>
</tr>
</table>
</td>
<td class="mlabels-right">
<span class="mlabels"><span class="mlabel">inline</span><span class="mlabel">virtual</span></span> </td>
</tr>
</table>
</div><div class="memdoc">
<p>destructor </p>
</div>
</div>
<h2 class="groupheader">Member Function Documentation</h2>
<a id="a2f111755a3434097bdd2a765982180a7"></a>
<h2 class="memtitle"><span class="permalink"><a href="#a2f111755a3434097bdd2a765982180a7">&#9670;&nbsp;</a></span>BeforeFirst()</h2>
<div class="memitem">
<div class="memproto">
<table class="mlabels">
<tr>
<td class="mlabels-left">
<table class="memname">
<tr>
<td class="memname">virtual void dmlc::InputSplit::BeforeFirst </td>
<td>(</td>
<td class="paramtype">void&#160;</td>
<td class="paramname"></td><td>)</td>
<td></td>
</tr>
</table>
</td>
<td class="mlabels-right">
<span class="mlabels"><span class="mlabel">pure virtual</span></span> </td>
</tr>
</table>
</div><div class="memdoc">
<p>reset the position of <a class="el" href="classdmlc_1_1InputSplit.html" title="input split creates that allows reading of records from split of data, independent part that covers a...">InputSplit</a> to beginning </p>
<p>Implemented in <a class="el" href="classdmlc_1_1InputSplitShuffle.html#af3f6ae9b61f4d18494d4bfbda0fab9c8">dmlc::InputSplitShuffle</a>.</p>
</div>
</div>
<a id="a9b04228dac84268abf46719074700824"></a>
<h2 class="memtitle"><span class="permalink"><a href="#a9b04228dac84268abf46719074700824">&#9670;&nbsp;</a></span>Create() <span class="overload">[1/2]</span></h2>
<div class="memitem">
<div class="memproto">
<table class="mlabels">
<tr>
<td class="mlabels-left">
<table class="memname">
<tr>
<td class="memname">static <a class="el" href="classdmlc_1_1InputSplit.html">InputSplit</a>* dmlc::InputSplit::Create </td>
<td>(</td>
<td class="paramtype">const char *&#160;</td>
<td class="paramname"><em>uri</em>, </td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">const char *&#160;</td>
<td class="paramname"><em>index_uri</em>, </td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">unsigned&#160;</td>
<td class="paramname"><em>part_index</em>, </td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">unsigned&#160;</td>
<td class="paramname"><em>num_parts</em>, </td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">const char *&#160;</td>
<td class="paramname"><em>type</em>, </td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">const bool&#160;</td>
<td class="paramname"><em>shuffle</em> = <code>false</code>, </td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">const int&#160;</td>
<td class="paramname"><em>seed</em> = <code>0</code>, </td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">const size_t&#160;</td>
<td class="paramname"><em>batch_size</em> = <code>256</code>, </td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">const bool&#160;</td>
<td class="paramname"><em>recurse_directories</em> = <code>false</code>&#160;</td>
</tr>
<tr>
<td></td>
<td>)</td>
<td></td><td></td>
</tr>
</table>
</td>
<td class="mlabels-right">
<span class="mlabels"><span class="mlabel">static</span></span> </td>
</tr>
</table>
</div><div class="memdoc">
<p>factory function: create input split given a uri for input and index </p>
<dl class="params"><dt>Parameters</dt><dd>
<table class="params">
<tr><td class="paramname">uri</td><td>the uri of the input, can contain hdfs prefix </td></tr>
<tr><td class="paramname">index_uri</td><td>the uri of the index, can contain hdfs prefix </td></tr>
<tr><td class="paramname">part_index</td><td>the part id of current input </td></tr>
<tr><td class="paramname">num_parts</td><td>total number of splits </td></tr>
<tr><td class="paramname">type</td><td>type of record List of possible types: "text", "recordio", "indexed_recordio"<ul>
<li>"text": text file, each line is treated as a record input split will split on '\n' or '\r'</li>
<li>"recordio": binary recordio file, see <a class="el" href="recordio_8h.html" title="recordio that is able to pack binary data into a splittable format, useful to exchange data in binary...">recordio.h</a></li>
<li>"indexed_recordio": binary recordio file with index, see <a class="el" href="recordio_8h.html" title="recordio that is able to pack binary data into a splittable format, useful to exchange data in binary...">recordio.h</a> </li>
</ul>
</td></tr>
<tr><td class="paramname">shuffle</td><td>whether to shuffle the output from the <a class="el" href="classdmlc_1_1InputSplit.html" title="input split creates that allows reading of records from split of data, independent part that covers a...">InputSplit</a>, supported only by "indexed_recordio" type. Defaults to "false" </td></tr>
<tr><td class="paramname">seed</td><td>random seed to use in conjunction with the "shuffle" option. Defaults to 0 </td></tr>
<tr><td class="paramname">batch_size</td><td>a hint to <a class="el" href="classdmlc_1_1InputSplit.html" title="input split creates that allows reading of records from split of data, independent part that covers a...">InputSplit</a> what is the intended number of examples return per batch. Used only by "indexed_recordio" type </td></tr>
<tr><td class="paramname">recurse_directories</td><td>whether to recursively traverse directories </td></tr>
</table>
</dd>
</dl>
<dl class="section return"><dt>Returns</dt><dd>a new input split </dd></dl>
<dl class="section see"><dt>See also</dt><dd>InputSplit::Type </dd></dl>
</div>
</div>
<a id="a118dc34dbd121d5ff80cd904adee4c24"></a>
<h2 class="memtitle"><span class="permalink"><a href="#a118dc34dbd121d5ff80cd904adee4c24">&#9670;&nbsp;</a></span>Create() <span class="overload">[2/2]</span></h2>
<div class="memitem">
<div class="memproto">
<table class="mlabels">
<tr>
<td class="mlabels-left">
<table class="memname">
<tr>
<td class="memname">static <a class="el" href="classdmlc_1_1InputSplit.html">InputSplit</a>* dmlc::InputSplit::Create </td>
<td>(</td>
<td class="paramtype">const char *&#160;</td>
<td class="paramname"><em>uri</em>, </td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">unsigned&#160;</td>
<td class="paramname"><em>part_index</em>, </td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">unsigned&#160;</td>
<td class="paramname"><em>num_parts</em>, </td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">const char *&#160;</td>
<td class="paramname"><em>type</em>&#160;</td>
</tr>
<tr>
<td></td>
<td>)</td>
<td></td><td></td>
</tr>
</table>
</td>
<td class="mlabels-right">
<span class="mlabels"><span class="mlabel">static</span></span> </td>
</tr>
</table>
</div><div class="memdoc">
<p>factory function: create input split given a uri </p>
<dl class="params"><dt>Parameters</dt><dd>
<table class="params">
<tr><td class="paramname">uri</td><td>the uri of the input, can contain hdfs prefix </td></tr>
<tr><td class="paramname">part_index</td><td>the part id of current input </td></tr>
<tr><td class="paramname">num_parts</td><td>total number of splits </td></tr>
<tr><td class="paramname">type</td><td>type of record List of possible types: "text", "recordio", "indexed_recordio"<ul>
<li>"text": text file, each line is treated as a record input split will split on '\n' or '\r'</li>
<li>"recordio": binary recordio file, see <a class="el" href="recordio_8h.html" title="recordio that is able to pack binary data into a splittable format, useful to exchange data in binary...">recordio.h</a></li>
<li>"indexed_recordio": binary recordio file with index, see <a class="el" href="recordio_8h.html" title="recordio that is able to pack binary data into a splittable format, useful to exchange data in binary...">recordio.h</a> </li>
</ul>
</td></tr>
</table>
</dd>
</dl>
<dl class="section return"><dt>Returns</dt><dd>a new input split </dd></dl>
<dl class="section see"><dt>See also</dt><dd>InputSplit::Type </dd></dl>
</div>
</div>
<a id="af2fb3e0f9acd6bae599a398eb0758044"></a>
<h2 class="memtitle"><span class="permalink"><a href="#af2fb3e0f9acd6bae599a398eb0758044">&#9670;&nbsp;</a></span>GetTotalSize()</h2>
<div class="memitem">
<div class="memproto">
<table class="mlabels">
<tr>
<td class="mlabels-left">
<table class="memname">
<tr>
<td class="memname">virtual size_t dmlc::InputSplit::GetTotalSize </td>
<td>(</td>
<td class="paramtype">void&#160;</td>
<td class="paramname"></td><td>)</td>
<td></td>
</tr>
</table>
</td>
<td class="mlabels-right">
<span class="mlabels"><span class="mlabel">pure virtual</span></span> </td>
</tr>
</table>
</div><div class="memdoc">
<p>get the total size of the <a class="el" href="classdmlc_1_1InputSplit.html" title="input split creates that allows reading of records from split of data, independent part that covers a...">InputSplit</a> </p>
<p>Implemented in <a class="el" href="classdmlc_1_1InputSplitShuffle.html#a768b224c5dad3d7cef5af3c5cbd08263">dmlc::InputSplitShuffle</a>.</p>
</div>
</div>
<a id="a58c7f48bec6bd1a880e59c4bbfdf4417"></a>
<h2 class="memtitle"><span class="permalink"><a href="#a58c7f48bec6bd1a880e59c4bbfdf4417">&#9670;&nbsp;</a></span>HintChunkSize()</h2>
<div class="memitem">
<div class="memproto">
<table class="mlabels">
<tr>
<td class="mlabels-left">
<table class="memname">
<tr>
<td class="memname">virtual void dmlc::InputSplit::HintChunkSize </td>
<td>(</td>
<td class="paramtype">size_t&#160;</td>
<td class="paramname"><em>chunk_size</em></td><td>)</td>
<td></td>
</tr>
</table>
</td>
<td class="mlabels-right">
<span class="mlabels"><span class="mlabel">inline</span><span class="mlabel">virtual</span></span> </td>
</tr>
</table>
</div><div class="memdoc">
<p>hint the inputsplit how large the chunk size it should return when implementing NextChunk this is a hint so may not be enforced, but <a class="el" href="classdmlc_1_1InputSplit.html" title="input split creates that allows reading of records from split of data, independent part that covers a...">InputSplit</a> will try adjust its internal buffer size to the hinted value </p>
<dl class="params"><dt>Parameters</dt><dd>
<table class="params">
<tr><td class="paramname">chunk_size</td><td>the chunk size </td></tr>
</table>
</dd>
</dl>
<p>Reimplemented in <a class="el" href="classdmlc_1_1InputSplitShuffle.html#a491c4e5c44919e24d32403caeecdf468">dmlc::InputSplitShuffle</a>.</p>
</div>
</div>
<a id="a938da99f6cbaf1a4667bb00c75692e4b"></a>
<h2 class="memtitle"><span class="permalink"><a href="#a938da99f6cbaf1a4667bb00c75692e4b">&#9670;&nbsp;</a></span>NextBatch()</h2>
<div class="memitem">
<div class="memproto">
<table class="mlabels">
<tr>
<td class="mlabels-left">
<table class="memname">
<tr>
<td class="memname">virtual bool dmlc::InputSplit::NextBatch </td>
<td>(</td>
<td class="paramtype"><a class="el" href="structdmlc_1_1InputSplit_1_1Blob.html">Blob</a> *&#160;</td>
<td class="paramname"><em>out_chunk</em>, </td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">size_t&#160;</td>
<td class="paramname"><em>n_records</em>&#160;</td>
</tr>
<tr>
<td></td>
<td>)</td>
<td></td><td></td>
</tr>
</table>
</td>
<td class="mlabels-right">
<span class="mlabels"><span class="mlabel">inline</span><span class="mlabel">virtual</span></span> </td>
</tr>
</table>
</div><div class="memdoc">
<p>get a chunk of memory that can contain multiple records, with hint for how many records is needed, the caller needs to parse the content of the resulting chunk, for text file, out_chunk can contain data of multiple lines for recordio, out_chunk can contain multiple records(including headers) </p>
<p>This function ensures there won't be partial record in the chunk caller can modify the memory content of out_chunk, the memory is valid until next call to NextRecord, NextChunk or NextBatch</p>
<dl class="params"><dt>Parameters</dt><dd>
<table class="params">
<tr><td class="paramname">out_chunk</td><td>used to store the result </td></tr>
<tr><td class="paramname">n_records</td><td>used as a hint for how many records should be returned, may be ignored </td></tr>
</table>
</dd>
</dl>
<dl class="section return"><dt>Returns</dt><dd>true if we can successfully get next record false if we reached end of split </dd></dl>
<dl class="section see"><dt>See also</dt><dd><a class="el" href="classdmlc_1_1InputSplit.html#a118dc34dbd121d5ff80cd904adee4c24" title="factory function: create input split given a uri">InputSplit::Create</a> for definition of record </dd>
<dd>
<a class="el" href="classdmlc_1_1RecordIOChunkReader.html" title="reader of binary recordio from Blob returned by InputSplit This class divides the blob into several i...">RecordIOChunkReader</a> to parse recordio content from out_chunk </dd></dl>
</div>
</div>
<a id="a2e835c07ce77d8d873592e006c242156"></a>
<h2 class="memtitle"><span class="permalink"><a href="#a2e835c07ce77d8d873592e006c242156">&#9670;&nbsp;</a></span>NextChunk()</h2>
<div class="memitem">
<div class="memproto">
<table class="mlabels">
<tr>
<td class="mlabels-left">
<table class="memname">
<tr>
<td class="memname">virtual bool dmlc::InputSplit::NextChunk </td>
<td>(</td>
<td class="paramtype"><a class="el" href="structdmlc_1_1InputSplit_1_1Blob.html">Blob</a> *&#160;</td>
<td class="paramname"><em>out_chunk</em></td><td>)</td>
<td></td>
</tr>
</table>
</td>
<td class="mlabels-right">
<span class="mlabels"><span class="mlabel">pure virtual</span></span> </td>
</tr>
</table>
</div><div class="memdoc">
<p>get a chunk of memory that can contain multiple records, the caller needs to parse the content of the resulting chunk, for text file, out_chunk can contain data of multiple lines for recordio, out_chunk can contain multiple records(including headers) </p>
<p>This function ensures there won't be partial record in the chunk caller can modify the memory content of out_chunk, the memory is valid until next call to NextRecord, NextChunk or NextBatch</p>
<p>Usually NextRecord is sufficient, NextChunk can be used by some multi-threaded parsers to parse the input content</p>
<dl class="params"><dt>Parameters</dt><dd>
<table class="params">
<tr><td class="paramname">out_chunk</td><td>used to store the result </td></tr>
</table>
</dd>
</dl>
<dl class="section return"><dt>Returns</dt><dd>true if we can successfully get next record false if we reached end of split </dd></dl>
<dl class="section see"><dt>See also</dt><dd><a class="el" href="classdmlc_1_1InputSplit.html#a118dc34dbd121d5ff80cd904adee4c24" title="factory function: create input split given a uri">InputSplit::Create</a> for definition of record </dd>
<dd>
<a class="el" href="classdmlc_1_1RecordIOChunkReader.html" title="reader of binary recordio from Blob returned by InputSplit This class divides the blob into several i...">RecordIOChunkReader</a> to parse recordio content from out_chunk </dd></dl>
<p>Implemented in <a class="el" href="classdmlc_1_1InputSplitShuffle.html#a88433138b96bc4c00d3e3bc8f2856025">dmlc::InputSplitShuffle</a>.</p>
</div>
</div>
<a id="a0f124b34e62d9319dca39fa42e5a59f7"></a>
<h2 class="memtitle"><span class="permalink"><a href="#a0f124b34e62d9319dca39fa42e5a59f7">&#9670;&nbsp;</a></span>NextRecord()</h2>
<div class="memitem">
<div class="memproto">
<table class="mlabels">
<tr>
<td class="mlabels-left">
<table class="memname">
<tr>
<td class="memname">virtual bool dmlc::InputSplit::NextRecord </td>
<td>(</td>
<td class="paramtype"><a class="el" href="structdmlc_1_1InputSplit_1_1Blob.html">Blob</a> *&#160;</td>
<td class="paramname"><em>out_rec</em></td><td>)</td>
<td></td>
</tr>
</table>
</td>
<td class="mlabels-right">
<span class="mlabels"><span class="mlabel">pure virtual</span></span> </td>
</tr>
</table>
</div><div class="memdoc">
<p>get the next record, the returning value is valid until next call to NextRecord, NextChunk or NextBatch caller can modify the memory content of out_rec </p>
<p>For text, out_rec contains a single line For recordio, out_rec contains one record content(with header striped)</p>
<dl class="params"><dt>Parameters</dt><dd>
<table class="params">
<tr><td class="paramname">out_rec</td><td>used to store the result </td></tr>
</table>
</dd>
</dl>
<dl class="section return"><dt>Returns</dt><dd>true if we can successfully get next record false if we reached end of split </dd></dl>
<dl class="section see"><dt>See also</dt><dd><a class="el" href="classdmlc_1_1InputSplit.html#a118dc34dbd121d5ff80cd904adee4c24" title="factory function: create input split given a uri">InputSplit::Create</a> for definition of record </dd></dl>
<p>Implemented in <a class="el" href="classdmlc_1_1InputSplitShuffle.html#ab8812c94aa95744a475a1549b2b6f6a6">dmlc::InputSplitShuffle</a>.</p>
</div>
</div>
<a id="aaa17b8be6b3f1da35ced68379e3e8dbe"></a>
<h2 class="memtitle"><span class="permalink"><a href="#aaa17b8be6b3f1da35ced68379e3e8dbe">&#9670;&nbsp;</a></span>ResetPartition()</h2>
<div class="memitem">
<div class="memproto">
<table class="mlabels">
<tr>
<td class="mlabels-left">
<table class="memname">
<tr>
<td class="memname">virtual void dmlc::InputSplit::ResetPartition </td>
<td>(</td>
<td class="paramtype">unsigned&#160;</td>
<td class="paramname"><em>part_index</em>, </td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">unsigned&#160;</td>
<td class="paramname"><em>num_parts</em>&#160;</td>
</tr>
<tr>
<td></td>
<td>)</td>
<td></td><td></td>
</tr>
</table>
</td>
<td class="mlabels-right">
<span class="mlabels"><span class="mlabel">pure virtual</span></span> </td>
</tr>
</table>
</div><div class="memdoc">
<p>reset the Input split to a certain part id, The <a class="el" href="classdmlc_1_1InputSplit.html" title="input split creates that allows reading of records from split of data, independent part that covers a...">InputSplit</a> will be pointed to the head of the new specified segment. This feature may not be supported by every implementation of <a class="el" href="classdmlc_1_1InputSplit.html" title="input split creates that allows reading of records from split of data, independent part that covers a...">InputSplit</a>. </p>
<dl class="params"><dt>Parameters</dt><dd>
<table class="params">
<tr><td class="paramname">part_index</td><td>The part id of the new input. </td></tr>
<tr><td class="paramname">num_parts</td><td>The total number of parts. </td></tr>
</table>
</dd>
</dl>
<p>Implemented in <a class="el" href="classdmlc_1_1InputSplitShuffle.html#a2e3056434b2605d42f935b242d0e4a3d">dmlc::InputSplitShuffle</a>.</p>
</div>
</div>
<hr/>The documentation for this class was generated from the following file:<ul>
<li>/work/mxnet/3rdparty/dmlc-core/include/dmlc/<a class="el" href="3rdparty_2dmlc-core_2include_2dmlc_2io_8h_source.html">io.h</a></li>
</ul>
</div><!-- contents -->
<!-- start footer part -->
<hr class="footer"/><address class="footer"><small>
Generated on Thu Jan 5 2023 03:47:40 for mxnet by &#160;<a href="http://www.doxygen.org/index.html">
<img class="footer" src="doxygen.png" alt="doxygen"/>
</a> 1.8.17
</small></address>
</body>
</html>