blob: 211c862f3cbf2d951981e694856a41b09de9bba2 [file] [log] [blame]
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
<meta http-equiv="X-UA-Compatible" content="IE=9"/>
<meta name="generator" content="Doxygen 1.8.3"/>
<title>Lucene.Net: Lucene.Net.Index.DocumentsWriter Class Reference</title>
<link href="tabs.css" rel="stylesheet" type="text/css"/>
<script type="text/javascript" src="jquery.js"></script>
<script type="text/javascript" src="dynsections.js"></script>
<link href="search/search.css" rel="stylesheet" type="text/css"/>
<script type="text/javascript" src="search/search.js"></script>
<script type="text/javascript">
$(document).ready(function() { searchBox.OnSelectItem(0); });
</script>
<link href="doxygen.css" rel="stylesheet" type="text/css" />
</head>
<body>
<div id="top"><!-- do not remove this div, it is closed by doxygen! -->
<div id="titlearea">
<table cellspacing="0" cellpadding="0">
<tbody>
<tr style="height: 56px;">
<td id="projectlogo"><img alt="Logo" src="lucene-net-icon-128x128.png"/></td>
<td style="padding-left: 0.5em;">
<div id="projectname">Lucene.Net
&#160;<span id="projectnumber">3.0.3</span>
</div>
<div id="projectbrief">Lucene.Net is a .NET port of the Java Lucene Indexing Library</div>
</td>
</tr>
</tbody>
</table>
</div>
<!-- end header part -->
<!-- Generated by Doxygen 1.8.3 -->
<script type="text/javascript">
var searchBox = new SearchBox("searchBox", "search",false,'Search');
</script>
<div id="navrow1" class="tabs">
<ul class="tablist">
<li><a href="Index.html"><span>Main&#160;Page</span></a></li>
<li><a href="namespaces.html"><span>Packages</span></a></li>
<li class="current"><a href="annotated.html"><span>Classes</span></a></li>
<li><a href="files.html"><span>Files</span></a></li>
<li>
<div id="MSearchBox" class="MSearchBoxInactive">
<span class="left">
<img id="MSearchSelect" src="search/mag_sel.png"
onmouseover="return searchBox.OnSearchSelectShow()"
onmouseout="return searchBox.OnSearchSelectHide()"
alt=""/>
<input type="text" id="MSearchField" value="Search" accesskey="S"
onfocus="searchBox.OnSearchFieldFocus(true)"
onblur="searchBox.OnSearchFieldFocus(false)"
onkeyup="searchBox.OnSearchFieldChange(event)"/>
</span><span class="right">
<a id="MSearchClose" href="javascript:searchBox.CloseResultsWindow()"><img id="MSearchCloseImg" border="0" src="search/close.png" alt=""/></a>
</span>
</div>
</li>
</ul>
</div>
<div id="navrow2" class="tabs2">
<ul class="tablist">
<li><a href="annotated.html"><span>Class&#160;List</span></a></li>
<li><a href="classes.html"><span>Class&#160;Index</span></a></li>
<li><a href="hierarchy.html"><span>Class&#160;Hierarchy</span></a></li>
<li><a href="functions.html"><span>Class&#160;Members</span></a></li>
</ul>
</div>
<!-- window showing the filter options -->
<div id="MSearchSelectWindow"
onmouseover="return searchBox.OnSearchSelectShow()"
onmouseout="return searchBox.OnSearchSelectHide()"
onkeydown="return searchBox.OnSearchSelectKey(event)">
<a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(0)"><span class="SelectionMark">&#160;</span>All</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(1)"><span class="SelectionMark">&#160;</span>Classes</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(2)"><span class="SelectionMark">&#160;</span>Namespaces</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(3)"><span class="SelectionMark">&#160;</span>Files</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(4)"><span class="SelectionMark">&#160;</span>Functions</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(5)"><span class="SelectionMark">&#160;</span>Variables</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(6)"><span class="SelectionMark">&#160;</span>Typedefs</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(7)"><span class="SelectionMark">&#160;</span>Enumerations</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(8)"><span class="SelectionMark">&#160;</span>Properties</a></div>
<!-- iframe showing the search results (closed by default) -->
<div id="MSearchResultsWindow">
<iframe src="javascript:void(0)" frameborder="0"
name="MSearchResults" id="MSearchResults">
</iframe>
</div>
<div id="nav-path" class="navpath">
<ul>
<li class="navelem"><a class="el" href="namespace_lucene.html">Lucene</a></li><li class="navelem"><a class="el" href="namespace_lucene_1_1_net.html">Net</a></li><li class="navelem"><a class="el" href="namespace_lucene_1_1_net_1_1_index.html">Index</a></li><li class="navelem"><a class="el" href="class_lucene_1_1_net_1_1_index_1_1_documents_writer.html">DocumentsWriter</a></li> </ul>
</div>
</div><!-- top -->
<div class="header">
<div class="summary">
<a href="#nested-classes">Classes</a> &#124;
<a href="#pub-methods">Public Member Functions</a> &#124;
<a href="#properties">Properties</a> &#124;
<a href="class_lucene_1_1_net_1_1_index_1_1_documents_writer-members.html">List of all members</a> </div>
<div class="headertitle">
<div class="title">Lucene.Net.Index.DocumentsWriter Class Reference</div> </div>
</div><!--header-->
<div class="contents">
<p>This class accepts multiple added documents and directly writes a single segment file. It does this more efficiently than creating a single segment per document (with DocumentWriter) and doing standard merges on those segments.
<a href="class_lucene_1_1_net_1_1_index_1_1_documents_writer.html#details">More...</a></p>
<p>Inherits IDisposable.</p>
<table class="memberdecls">
<tr class="heading"><td colspan="2"><h2 class="groupheader"><a name="nested-classes"></a>
Classes</h2></td></tr>
<tr class="memitem:"><td class="memItemLeft" align="right" valign="top">class &#160;</td><td class="memItemRight" valign="bottom"><b>AnonymousClassIndexingChain</b></td></tr>
<tr class="separator:"><td class="memSeparator" colspan="2">&#160;</td></tr>
<tr class="memitem:"><td class="memItemLeft" align="right" valign="top">class &#160;</td><td class="memItemRight" valign="bottom"><b>ByteBlockAllocator</b></td></tr>
<tr class="separator:"><td class="memSeparator" colspan="2">&#160;</td></tr>
<tr class="memitem:"><td class="memItemLeft" align="right" valign="top">class &#160;</td><td class="memItemRight" valign="bottom"><b>DocState</b></td></tr>
<tr class="separator:"><td class="memSeparator" colspan="2">&#160;</td></tr>
<tr class="memitem:"><td class="memItemLeft" align="right" valign="top">class &#160;</td><td class="memItemRight" valign="bottom"><b>DocWriter</b></td></tr>
<tr class="memdesc:"><td class="mdescLeft">&#160;</td><td class="mdescRight">Consumer returns this on each doc. This holds any state that must be flushed synchronized "in docID
order". We gather these and flush them in order. <br/></td></tr>
<tr class="separator:"><td class="memSeparator" colspan="2">&#160;</td></tr>
<tr class="memitem:"><td class="memItemLeft" align="right" valign="top">class &#160;</td><td class="memItemRight" valign="bottom"><b>IndexingChain</b></td></tr>
<tr class="memdesc:"><td class="mdescLeft">&#160;</td><td class="mdescRight">The IndexingChain must define the GetChain(DocumentsWriter) method which returns the <a class="el" href="class_lucene_1_1_net_1_1_index_1_1_doc_consumer.html">DocConsumer</a> that the <a class="el" href="class_lucene_1_1_net_1_1_index_1_1_documents_writer.html" title="This class accepts multiple added documents and directly writes a single segment file. It does this more efficiently than creating a single segment per document (with DocumentWriter) and doing standard merges on those segments.">DocumentsWriter</a> calls to process the documents. <br/></td></tr>
<tr class="separator:"><td class="memSeparator" colspan="2">&#160;</td></tr>
<tr class="memitem:"><td class="memItemLeft" align="right" valign="top">class &#160;</td><td class="memItemRight" valign="bottom"><b>PerDocBuffer</b></td></tr>
<tr class="separator:"><td class="memSeparator" colspan="2">&#160;</td></tr>
<tr class="memitem:"><td class="memItemLeft" align="right" valign="top">class &#160;</td><td class="memItemRight" valign="bottom"><b>SkipDocWriter</b></td></tr>
<tr class="separator:"><td class="memSeparator" colspan="2">&#160;</td></tr>
<tr class="memitem:"><td class="memItemLeft" align="right" valign="top">class &#160;</td><td class="memItemRight" valign="bottom"><b>WaitQueue</b></td></tr>
<tr class="separator:"><td class="memSeparator" colspan="2">&#160;</td></tr>
</table><table class="memberdecls">
<tr class="heading"><td colspan="2"><h2 class="groupheader"><a name="pub-methods"></a>
Public Member Functions</h2></td></tr>
<tr class="memitem:a39091cbfa691faa6dc58b71d389053ee"><td class="memItemLeft" align="right" valign="top">void&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="class_lucene_1_1_net_1_1_index_1_1_documents_writer.html#a39091cbfa691faa6dc58b71d389053ee">Dispose</a> ()</td></tr>
<tr class="separator:a39091cbfa691faa6dc58b71d389053ee"><td class="memSeparator" colspan="2">&#160;</td></tr>
</table><table class="memberdecls">
<tr class="heading"><td colspan="2"><h2 class="groupheader"><a name="properties"></a>
Properties</h2></td></tr>
<tr class="memitem:ad5d9c9b5f1725a2ddfdff02137989b58"><td class="memItemLeft" align="right" valign="top">static int&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="class_lucene_1_1_net_1_1_index_1_1_documents_writer.html#ad5d9c9b5f1725a2ddfdff02137989b58">BYTE_BLOCK_SIZE_ForNUnit</a><code> [get]</code></td></tr>
<tr class="separator:ad5d9c9b5f1725a2ddfdff02137989b58"><td class="memSeparator" colspan="2">&#160;</td></tr>
<tr class="memitem:a349f0c0d45725034b48ba5cab9876ede"><td class="memItemLeft" align="right" valign="top">static int&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="class_lucene_1_1_net_1_1_index_1_1_documents_writer.html#a349f0c0d45725034b48ba5cab9876ede">CHAR_BLOCK_SIZE_ForNUnit</a><code> [get]</code></td></tr>
<tr class="separator:a349f0c0d45725034b48ba5cab9876ede"><td class="memSeparator" colspan="2">&#160;</td></tr>
</table>
<a name="details" id="details"></a><h2 class="groupheader">Detailed Description</h2>
<div class="textblock"><p>This class accepts multiple added documents and directly writes a single segment file. It does this more efficiently than creating a single segment per document (with DocumentWriter) and doing standard merges on those segments. </p>
<p>Each added document is passed to the <a class="el" href="class_lucene_1_1_net_1_1_index_1_1_doc_consumer.html">DocConsumer</a>, which in turn processes the document and interacts with other consumers in the indexing chain. Certain consumers, like <a class="el" href="class_lucene_1_1_net_1_1_index_1_1_stored_fields_writer.html" title="This is a DocFieldConsumer that writes stored fields. ">StoredFieldsWriter</a> and <a class="el" href="class_lucene_1_1_net_1_1_index_1_1_term_vectors_terms_writer.html">TermVectorsTermsWriter</a> , digest a document and immediately write bytes to the "doc store" files (ie, they do not consume RAM per document, except while they are processing the document).</p>
<p>Other consumers, eg <a class="el" href="class_lucene_1_1_net_1_1_index_1_1_freq_prox_terms_writer.html">FreqProxTermsWriter</a> and <a class="el" href="class_lucene_1_1_net_1_1_index_1_1_norms_writer.html" title="Writes norms. Each thread X field accumulates the norms for the doc/fields it saw, then the flush method below merges all of these together into a single _X.nrm file. ">NormsWriter</a>, buffer bytes in RAM and flush only when a new segment is produced. Once we have used our allowed RAM buffer, or the number of added docs is large enough (in the case we are flushing by doc count instead of RAM usage), we create a real segment and flush it to the Directory.</p>
<p>Threads:</p>
<p>Multiple threads are allowed into addDocument at once. There is an initial synchronized call to getThreadState which allocates a ThreadState for this thread. The same thread will get the same ThreadState over time (thread affinity) so that if there are consistent patterns (for example each thread is indexing a different content source) then we make better use of RAM. Then processDocument is called on that ThreadState without synchronization (most of the "heavy lifting" is in this call). Finally the synchronized "finishDocument" is called to flush changes to the directory.</p>
<p>When flush is called by <a class="el" href="class_lucene_1_1_net_1_1_index_1_1_index_writer.html" title="An IndexWriter creates and maintains an index. The create argument to the constructor determines whet...">IndexWriter</a> we forcefully idle all threads and flush only once they are all idle. This means you can call flush with a given thread even while other threads are actively adding/deleting documents.</p>
<p>Exceptions:</p>
<p>Because this class directly updates in-memory posting lists, and flushes stored fields and term vectors directly to files in the directory, there are certain limited times when an exception can corrupt this state. For example, a disk full while flushing stored fields leaves this file in a corrupt state. Or, an OOM exception while appending to the in-memory posting lists can corrupt that posting list. We call such exceptions "aborting exceptions". In these cases we must call abort() to discard all docs added since the last flush.</p>
<p>All other exceptions ("non-aborting exceptions") can still partially update the index structures. These updates are consistent, but, they represent only a part of the document seen up until the exception was hit. When this happens, we immediately mark the document as deleted so that the document is always atomically ("all
or none") added to the index. </p>
<p>Definition at line <a class="el" href="_documents_writer_8cs_source.html#l00103">103</a> of file <a class="el" href="_documents_writer_8cs_source.html">DocumentsWriter.cs</a>.</p>
</div><h2 class="groupheader">Member Function Documentation</h2>
<a class="anchor" id="a39091cbfa691faa6dc58b71d389053ee"></a>
<div class="memitem">
<div class="memproto">
<table class="memname">
<tr>
<td class="memname">void Lucene.Net.Index.DocumentsWriter.Dispose </td>
<td>(</td>
<td class="paramname"></td><td>)</td>
<td></td>
</tr>
</table>
</div><div class="memdoc">
<p>Definition at line <a class="el" href="_documents_writer_8cs_source.html#l00868">868</a> of file <a class="el" href="_documents_writer_8cs_source.html">DocumentsWriter.cs</a>.</p>
</div>
</div>
<h2 class="groupheader">Property Documentation</h2>
<a class="anchor" id="ad5d9c9b5f1725a2ddfdff02137989b58"></a>
<div class="memitem">
<div class="memproto">
<table class="mlabels">
<tr>
<td class="mlabels-left">
<table class="memname">
<tr>
<td class="memname">int Lucene.Net.Index.DocumentsWriter.BYTE_BLOCK_SIZE_ForNUnit</td>
</tr>
</table>
</td>
<td class="mlabels-right">
<span class="mlabels"><span class="mlabel">static</span><span class="mlabel">get</span></span> </td>
</tr>
</table>
</div><div class="memdoc">
<p>Definition at line <a class="el" href="_documents_writer_8cs_source.html#l02066">2066</a> of file <a class="el" href="_documents_writer_8cs_source.html">DocumentsWriter.cs</a>.</p>
</div>
</div>
<a class="anchor" id="a349f0c0d45725034b48ba5cab9876ede"></a>
<div class="memitem">
<div class="memproto">
<table class="mlabels">
<tr>
<td class="mlabels-left">
<table class="memname">
<tr>
<td class="memname">int Lucene.Net.Index.DocumentsWriter.CHAR_BLOCK_SIZE_ForNUnit</td>
</tr>
</table>
</td>
<td class="mlabels-right">
<span class="mlabels"><span class="mlabel">static</span><span class="mlabel">get</span></span> </td>
</tr>
</table>
</div><div class="memdoc">
<p>Definition at line <a class="el" href="_documents_writer_8cs_source.html#l02071">2071</a> of file <a class="el" href="_documents_writer_8cs_source.html">DocumentsWriter.cs</a>.</p>
</div>
</div>
<hr/>The documentation for this class was generated from the following file:<ul>
<li>core/Index/<a class="el" href="_documents_writer_8cs_source.html">DocumentsWriter.cs</a></li>
</ul>
</div><!-- contents -->
<!-- start footer part -->
<hr class="footer"/><address class="footer"><small>
Generated on Thu Jan 3 2013 02:12:50 for Lucene.Net by &#160;<a href="http://www.doxygen.org/index.html">
<img class="footer" src="doxygen.png" alt="doxygen"/>
</a> 1.8.3
</small></address>
</body>
</html>