blob: 6b8fec97e4e37ced11edc3a0489f90f5d1e90b05 [file] [log] [blame]
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
<meta http-equiv="X-UA-Compatible" content="IE=9"/>
<meta name="generator" content="Doxygen 1.8.3"/>
<title>Lucene.Net: contrib/Queries/DuplicateFilter.cs Source File</title>
<link href="tabs.css" rel="stylesheet" type="text/css"/>
<script type="text/javascript" src="jquery.js"></script>
<script type="text/javascript" src="dynsections.js"></script>
<link href="search/search.css" rel="stylesheet" type="text/css"/>
<script type="text/javascript" src="search/search.js"></script>
<script type="text/javascript">
$(document).ready(function() { searchBox.OnSelectItem(0); });
</script>
<link href="doxygen.css" rel="stylesheet" type="text/css" />
</head>
<body>
<div id="top"><!-- do not remove this div, it is closed by doxygen! -->
<div id="titlearea">
<table cellspacing="0" cellpadding="0">
<tbody>
<tr style="height: 56px;">
<td id="projectlogo"><img alt="Logo" src="lucene-net-icon-128x128.png"/></td>
<td style="padding-left: 0.5em;">
<div id="projectname">Lucene.Net
&#160;<span id="projectnumber">3.0.3</span>
</div>
<div id="projectbrief">Lucene.Net is a .NET port of the Java Lucene Indexing Library</div>
</td>
</tr>
</tbody>
</table>
</div>
<!-- end header part -->
<!-- Generated by Doxygen 1.8.3 -->
<script type="text/javascript">
var searchBox = new SearchBox("searchBox", "search",false,'Search');
</script>
<div id="navrow1" class="tabs">
<ul class="tablist">
<li><a href="Index.html"><span>Main&#160;Page</span></a></li>
<li><a href="namespaces.html"><span>Packages</span></a></li>
<li><a href="annotated.html"><span>Classes</span></a></li>
<li class="current"><a href="files.html"><span>Files</span></a></li>
<li>
<div id="MSearchBox" class="MSearchBoxInactive">
<span class="left">
<img id="MSearchSelect" src="search/mag_sel.png"
onmouseover="return searchBox.OnSearchSelectShow()"
onmouseout="return searchBox.OnSearchSelectHide()"
alt=""/>
<input type="text" id="MSearchField" value="Search" accesskey="S"
onfocus="searchBox.OnSearchFieldFocus(true)"
onblur="searchBox.OnSearchFieldFocus(false)"
onkeyup="searchBox.OnSearchFieldChange(event)"/>
</span><span class="right">
<a id="MSearchClose" href="javascript:searchBox.CloseResultsWindow()"><img id="MSearchCloseImg" border="0" src="search/close.png" alt=""/></a>
</span>
</div>
</li>
</ul>
</div>
<div id="navrow2" class="tabs2">
<ul class="tablist">
<li><a href="files.html"><span>File&#160;List</span></a></li>
<li><a href="globals.html"><span>File&#160;Members</span></a></li>
</ul>
</div>
<!-- window showing the filter options -->
<div id="MSearchSelectWindow"
onmouseover="return searchBox.OnSearchSelectShow()"
onmouseout="return searchBox.OnSearchSelectHide()"
onkeydown="return searchBox.OnSearchSelectKey(event)">
<a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(0)"><span class="SelectionMark">&#160;</span>All</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(1)"><span class="SelectionMark">&#160;</span>Classes</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(2)"><span class="SelectionMark">&#160;</span>Namespaces</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(3)"><span class="SelectionMark">&#160;</span>Files</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(4)"><span class="SelectionMark">&#160;</span>Functions</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(5)"><span class="SelectionMark">&#160;</span>Variables</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(6)"><span class="SelectionMark">&#160;</span>Typedefs</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(7)"><span class="SelectionMark">&#160;</span>Enumerations</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(8)"><span class="SelectionMark">&#160;</span>Properties</a></div>
<!-- iframe showing the search results (closed by default) -->
<div id="MSearchResultsWindow">
<iframe src="javascript:void(0)" frameborder="0"
name="MSearchResults" id="MSearchResults">
</iframe>
</div>
<div id="nav-path" class="navpath">
<ul>
<li class="navelem"><a class="el" href="dir_3a8d697be1e2feab9f01acc78e9570fb.html">contrib</a></li><li class="navelem"><a class="el" href="dir_5b88ab0c8cbeaf4a220fc00c661714a3.html">Queries</a></li> </ul>
</div>
</div><!-- top -->
<div class="header">
<div class="headertitle">
<div class="title">DuplicateFilter.cs</div> </div>
</div><!--header-->
<div class="contents">
<a href="_duplicate_filter_8cs.html">Go to the documentation of this file.</a><div class="fragment"><div class="line"><a name="l00001"></a><span class="lineno"> 1</span>&#160;<span class="comment">/*</span></div>
<div class="line"><a name="l00002"></a><span class="lineno"> 2</span>&#160;<span class="comment"> * Licensed to the Apache Software Foundation (ASF) under one or more</span></div>
<div class="line"><a name="l00003"></a><span class="lineno"> 3</span>&#160;<span class="comment"> * contributor license agreements. See the NOTICE file distributed with</span></div>
<div class="line"><a name="l00004"></a><span class="lineno"> 4</span>&#160;<span class="comment"> * this work for additional information regarding copyright ownership.</span></div>
<div class="line"><a name="l00005"></a><span class="lineno"> 5</span>&#160;<span class="comment"> * The ASF licenses this file to You under the Apache License, Version 2.0</span></div>
<div class="line"><a name="l00006"></a><span class="lineno"> 6</span>&#160;<span class="comment"> * (the &quot;License&quot;); you may not use this file except in compliance with</span></div>
<div class="line"><a name="l00007"></a><span class="lineno"> 7</span>&#160;<span class="comment"> * the License. You may obtain a copy of the License at</span></div>
<div class="line"><a name="l00008"></a><span class="lineno"> 8</span>&#160;<span class="comment"> *</span></div>
<div class="line"><a name="l00009"></a><span class="lineno"> 9</span>&#160;<span class="comment"> * http://www.apache.org/licenses/LICENSE-2.0</span></div>
<div class="line"><a name="l00010"></a><span class="lineno"> 10</span>&#160;<span class="comment"> *</span></div>
<div class="line"><a name="l00011"></a><span class="lineno"> 11</span>&#160;<span class="comment"> * Unless required by applicable law or agreed to in writing, software</span></div>
<div class="line"><a name="l00012"></a><span class="lineno"> 12</span>&#160;<span class="comment"> * distributed under the License is distributed on an &quot;AS IS&quot; BASIS,</span></div>
<div class="line"><a name="l00013"></a><span class="lineno"> 13</span>&#160;<span class="comment"> * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.</span></div>
<div class="line"><a name="l00014"></a><span class="lineno"> 14</span>&#160;<span class="comment"> * See the License for the specific language governing permissions and</span></div>
<div class="line"><a name="l00015"></a><span class="lineno"> 15</span>&#160;<span class="comment"> * limitations under the License.</span></div>
<div class="line"><a name="l00016"></a><span class="lineno"> 16</span>&#160;<span class="comment"> */</span></div>
<div class="line"><a name="l00017"></a><span class="lineno"> 17</span>&#160;</div>
<div class="line"><a name="l00018"></a><span class="lineno"> 18</span>&#160;<span class="keyword">using</span> System;</div>
<div class="line"><a name="l00019"></a><span class="lineno"> 19</span>&#160;<span class="keyword">using</span> System.Collections.Generic;</div>
<div class="line"><a name="l00020"></a><span class="lineno"> 20</span>&#160;<span class="keyword">using</span> System.Linq;</div>
<div class="line"><a name="l00021"></a><span class="lineno"> 21</span>&#160;<span class="keyword">using</span> System.Text;</div>
<div class="line"><a name="l00022"></a><span class="lineno"> 22</span>&#160;</div>
<div class="line"><a name="l00023"></a><span class="lineno"> 23</span>&#160;<span class="keyword">using</span> Lucene.Net.Search;</div>
<div class="line"><a name="l00024"></a><span class="lineno"> 24</span>&#160;<span class="keyword">using</span> Lucene.Net.Index;</div>
<div class="line"><a name="l00025"></a><span class="lineno"> 25</span>&#160;<span class="keyword">using</span> Lucene.Net.Util;</div>
<div class="line"><a name="l00026"></a><span class="lineno"> 26</span>&#160;</div>
<div class="line"><a name="l00027"></a><span class="lineno"> 27</span>&#160;<span class="keyword">namespace </span>Lucene.Net.Search</div>
<div class="line"><a name="l00028"></a><span class="lineno"> 28</span>&#160;{</div>
<div class="line"><a name="l00029"></a><span class="lineno"><a class="code" href="class_lucene_1_1_net_1_1_search_1_1_duplicate_filter.html"> 29</a></span>&#160; <span class="keyword">public</span> <span class="keyword">class </span><a class="code" href="class_lucene_1_1_net_1_1_search_1_1_duplicate_filter.html">DuplicateFilter</a> : <a class="code" href="class_lucene_1_1_net_1_1_search_1_1_filter.html" title="Abstract base class for restricting which documents may be returned during searching.">Filter</a></div>
<div class="line"><a name="l00030"></a><span class="lineno"> 30</span>&#160; {</div>
<div class="line"><a name="l00031"></a><span class="lineno"> 31</span>&#160; String fieldName;</div>
<div class="line"><a name="l00032"></a><span class="lineno"> 32</span>&#160;</div>
<div class="line"><a name="l00033"></a><span class="lineno"> 33</span>&#160; <span class="comment">/*</span></div>
<div class="line"><a name="l00034"></a><span class="lineno"> 34</span>&#160;<span class="comment"> * KeepMode determines which document id to consider as the master, all others being </span></div>
<div class="line"><a name="l00035"></a><span class="lineno"> 35</span>&#160;<span class="comment"> * identified as duplicates. Selecting the &quot;first occurrence&quot; can potentially save on IO.</span></div>
<div class="line"><a name="l00036"></a><span class="lineno"> 36</span>&#160;<span class="comment"> */</span></div>
<div class="line"><a name="l00037"></a><span class="lineno"> 37</span>&#160; <span class="keywordtype">int</span> keepMode = KM_USE_FIRST_OCCURRENCE;</div>
<div class="line"><a name="l00038"></a><span class="lineno"><a class="code" href="class_lucene_1_1_net_1_1_search_1_1_duplicate_filter.html#a9f3e5f8696a70f0e07992b2aa75cf2fd"> 38</a></span>&#160; <span class="keyword">public</span> <span class="keyword">static</span> <span class="keywordtype">int</span> KM_USE_FIRST_OCCURRENCE = 1;</div>
<div class="line"><a name="l00039"></a><span class="lineno"><a class="code" href="class_lucene_1_1_net_1_1_search_1_1_duplicate_filter.html#a8f2af9aa9abcecae2d8d9e7ec2d703b9"> 39</a></span>&#160; <span class="keyword">public</span> <span class="keyword">static</span> <span class="keywordtype">int</span> KM_USE_LAST_OCCURRENCE = 2;</div>
<div class="line"><a name="l00040"></a><span class="lineno"> 40</span>&#160;</div>
<div class="line"><a name="l00041"></a><span class="lineno"> 41</span>&#160; <span class="comment">/*</span></div>
<div class="line"><a name="l00042"></a><span class="lineno"> 42</span>&#160;<span class="comment"> * &quot;Full&quot; processing mode starts by setting all bits to false and only setting bits</span></div>
<div class="line"><a name="l00043"></a><span class="lineno"> 43</span>&#160;<span class="comment"> * for documents that contain the given field and are identified as none-duplicates. </span></div>
<div class="line"><a name="l00044"></a><span class="lineno"> 44</span>&#160;<span class="comment"></span></div>
<div class="line"><a name="l00045"></a><span class="lineno"> 45</span>&#160;<span class="comment"> * &quot;Fast&quot; processing sets all bits to true then unsets all duplicate docs found for the</span></div>
<div class="line"><a name="l00046"></a><span class="lineno"> 46</span>&#160;<span class="comment"> * given field. This approach avoids the need to read TermDocs for terms that are seen </span></div>
<div class="line"><a name="l00047"></a><span class="lineno"> 47</span>&#160;<span class="comment"> * to have a document frequency of exactly &quot;1&quot; (i.e. no duplicates). While a potentially </span></div>
<div class="line"><a name="l00048"></a><span class="lineno"> 48</span>&#160;<span class="comment"> * faster approach , the downside is that bitsets produced will include bits set for </span></div>
<div class="line"><a name="l00049"></a><span class="lineno"> 49</span>&#160;<span class="comment"> * documents that do not actually contain the field given.</span></div>
<div class="line"><a name="l00050"></a><span class="lineno"> 50</span>&#160;<span class="comment"> * </span></div>
<div class="line"><a name="l00051"></a><span class="lineno"> 51</span>&#160;<span class="comment"> */</span></div>
<div class="line"><a name="l00052"></a><span class="lineno"> 52</span>&#160; <span class="keywordtype">int</span> processingMode = PM_FULL_VALIDATION;</div>
<div class="line"><a name="l00053"></a><span class="lineno"><a class="code" href="class_lucene_1_1_net_1_1_search_1_1_duplicate_filter.html#a26bf4af2f95fe0fc0d203752b5d553f5"> 53</a></span>&#160; <span class="keyword">public</span> <span class="keyword">static</span> <span class="keywordtype">int</span> PM_FULL_VALIDATION = 1;</div>
<div class="line"><a name="l00054"></a><span class="lineno"><a class="code" href="class_lucene_1_1_net_1_1_search_1_1_duplicate_filter.html#a711b63f630fb4985749edfffbdd687c7"> 54</a></span>&#160; <span class="keyword">public</span> <span class="keyword">static</span> <span class="keywordtype">int</span> PM_FAST_INVALIDATION = 2;</div>
<div class="line"><a name="l00055"></a><span class="lineno"> 55</span>&#160;</div>
<div class="line"><a name="l00056"></a><span class="lineno"> 56</span>&#160;</div>
<div class="line"><a name="l00057"></a><span class="lineno"> 57</span>&#160;</div>
<div class="line"><a name="l00058"></a><span class="lineno"><a class="code" href="class_lucene_1_1_net_1_1_search_1_1_duplicate_filter.html#a33ab23e44601bd1ed7dc74a768da6795"> 58</a></span>&#160; <span class="keyword">public</span> <a class="code" href="class_lucene_1_1_net_1_1_search_1_1_duplicate_filter.html">DuplicateFilter</a>(String fieldName) : this(fieldName, KM_USE_LAST_OCCURRENCE, PM_FULL_VALIDATION)</div>
<div class="line"><a name="l00059"></a><span class="lineno"> 59</span>&#160; {</div>
<div class="line"><a name="l00060"></a><span class="lineno"> 60</span>&#160; }</div>
<div class="line"><a name="l00061"></a><span class="lineno"> 61</span>&#160;</div>
<div class="line"><a name="l00062"></a><span class="lineno"> 62</span>&#160;</div>
<div class="line"><a name="l00063"></a><span class="lineno"><a class="code" href="class_lucene_1_1_net_1_1_search_1_1_duplicate_filter.html#a7ef0d3a3d80e4c38d487e3f0d0267392"> 63</a></span>&#160; <span class="keyword">public</span> <a class="code" href="class_lucene_1_1_net_1_1_search_1_1_duplicate_filter.html">DuplicateFilter</a>(String fieldName, <span class="keywordtype">int</span> keepMode, <span class="keywordtype">int</span> processingMode)</div>
<div class="line"><a name="l00064"></a><span class="lineno"> 64</span>&#160; {</div>
<div class="line"><a name="l00065"></a><span class="lineno"> 65</span>&#160; this.fieldName = fieldName;</div>
<div class="line"><a name="l00066"></a><span class="lineno"> 66</span>&#160; this.keepMode = keepMode;</div>
<div class="line"><a name="l00067"></a><span class="lineno"> 67</span>&#160; this.processingMode = processingMode;</div>
<div class="line"><a name="l00068"></a><span class="lineno"> 68</span>&#160; }</div>
<div class="line"><a name="l00069"></a><span class="lineno"> 69</span>&#160;</div>
<div class="line"><a name="l00070"></a><span class="lineno"><a class="code" href="class_lucene_1_1_net_1_1_search_1_1_duplicate_filter.html#a78f54e70ad16b1fcf38c0fc4428a6175"> 70</a></span>&#160; <span class="keyword">public</span> <span class="keyword">override</span> <a class="code" href="class_lucene_1_1_net_1_1_search_1_1_doc_id_set.html" title="A DocIdSet contains a set of doc ids. Implementing classes must only implement Iterator to provide ac...">DocIdSet</a> GetDocIdSet(<a class="code" href="class_lucene_1_1_net_1_1_index_1_1_index_reader.html" title="IndexReader is an abstract class, providing an interface for accessing an index. Search of an index i...">IndexReader</a> reader)</div>
<div class="line"><a name="l00071"></a><span class="lineno"> 71</span>&#160; {</div>
<div class="line"><a name="l00072"></a><span class="lineno"> 72</span>&#160; <span class="keywordflow">if</span> (processingMode == PM_FAST_INVALIDATION)</div>
<div class="line"><a name="l00073"></a><span class="lineno"> 73</span>&#160; {</div>
<div class="line"><a name="l00074"></a><span class="lineno"> 74</span>&#160; <span class="keywordflow">return</span> FastBits(reader);</div>
<div class="line"><a name="l00075"></a><span class="lineno"> 75</span>&#160; }</div>
<div class="line"><a name="l00076"></a><span class="lineno"> 76</span>&#160; <span class="keywordflow">else</span></div>
<div class="line"><a name="l00077"></a><span class="lineno"> 77</span>&#160; {</div>
<div class="line"><a name="l00078"></a><span class="lineno"> 78</span>&#160; <span class="keywordflow">return</span> CorrectBits(reader);</div>
<div class="line"><a name="l00079"></a><span class="lineno"> 79</span>&#160; }</div>
<div class="line"><a name="l00080"></a><span class="lineno"> 80</span>&#160; }</div>
<div class="line"><a name="l00081"></a><span class="lineno"> 81</span>&#160;</div>
<div class="line"><a name="l00082"></a><span class="lineno"> 82</span>&#160; <span class="keyword">private</span> <a class="code" href="class_lucene_1_1_net_1_1_util_1_1_open_bit_set.html" title="An &quot;open&quot; BitSet implementation that allows direct access to the array of words storing the bits...">OpenBitSet</a> CorrectBits(<a class="code" href="class_lucene_1_1_net_1_1_index_1_1_index_reader.html" title="IndexReader is an abstract class, providing an interface for accessing an index. Search of an index i...">IndexReader</a> reader)</div>
<div class="line"><a name="l00083"></a><span class="lineno"> 83</span>&#160; {</div>
<div class="line"><a name="l00084"></a><span class="lineno"> 84</span>&#160; <a class="code" href="class_lucene_1_1_net_1_1_util_1_1_open_bit_set.html" title="An &quot;open&quot; BitSet implementation that allows direct access to the array of words storing the bits...">OpenBitSet</a> bits = <span class="keyword">new</span> <a class="code" href="_field_cache_terms_filter_8cs.html#a39e4630d91e4f7fc118eb51aa4d221a0">OpenBitSet</a>(reader.<a class="code" href="class_lucene_1_1_net_1_1_index_1_1_index_reader.html#ac7694e685dde51863b8974da924fa439" title="Returns one greater than the largest possible document number. This may be used to, e.g., determine how big to allocate an array which will have an element for every document number in an index. ">MaxDoc</a>); <span class="comment">//assume all are INvalid</span></div>
<div class="line"><a name="l00085"></a><span class="lineno"> 85</span>&#160; <a class="code" href="class_lucene_1_1_net_1_1_index_1_1_term.html" title="A Term represents a word from text. This is the unit of search. It is composed of two elements...">Term</a> startTerm = <span class="keyword">new</span> <a class="code" href="_more_like_this_8cs.html#a8797eb30ce21b31829d3e3a43a20e3b9">Term</a>(fieldName);</div>
<div class="line"><a name="l00086"></a><span class="lineno"> 86</span>&#160; <a class="code" href="class_lucene_1_1_net_1_1_index_1_1_term_enum.html" title="Abstract class for enumerating terms. Term enumerations are always ordered by Term.compareTo(). Each term in the enumeration is greater than all that precede it. ">TermEnum</a> te = reader.<a class="code" href="class_lucene_1_1_net_1_1_index_1_1_index_reader.html#a9881fb10c3dbc407157b0c7e9cca7d11" title="Returns an enumeration of all the terms in the index. The enumeration is ordered by Term...">Terms</a>(startTerm);</div>
<div class="line"><a name="l00087"></a><span class="lineno"> 87</span>&#160; <span class="keywordflow">if</span> (te != null)</div>
<div class="line"><a name="l00088"></a><span class="lineno"> 88</span>&#160; {</div>
<div class="line"><a name="l00089"></a><span class="lineno"> 89</span>&#160; <a class="code" href="class_lucene_1_1_net_1_1_index_1_1_term.html" title="A Term represents a word from text. This is the unit of search. It is composed of two elements...">Term</a> currTerm = te.<a class="code" href="class_lucene_1_1_net_1_1_index_1_1_term_enum.html#acfd949e542b23691974b3f867bcae47f" title="Returns the current Term in the enumeration.">Term</a>;</div>
<div class="line"><a name="l00090"></a><span class="lineno"> 90</span>&#160; <span class="keywordflow">while</span> ((currTerm != null) &amp;&amp; (currTerm.<a class="code" href="class_lucene_1_1_net_1_1_index_1_1_term.html#a2ecfa7c0860798920d089654f34aa222" title="Returns the field of this term, an interned string. The field indicates the part of a document which ...">Field</a> == startTerm.<a class="code" href="class_lucene_1_1_net_1_1_index_1_1_term.html#a2ecfa7c0860798920d089654f34aa222" title="Returns the field of this term, an interned string. The field indicates the part of a document which ...">Field</a>)) <span class="comment">//term fieldnames are interned</span></div>
<div class="line"><a name="l00091"></a><span class="lineno"> 91</span>&#160; {</div>
<div class="line"><a name="l00092"></a><span class="lineno"> 92</span>&#160; <span class="keywordtype">int</span> lastDoc = -1;</div>
<div class="line"><a name="l00093"></a><span class="lineno"> 93</span>&#160; <span class="comment">//set non duplicates</span></div>
<div class="line"><a name="l00094"></a><span class="lineno"> 94</span>&#160; <a class="code" href="interface_lucene_1_1_net_1_1_index_1_1_term_docs.html" title="TermDocs provides an interface for enumerating &lt;document, frequency&gt; pairs for a term. The document portion names each document containing the term. Documents are indicated by number. The frequency portion gives the number of times the term occurred in each document. The pairs are ordered by document number. ">TermDocs</a> td = reader.<a class="code" href="class_lucene_1_1_net_1_1_index_1_1_index_reader.html#acbbce30c73e871a7cf5c151ee0c775b9" title="Returns an enumeration of all the documents which contain term. For each document, the document number, the frequency of the term in that document is also provided, for use in search scoring. If term is null, then all non-deleted docs are returned with freq=1. Thus, this method implements the mapping: &amp;#160;&amp;#160; =&gt; &amp;#160;&amp;#160; &lt;docNum, freq&gt;* The enumeration is ordered by document number. Each document number is greater than all that precede it in the enumeration. ">TermDocs</a>(currTerm);</div>
<div class="line"><a name="l00095"></a><span class="lineno"> 95</span>&#160; <span class="keywordflow">if</span> (td.<a class="code" href="interface_lucene_1_1_net_1_1_index_1_1_term_docs.html#a5a07b58ff92edf95aff4fbc264eb12e7" title="Moves to the next pair in the enumeration. Returns true iff there is such a next pair in the enumerat...">Next</a>())</div>
<div class="line"><a name="l00096"></a><span class="lineno"> 96</span>&#160; {</div>
<div class="line"><a name="l00097"></a><span class="lineno"> 97</span>&#160; <span class="keywordflow">if</span> (keepMode == KM_USE_FIRST_OCCURRENCE)</div>
<div class="line"><a name="l00098"></a><span class="lineno"> 98</span>&#160; {</div>
<div class="line"><a name="l00099"></a><span class="lineno"> 99</span>&#160; bits.<a class="code" href="class_lucene_1_1_net_1_1_util_1_1_open_bit_set.html#acfb26a60d06cb5c1231ab2e441acf049" title="sets a bit, expanding the set size if necessary ">Set</a>(td.<a class="code" href="interface_lucene_1_1_net_1_1_index_1_1_term_docs.html#af4d11271571bff528c0639eb69cd8b4d" title="Returns the current document number. This is invalid until Next() is called for the first time...">Doc</a>);</div>
<div class="line"><a name="l00100"></a><span class="lineno"> 100</span>&#160; }</div>
<div class="line"><a name="l00101"></a><span class="lineno"> 101</span>&#160; <span class="keywordflow">else</span></div>
<div class="line"><a name="l00102"></a><span class="lineno"> 102</span>&#160; {</div>
<div class="line"><a name="l00103"></a><span class="lineno"> 103</span>&#160; <span class="keywordflow">do</span></div>
<div class="line"><a name="l00104"></a><span class="lineno"> 104</span>&#160; {</div>
<div class="line"><a name="l00105"></a><span class="lineno"> 105</span>&#160; lastDoc = td.<a class="code" href="interface_lucene_1_1_net_1_1_index_1_1_term_docs.html#af4d11271571bff528c0639eb69cd8b4d" title="Returns the current document number. This is invalid until Next() is called for the first time...">Doc</a>;</div>
<div class="line"><a name="l00106"></a><span class="lineno"> 106</span>&#160; } <span class="keywordflow">while</span> (td.<a class="code" href="interface_lucene_1_1_net_1_1_index_1_1_term_docs.html#a5a07b58ff92edf95aff4fbc264eb12e7" title="Moves to the next pair in the enumeration. Returns true iff there is such a next pair in the enumerat...">Next</a>());</div>
<div class="line"><a name="l00107"></a><span class="lineno"> 107</span>&#160; bits.<a class="code" href="class_lucene_1_1_net_1_1_util_1_1_open_bit_set.html#acfb26a60d06cb5c1231ab2e441acf049" title="sets a bit, expanding the set size if necessary ">Set</a>(lastDoc);</div>
<div class="line"><a name="l00108"></a><span class="lineno"> 108</span>&#160; }</div>
<div class="line"><a name="l00109"></a><span class="lineno"> 109</span>&#160; }</div>
<div class="line"><a name="l00110"></a><span class="lineno"> 110</span>&#160; <span class="keywordflow">if</span> (!te.<a class="code" href="class_lucene_1_1_net_1_1_index_1_1_term_enum.html#a3821b36f91dd29129649dc64f5ee1738" title="Increments the enumeration to the next element. True if one exists.">Next</a>())</div>
<div class="line"><a name="l00111"></a><span class="lineno"> 111</span>&#160; {</div>
<div class="line"><a name="l00112"></a><span class="lineno"> 112</span>&#160; <span class="keywordflow">break</span>;</div>
<div class="line"><a name="l00113"></a><span class="lineno"> 113</span>&#160; }</div>
<div class="line"><a name="l00114"></a><span class="lineno"> 114</span>&#160; currTerm = te.<a class="code" href="class_lucene_1_1_net_1_1_index_1_1_term_enum.html#acfd949e542b23691974b3f867bcae47f" title="Returns the current Term in the enumeration.">Term</a>;</div>
<div class="line"><a name="l00115"></a><span class="lineno"> 115</span>&#160; }</div>
<div class="line"><a name="l00116"></a><span class="lineno"> 116</span>&#160; }</div>
<div class="line"><a name="l00117"></a><span class="lineno"> 117</span>&#160; <span class="keywordflow">return</span> bits;</div>
<div class="line"><a name="l00118"></a><span class="lineno"> 118</span>&#160; }</div>
<div class="line"><a name="l00119"></a><span class="lineno"> 119</span>&#160;</div>
<div class="line"><a name="l00120"></a><span class="lineno"> 120</span>&#160; <span class="keyword">private</span> <a class="code" href="class_lucene_1_1_net_1_1_util_1_1_open_bit_set.html" title="An &quot;open&quot; BitSet implementation that allows direct access to the array of words storing the bits...">OpenBitSet</a> FastBits(<a class="code" href="class_lucene_1_1_net_1_1_index_1_1_index_reader.html" title="IndexReader is an abstract class, providing an interface for accessing an index. Search of an index i...">IndexReader</a> reader)</div>
<div class="line"><a name="l00121"></a><span class="lineno"> 121</span>&#160; {</div>
<div class="line"><a name="l00122"></a><span class="lineno"> 122</span>&#160; <a class="code" href="class_lucene_1_1_net_1_1_util_1_1_open_bit_set.html" title="An &quot;open&quot; BitSet implementation that allows direct access to the array of words storing the bits...">OpenBitSet</a> bits = <span class="keyword">new</span> <a class="code" href="_field_cache_terms_filter_8cs.html#a39e4630d91e4f7fc118eb51aa4d221a0">OpenBitSet</a>(reader.<a class="code" href="class_lucene_1_1_net_1_1_index_1_1_index_reader.html#ac7694e685dde51863b8974da924fa439" title="Returns one greater than the largest possible document number. This may be used to, e.g., determine how big to allocate an array which will have an element for every document number in an index. ">MaxDoc</a>);</div>
<div class="line"><a name="l00123"></a><span class="lineno"> 123</span>&#160; bits.<a class="code" href="class_lucene_1_1_net_1_1_util_1_1_open_bit_set.html#acfb26a60d06cb5c1231ab2e441acf049" title="sets a bit, expanding the set size if necessary ">Set</a>(0, reader.<a class="code" href="class_lucene_1_1_net_1_1_index_1_1_index_reader.html#ac7694e685dde51863b8974da924fa439" title="Returns one greater than the largest possible document number. This may be used to, e.g., determine how big to allocate an array which will have an element for every document number in an index. ">MaxDoc</a>); <span class="comment">//assume all are valid</span></div>
<div class="line"><a name="l00124"></a><span class="lineno"> 124</span>&#160; <a class="code" href="class_lucene_1_1_net_1_1_index_1_1_term.html" title="A Term represents a word from text. This is the unit of search. It is composed of two elements...">Term</a> startTerm = <span class="keyword">new</span> <a class="code" href="_more_like_this_8cs.html#a8797eb30ce21b31829d3e3a43a20e3b9">Term</a>(fieldName);</div>
<div class="line"><a name="l00125"></a><span class="lineno"> 125</span>&#160; <a class="code" href="class_lucene_1_1_net_1_1_index_1_1_term_enum.html" title="Abstract class for enumerating terms. Term enumerations are always ordered by Term.compareTo(). Each term in the enumeration is greater than all that precede it. ">TermEnum</a> te = reader.<a class="code" href="class_lucene_1_1_net_1_1_index_1_1_index_reader.html#a9881fb10c3dbc407157b0c7e9cca7d11" title="Returns an enumeration of all the terms in the index. The enumeration is ordered by Term...">Terms</a>(startTerm);</div>
<div class="line"><a name="l00126"></a><span class="lineno"> 126</span>&#160; <span class="keywordflow">if</span> (te != null)</div>
<div class="line"><a name="l00127"></a><span class="lineno"> 127</span>&#160; {</div>
<div class="line"><a name="l00128"></a><span class="lineno"> 128</span>&#160; <a class="code" href="class_lucene_1_1_net_1_1_index_1_1_term.html" title="A Term represents a word from text. This is the unit of search. It is composed of two elements...">Term</a> currTerm = te.<a class="code" href="class_lucene_1_1_net_1_1_index_1_1_term_enum.html#acfd949e542b23691974b3f867bcae47f" title="Returns the current Term in the enumeration.">Term</a>;</div>
<div class="line"><a name="l00129"></a><span class="lineno"> 129</span>&#160;</div>
<div class="line"><a name="l00130"></a><span class="lineno"> 130</span>&#160; <span class="keywordflow">while</span> ((currTerm != null) &amp;&amp; (currTerm.<a class="code" href="class_lucene_1_1_net_1_1_index_1_1_term.html#a2ecfa7c0860798920d089654f34aa222" title="Returns the field of this term, an interned string. The field indicates the part of a document which ...">Field</a> == startTerm.<a class="code" href="class_lucene_1_1_net_1_1_index_1_1_term.html#a2ecfa7c0860798920d089654f34aa222" title="Returns the field of this term, an interned string. The field indicates the part of a document which ...">Field</a>)) <span class="comment">//term fieldnames are interned</span></div>
<div class="line"><a name="l00131"></a><span class="lineno"> 131</span>&#160; {</div>
<div class="line"><a name="l00132"></a><span class="lineno"> 132</span>&#160; <span class="keywordflow">if</span> (te.<a class="code" href="class_lucene_1_1_net_1_1_index_1_1_term_enum.html#a02510654c0b4d40c3c9aab1067541749" title="Returns the docFreq of the current Term in the enumeration.">DocFreq</a>() &gt; 1)</div>
<div class="line"><a name="l00133"></a><span class="lineno"> 133</span>&#160; {</div>
<div class="line"><a name="l00134"></a><span class="lineno"> 134</span>&#160; <span class="keywordtype">int</span> lastDoc = -1;</div>
<div class="line"><a name="l00135"></a><span class="lineno"> 135</span>&#160; <span class="comment">//unset potential duplicates</span></div>
<div class="line"><a name="l00136"></a><span class="lineno"> 136</span>&#160; <a class="code" href="interface_lucene_1_1_net_1_1_index_1_1_term_docs.html" title="TermDocs provides an interface for enumerating &lt;document, frequency&gt; pairs for a term. The document portion names each document containing the term. Documents are indicated by number. The frequency portion gives the number of times the term occurred in each document. The pairs are ordered by document number. ">TermDocs</a> td = reader.<a class="code" href="class_lucene_1_1_net_1_1_index_1_1_index_reader.html#acbbce30c73e871a7cf5c151ee0c775b9" title="Returns an enumeration of all the documents which contain term. For each document, the document number, the frequency of the term in that document is also provided, for use in search scoring. If term is null, then all non-deleted docs are returned with freq=1. Thus, this method implements the mapping: &amp;#160;&amp;#160; =&gt; &amp;#160;&amp;#160; &lt;docNum, freq&gt;* The enumeration is ordered by document number. Each document number is greater than all that precede it in the enumeration. ">TermDocs</a>(currTerm);</div>
<div class="line"><a name="l00137"></a><span class="lineno"> 137</span>&#160; td.<a class="code" href="interface_lucene_1_1_net_1_1_index_1_1_term_docs.html#a5a07b58ff92edf95aff4fbc264eb12e7" title="Moves to the next pair in the enumeration. Returns true iff there is such a next pair in the enumerat...">Next</a>();</div>
<div class="line"><a name="l00138"></a><span class="lineno"> 138</span>&#160; <span class="keywordflow">if</span> (keepMode == KM_USE_FIRST_OCCURRENCE)</div>
<div class="line"><a name="l00139"></a><span class="lineno"> 139</span>&#160; {</div>
<div class="line"><a name="l00140"></a><span class="lineno"> 140</span>&#160; td.<a class="code" href="interface_lucene_1_1_net_1_1_index_1_1_term_docs.html#a5a07b58ff92edf95aff4fbc264eb12e7" title="Moves to the next pair in the enumeration. Returns true iff there is such a next pair in the enumerat...">Next</a>();</div>
<div class="line"><a name="l00141"></a><span class="lineno"> 141</span>&#160; }</div>
<div class="line"><a name="l00142"></a><span class="lineno"> 142</span>&#160; <span class="keywordflow">do</span></div>
<div class="line"><a name="l00143"></a><span class="lineno"> 143</span>&#160; {</div>
<div class="line"><a name="l00144"></a><span class="lineno"> 144</span>&#160; lastDoc = td.<a class="code" href="interface_lucene_1_1_net_1_1_index_1_1_term_docs.html#af4d11271571bff528c0639eb69cd8b4d" title="Returns the current document number. This is invalid until Next() is called for the first time...">Doc</a>;</div>
<div class="line"><a name="l00145"></a><span class="lineno"> 145</span>&#160; bits.<a class="code" href="class_lucene_1_1_net_1_1_util_1_1_open_bit_set.html#a725e13e033210eb75baaf493b3498c9f" title="clears a bit, allowing access beyond the current set size without changing the size.">Clear</a>(lastDoc);</div>
<div class="line"><a name="l00146"></a><span class="lineno"> 146</span>&#160; } <span class="keywordflow">while</span> (td.<a class="code" href="interface_lucene_1_1_net_1_1_index_1_1_term_docs.html#a5a07b58ff92edf95aff4fbc264eb12e7" title="Moves to the next pair in the enumeration. Returns true iff there is such a next pair in the enumerat...">Next</a>());</div>
<div class="line"><a name="l00147"></a><span class="lineno"> 147</span>&#160; <span class="keywordflow">if</span> (keepMode == KM_USE_LAST_OCCURRENCE)</div>
<div class="line"><a name="l00148"></a><span class="lineno"> 148</span>&#160; {</div>
<div class="line"><a name="l00149"></a><span class="lineno"> 149</span>&#160; <span class="comment">//restore the last bit</span></div>
<div class="line"><a name="l00150"></a><span class="lineno"> 150</span>&#160; bits.<a class="code" href="class_lucene_1_1_net_1_1_util_1_1_open_bit_set.html#acfb26a60d06cb5c1231ab2e441acf049" title="sets a bit, expanding the set size if necessary ">Set</a>(lastDoc);</div>
<div class="line"><a name="l00151"></a><span class="lineno"> 151</span>&#160; }</div>
<div class="line"><a name="l00152"></a><span class="lineno"> 152</span>&#160; }</div>
<div class="line"><a name="l00153"></a><span class="lineno"> 153</span>&#160; <span class="keywordflow">if</span> (!te.<a class="code" href="class_lucene_1_1_net_1_1_index_1_1_term_enum.html#a3821b36f91dd29129649dc64f5ee1738" title="Increments the enumeration to the next element. True if one exists.">Next</a>())</div>
<div class="line"><a name="l00154"></a><span class="lineno"> 154</span>&#160; {</div>
<div class="line"><a name="l00155"></a><span class="lineno"> 155</span>&#160; <span class="keywordflow">break</span>;</div>
<div class="line"><a name="l00156"></a><span class="lineno"> 156</span>&#160; }</div>
<div class="line"><a name="l00157"></a><span class="lineno"> 157</span>&#160; currTerm = te.<a class="code" href="class_lucene_1_1_net_1_1_index_1_1_term_enum.html#acfd949e542b23691974b3f867bcae47f" title="Returns the current Term in the enumeration.">Term</a>;</div>
<div class="line"><a name="l00158"></a><span class="lineno"> 158</span>&#160; }</div>
<div class="line"><a name="l00159"></a><span class="lineno"> 159</span>&#160; }</div>
<div class="line"><a name="l00160"></a><span class="lineno"> 160</span>&#160; <span class="keywordflow">return</span> bits;</div>
<div class="line"><a name="l00161"></a><span class="lineno"> 161</span>&#160; }</div>
<div class="line"><a name="l00162"></a><span class="lineno"> 162</span>&#160;</div>
<div class="line"><a name="l00163"></a><span class="lineno"> 163</span>&#160; <span class="keyword">public</span> <span class="keywordtype">string</span> FieldName</div>
<div class="line"><a name="l00164"></a><span class="lineno"><a class="code" href="class_lucene_1_1_net_1_1_search_1_1_duplicate_filter.html#a4dd898e026213f16e462795c448adbb2"> 164</a></span>&#160; {</div>
<div class="line"><a name="l00165"></a><span class="lineno"> 165</span>&#160; <span class="keyword">get</span> { <span class="keywordflow">return</span> fieldName; }</div>
<div class="line"><a name="l00166"></a><span class="lineno"> 166</span>&#160; <span class="keyword">set</span> { this.fieldName = value; }</div>
<div class="line"><a name="l00167"></a><span class="lineno"> 167</span>&#160; }</div>
<div class="line"><a name="l00168"></a><span class="lineno"> 168</span>&#160;</div>
<div class="line"><a name="l00169"></a><span class="lineno"> 169</span>&#160; <span class="keyword">public</span> <span class="keywordtype">int</span> KeepMode</div>
<div class="line"><a name="l00170"></a><span class="lineno"><a class="code" href="class_lucene_1_1_net_1_1_search_1_1_duplicate_filter.html#a715517d2e145c442512fe9bd8108d3e9"> 170</a></span>&#160; {</div>
<div class="line"><a name="l00171"></a><span class="lineno"> 171</span>&#160; <span class="keyword">get</span> { <span class="keywordflow">return</span> keepMode; }</div>
<div class="line"><a name="l00172"></a><span class="lineno"> 172</span>&#160; <span class="keyword">set</span> { this.keepMode = value; }</div>
<div class="line"><a name="l00173"></a><span class="lineno"> 173</span>&#160; }</div>
<div class="line"><a name="l00174"></a><span class="lineno"> 174</span>&#160;</div>
<div class="line"><a name="l00175"></a><span class="lineno"><a class="code" href="class_lucene_1_1_net_1_1_search_1_1_duplicate_filter.html#a77e5fc9af23acff079dea18020c45387"> 175</a></span>&#160; <span class="keyword">public</span> <span class="keyword">override</span> <span class="keywordtype">bool</span> Equals(Object obj)</div>
<div class="line"><a name="l00176"></a><span class="lineno"> 176</span>&#160; {</div>
<div class="line"><a name="l00177"></a><span class="lineno"> 177</span>&#160; <span class="keywordflow">if</span> (<span class="keyword">this</span> == obj)</div>
<div class="line"><a name="l00178"></a><span class="lineno"> 178</span>&#160; <span class="keywordflow">return</span> <span class="keyword">true</span>;</div>
<div class="line"><a name="l00179"></a><span class="lineno"> 179</span>&#160; <span class="keywordflow">if</span> ((obj == null) || (obj.GetType()!= this.GetType()))</div>
<div class="line"><a name="l00180"></a><span class="lineno"> 180</span>&#160; <span class="keywordflow">return</span> <span class="keyword">false</span>;</div>
<div class="line"><a name="l00181"></a><span class="lineno"> 181</span>&#160; <a class="code" href="class_lucene_1_1_net_1_1_search_1_1_duplicate_filter.html">DuplicateFilter</a> other = (<a class="code" href="class_lucene_1_1_net_1_1_search_1_1_duplicate_filter.html">DuplicateFilter</a>)obj;</div>
<div class="line"><a name="l00182"></a><span class="lineno"> 182</span>&#160; <span class="keywordflow">return</span> keepMode == other.keepMode &amp;&amp;</div>
<div class="line"><a name="l00183"></a><span class="lineno"> 183</span>&#160; processingMode == other.processingMode &amp;&amp;</div>
<div class="line"><a name="l00184"></a><span class="lineno"> 184</span>&#160; (fieldName == other.fieldName || (fieldName != null &amp;&amp; fieldName.Equals(other.fieldName)));</div>
<div class="line"><a name="l00185"></a><span class="lineno"> 185</span>&#160; }</div>
<div class="line"><a name="l00186"></a><span class="lineno"> 186</span>&#160;</div>
<div class="line"><a name="l00187"></a><span class="lineno"><a class="code" href="class_lucene_1_1_net_1_1_search_1_1_duplicate_filter.html#a2e2b0e022dcc08b2169e657828499825"> 187</a></span>&#160; <span class="keyword">public</span> <span class="keyword">override</span> <span class="keywordtype">int</span> GetHashCode()</div>
<div class="line"><a name="l00188"></a><span class="lineno"> 188</span>&#160; {</div>
<div class="line"><a name="l00189"></a><span class="lineno"> 189</span>&#160; <span class="keywordtype">int</span> hash = 217;</div>
<div class="line"><a name="l00190"></a><span class="lineno"> 190</span>&#160; hash = 31 * hash + keepMode;</div>
<div class="line"><a name="l00191"></a><span class="lineno"> 191</span>&#160; hash = 31 * hash + processingMode;</div>
<div class="line"><a name="l00192"></a><span class="lineno"> 192</span>&#160; hash = 31 * hash + fieldName.GetHashCode();</div>
<div class="line"><a name="l00193"></a><span class="lineno"> 193</span>&#160; <span class="keywordflow">return</span> hash;</div>
<div class="line"><a name="l00194"></a><span class="lineno"> 194</span>&#160; }</div>
<div class="line"><a name="l00195"></a><span class="lineno"> 195</span>&#160;</div>
<div class="line"><a name="l00196"></a><span class="lineno"> 196</span>&#160; <span class="keyword">public</span> <span class="keywordtype">int</span> ProcessingMode</div>
<div class="line"><a name="l00197"></a><span class="lineno"><a class="code" href="class_lucene_1_1_net_1_1_search_1_1_duplicate_filter.html#a31b3f6a034b411e35e718877d0a9510e"> 197</a></span>&#160; {</div>
<div class="line"><a name="l00198"></a><span class="lineno"> 198</span>&#160; <span class="keyword">get</span> { <span class="keywordflow">return</span> processingMode; }</div>
<div class="line"><a name="l00199"></a><span class="lineno"> 199</span>&#160; <span class="keyword">set</span> { this.processingMode = value; }</div>
<div class="line"><a name="l00200"></a><span class="lineno"> 200</span>&#160; }</div>
<div class="line"><a name="l00201"></a><span class="lineno"> 201</span>&#160; }</div>
<div class="line"><a name="l00202"></a><span class="lineno"> 202</span>&#160;}</div>
</div><!-- fragment --></div><!-- contents -->
<!-- start footer part -->
<hr class="footer"/><address class="footer"><small>
Generated on Thu Jan 3 2013 02:12:43 for Lucene.Net by &#160;<a href="http://www.doxygen.org/index.html">
<img class="footer" src="doxygen.png" alt="doxygen"/>
</a> 1.8.3
</small></address>
</body>
</html>