| using Lucene.Net.Diagnostics; |
| using Lucene.Net.Index; |
| using Lucene.Net.Util; |
| using System; |
| using System.Collections.Generic; |
| using System.Diagnostics; |
| using System.IO; |
| using System.Text; |
| |
| namespace Lucene.Net.Search.VectorHighlight |
| { |
| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| /// <summary> |
| /// <see cref="FieldTermStack"/> is a stack that keeps query terms in the specified field |
| /// of the document to be highlighted. |
| /// </summary> |
| public class FieldTermStack |
| { |
| private readonly string fieldName; |
| internal List<TermInfo> termList = new List<TermInfo>(); |
| |
| //public static void main( string[] args ) throws Exception { |
| // Analyzer analyzer = new WhitespaceAnalyzer(Version.LUCENE_CURRENT); |
| // QueryParser parser = new QueryParser(Version.LUCENE_CURRENT, "f", analyzer ); |
| // Query query = parser.parse( "a x:b" ); |
| // FieldQuery fieldQuery = new FieldQuery( query, true, false ); |
| |
| // Directory dir = new RAMDirectory(); |
| // IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(Version.LUCENE_CURRENT, analyzer)); |
| // Document doc = new Document(); |
| // IndexableFieldType ft = new IndexableFieldType(TextField.TYPE_STORED); |
| // ft.setStoreTermVectors(true); |
| // ft.setStoreTermVectorOffsets(true); |
| // ft.setStoreTermVectorPositions(true); |
| // doc.add( new Field( "f", ft, "a a a b b c a b b c d e f" ) ); |
| // doc.add( new Field( "f", ft, "b a b a f" ) ); |
| // writer.addDocument( doc ); |
| // writer.close(); |
| |
| // IndexReader reader = IndexReader.open(dir1); |
| // new FieldTermStack( reader, 0, "f", fieldQuery ); |
| // reader.close(); |
| //} |
| |
| /// <summary> |
| /// a constructor. |
| /// </summary> |
| /// <param name="reader"><see cref="IndexReader"/> of the index</param> |
| /// <param name="docId">document id to be highlighted</param> |
| /// <param name="fieldName">field of the document to be highlighted</param> |
| /// <param name="fieldQuery"><see cref="FieldQuery"/> object</param> |
| /// <exception cref="IOException">If there is a low-level I/O error</exception> |
| public FieldTermStack(IndexReader reader, int docId, string fieldName, FieldQuery fieldQuery) |
| { |
| this.fieldName = fieldName; |
| |
| ISet<string> termSet = fieldQuery.GetTermSet(fieldName); |
| // just return to make null snippet if un-matched fieldName specified when fieldMatch == true |
| if (termSet == null) return; |
| |
| Fields vectors = reader.GetTermVectors(docId); |
| if (vectors == null) |
| { |
| // null snippet |
| return; |
| } |
| |
| Terms vector = vectors.GetTerms(fieldName); |
| if (vector == null) |
| { |
| // null snippet |
| return; |
| } |
| |
| CharsRef spare = new CharsRef(); |
| TermsEnum termsEnum = vector.GetEnumerator(); |
| DocsAndPositionsEnum dpEnum = null; |
| BytesRef text; |
| |
| int numDocs = reader.MaxDoc; |
| |
| while (termsEnum.MoveNext()) |
| { |
| text = termsEnum.Term; |
| UnicodeUtil.UTF8toUTF16(text, spare); |
| string term = spare.ToString(); |
| if (!termSet.Contains(term)) |
| { |
| continue; |
| } |
| dpEnum = termsEnum.DocsAndPositions(null, dpEnum); |
| if (dpEnum == null) |
| { |
| // null snippet |
| return; |
| } |
| |
| dpEnum.NextDoc(); |
| |
| // For weight look here: http://lucene.apache.org/core/3_6_0/api/core/org/apache/lucene/search/DefaultSimilarity.html |
| float weight = (float)(Math.Log(numDocs / (double)(reader.DocFreq(new Term(fieldName, text)) + 1)) + 1.0); |
| |
| int freq = dpEnum.Freq; |
| |
| for (int i = 0; i < freq; i++) |
| { |
| int pos = dpEnum.NextPosition(); |
| if (dpEnum.StartOffset < 0) |
| { |
| return; // no offsets, null snippet |
| } |
| termList.Add(new TermInfo(term, dpEnum.StartOffset, dpEnum.EndOffset, pos, weight)); |
| } |
| } |
| |
| // sort by position |
| CollectionUtil.TimSort(termList); |
| |
| // now look for dups at the same position, linking them together |
| int currentPos = -1; |
| TermInfo previous = null; |
| TermInfo first = null; |
| for (int i = 0; i < termList.Count; ) |
| { |
| TermInfo current = termList[i]; |
| if (current.Position == currentPos) |
| { |
| if (Debugging.AssertsEnabled) Debugging.Assert(previous != null); |
| previous.SetNext(current); |
| previous = current; |
| //iterator.Remove(); |
| |
| // LUCENENET NOTE: Remove, but don't advance the i position (since removing will advance to the next item) |
| termList.RemoveAt(i); |
| } |
| else |
| { |
| if (previous != null) |
| { |
| previous.SetNext(first); |
| } |
| previous = first = current; |
| currentPos = current.Position; |
| |
| // LUCENENET NOTE: Only increment the position if we don't do a delete. |
| i++; |
| } |
| } |
| |
| if (previous != null) |
| { |
| previous.SetNext(first); |
| } |
| } |
| |
| /// <summary> |
| /// field name |
| /// </summary> |
| public virtual string FieldName => fieldName; |
| |
| /// <summary> |
| /// Returns the top <see cref="TermInfo"/> object of the stack |
| /// </summary> |
| /// <returns>the top <see cref="TermInfo"/> object of the stack</returns> |
| public virtual TermInfo Pop() |
| { |
| if (termList.Count == 0) |
| { |
| return null; |
| } |
| TermInfo first = termList[0]; |
| termList.Remove(first); |
| return first; |
| } |
| |
| /// <summary> |
| /// Puts a <see cref="TermInfo"/> onto the top of the stack |
| /// </summary> |
| /// <param name="termInfo">the <see cref="TermInfo"/> object to be put on the top of the stack</param> |
| public virtual void Push(TermInfo termInfo) |
| { |
| termList.Insert(0, termInfo); |
| } |
| |
| /// <summary> |
| /// to know whether the stack is empty. Returns true if the stack is empty, false if not |
| /// </summary> |
| public virtual bool IsEmpty => termList == null || termList.Count == 0; |
| |
| /// <summary> |
| /// Single term with its position/offsets in the document and IDF weight. |
| /// It is <see cref="IComparable{TermInfo}"/> but considers only position. |
| /// </summary> |
| public class TermInfo : IComparable<TermInfo> |
| { |
| private readonly string text; |
| private readonly int startOffset; |
| private readonly int endOffset; |
| private readonly int position; |
| |
| // IDF-weight of this term |
| private readonly float weight; |
| |
| // pointer to other TermInfo's at the same position. |
| // this is a circular list, so with no syns, just points to itself |
| private TermInfo next; |
| |
| public TermInfo(string text, int startOffset, int endOffset, int position, float weight) |
| { |
| this.text = text; |
| this.startOffset = startOffset; |
| this.endOffset = endOffset; |
| this.position = position; |
| this.weight = weight; |
| this.next = this; |
| } |
| |
| internal void SetNext(TermInfo next) { this.next = next; } |
| /// <summary> |
| /// Returns the next TermInfo at this same position. This is a circular list! |
| /// </summary> |
| public virtual TermInfo Next => next; |
| |
| public virtual string Text => text; |
| public virtual int StartOffset => startOffset; |
| public virtual int EndOffset => endOffset; |
| public virtual int Position => position; |
| public virtual float Weight => weight; |
| |
| public override string ToString() |
| { |
| StringBuilder sb = new StringBuilder(); |
| sb.Append(text).Append('(').Append(startOffset).Append(',').Append(endOffset).Append(',').Append(position).Append(')'); |
| return sb.ToString(); |
| } |
| |
| public virtual int CompareTo(TermInfo o) |
| { |
| return (this.position - o.position); |
| } |
| |
| public override int GetHashCode() |
| { |
| int prime = 31; |
| int result = 1; |
| result = prime * result + position; |
| return result; |
| } |
| |
| public override bool Equals(object obj) |
| { |
| if (this == obj) |
| { |
| return true; |
| } |
| if (obj == null) |
| { |
| return false; |
| } |
| if (GetType() != obj.GetType()) |
| { |
| return false; |
| } |
| TermInfo other = (TermInfo)obj; |
| if (position != other.position) |
| { |
| return false; |
| } |
| return true; |
| } |
| } |
| } |
| } |