| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| using System; |
| using Lucene.Net.Analysis.Tokenattributes; |
| using NUnit.Framework; |
| |
| using Analyzer = Lucene.Net.Analysis.Analyzer; |
| using TokenStream = Lucene.Net.Analysis.TokenStream; |
| using Document = Lucene.Net.Documents.Document; |
| using Field = Lucene.Net.Documents.Field; |
| using MockRAMDirectory = Lucene.Net.Store.MockRAMDirectory; |
| using LuceneTestCase = Lucene.Net.Util.LuceneTestCase; |
| |
| namespace Lucene.Net.Index |
| { |
| |
| [TestFixture] |
| public class TestTermVectorsReader:LuceneTestCase |
| { |
| private void InitBlock() |
| { |
| positions = new int[testTerms.Length][]; |
| offsets = new TermVectorOffsetInfo[testTerms.Length][]; |
| tokens = new TestToken[testTerms.Length * TERM_FREQ]; |
| } |
| //Must be lexicographically sorted, will do in setup, versus trying to maintain here |
| private System.String[] testFields = new System.String[]{"f1", "f2", "f3", "f4"}; |
| private bool[] testFieldsStorePos = new bool[]{true, false, true, false}; |
| private bool[] testFieldsStoreOff = new bool[]{true, false, false, true}; |
| private System.String[] testTerms = new System.String[]{"this", "is", "a", "test"}; |
| private int[][] positions; |
| private TermVectorOffsetInfo[][] offsets; |
| private MockRAMDirectory dir = new MockRAMDirectory(); |
| private System.String seg; |
| private FieldInfos fieldInfos = new FieldInfos(); |
| private static int TERM_FREQ = 3; |
| |
| public TestTermVectorsReader(System.String s):base(s) |
| { |
| InitBlock(); |
| } |
| public TestTermVectorsReader() |
| : base() |
| { |
| InitBlock(); |
| } |
| |
| internal class TestToken : System.IComparable<TestToken> |
| { |
| public TestToken(TestTermVectorsReader enclosingInstance) |
| { |
| InitBlock(enclosingInstance); |
| } |
| private void InitBlock(TestTermVectorsReader enclosingInstance) |
| { |
| this.enclosingInstance = enclosingInstance; |
| } |
| private TestTermVectorsReader enclosingInstance; |
| public TestTermVectorsReader Enclosing_Instance |
| { |
| get |
| { |
| return enclosingInstance; |
| } |
| |
| } |
| internal System.String text; |
| internal int pos; |
| internal int startOffset; |
| internal int endOffset; |
| public virtual int CompareTo(TestToken other) |
| { |
| return pos - other.pos; |
| } |
| } |
| |
| internal TestToken[] tokens; |
| |
| [SetUp] |
| public override void SetUp() |
| { |
| base.SetUp(); |
| /* |
| for (int i = 0; i < testFields.length; i++) { |
| fieldInfos.add(testFields[i], true, true, testFieldsStorePos[i], testFieldsStoreOff[i]); |
| } |
| */ |
| |
| System.Array.Sort(testTerms); |
| int tokenUpto = 0; |
| for (int i = 0; i < testTerms.Length; i++) |
| { |
| positions[i] = new int[TERM_FREQ]; |
| offsets[i] = new TermVectorOffsetInfo[TERM_FREQ]; |
| // first position must be 0 |
| for (int j = 0; j < TERM_FREQ; j++) |
| { |
| // positions are always sorted in increasing order |
| positions[i][j] = (int) (j * 10 + (new System.Random().NextDouble()) * 10); |
| // offsets are always sorted in increasing order |
| offsets[i][j] = new TermVectorOffsetInfo(j * 10, j * 10 + testTerms[i].Length); |
| TestToken token = tokens[tokenUpto++] = new TestToken(this); |
| token.text = testTerms[i]; |
| token.pos = positions[i][j]; |
| token.startOffset = offsets[i][j].StartOffset; |
| token.endOffset = offsets[i][j].EndOffset; |
| } |
| } |
| System.Array.Sort(tokens); |
| |
| IndexWriter writer = new IndexWriter(dir, new MyAnalyzer(this), true, IndexWriter.MaxFieldLength.LIMITED); |
| writer.UseCompoundFile = false; |
| Document doc = new Document(); |
| for (int i = 0; i < testFields.Length; i++) |
| { |
| Field.TermVector tv; |
| if (testFieldsStorePos[i] && testFieldsStoreOff[i]) |
| tv = Field.TermVector.WITH_POSITIONS_OFFSETS; |
| else if (testFieldsStorePos[i] && !testFieldsStoreOff[i]) |
| tv = Field.TermVector.WITH_POSITIONS; |
| else if (!testFieldsStorePos[i] && testFieldsStoreOff[i]) |
| tv = Field.TermVector.WITH_OFFSETS; |
| else |
| tv = Field.TermVector.YES; |
| doc.Add(new Field(testFields[i], "", Field.Store.NO, Field.Index.ANALYZED, tv)); |
| } |
| |
| //Create 5 documents for testing, they all have the same |
| //terms |
| for (int j = 0; j < 5; j++) |
| writer.AddDocument(doc); |
| writer.Commit(); |
| seg = writer.NewestSegment().name; |
| writer.Close(); |
| |
| fieldInfos = new FieldInfos(dir, seg + "." + IndexFileNames.FIELD_INFOS_EXTENSION); |
| } |
| |
| private class MyTokenStream:TokenStream |
| { |
| private void InitBlock(TestTermVectorsReader enclosingInstance) |
| { |
| this.enclosingInstance = enclosingInstance; |
| } |
| private TestTermVectorsReader enclosingInstance; |
| public TestTermVectorsReader Enclosing_Instance |
| { |
| get |
| { |
| return enclosingInstance; |
| } |
| |
| } |
| internal int tokenUpto; |
| |
| internal ITermAttribute termAtt; |
| internal IPositionIncrementAttribute posIncrAtt; |
| internal IOffsetAttribute offsetAtt; |
| |
| public MyTokenStream(TestTermVectorsReader enclosingInstance) |
| { |
| InitBlock(enclosingInstance); |
| termAtt = AddAttribute<ITermAttribute>(); |
| posIncrAtt = AddAttribute<IPositionIncrementAttribute>(); |
| offsetAtt = AddAttribute<IOffsetAttribute>(); |
| } |
| |
| public override bool IncrementToken() |
| { |
| if (tokenUpto >= Enclosing_Instance.tokens.Length) |
| return false; |
| else |
| { |
| TestToken testToken = Enclosing_Instance.tokens[tokenUpto++]; |
| ClearAttributes(); |
| termAtt.SetTermBuffer(testToken.text); |
| offsetAtt.SetOffset(testToken.startOffset, testToken.endOffset); |
| if (tokenUpto > 1) |
| { |
| posIncrAtt.PositionIncrement = testToken.pos - Enclosing_Instance.tokens[tokenUpto - 2].pos; |
| } |
| else |
| { |
| posIncrAtt.PositionIncrement = testToken.pos + 1; |
| } |
| return true; |
| } |
| } |
| |
| protected override void Dispose(bool disposing) |
| { |
| // do nothing |
| } |
| } |
| |
| private class MyAnalyzer:Analyzer |
| { |
| public MyAnalyzer(TestTermVectorsReader enclosingInstance) |
| { |
| InitBlock(enclosingInstance); |
| } |
| private void InitBlock(TestTermVectorsReader enclosingInstance) |
| { |
| this.enclosingInstance = enclosingInstance; |
| } |
| private TestTermVectorsReader enclosingInstance; |
| public TestTermVectorsReader Enclosing_Instance |
| { |
| get |
| { |
| return enclosingInstance; |
| } |
| |
| } |
| public override TokenStream TokenStream(System.String fieldName, System.IO.TextReader reader) |
| { |
| return new MyTokenStream(enclosingInstance); |
| } |
| } |
| |
| [Test] |
| public virtual void Test() |
| { |
| //Check to see the files were created properly in setup |
| Assert.IsTrue(dir.FileExists(seg + "." + IndexFileNames.VECTORS_DOCUMENTS_EXTENSION)); |
| Assert.IsTrue(dir.FileExists(seg + "." + IndexFileNames.VECTORS_INDEX_EXTENSION)); |
| } |
| |
| [Test] |
| public virtual void TestReader() |
| { |
| TermVectorsReader reader = new TermVectorsReader(dir, seg, fieldInfos); |
| Assert.IsTrue(reader != null); |
| for (int j = 0; j < 5; j++) |
| { |
| ITermFreqVector vector = reader.Get(j, testFields[0]); |
| Assert.IsTrue(vector != null); |
| System.String[] terms = vector.GetTerms(); |
| Assert.IsTrue(terms != null); |
| Assert.IsTrue(terms.Length == testTerms.Length); |
| for (int i = 0; i < terms.Length; i++) |
| { |
| System.String term = terms[i]; |
| //System.out.println("Term: " + term); |
| Assert.IsTrue(term.Equals(testTerms[i])); |
| } |
| } |
| } |
| |
| [Test] |
| public virtual void TestPositionReader() |
| { |
| TermVectorsReader reader = new TermVectorsReader(dir, seg, fieldInfos); |
| Assert.IsTrue(reader != null); |
| TermPositionVector vector; |
| System.String[] terms; |
| vector = (TermPositionVector) reader.Get(0, testFields[0]); |
| Assert.IsTrue(vector != null); |
| terms = vector.GetTerms(); |
| Assert.IsTrue(terms != null); |
| Assert.IsTrue(terms.Length == testTerms.Length); |
| for (int i = 0; i < terms.Length; i++) |
| { |
| System.String term = terms[i]; |
| //System.out.println("Term: " + term); |
| Assert.IsTrue(term.Equals(testTerms[i])); |
| int[] positions = vector.GetTermPositions(i); |
| Assert.IsTrue(positions != null); |
| Assert.IsTrue(positions.Length == this.positions[i].Length); |
| for (int j = 0; j < positions.Length; j++) |
| { |
| int position = positions[j]; |
| Assert.IsTrue(position == this.positions[i][j]); |
| } |
| TermVectorOffsetInfo[] offset = vector.GetOffsets(i); |
| Assert.IsTrue(offset != null); |
| Assert.IsTrue(offset.Length == this.offsets[i].Length); |
| for (int j = 0; j < offset.Length; j++) |
| { |
| TermVectorOffsetInfo termVectorOffsetInfo = offset[j]; |
| Assert.IsTrue(termVectorOffsetInfo.Equals(offsets[i][j])); |
| } |
| } |
| |
| ITermFreqVector freqVector = reader.Get(0, testFields[1]); //no pos, no offset |
| Assert.IsTrue(freqVector != null); |
| Assert.IsTrue(freqVector is TermPositionVector == false); |
| terms = freqVector.GetTerms(); |
| Assert.IsTrue(terms != null); |
| Assert.IsTrue(terms.Length == testTerms.Length); |
| for (int i = 0; i < terms.Length; i++) |
| { |
| System.String term = terms[i]; |
| //System.out.println("Term: " + term); |
| Assert.IsTrue(term.Equals(testTerms[i])); |
| } |
| } |
| |
| [Test] |
| public virtual void TestOffsetReader() |
| { |
| TermVectorsReader reader = new TermVectorsReader(dir, seg, fieldInfos); |
| Assert.IsTrue(reader != null); |
| TermPositionVector vector = (TermPositionVector) reader.Get(0, testFields[0]); |
| Assert.IsTrue(vector != null); |
| System.String[] terms = vector.GetTerms(); |
| Assert.IsTrue(terms != null); |
| Assert.IsTrue(terms.Length == testTerms.Length); |
| for (int i = 0; i < terms.Length; i++) |
| { |
| System.String term = terms[i]; |
| //System.out.println("Term: " + term); |
| Assert.IsTrue(term.Equals(testTerms[i])); |
| int[] positions = vector.GetTermPositions(i); |
| Assert.IsTrue(positions != null); |
| Assert.IsTrue(positions.Length == this.positions[i].Length); |
| for (int j = 0; j < positions.Length; j++) |
| { |
| int position = positions[j]; |
| Assert.IsTrue(position == this.positions[i][j]); |
| } |
| TermVectorOffsetInfo[] offset = vector.GetOffsets(i); |
| Assert.IsTrue(offset != null); |
| Assert.IsTrue(offset.Length == this.offsets[i].Length); |
| for (int j = 0; j < offset.Length; j++) |
| { |
| TermVectorOffsetInfo termVectorOffsetInfo = offset[j]; |
| Assert.IsTrue(termVectorOffsetInfo.Equals(offsets[i][j])); |
| } |
| } |
| } |
| |
| [Test] |
| public virtual void TestMapper() |
| { |
| TermVectorsReader reader = new TermVectorsReader(dir, seg, fieldInfos); |
| Assert.IsTrue(reader != null); |
| SortedTermVectorMapper mapper = new SortedTermVectorMapper(new TermVectorEntryFreqSortedComparator()); |
| reader.Get(0, mapper); |
| var set_Renamed = mapper.TermVectorEntrySet; |
| Assert.IsTrue(set_Renamed != null, "set is null and it shouldn't be"); |
| //three fields, 4 terms, all terms are the same |
| Assert.IsTrue(set_Renamed.Count == 4, "set Size: " + set_Renamed.Count + " is not: " + 4); |
| //Check offsets and positions |
| for (System.Collections.IEnumerator iterator = set_Renamed.GetEnumerator(); iterator.MoveNext(); ) |
| { |
| TermVectorEntry tve = (TermVectorEntry) iterator.Current; |
| Assert.IsTrue(tve != null, "tve is null and it shouldn't be"); |
| Assert.IsTrue(tve.GetOffsets() != null, "tve.getOffsets() is null and it shouldn't be"); |
| Assert.IsTrue(tve.GetPositions() != null, "tve.getPositions() is null and it shouldn't be"); |
| } |
| |
| mapper = new SortedTermVectorMapper(new TermVectorEntryFreqSortedComparator()); |
| reader.Get(1, mapper); |
| set_Renamed = mapper.TermVectorEntrySet; |
| Assert.IsTrue(set_Renamed != null, "set is null and it shouldn't be"); |
| //three fields, 4 terms, all terms are the same |
| Assert.IsTrue(set_Renamed.Count == 4, "set Size: " + set_Renamed.Count + " is not: " + 4); |
| //Should have offsets and positions b/c we are munging all the fields together |
| for (System.Collections.IEnumerator iterator = set_Renamed.GetEnumerator(); iterator.MoveNext(); ) |
| { |
| TermVectorEntry tve = (TermVectorEntry) iterator.Current; |
| Assert.IsTrue(tve != null, "tve is null and it shouldn't be"); |
| Assert.IsTrue(tve.GetOffsets() != null, "tve.getOffsets() is null and it shouldn't be"); |
| Assert.IsTrue(tve.GetPositions() != null, "tve.getPositions() is null and it shouldn't be"); |
| } |
| |
| |
| FieldSortedTermVectorMapper fsMapper = new FieldSortedTermVectorMapper(new TermVectorEntryFreqSortedComparator()); |
| reader.Get(0, fsMapper); |
| var map = fsMapper.FieldToTerms; |
| Assert.IsTrue(map.Count == testFields.Length, "map Size: " + map.Count + " is not: " + testFields.Length); |
| for (var iterator = map.GetEnumerator(); iterator.MoveNext(); ) |
| { |
| var entry = iterator.Current; |
| var sortedSet = entry.Value; |
| Assert.IsTrue(sortedSet.Count == 4, "sortedSet Size: " + sortedSet.Count + " is not: " + 4); |
| for (var inner = sortedSet.GetEnumerator(); inner.MoveNext(); ) |
| { |
| TermVectorEntry tve = inner.Current; |
| Assert.IsTrue(tve != null, "tve is null and it shouldn't be"); |
| //Check offsets and positions. |
| Assert.IsTrue(tve != null, "tve is null and it shouldn't be"); |
| System.String field = tve.Field; |
| if (field.Equals(testFields[0])) |
| { |
| //should have offsets |
| |
| Assert.IsTrue(tve.GetOffsets() != null, "tve.getOffsets() is null and it shouldn't be"); |
| Assert.IsTrue(tve.GetPositions() != null, "tve.getPositions() is null and it shouldn't be"); |
| } |
| else if (field.Equals(testFields[1])) |
| { |
| //should not have offsets |
| |
| Assert.IsTrue(tve.GetOffsets() == null, "tve.getOffsets() is not null and it shouldn't be"); |
| Assert.IsTrue(tve.GetPositions() == null, "tve.getPositions() is not null and it shouldn't be"); |
| } |
| } |
| } |
| //Try mapper that ignores offs and positions |
| fsMapper = new FieldSortedTermVectorMapper(true, true, new TermVectorEntryFreqSortedComparator()); |
| reader.Get(0, fsMapper); |
| map = fsMapper.FieldToTerms; |
| Assert.IsTrue(map.Count == testFields.Length, "map Size: " + map.Count + " is not: " + testFields.Length); |
| for (var iterator = map.GetEnumerator(); iterator.MoveNext(); ) |
| { |
| var entry = iterator.Current; |
| var sortedSet = entry.Value; |
| Assert.IsTrue(sortedSet.Count == 4, "sortedSet Size: " + sortedSet.Count + " is not: " + 4); |
| for (var inner = sortedSet.GetEnumerator(); inner.MoveNext(); ) |
| { |
| TermVectorEntry tve = inner.Current; |
| Assert.IsTrue(tve != null, "tve is null and it shouldn't be"); |
| //Check offsets and positions. |
| Assert.IsTrue(tve != null, "tve is null and it shouldn't be"); |
| System.String field = tve.Field; |
| if (field.Equals(testFields[0])) |
| { |
| //should have offsets |
| |
| Assert.IsTrue(tve.GetOffsets() == null, "tve.getOffsets() is null and it shouldn't be"); |
| Assert.IsTrue(tve.GetPositions() == null, "tve.getPositions() is null and it shouldn't be"); |
| } |
| else if (field.Equals(testFields[1])) |
| { |
| //should not have offsets |
| |
| Assert.IsTrue(tve.GetOffsets() == null, "tve.getOffsets() is not null and it shouldn't be"); |
| Assert.IsTrue(tve.GetPositions() == null, "tve.getPositions() is not null and it shouldn't be"); |
| } |
| } |
| } |
| |
| // test setDocumentNumber() |
| IndexReader ir = IndexReader.Open(dir, true); |
| DocNumAwareMapper docNumAwareMapper = new DocNumAwareMapper(); |
| Assert.AreEqual(- 1, docNumAwareMapper.GetDocumentNumber()); |
| |
| ir.GetTermFreqVector(0, docNumAwareMapper); |
| Assert.AreEqual(0, docNumAwareMapper.GetDocumentNumber()); |
| docNumAwareMapper.SetDocumentNumber(-1); |
| |
| ir.GetTermFreqVector(1, docNumAwareMapper); |
| Assert.AreEqual(1, docNumAwareMapper.GetDocumentNumber()); |
| docNumAwareMapper.SetDocumentNumber(-1); |
| |
| ir.GetTermFreqVector(0, "f1", docNumAwareMapper); |
| Assert.AreEqual(0, docNumAwareMapper.GetDocumentNumber()); |
| docNumAwareMapper.SetDocumentNumber(-1); |
| |
| ir.GetTermFreqVector(1, "f2", docNumAwareMapper); |
| Assert.AreEqual(1, docNumAwareMapper.GetDocumentNumber()); |
| docNumAwareMapper.SetDocumentNumber(-1); |
| |
| ir.GetTermFreqVector(0, "f1", docNumAwareMapper); |
| Assert.AreEqual(0, docNumAwareMapper.GetDocumentNumber()); |
| |
| ir.Close(); |
| } |
| |
| |
| /// <summary> Make sure exceptions and bad params are handled appropriately</summary> |
| [Test] |
| public virtual void TestBadParams() |
| { |
| var reader = new TermVectorsReader(dir, seg, fieldInfos); |
| Assert.IsTrue(reader != null); |
| //Bad document number, good field number |
| Assert.Throws<System.IO.IOException>(() => reader.Get(50, testFields[0])); |
| |
| reader = new TermVectorsReader(dir, seg, fieldInfos); |
| Assert.IsTrue(reader != null); |
| //Bad document number, no field |
| Assert.Throws<System.IO.IOException>(() => reader.Get(50)); |
| |
| reader = new TermVectorsReader(dir, seg, fieldInfos); |
| Assert.IsTrue(reader != null); |
| Assert.DoesNotThrow(() => |
| { |
| //good document number, bad field number |
| ITermFreqVector vector = reader.Get(0, "f50"); |
| Assert.IsTrue(vector == null); |
| }); |
| } |
| |
| |
| public class DocNumAwareMapper:TermVectorMapper |
| { |
| |
| public DocNumAwareMapper() |
| { |
| } |
| |
| private int documentNumber = - 1; |
| |
| public override void SetExpectations(System.String field, int numTerms, bool storeOffsets, bool storePositions) |
| { |
| if (documentNumber == - 1) |
| { |
| throw new System.SystemException("Documentnumber should be set at this point!"); |
| } |
| } |
| |
| public override void Map(System.String term, int frequency, TermVectorOffsetInfo[] offsets, int[] positions) |
| { |
| if (documentNumber == - 1) |
| { |
| throw new System.SystemException("Documentnumber should be set at this point!"); |
| } |
| } |
| |
| public virtual int GetDocumentNumber() |
| { |
| return documentNumber; |
| } |
| |
| public override void SetDocumentNumber(int documentNumber) |
| { |
| this.documentNumber = documentNumber; |
| } |
| } |
| } |
| } |