blob: 23ad6b9ace14adae7288f71e70fec7c3790e62e8 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
using System;
using Lucene.Net.Documents;
using Analyzer = Lucene.Net.Analysis.Analyzer;
using WhitespaceAnalyzer = Lucene.Net.Analysis.WhitespaceAnalyzer;
using Document = Lucene.Net.Documents.Document;
using Field = Lucene.Net.Documents.Field;
using Directory = Lucene.Net.Store.Directory;
using Similarity = Lucene.Net.Search.Similarity;
namespace Lucene.Net.Index
{
class DocHelper
{
public const System.String FIELD_1_TEXT = "field one text";
public const System.String TEXT_FIELD_1_KEY = "textField1";
public static Field textField1;
public const System.String FIELD_2_TEXT = "field field field two text";
//Fields will be lexicographically sorted. So, the order is: field, text, two
public static readonly int[] FIELD_2_FREQS = new int[]{3, 1, 1};
public const System.String TEXT_FIELD_2_KEY = "textField2";
public static Field textField2;
public const System.String FIELD_3_TEXT = "aaaNoNorms aaaNoNorms bbbNoNorms";
public const System.String TEXT_FIELD_3_KEY = "textField3";
public static Field textField3;
public const System.String KEYWORD_TEXT = "Keyword";
public const System.String KEYWORD_FIELD_KEY = "keyField";
public static Field keyField;
public const System.String NO_NORMS_TEXT = "omitNormsText";
public const System.String NO_NORMS_KEY = "omitNorms";
public static Field noNormsField;
public const System.String NO_TF_TEXT = "analyzed with no tf and positions";
public const System.String NO_TF_KEY = "omitTermFreqAndPositions";
public static Field noTFField;
public const System.String UNINDEXED_FIELD_TEXT = "unindexed field text";
public const System.String UNINDEXED_FIELD_KEY = "unIndField";
public static Field unIndField;
public const System.String UNSTORED_1_FIELD_TEXT = "unstored field text";
public const System.String UNSTORED_FIELD_1_KEY = "unStoredField1";
public static Field unStoredField1;
public const System.String UNSTORED_2_FIELD_TEXT = "unstored field text";
public const System.String UNSTORED_FIELD_2_KEY = "unStoredField2";
public static Field unStoredField2;
public const System.String LAZY_FIELD_BINARY_KEY = "lazyFieldBinary";
public static byte[] LAZY_FIELD_BINARY_BYTES;
public static Field lazyFieldBinary;
public const System.String LAZY_FIELD_KEY = "lazyField";
public const System.String LAZY_FIELD_TEXT = "These are some field bytes";
public static Field lazyField;
public const System.String LARGE_LAZY_FIELD_KEY = "largeLazyField";
public static System.String LARGE_LAZY_FIELD_TEXT;
public static Field largeLazyField;
//From Issue 509
public const System.String FIELD_UTF1_TEXT = "field one \u4e00text";
public const System.String TEXT_FIELD_UTF1_KEY = "textField1Utf8";
public static Field textUtfField1;
public const System.String FIELD_UTF2_TEXT = "field field field \u4e00two text";
//Fields will be lexicographically sorted. So, the order is: field, text, two
public static readonly int[] FIELD_UTF2_FREQS = new int[]{3, 1, 1};
public const System.String TEXT_FIELD_UTF2_KEY = "textField2Utf8";
public static Field textUtfField2;
public static System.Collections.IDictionary nameValues = null;
// ordered list of all the fields...
// could use LinkedHashMap for this purpose if Java1.4 is OK
public static Field[] fields = null;
// Map<String fieldName, Fieldable field>
public static System.Collections.IDictionary all = new System.Collections.Hashtable();
public static System.Collections.IDictionary indexed = new System.Collections.Hashtable();
public static System.Collections.IDictionary stored = new System.Collections.Hashtable();
public static System.Collections.IDictionary unstored = new System.Collections.Hashtable();
public static System.Collections.IDictionary unindexed = new System.Collections.Hashtable();
public static System.Collections.IDictionary termvector = new System.Collections.Hashtable();
public static System.Collections.IDictionary notermvector = new System.Collections.Hashtable();
public static System.Collections.IDictionary lazy = new System.Collections.Hashtable();
public static System.Collections.IDictionary noNorms = new System.Collections.Hashtable();
public static System.Collections.IDictionary noTf = new System.Collections.Hashtable();
private static void Add(System.Collections.IDictionary map, IFieldable field)
{
map[field.Name] = field;
}
/// <summary> Adds the fields above to a document </summary>
/// <param name="doc">The document to write
/// </param>
public static void SetupDoc(Document doc)
{
for (int i = 0; i < fields.Length; i++)
{
doc.Add(fields[i]);
}
}
/// <summary> Writes the document to the directory using a segment
/// named "test"; returns the SegmentInfo describing the new
/// segment
/// </summary>
/// <param name="dir">
/// </param>
/// <param name="doc">
/// </param>
/// <throws> IOException </throws>
public static SegmentInfo WriteDoc(Directory dir, Document doc)
{
return WriteDoc(dir, new WhitespaceAnalyzer(), Similarity.Default, doc);
}
/// <summary> Writes the document to the directory using the analyzer
/// and the similarity score; returns the SegmentInfo
/// describing the new segment
/// </summary>
/// <param name="dir">
/// </param>
/// <param name="analyzer">
/// </param>
/// <param name="similarity">
/// </param>
/// <param name="doc">
/// </param>
/// <throws> IOException </throws>
public static SegmentInfo WriteDoc(Directory dir, Analyzer analyzer, Similarity similarity, Document doc)
{
IndexWriter writer = new IndexWriter(dir, analyzer, IndexWriter.MaxFieldLength.LIMITED);
writer.SetSimilarity(similarity);
//writer.setUseCompoundFile(false);
writer.AddDocument(doc);
writer.Commit();
SegmentInfo info = writer.NewestSegment();
writer.Close();
return info;
}
public static int NumFields(Document doc)
{
return doc.GetFields().Count;
}
static DocHelper()
{
textField1 = new Field(TEXT_FIELD_1_KEY, FIELD_1_TEXT, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.NO);
textField2 = new Field(TEXT_FIELD_2_KEY, FIELD_2_TEXT, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS);
textField3 = new Field(TEXT_FIELD_3_KEY, FIELD_3_TEXT, Field.Store.YES, Field.Index.ANALYZED);
{
textField3.OmitNorms = true;
}
keyField = new Field(KEYWORD_FIELD_KEY, KEYWORD_TEXT, Field.Store.YES, Field.Index.NOT_ANALYZED);
noNormsField = new Field(NO_NORMS_KEY, NO_NORMS_TEXT, Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS);
noTFField = new Field(NO_TF_KEY, NO_TF_TEXT, Field.Store.YES, Field.Index.ANALYZED);
{
noTFField.OmitTermFreqAndPositions = true;
}
unIndField = new Field(UNINDEXED_FIELD_KEY, UNINDEXED_FIELD_TEXT, Field.Store.YES, Field.Index.NO);
unStoredField1 = new Field(UNSTORED_FIELD_1_KEY, UNSTORED_1_FIELD_TEXT, Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.NO);
unStoredField2 = new Field(UNSTORED_FIELD_2_KEY, UNSTORED_2_FIELD_TEXT, Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.YES);
lazyField = new Field(LAZY_FIELD_KEY, LAZY_FIELD_TEXT, Field.Store.YES, Field.Index.ANALYZED);
textUtfField1 = new Field(TEXT_FIELD_UTF1_KEY, FIELD_UTF1_TEXT, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.NO);
textUtfField2 = new Field(TEXT_FIELD_UTF2_KEY, FIELD_UTF2_TEXT, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS);
fields = new Field[] { textField1, textField2, textField3, keyField, noNormsField, noTFField, unIndField, unStoredField1, unStoredField2, textUtfField1, textUtfField2, lazyField, lazyFieldBinary, largeLazyField };
{
//Initialize the large Lazy Field
System.Text.StringBuilder buffer = new System.Text.StringBuilder();
for (int i = 0; i < 10000; i++)
{
buffer.Append("Lazily loading lengths of language in lieu of laughing ");
}
try
{
LAZY_FIELD_BINARY_BYTES = System.Text.Encoding.UTF8.GetBytes("These are some binary field bytes");
}
catch (System.IO.IOException)
{
}
lazyFieldBinary = new Field(LAZY_FIELD_BINARY_KEY, LAZY_FIELD_BINARY_BYTES, Field.Store.YES);
fields[fields.Length - 2] = lazyFieldBinary;
LARGE_LAZY_FIELD_TEXT = buffer.ToString();
largeLazyField = new Field(LARGE_LAZY_FIELD_KEY, LARGE_LAZY_FIELD_TEXT, Field.Store.YES, Field.Index.ANALYZED);
fields[fields.Length - 1] = largeLazyField;
for (int i = 0; i < fields.Length; i++)
{
IFieldable f = fields[i];
Add(all, f);
if (f.IsIndexed)
Add(indexed, f);
else
Add(unindexed, f);
if (f.IsTermVectorStored)
Add(termvector, f);
if (f.IsIndexed && !f.IsTermVectorStored)
Add(notermvector, f);
if (f.IsStored)
Add(stored, f);
else
Add(unstored, f);
if (f.OmitNorms)
Add(noNorms, f);
if (f.OmitTermFreqAndPositions)
Add(noTf, f);
if (f.IsLazy)
Add(lazy, f);
}
}
{
nameValues = new System.Collections.Hashtable();
nameValues[TEXT_FIELD_1_KEY] = FIELD_1_TEXT;
nameValues[TEXT_FIELD_2_KEY] = FIELD_2_TEXT;
nameValues[TEXT_FIELD_3_KEY] = FIELD_3_TEXT;
nameValues[KEYWORD_FIELD_KEY] = KEYWORD_TEXT;
nameValues[NO_NORMS_KEY] = NO_NORMS_TEXT;
nameValues[NO_TF_KEY] = NO_TF_TEXT;
nameValues[UNINDEXED_FIELD_KEY] = UNINDEXED_FIELD_TEXT;
nameValues[UNSTORED_FIELD_1_KEY] = UNSTORED_1_FIELD_TEXT;
nameValues[UNSTORED_FIELD_2_KEY] = UNSTORED_2_FIELD_TEXT;
nameValues[LAZY_FIELD_KEY] = LAZY_FIELD_TEXT;
nameValues[LAZY_FIELD_BINARY_KEY] = LAZY_FIELD_BINARY_BYTES;
nameValues[LARGE_LAZY_FIELD_KEY] = LARGE_LAZY_FIELD_TEXT;
nameValues[TEXT_FIELD_UTF1_KEY] = FIELD_UTF1_TEXT;
nameValues[TEXT_FIELD_UTF2_KEY] = FIELD_UTF2_TEXT;
}
}
}
}