| using System; |
| |
| namespace Lucene.Net.Codecs.Lucene40 |
| { |
| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| /// <summary> |
| /// Lucene 4.0 Field Infos format. |
| /// <para/> |
| /// <para>Field names are stored in the field info file, with suffix <tt>.fnm</tt>.</para> |
| /// <para>FieldInfos (.fnm) --> Header,FieldsCount, <FieldName,FieldNumber, |
| /// FieldBits,DocValuesBits,Attributes> <sup>FieldsCount</sup></para> |
| /// <para>Data types: |
| /// <list type="bullet"> |
| /// <item><description>Header --> CodecHeader (<see cref="CodecUtil.WriteHeader(Store.DataOutput, string, int)"/>) </description></item> |
| /// <item><description>FieldsCount --> VInt (<see cref="Store.DataOutput.WriteVInt32(int)"/>) </description></item> |
| /// <item><description>FieldName --> String (<see cref="Store.DataOutput.WriteString(string)"/>) </description></item> |
| /// <item><description>FieldBits, DocValuesBits --> Byte (<see cref="Store.DataOutput.WriteByte(byte)"/>) </description></item> |
| /// <item><description>FieldNumber --> VInt (<see cref="Store.DataOutput.WriteInt32(int)"/>) </description></item> |
| /// <item><description>Attributes --> IDictionary<String,String> (<see cref="Store.DataOutput.WriteStringStringMap(System.Collections.Generic.IDictionary{string, string})"/>) </description></item> |
| /// </list> |
| /// </para> |
| /// Field Descriptions: |
| /// <list type="bullet"> |
| /// <item><description>FieldsCount: the number of fields in this file.</description></item> |
| /// <item><description>FieldName: name of the field as a UTF-8 String.</description></item> |
| /// <item><description>FieldNumber: the field's number. Note that unlike previous versions of |
| /// Lucene, the fields are not numbered implicitly by their order in the |
| /// file, instead explicitly.</description></item> |
| /// <item><description>FieldBits: a byte containing field options. |
| /// <list type="bullet"> |
| /// <item><description>The low-order bit is one for indexed fields, and zero for non-indexed |
| /// fields.</description></item> |
| /// <item><description>The second lowest-order bit is one for fields that have term vectors |
| /// stored, and zero for fields without term vectors.</description></item> |
| /// <item><description>If the third lowest order-bit is set (0x4), offsets are stored into |
| /// the postings list in addition to positions.</description></item> |
| /// <item><description>Fourth bit is unused.</description></item> |
| /// <item><description>If the fifth lowest-order bit is set (0x10), norms are omitted for the |
| /// indexed field.</description></item> |
| /// <item><description>If the sixth lowest-order bit is set (0x20), payloads are stored for the |
| /// indexed field.</description></item> |
| /// <item><description>If the seventh lowest-order bit is set (0x40), term frequencies and |
| /// positions omitted for the indexed field.</description></item> |
| /// <item><description>If the eighth lowest-order bit is set (0x80), positions are omitted for the |
| /// indexed field.</description></item> |
| /// </list> |
| /// </description></item> |
| /// <item><description>DocValuesBits: a byte containing per-document value types. The type |
| /// recorded as two four-bit integers, with the high-order bits representing |
| /// <c>norms</c> options, and the low-order bits representing |
| /// <see cref="Index.DocValues"/> options. Each four-bit integer can be decoded as such: |
| /// <list type="bullet"> |
| /// <item><description>0: no DocValues for this field.</description></item> |
| /// <item><description>1: variable-width signed integers. (<see cref="LegacyDocValuesType.VAR_INTS"/>)</description></item> |
| /// <item><description>2: 32-bit floating point values. (<see cref="LegacyDocValuesType.FLOAT_32"/>)</description></item> |
| /// <item><description>3: 64-bit floating point values. (<see cref="LegacyDocValuesType.FLOAT_64"/>)</description></item> |
| /// <item><description>4: fixed-length byte array values. (<see cref="LegacyDocValuesType.BYTES_FIXED_STRAIGHT"/>)</description></item> |
| /// <item><description>5: fixed-length dereferenced byte array values. (<see cref="LegacyDocValuesType.BYTES_FIXED_DEREF"/>)</description></item> |
| /// <item><description>6: variable-length byte array values. (<see cref="LegacyDocValuesType.BYTES_VAR_STRAIGHT"/>)</description></item> |
| /// <item><description>7: variable-length dereferenced byte array values. (<see cref="LegacyDocValuesType.BYTES_VAR_DEREF"/>)</description></item> |
| /// <item><description>8: 16-bit signed integers. (<see cref="LegacyDocValuesType.FIXED_INTS_16"/>)</description></item> |
| /// <item><description>9: 32-bit signed integers. (<see cref="LegacyDocValuesType.FIXED_INTS_32"/>)</description></item> |
| /// <item><description>10: 64-bit signed integers. (<see cref="LegacyDocValuesType.FIXED_INTS_64"/>)</description></item> |
| /// <item><description>11: 8-bit signed integers. (<see cref="LegacyDocValuesType.FIXED_INTS_8"/>)</description></item> |
| /// <item><description>12: fixed-length sorted byte array values. (<see cref="LegacyDocValuesType.BYTES_FIXED_SORTED"/>)</description></item> |
| /// <item><description>13: variable-length sorted byte array values. (<see cref="LegacyDocValuesType.BYTES_VAR_SORTED"/>)</description></item> |
| /// </list> |
| /// </description></item> |
| /// <item><description>Attributes: a key-value map of codec-private attributes.</description></item> |
| /// </list> |
| /// |
| /// @lucene.experimental |
| /// </summary> |
| [Obsolete("Only for reading old 4.0 and 4.1 segments")] |
| public class Lucene40FieldInfosFormat : FieldInfosFormat |
| { |
| private readonly FieldInfosReader reader = new Lucene40FieldInfosReader(); |
| |
| /// <summary> |
| /// Sole constructor. </summary> |
| public Lucene40FieldInfosFormat() |
| { |
| } |
| |
| public override FieldInfosReader FieldInfosReader => reader; |
| |
| public override FieldInfosWriter FieldInfosWriter => throw UnsupportedOperationException.Create("this codec can only be used for reading"); |
| |
| /// <summary> |
| /// Extension of field infos </summary> |
| internal const string FIELD_INFOS_EXTENSION = "fnm"; |
| |
| internal const string CODEC_NAME = "Lucene40FieldInfos"; |
| internal const int FORMAT_START = 0; |
| internal const int FORMAT_CURRENT = FORMAT_START; |
| |
| internal const sbyte IS_INDEXED = 0x1; |
| internal const sbyte STORE_TERMVECTOR = 0x2; |
| internal const sbyte STORE_OFFSETS_IN_POSTINGS = 0x4; |
| internal const sbyte OMIT_NORMS = 0x10; |
| internal const sbyte STORE_PAYLOADS = 0x20; |
| internal const sbyte OMIT_TERM_FREQ_AND_POSITIONS = 0x40; |
| internal const sbyte OMIT_POSITIONS = -128; |
| } |
| } |