src/Lucene.Net.Codecs/SimpleText/SimpleTextDocValuesFormat.cs - lucenenet - Git at Google

 namespace Lucene.Net.Codecs.SimpleText
 {
     /*
      * Licensed to the Apache Software Foundation (ASF) under one or more
      * contributor license agreements.  See the NOTICE file distributed with
      * this work for additional information regarding copyright ownership.
      * The ASF licenses this file to You under the Apache License, Version 2.0
      * (the "License"); you may not use this file except in compliance with
      * the License.  You may obtain a copy of the License at
      *
      *     http://www.apache.org/licenses/LICENSE-2.0
      *
      * Unless required by applicable law or agreed to in writing, software
      * distributed under the License is distributed on an "AS IS" BASIS,
      * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
      * See the License for the specific language governing permissions and
      * limitations under the License.
      */

     using SegmentReadState = Index.SegmentReadState;
     using SegmentWriteState = Index.SegmentWriteState;

     /// <summary>
     /// Plain text doc values format.
     /// <para>
     /// <b><font color="red">FOR RECREATIONAL USE ONLY</font></b>
     /// </para>
     /// <para>
     /// The .dat file contains the data.
     /// For numbers this is a "fixed-width" file, for example a single byte range:
     /// <code>
     ///  field myField
     ///    type NUMERIC
     ///    minvalue 0
     ///    pattern 000
     ///  005
     ///  T
     ///  234
     ///  T
     ///  123
     ///  T
     ///  ...
     /// </code>
     /// So a document's value (delta encoded from minvalue) can be retrieved by
     /// seeking to startOffset + (1+pattern.length()+2)*docid. The extra 1 is the newline.
     /// The extra 2 is another newline and 'T' or 'F': true if the value is real, false if missing.
     ///
     /// for bytes this is also a "fixed-width" file, for example:
     /// <code>
     ///  field myField
     ///    type BINARY
     ///    maxlength 6
     ///    pattern 0
     ///  length 6
     ///  foobar[space][space]
     ///  T
     ///  length 3
     ///  baz[space][space][space][space][space]
     ///  T
     ///  ...
     /// </code>
     /// So a doc's value can be retrieved by seeking to startOffset + (9+pattern.length+maxlength+2)*doc
     /// the extra 9 is 2 newlines, plus "length " itself.
     /// The extra 2 is another newline and 'T' or 'F': true if the value is real, false if missing.
     ///
     /// For sorted bytes this is a fixed-width file, for example:
     /// <code>
     ///  field myField
     ///    type SORTED
     ///    numvalues 10
     ///    maxLength 8
     ///    pattern 0
     ///    ordpattern 00
     ///  length 6
     ///  foobar[space][space]
     ///  length 3
     ///  baz[space][space][space][space][space]
     ///  ...
     ///  03
     ///  06
     ///  01
     ///  10
     ///  ...
     /// </code>
     /// So the "ord section" begins at startOffset + (9+pattern.length+maxlength)*numValues.
     /// A document's ord can be retrieved by seeking to "ord section" + (1+ordpattern.length())*docid
     /// an ord's value can be retrieved by seeking to startOffset + (9+pattern.length+maxlength)*ord
     ///
     /// For sorted set this is a fixed-width file very similar to the SORTED case, for example:
     /// <code>
     ///  field myField
     ///    type SORTED_SET
     ///    numvalues 10
     ///    maxLength 8
     ///    pattern 0
     ///    ordpattern XXXXX
     ///  length 6
     ///  foobar[space][space]
     ///  length 3
     ///  baz[space][space][space][space][space]
     ///  ...
     ///  0,3,5
     ///  1,2
     ///
     ///  10
     ///  ...
     /// </code>
     /// So the "ord section" begins at startOffset + (9+pattern.length+maxlength)*numValues.
     /// A document's ord list can be retrieved by seeking to "ord section" + (1+ordpattern.length())*docid
     /// this is a comma-separated list, and its padded with spaces to be fixed width. so trim() and split() it.
     /// and beware the empty string!
     /// An ord's value can be retrieved by seeking to startOffset + (9+pattern.length+maxlength)*ord
     /// <para/>
     /// The reader can just scan this file when it opens, skipping over the data blocks
     /// and saving the offset/etc for each field.
     /// <para/>
     /// @lucene.experimental
     /// </para>
     /// </summary>
     [DocValuesFormatName("SimpleText")] // LUCENENET specific - using DocValuesFormatName attribute to ensure the default name passed from subclasses is the same as this class name
     public class SimpleTextDocValuesFormat : DocValuesFormat
     {
         public SimpleTextDocValuesFormat()
             : base()
         {
         }

         public override DocValuesConsumer FieldsConsumer(SegmentWriteState state)
         {
             return new SimpleTextDocValuesWriter(state, "dat");
         }

         public override DocValuesProducer FieldsProducer(SegmentReadState state)
         {
             return new SimpleTextDocValuesReader(state, "dat");
         }
     }
 }
	namespace Lucene.Net.Codecs.SimpleText
	{
	/*
	* Licensed to the Apache Software Foundation (ASF) under one or more
	* contributor license agreements. See the NOTICE file distributed with
	* this work for additional information regarding copyright ownership.
	* The ASF licenses this file to You under the Apache License, Version 2.0
	* (the "License"); you may not use this file except in compliance with
	* the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/

	using SegmentReadState = Index.SegmentReadState;
	using SegmentWriteState = Index.SegmentWriteState;

	/// <summary>
	/// Plain text doc values format.
	/// <para>
	/// <b><font color="red">FOR RECREATIONAL USE ONLY</font></b>
	/// </para>
	/// <para>
	/// The .dat file contains the data.
	/// For numbers this is a "fixed-width" file, for example a single byte range:
	/// <code>
	/// field myField
	/// type NUMERIC
	/// minvalue 0
	/// pattern 000
	/// 005
	/// T
	/// 234
	/// T
	/// 123
	/// T
	/// ...
	/// </code>
	/// So a document's value (delta encoded from minvalue) can be retrieved by
	/// seeking to startOffset + (1+pattern.length()+2)*docid. The extra 1 is the newline.
	/// The extra 2 is another newline and 'T' or 'F': true if the value is real, false if missing.
	///
	/// for bytes this is also a "fixed-width" file, for example:
	/// <code>
	/// field myField
	/// type BINARY
	/// maxlength 6
	/// pattern 0
	/// length 6
	/// foobar[space][space]
	/// T
	/// length 3
	/// baz[space][space][space][space][space]
	/// T
	/// ...
	/// </code>
	/// So a doc's value can be retrieved by seeking to startOffset + (9+pattern.length+maxlength+2)*doc
	/// the extra 9 is 2 newlines, plus "length " itself.
	/// The extra 2 is another newline and 'T' or 'F': true if the value is real, false if missing.
	///
	/// For sorted bytes this is a fixed-width file, for example:
	/// <code>
	/// field myField
	/// type SORTED
	/// numvalues 10
	/// maxLength 8
	/// pattern 0
	/// ordpattern 00
	/// length 6
	/// foobar[space][space]
	/// length 3
	/// baz[space][space][space][space][space]
	/// ...
	/// 03
	/// 06
	/// 01
	/// 10
	/// ...
	/// </code>
	/// So the "ord section" begins at startOffset + (9+pattern.length+maxlength)*numValues.
	/// A document's ord can be retrieved by seeking to "ord section" + (1+ordpattern.length())*docid
	/// an ord's value can be retrieved by seeking to startOffset + (9+pattern.length+maxlength)*ord
	///
	/// For sorted set this is a fixed-width file very similar to the SORTED case, for example:
	/// <code>
	/// field myField
	/// type SORTED_SET
	/// numvalues 10
	/// maxLength 8
	/// pattern 0
	/// ordpattern XXXXX
	/// length 6
	/// foobar[space][space]
	/// length 3
	/// baz[space][space][space][space][space]
	/// ...
	/// 0,3,5
	/// 1,2
	///
	/// 10
	/// ...
	/// </code>
	/// So the "ord section" begins at startOffset + (9+pattern.length+maxlength)*numValues.
	/// A document's ord list can be retrieved by seeking to "ord section" + (1+ordpattern.length())*docid
	/// this is a comma-separated list, and its padded with spaces to be fixed width. so trim() and split() it.
	/// and beware the empty string!
	/// An ord's value can be retrieved by seeking to startOffset + (9+pattern.length+maxlength)*ord
	/// <para/>
	/// The reader can just scan this file when it opens, skipping over the data blocks
	/// and saving the offset/etc for each field.
	/// <para/>
	/// @lucene.experimental
	/// </para>
	/// </summary>
	[DocValuesFormatName("SimpleText")] // LUCENENET specific - using DocValuesFormatName attribute to ensure the default name passed from subclasses is the same as this class name
	public class SimpleTextDocValuesFormat : DocValuesFormat
	{
	public SimpleTextDocValuesFormat()
	: base()
	{
	}

	public override DocValuesConsumer FieldsConsumer(SegmentWriteState state)
	{
	return new SimpleTextDocValuesWriter(state, "dat");
	}

	public override DocValuesProducer FieldsProducer(SegmentReadState state)
	{
	return new SimpleTextDocValuesReader(state, "dat");
	}
	}
	}