lucene/codecs/src/java/org/apache/lucene/codecs/uniformsplit/FSTDictionary.java - lucene-solr - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 package org.apache.lucene.codecs.uniformsplit;

 import java.io.IOException;

 import org.apache.lucene.store.ByteArrayDataInput;
 import org.apache.lucene.store.ByteBuffersDataOutput;
 import org.apache.lucene.store.DataInput;
 import org.apache.lucene.store.DataOutput;
 import org.apache.lucene.store.IndexInput;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.IntsRefBuilder;
 import org.apache.lucene.util.RamUsageEstimator;
 import org.apache.lucene.util.fst.BytesRefFSTEnum;
 import org.apache.lucene.util.fst.FST;
 import org.apache.lucene.util.fst.OffHeapFSTStore;
 import org.apache.lucene.util.fst.PositiveIntOutputs;
 import org.apache.lucene.util.fst.Util;

 /**
  * Immutable stateless {@link FST}-based index dictionary kept in memory.
  * <p>
  * Use {@link IndexDictionary.Builder} to build the {@link IndexDictionary}.
  * <p>
  * Create a stateful {@link IndexDictionary.Browser} to seek a term in this
  * {@link IndexDictionary} and get its corresponding block file pointer to
  * the terms block file.
  * <p>
  * Its greatest advantage is to be very compact in memory thanks to both
  * the compaction of the {@link FST} as a byte array, and the incremental
  * encoding of the leaves block pointer values, which are long integers in
  * increasing order, with {@link PositiveIntOutputs}.<br>
  * With a compact dictionary in memory we can increase the number of blocks.
  * This allows us to reduce the average block size, which means faster scan
  * inside a block.
  *
  * @lucene.experimental
  */
 public class FSTDictionary implements IndexDictionary {

   private static final long BASE_RAM_USAGE = RamUsageEstimator.shallowSizeOfInstance(FSTDictionary.class);

   protected final FST<Long> fst;

   protected FSTDictionary(FST<Long> fst) {
     this.fst = fst;
   }

   @Override
   public long ramBytesUsed() {
     return BASE_RAM_USAGE + fst.ramBytesUsed();
   }

   @Override
   public void write(DataOutput output, BlockEncoder blockEncoder) throws IOException {
     if (blockEncoder == null) {
       fst.save(output, output);
     } else {
       ByteBuffersDataOutput bytesDataOutput = ByteBuffersDataOutput.newResettableInstance();
       fst.save(bytesDataOutput, bytesDataOutput);
       BlockEncoder.WritableBytes encodedBytes = blockEncoder.encode(bytesDataOutput.toDataInput(), bytesDataOutput.size());
       output.writeVLong(encodedBytes.size());
       encodedBytes.writeTo(output);
     }
   }

   /**
    * Reads a {@link FSTDictionary} from the provided input.
    * @param blockDecoder The {@link BlockDecoder} to use for specific decoding; or null if none.
    */
   protected static FSTDictionary read(DataInput input, BlockDecoder blockDecoder, boolean isFSTOnHeap) throws IOException {
     DataInput fstDataInput;
     if (blockDecoder == null) {
       fstDataInput = input;
     } else {
       long numBytes = input.readVLong();
       BytesRef decodedBytes = blockDecoder.decode(input, numBytes);
       fstDataInput = new ByteArrayDataInput(decodedBytes.bytes, 0, decodedBytes.length);
       // OffHeapFSTStore.init() requires a DataInput which is an instance of IndexInput.
       // When the block is decoded we must load the FST on heap.
       isFSTOnHeap = true;
     }
     PositiveIntOutputs fstOutputs = PositiveIntOutputs.getSingleton();
     FST<Long> fst = isFSTOnHeap ? new FST<>(fstDataInput, fstDataInput, fstOutputs)
         : new FST<>(fstDataInput, fstDataInput, fstOutputs, new OffHeapFSTStore());
     return new FSTDictionary(fst);
   }

   @Override
   public Browser browser() {
     return new Browser();
   }

   /**
    * Stateful {@link Browser} to seek a term in this {@link FSTDictionary}
    * and get its corresponding block file pointer in the block file.
    */
   protected class Browser implements IndexDictionary.Browser {

     protected final BytesRefFSTEnum<Long> fstEnum = new BytesRefFSTEnum<>(fst);

     @Override
     public long seekBlock(BytesRef term) throws IOException {
       BytesRefFSTEnum.InputOutput<Long> seekFloor = fstEnum.seekFloor(term);
       return seekFloor == null ? -1 : seekFloor.output;
     }
   }

   /**
    * Provides stateful {@link Browser} to seek in the {@link FSTDictionary}.
    *
    * @lucene.experimental
    */
   public static class BrowserSupplier implements IndexDictionary.BrowserSupplier {

     protected final IndexInput dictionaryInput;
     protected final BlockDecoder blockDecoder;
     protected final boolean isFSTOnHeap;

     /**
      * Lazy loaded immutable index dictionary FST.
      * The FST is either kept off-heap, or hold in RAM on-heap.
      */
     protected IndexDictionary dictionary;

     public BrowserSupplier(IndexInput dictionaryInput, long dictionaryStartFP, BlockDecoder blockDecoder, boolean isFSTOnHeap) throws IOException {
       this.dictionaryInput = dictionaryInput.clone();
       this.dictionaryInput.seek(dictionaryStartFP);
       this.blockDecoder = blockDecoder;
       this.isFSTOnHeap = isFSTOnHeap;
     }

     @Override
     public IndexDictionary.Browser get() throws IOException {
       // This double-check idiom does not require the dictionary to be volatile
       // because it is immutable. See section "Double-Checked Locking Immutable Objects"
       // of https://www.cs.umd.edu/~pugh/java/memoryModel/DoubleCheckedLocking.html.
       if (dictionary == null) {
         synchronized (this) {
           if (dictionary == null) {
             dictionary = read(dictionaryInput, blockDecoder, isFSTOnHeap);
           }
         }
       }
       return dictionary.browser();
     }

     @Override
     public long ramBytesUsed() {
       return dictionary == null ? 0 : dictionary.ramBytesUsed();
     }
   }

   /**
    * Builds an immutable {@link FSTDictionary}.
    *
    * @lucene.experimental
    */
   public static class Builder implements IndexDictionary.Builder {

     protected final org.apache.lucene.util.fst.Builder<Long> fstBuilder;
     protected final IntsRefBuilder scratchInts;

     public Builder() {
       PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton();
       fstBuilder = new org.apache.lucene.util.fst.Builder<>(FST.INPUT_TYPE.BYTE1, outputs);
       scratchInts = new IntsRefBuilder();
     }

     @Override
     public void add(BytesRef blockKey, long blockFilePointer) throws IOException {
       fstBuilder.add(Util.toIntsRef(blockKey, scratchInts), blockFilePointer);
     }

     @Override
     public FSTDictionary build() throws IOException {
       return new FSTDictionary(fstBuilder.finish());
     }
   }
 }
	/*
	* Licensed to the Apache Software Foundation (ASF) under one or more
	* contributor license agreements. See the NOTICE file distributed with
	* this work for additional information regarding copyright ownership.
	* The ASF licenses this file to You under the Apache License, Version 2.0
	* (the "License"); you may not use this file except in compliance with
	* the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/

	package org.apache.lucene.codecs.uniformsplit;

	import java.io.IOException;

	import org.apache.lucene.store.ByteArrayDataInput;
	import org.apache.lucene.store.ByteBuffersDataOutput;
	import org.apache.lucene.store.DataInput;
	import org.apache.lucene.store.DataOutput;
	import org.apache.lucene.store.IndexInput;
	import org.apache.lucene.util.BytesRef;
	import org.apache.lucene.util.IntsRefBuilder;
	import org.apache.lucene.util.RamUsageEstimator;
	import org.apache.lucene.util.fst.BytesRefFSTEnum;
	import org.apache.lucene.util.fst.FST;
	import org.apache.lucene.util.fst.OffHeapFSTStore;
	import org.apache.lucene.util.fst.PositiveIntOutputs;
	import org.apache.lucene.util.fst.Util;

	/**
	* Immutable stateless {@link FST}-based index dictionary kept in memory.
	* <p>
	* Use {@link IndexDictionary.Builder} to build the {@link IndexDictionary}.
	* <p>
	* Create a stateful {@link IndexDictionary.Browser} to seek a term in this
	* {@link IndexDictionary} and get its corresponding block file pointer to
	* the terms block file.
	* <p>
	* Its greatest advantage is to be very compact in memory thanks to both
	* the compaction of the {@link FST} as a byte array, and the incremental
	* encoding of the leaves block pointer values, which are long integers in
	* increasing order, with {@link PositiveIntOutputs}.<br>
	* With a compact dictionary in memory we can increase the number of blocks.
	* This allows us to reduce the average block size, which means faster scan
	* inside a block.
	*
	* @lucene.experimental
	*/
	public class FSTDictionary implements IndexDictionary {

	private static final long BASE_RAM_USAGE = RamUsageEstimator.shallowSizeOfInstance(FSTDictionary.class);

	protected final FST<Long> fst;

	protected FSTDictionary(FST<Long> fst) {
	this.fst = fst;
	}

	@Override
	public long ramBytesUsed() {
	return BASE_RAM_USAGE + fst.ramBytesUsed();
	}

	@Override
	public void write(DataOutput output, BlockEncoder blockEncoder) throws IOException {
	if (blockEncoder == null) {
	fst.save(output, output);
	} else {
	ByteBuffersDataOutput bytesDataOutput = ByteBuffersDataOutput.newResettableInstance();
	fst.save(bytesDataOutput, bytesDataOutput);
	BlockEncoder.WritableBytes encodedBytes = blockEncoder.encode(bytesDataOutput.toDataInput(), bytesDataOutput.size());
	output.writeVLong(encodedBytes.size());
	encodedBytes.writeTo(output);
	}
	}

	/**
	* Reads a {@link FSTDictionary} from the provided input.
	* @param blockDecoder The {@link BlockDecoder} to use for specific decoding; or null if none.
	*/
	protected static FSTDictionary read(DataInput input, BlockDecoder blockDecoder, boolean isFSTOnHeap) throws IOException {
	DataInput fstDataInput;
	if (blockDecoder == null) {
	fstDataInput = input;
	} else {
	long numBytes = input.readVLong();
	BytesRef decodedBytes = blockDecoder.decode(input, numBytes);
	fstDataInput = new ByteArrayDataInput(decodedBytes.bytes, 0, decodedBytes.length);
	// OffHeapFSTStore.init() requires a DataInput which is an instance of IndexInput.
	// When the block is decoded we must load the FST on heap.
	isFSTOnHeap = true;
	}
	PositiveIntOutputs fstOutputs = PositiveIntOutputs.getSingleton();
	FST<Long> fst = isFSTOnHeap ? new FST<>(fstDataInput, fstDataInput, fstOutputs)
	: new FST<>(fstDataInput, fstDataInput, fstOutputs, new OffHeapFSTStore());
	return new FSTDictionary(fst);
	}

	@Override
	public Browser browser() {
	return new Browser();
	}

	/**
	* Stateful {@link Browser} to seek a term in this {@link FSTDictionary}
	* and get its corresponding block file pointer in the block file.
	*/
	protected class Browser implements IndexDictionary.Browser {

	protected final BytesRefFSTEnum<Long> fstEnum = new BytesRefFSTEnum<>(fst);

	@Override
	public long seekBlock(BytesRef term) throws IOException {
	BytesRefFSTEnum.InputOutput<Long> seekFloor = fstEnum.seekFloor(term);
	return seekFloor == null ? -1 : seekFloor.output;
	}
	}

	/**
	* Provides stateful {@link Browser} to seek in the {@link FSTDictionary}.
	*
	* @lucene.experimental
	*/
	public static class BrowserSupplier implements IndexDictionary.BrowserSupplier {

	protected final IndexInput dictionaryInput;
	protected final BlockDecoder blockDecoder;
	protected final boolean isFSTOnHeap;

	/**
	* Lazy loaded immutable index dictionary FST.
	* The FST is either kept off-heap, or hold in RAM on-heap.
	*/
	protected IndexDictionary dictionary;

	public BrowserSupplier(IndexInput dictionaryInput, long dictionaryStartFP, BlockDecoder blockDecoder, boolean isFSTOnHeap) throws IOException {
	this.dictionaryInput = dictionaryInput.clone();
	this.dictionaryInput.seek(dictionaryStartFP);
	this.blockDecoder = blockDecoder;
	this.isFSTOnHeap = isFSTOnHeap;
	}

	@Override
	public IndexDictionary.Browser get() throws IOException {
	// This double-check idiom does not require the dictionary to be volatile
	// because it is immutable. See section "Double-Checked Locking Immutable Objects"
	// of https://www.cs.umd.edu/~pugh/java/memoryModel/DoubleCheckedLocking.html.
	if (dictionary == null) {
	synchronized (this) {
	if (dictionary == null) {
	dictionary = read(dictionaryInput, blockDecoder, isFSTOnHeap);
	}
	}
	}
	return dictionary.browser();
	}

	@Override
	public long ramBytesUsed() {
	return dictionary == null ? 0 : dictionary.ramBytesUsed();
	}
	}

	/**
	* Builds an immutable {@link FSTDictionary}.
	*
	* @lucene.experimental
	*/
	public static class Builder implements IndexDictionary.Builder {

	protected final org.apache.lucene.util.fst.Builder<Long> fstBuilder;
	protected final IntsRefBuilder scratchInts;

	public Builder() {
	PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton();
	fstBuilder = new org.apache.lucene.util.fst.Builder<>(FST.INPUT_TYPE.BYTE1, outputs);
	scratchInts = new IntsRefBuilder();
	}

	@Override
	public void add(BytesRef blockKey, long blockFilePointer) throws IOException {
	fstBuilder.add(Util.toIntsRef(blockKey, scratchInts), blockFilePointer);
	}

	@Override
	public FSTDictionary build() throws IOException {
	return new FSTDictionary(fstBuilder.finish());
	}
	}
	}