lucene/codecs/src/java/org/apache/lucene/codecs/blocktreeords/OrdsIntersectTermsEnumFrame.java - lucene-solr - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
 package org.apache.lucene.codecs.blocktreeords;

 import java.io.IOException;
 import org.apache.lucene.codecs.BlockTermState;
 import org.apache.lucene.codecs.blocktreeords.FSTOrdsOutputs.Output;
 import org.apache.lucene.index.IndexOptions;
 import org.apache.lucene.store.ByteArrayDataInput;
 import org.apache.lucene.util.ArrayUtil;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.automaton.Transition;
 import org.apache.lucene.util.fst.FST;

 // TODO: can we share this with the frame in STE?
 final class OrdsIntersectTermsEnumFrame {
   final int ord;
   long fp;
   long fpOrig;
   long fpEnd;
   long lastSubFP;

   // State in automaton
   int state;

   int metaDataUpto;

   byte[] suffixBytes = new byte[128];
   final ByteArrayDataInput suffixesReader = new ByteArrayDataInput();

   byte[] statBytes = new byte[64];
   final ByteArrayDataInput statsReader = new ByteArrayDataInput();

   byte[] floorData = new byte[32];
   final ByteArrayDataInput floorDataReader = new ByteArrayDataInput();

   // Length of prefix shared by all terms in this block
   int prefix;

   // Number of entries (term or sub-block) in this block
   int entCount;

   // Which term we will next read
   int nextEnt;

   // Starting termOrd for this frame, used to reset termOrd in rewind()
   long termOrdOrig;

   // 1 + ordinal of the current term
   long termOrd;

   // True if this block is either not a floor block,
   // or, it's the last sub-block of a floor block
   boolean isLastInFloor;

   // True if all entries are terms
   boolean isLeafBlock;

   int numFollowFloorBlocks;
   int nextFloorLabel;

   Transition transition = new Transition();
   int curTransitionMax;
   int transitionIndex;
   int transitionCount;

   FST.Arc<Output> arc;

   final BlockTermState termState;

   // metadata
   public byte[] bytes;
   ByteArrayDataInput bytesReader;

   // Cumulative output so far
   Output outputPrefix;

   int startBytePos;
   int suffix;

   private final OrdsIntersectTermsEnum ite;

   public OrdsIntersectTermsEnumFrame(OrdsIntersectTermsEnum ite, int ord) throws IOException {
     this.ite = ite;
     this.ord = ord;
     this.termState = ite.fr.parent.postingsReader.newTermState();
     this.termState.totalTermFreq = -1;
   }

   void loadNextFloorBlock() throws IOException {
     assert numFollowFloorBlocks > 0;
     // if (DEBUG) System.out.println("    loadNextFoorBlock trans=" + transitions[transitionIndex]);

     do {
       fp = fpOrig + (floorDataReader.readVLong() >>> 1);
       numFollowFloorBlocks--;
       // if (DEBUG) System.out.println("    skip floor block2!  nextFloorLabel=" + (char)
       // nextFloorLabel + " vs target=" + (char) transitions[transitionIndex].getMin() + " newFP=" +
       // fp + " numFollowFloorBlocks=" + numFollowFloorBlocks);
       if (numFollowFloorBlocks != 0) {
         nextFloorLabel = floorDataReader.readByte() & 0xff;
         termOrd += floorDataReader.readVLong();
       } else {
         nextFloorLabel = 256;
       }
       // if (DEBUG) System.out.println("    nextFloorLabel=" + (char) nextFloorLabel);
     } while (numFollowFloorBlocks != 0 && nextFloorLabel <= transition.min);

     load(null);
   }

   public void setState(int state) {
     this.state = state;
     transitionIndex = 0;
     transitionCount = ite.compiledAutomaton.automaton.getNumTransitions(state);
     if (transitionCount != 0) {
       ite.compiledAutomaton.automaton.initTransition(state, transition);
       ite.compiledAutomaton.automaton.getNextTransition(transition);
       curTransitionMax = transition.max;
     } else {
       curTransitionMax = -1;
     }
   }

   void load(Output output) throws IOException {

     // if (DEBUG) System.out.println("    load fp=" + fp + " fpOrig=" + fpOrig + " frameIndexData="
     // + frameIndexData + " trans=" + (transitions.length != 0 ? transitions[0] : "n/a" + " state="
     // + state));

     if (output != null && output.bytes != null && transitionCount != 0) {
       BytesRef frameIndexData = output.bytes;

       // Floor frame
       if (floorData.length < frameIndexData.length) {
         this.floorData = new byte[ArrayUtil.oversize(frameIndexData.length, 1)];
       }
       System.arraycopy(
           frameIndexData.bytes, frameIndexData.offset, floorData, 0, frameIndexData.length);
       floorDataReader.reset(floorData, 0, frameIndexData.length);
       final long code = floorDataReader.readVLong();
       if ((code & OrdsBlockTreeTermsWriter.OUTPUT_FLAG_IS_FLOOR) != 0) {
         numFollowFloorBlocks = floorDataReader.readVInt();
         nextFloorLabel = floorDataReader.readByte() & 0xff;

         termOrd = termOrdOrig + floorDataReader.readVLong();

         // if (DEBUG) System.out.println("    numFollowFloorBlocks=" + numFollowFloorBlocks + "
         // nextFloorLabel=" + nextFloorLabel);

         // If current state is accept, we must process
         // first block in case it has empty suffix:
         if (!ite.runAutomaton.isAccept(state)) {
           // Maybe skip floor blocks:
           assert transitionIndex == 0 : "transitionIndex=" + transitionIndex;
           while (numFollowFloorBlocks != 0 && nextFloorLabel <= transition.min) {
             fp = fpOrig + (floorDataReader.readVLong() >>> 1);
             numFollowFloorBlocks--;
             // if (DEBUG) System.out.println("    skip floor block!  nextFloorLabel=" + (char)
             // nextFloorLabel + " vs target=" + (char) transitions[0].getMin() + " newFP=" + fp + "
             // numFollowFloorBlocks=" + numFollowFloorBlocks);
             if (numFollowFloorBlocks != 0) {
               nextFloorLabel = floorDataReader.readByte() & 0xff;
               termOrd += floorDataReader.readVLong();
             } else {
               nextFloorLabel = 256;
             }
           }
         }
       }
     }

     ite.in.seek(fp);
     int code = ite.in.readVInt();
     entCount = code >>> 1;
     assert entCount > 0;
     isLastInFloor = (code & 1) != 0;

     // term suffixes:
     code = ite.in.readVInt();
     isLeafBlock = (code & 1) != 0;
     int numBytes = code >>> 1;
     // if (DEBUG) System.out.println("      entCount=" + entCount + " lastInFloor?=" + isLastInFloor
     // + " leafBlock?=" + isLeafBlock + " numSuffixBytes=" + numBytes);
     if (suffixBytes.length < numBytes) {
       suffixBytes = new byte[ArrayUtil.oversize(numBytes, 1)];
     }
     ite.in.readBytes(suffixBytes, 0, numBytes);
     suffixesReader.reset(suffixBytes, 0, numBytes);

     // stats
     numBytes = ite.in.readVInt();
     if (statBytes.length < numBytes) {
       statBytes = new byte[ArrayUtil.oversize(numBytes, 1)];
     }
     ite.in.readBytes(statBytes, 0, numBytes);
     statsReader.reset(statBytes, 0, numBytes);
     metaDataUpto = 0;

     termState.termBlockOrd = 0;
     nextEnt = 0;

     // metadata
     numBytes = ite.in.readVInt();
     if (bytes == null) {
       bytes = new byte[ArrayUtil.oversize(numBytes, 1)];
       bytesReader = new ByteArrayDataInput();
     } else if (bytes.length < numBytes) {
       bytes = new byte[ArrayUtil.oversize(numBytes, 1)];
     }
     ite.in.readBytes(bytes, 0, numBytes);
     bytesReader.reset(bytes, 0, numBytes);

     if (!isLastInFloor) {
       // Sub-blocks of a single floor block are always
       // written one after another -- tail recurse:
       fpEnd = ite.in.getFilePointer();
     }
   }

   // TODO: maybe add scanToLabel; should give perf boost

   public boolean next() {
     return isLeafBlock ? nextLeaf() : nextNonLeaf();
   }

   // Decodes next entry; returns true if it's a sub-block
   public boolean nextLeaf() {
     // if (DEBUG) System.out.println("  frame.next ord=" + ord + " nextEnt=" + nextEnt + "
     // entCount=" + entCount);
     assert nextEnt != -1 && nextEnt < entCount
         : "nextEnt=" + nextEnt + " entCount=" + entCount + " fp=" + fp;
     nextEnt++;
     suffix = suffixesReader.readVInt();
     startBytePos = suffixesReader.getPosition();
     suffixesReader.skipBytes(suffix);
     return false;
   }

   public boolean nextNonLeaf() {
     // if (DEBUG) System.out.println("  frame.next ord=" + ord + " nextEnt=" + nextEnt + "
     // entCount=" + entCount);
     assert nextEnt != -1 && nextEnt < entCount
         : "nextEnt=" + nextEnt + " entCount=" + entCount + " fp=" + fp;
     nextEnt++;
     final int code = suffixesReader.readVInt();
     suffix = code >>> 1;
     startBytePos = suffixesReader.getPosition();
     suffixesReader.skipBytes(suffix);
     if ((code & 1) == 0) {
       // A normal term
       termState.termBlockOrd++;
       return false;
     } else {
       // A sub-block; make sub-FP absolute:
       lastSubFP = fp - suffixesReader.readVLong();
       // Skip term ord
       suffixesReader.readVLong();
       return true;
     }
   }

   public int getTermBlockOrd() {
     return isLeafBlock ? nextEnt : termState.termBlockOrd;
   }

   public void decodeMetaData() throws IOException {

     // lazily catch up on metadata decode:
     final int limit = getTermBlockOrd();
     boolean absolute = metaDataUpto == 0;
     assert limit > 0;

     // TODO: better API would be "jump straight to term=N"???
     while (metaDataUpto < limit) {

       // TODO: we could make "tiers" of metadata, ie,
       // decode docFreq/totalTF but don't decode postings
       // metadata; this way caller could get
       // docFreq/totalTF w/o paying decode cost for
       // postings

       // TODO: if docFreq were bulk decoded we could
       // just skipN here:

       // stats
       termState.docFreq = statsReader.readVInt();
       // if (DEBUG) System.out.println("    dF=" + state.docFreq);
       if (ite.fr.fieldInfo.getIndexOptions() == IndexOptions.DOCS) {
         termState.totalTermFreq = termState.docFreq; // all tf values are 1
       } else {
         termState.totalTermFreq = termState.docFreq + statsReader.readVLong();
         // if (DEBUG) System.out.println("    totTF=" + state.totalTermFreq);
       }
       // metadata
       ite.fr.parent.postingsReader.decodeTerm(bytesReader, ite.fr.fieldInfo, termState, absolute);

       metaDataUpto++;
       absolute = false;
     }
     termState.termBlockOrd = metaDataUpto;
   }
 }
	/*
	* Licensed to the Apache Software Foundation (ASF) under one or more
	* contributor license agreements. See the NOTICE file distributed with
	* this work for additional information regarding copyright ownership.
	* The ASF licenses this file to You under the Apache License, Version 2.0
	* (the "License"); you may not use this file except in compliance with
	* the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/
	package org.apache.lucene.codecs.blocktreeords;

	import java.io.IOException;
	import org.apache.lucene.codecs.BlockTermState;
	import org.apache.lucene.codecs.blocktreeords.FSTOrdsOutputs.Output;
	import org.apache.lucene.index.IndexOptions;
	import org.apache.lucene.store.ByteArrayDataInput;
	import org.apache.lucene.util.ArrayUtil;
	import org.apache.lucene.util.BytesRef;
	import org.apache.lucene.util.automaton.Transition;
	import org.apache.lucene.util.fst.FST;

	// TODO: can we share this with the frame in STE?
	final class OrdsIntersectTermsEnumFrame {
	final int ord;
	long fp;
	long fpOrig;
	long fpEnd;
	long lastSubFP;

	// State in automaton
	int state;

	int metaDataUpto;

	byte[] suffixBytes = new byte[128];
	final ByteArrayDataInput suffixesReader = new ByteArrayDataInput();

	byte[] statBytes = new byte[64];
	final ByteArrayDataInput statsReader = new ByteArrayDataInput();

	byte[] floorData = new byte[32];
	final ByteArrayDataInput floorDataReader = new ByteArrayDataInput();

	// Length of prefix shared by all terms in this block
	int prefix;

	// Number of entries (term or sub-block) in this block
	int entCount;

	// Which term we will next read
	int nextEnt;

	// Starting termOrd for this frame, used to reset termOrd in rewind()
	long termOrdOrig;

	// 1 + ordinal of the current term
	long termOrd;

	// True if this block is either not a floor block,
	// or, it's the last sub-block of a floor block
	boolean isLastInFloor;

	// True if all entries are terms
	boolean isLeafBlock;

	int numFollowFloorBlocks;
	int nextFloorLabel;

	Transition transition = new Transition();
	int curTransitionMax;
	int transitionIndex;
	int transitionCount;

	FST.Arc<Output> arc;

	final BlockTermState termState;

	// metadata
	public byte[] bytes;
	ByteArrayDataInput bytesReader;

	// Cumulative output so far
	Output outputPrefix;

	int startBytePos;
	int suffix;

	private final OrdsIntersectTermsEnum ite;

	public OrdsIntersectTermsEnumFrame(OrdsIntersectTermsEnum ite, int ord) throws IOException {
	this.ite = ite;
	this.ord = ord;
	this.termState = ite.fr.parent.postingsReader.newTermState();
	this.termState.totalTermFreq = -1;
	}

	void loadNextFloorBlock() throws IOException {
	assert numFollowFloorBlocks > 0;
	// if (DEBUG) System.out.println(" loadNextFoorBlock trans=" + transitions[transitionIndex]);

	do {
	fp = fpOrig + (floorDataReader.readVLong() >>> 1);
	numFollowFloorBlocks--;
	// if (DEBUG) System.out.println(" skip floor block2! nextFloorLabel=" + (char)
	// nextFloorLabel + " vs target=" + (char) transitions[transitionIndex].getMin() + " newFP=" +
	// fp + " numFollowFloorBlocks=" + numFollowFloorBlocks);
	if (numFollowFloorBlocks != 0) {
	nextFloorLabel = floorDataReader.readByte() & 0xff;
	termOrd += floorDataReader.readVLong();
	} else {
	nextFloorLabel = 256;
	}
	// if (DEBUG) System.out.println(" nextFloorLabel=" + (char) nextFloorLabel);
	} while (numFollowFloorBlocks != 0 && nextFloorLabel <= transition.min);

	load(null);
	}

	public void setState(int state) {
	this.state = state;
	transitionIndex = 0;
	transitionCount = ite.compiledAutomaton.automaton.getNumTransitions(state);
	if (transitionCount != 0) {
	ite.compiledAutomaton.automaton.initTransition(state, transition);
	ite.compiledAutomaton.automaton.getNextTransition(transition);
	curTransitionMax = transition.max;
	} else {
	curTransitionMax = -1;
	}
	}

	void load(Output output) throws IOException {

	// if (DEBUG) System.out.println(" load fp=" + fp + " fpOrig=" + fpOrig + " frameIndexData="
	// + frameIndexData + " trans=" + (transitions.length != 0 ? transitions[0] : "n/a" + " state="
	// + state));

	if (output != null && output.bytes != null && transitionCount != 0) {
	BytesRef frameIndexData = output.bytes;

	// Floor frame
	if (floorData.length < frameIndexData.length) {
	this.floorData = new byte[ArrayUtil.oversize(frameIndexData.length, 1)];
	}
	System.arraycopy(
	frameIndexData.bytes, frameIndexData.offset, floorData, 0, frameIndexData.length);
	floorDataReader.reset(floorData, 0, frameIndexData.length);
	final long code = floorDataReader.readVLong();
	if ((code & OrdsBlockTreeTermsWriter.OUTPUT_FLAG_IS_FLOOR) != 0) {
	numFollowFloorBlocks = floorDataReader.readVInt();
	nextFloorLabel = floorDataReader.readByte() & 0xff;

	termOrd = termOrdOrig + floorDataReader.readVLong();

	// if (DEBUG) System.out.println(" numFollowFloorBlocks=" + numFollowFloorBlocks + "
	// nextFloorLabel=" + nextFloorLabel);

	// If current state is accept, we must process
	// first block in case it has empty suffix:
	if (!ite.runAutomaton.isAccept(state)) {
	// Maybe skip floor blocks:
	assert transitionIndex == 0 : "transitionIndex=" + transitionIndex;
	while (numFollowFloorBlocks != 0 && nextFloorLabel <= transition.min) {
	fp = fpOrig + (floorDataReader.readVLong() >>> 1);
	numFollowFloorBlocks--;
	// if (DEBUG) System.out.println(" skip floor block! nextFloorLabel=" + (char)
	// nextFloorLabel + " vs target=" + (char) transitions[0].getMin() + " newFP=" + fp + "
	// numFollowFloorBlocks=" + numFollowFloorBlocks);
	if (numFollowFloorBlocks != 0) {
	nextFloorLabel = floorDataReader.readByte() & 0xff;
	termOrd += floorDataReader.readVLong();
	} else {
	nextFloorLabel = 256;
	}
	}
	}
	}
	}

	ite.in.seek(fp);
	int code = ite.in.readVInt();
	entCount = code >>> 1;
	assert entCount > 0;
	isLastInFloor = (code & 1) != 0;

	// term suffixes:
	code = ite.in.readVInt();
	isLeafBlock = (code & 1) != 0;
	int numBytes = code >>> 1;
	// if (DEBUG) System.out.println(" entCount=" + entCount + " lastInFloor?=" + isLastInFloor
	// + " leafBlock?=" + isLeafBlock + " numSuffixBytes=" + numBytes);
	if (suffixBytes.length < numBytes) {
	suffixBytes = new byte[ArrayUtil.oversize(numBytes, 1)];
	}
	ite.in.readBytes(suffixBytes, 0, numBytes);
	suffixesReader.reset(suffixBytes, 0, numBytes);

	// stats
	numBytes = ite.in.readVInt();
	if (statBytes.length < numBytes) {
	statBytes = new byte[ArrayUtil.oversize(numBytes, 1)];
	}
	ite.in.readBytes(statBytes, 0, numBytes);
	statsReader.reset(statBytes, 0, numBytes);
	metaDataUpto = 0;

	termState.termBlockOrd = 0;
	nextEnt = 0;

	// metadata
	numBytes = ite.in.readVInt();
	if (bytes == null) {
	bytes = new byte[ArrayUtil.oversize(numBytes, 1)];
	bytesReader = new ByteArrayDataInput();
	} else if (bytes.length < numBytes) {
	bytes = new byte[ArrayUtil.oversize(numBytes, 1)];
	}
	ite.in.readBytes(bytes, 0, numBytes);
	bytesReader.reset(bytes, 0, numBytes);

	if (!isLastInFloor) {
	// Sub-blocks of a single floor block are always
	// written one after another -- tail recurse:
	fpEnd = ite.in.getFilePointer();
	}
	}

	// TODO: maybe add scanToLabel; should give perf boost

	public boolean next() {
	return isLeafBlock ? nextLeaf() : nextNonLeaf();
	}

	// Decodes next entry; returns true if it's a sub-block
	public boolean nextLeaf() {
	// if (DEBUG) System.out.println(" frame.next ord=" + ord + " nextEnt=" + nextEnt + "
	// entCount=" + entCount);
	assert nextEnt != -1 && nextEnt < entCount
	: "nextEnt=" + nextEnt + " entCount=" + entCount + " fp=" + fp;
	nextEnt++;
	suffix = suffixesReader.readVInt();
	startBytePos = suffixesReader.getPosition();
	suffixesReader.skipBytes(suffix);
	return false;
	}

	public boolean nextNonLeaf() {
	// if (DEBUG) System.out.println(" frame.next ord=" + ord + " nextEnt=" + nextEnt + "
	// entCount=" + entCount);
	assert nextEnt != -1 && nextEnt < entCount
	: "nextEnt=" + nextEnt + " entCount=" + entCount + " fp=" + fp;
	nextEnt++;
	final int code = suffixesReader.readVInt();
	suffix = code >>> 1;
	startBytePos = suffixesReader.getPosition();
	suffixesReader.skipBytes(suffix);
	if ((code & 1) == 0) {
	// A normal term
	termState.termBlockOrd++;
	return false;
	} else {
	// A sub-block; make sub-FP absolute:
	lastSubFP = fp - suffixesReader.readVLong();
	// Skip term ord
	suffixesReader.readVLong();
	return true;
	}
	}

	public int getTermBlockOrd() {
	return isLeafBlock ? nextEnt : termState.termBlockOrd;
	}

	public void decodeMetaData() throws IOException {

	// lazily catch up on metadata decode:
	final int limit = getTermBlockOrd();
	boolean absolute = metaDataUpto == 0;
	assert limit > 0;

	// TODO: better API would be "jump straight to term=N"???
	while (metaDataUpto < limit) {

	// TODO: we could make "tiers" of metadata, ie,
	// decode docFreq/totalTF but don't decode postings
	// metadata; this way caller could get
	// docFreq/totalTF w/o paying decode cost for
	// postings

	// TODO: if docFreq were bulk decoded we could
	// just skipN here:

	// stats
	termState.docFreq = statsReader.readVInt();
	// if (DEBUG) System.out.println(" dF=" + state.docFreq);
	if (ite.fr.fieldInfo.getIndexOptions() == IndexOptions.DOCS) {
	termState.totalTermFreq = termState.docFreq; // all tf values are 1
	} else {
	termState.totalTermFreq = termState.docFreq + statsReader.readVLong();
	// if (DEBUG) System.out.println(" totTF=" + state.totalTermFreq);
	}
	// metadata
	ite.fr.parent.postingsReader.decodeTerm(bytesReader, ite.fr.fieldInfo, termState, absolute);

	metaDataUpto++;
	absolute = false;
	}
	termState.termBlockOrd = metaDataUpto;
	}
	}