| package org.apache.lucene.index.codecs.standard; |
| |
| /** |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| import java.io.IOException; |
| import java.util.Collection; |
| |
| import org.apache.lucene.index.DocsAndPositionsEnum; |
| import org.apache.lucene.index.DocsEnum; |
| import org.apache.lucene.index.FieldInfo; |
| import org.apache.lucene.index.IndexFileNames; |
| import org.apache.lucene.index.SegmentInfo; |
| import org.apache.lucene.index.TermState; |
| import org.apache.lucene.index.codecs.BlockTermState; |
| import org.apache.lucene.index.codecs.PostingsReaderBase; |
| import org.apache.lucene.store.ByteArrayDataInput; |
| import org.apache.lucene.store.Directory; |
| import org.apache.lucene.store.IndexInput; |
| import org.apache.lucene.util.ArrayUtil; |
| import org.apache.lucene.util.Bits; |
| import org.apache.lucene.util.BytesRef; |
| import org.apache.lucene.util.CodecUtil; |
| |
| /** Concrete class that reads the current doc/freq/skip |
| * postings format. |
| * @lucene.experimental */ |
| |
| public class StandardPostingsReader extends PostingsReaderBase { |
| |
| private final IndexInput freqIn; |
| private final IndexInput proxIn; |
| |
| int skipInterval; |
| int maxSkipLevels; |
| int skipMinimum; |
| |
| //private String segment; |
| |
| public StandardPostingsReader(Directory dir, SegmentInfo segmentInfo, int readBufferSize, int codecId) throws IOException { |
| freqIn = dir.openInput(IndexFileNames.segmentFileName(segmentInfo.name, codecId, StandardCodec.FREQ_EXTENSION), |
| readBufferSize); |
| //this.segment = segmentInfo.name; |
| if (segmentInfo.getHasProx()) { |
| boolean success = false; |
| try { |
| proxIn = dir.openInput(IndexFileNames.segmentFileName(segmentInfo.name, codecId, StandardCodec.PROX_EXTENSION), |
| readBufferSize); |
| success = true; |
| } finally { |
| if (!success) { |
| freqIn.close(); |
| } |
| } |
| } else { |
| proxIn = null; |
| } |
| } |
| |
| public static void files(Directory dir, SegmentInfo segmentInfo, int id, Collection<String> files) throws IOException { |
| files.add(IndexFileNames.segmentFileName(segmentInfo.name, id, StandardCodec.FREQ_EXTENSION)); |
| if (segmentInfo.getHasProx()) { |
| files.add(IndexFileNames.segmentFileName(segmentInfo.name, id, StandardCodec.PROX_EXTENSION)); |
| } |
| } |
| |
| @Override |
| public void init(IndexInput termsIn) throws IOException { |
| |
| // Make sure we are talking to the matching past writer |
| CodecUtil.checkHeader(termsIn, StandardPostingsWriter.CODEC, |
| StandardPostingsWriter.VERSION_START, StandardPostingsWriter.VERSION_START); |
| |
| skipInterval = termsIn.readInt(); |
| maxSkipLevels = termsIn.readInt(); |
| skipMinimum = termsIn.readInt(); |
| } |
| |
| // Must keep final because we do non-standard clone |
| private final static class StandardTermState extends BlockTermState { |
| long freqOffset; |
| long proxOffset; |
| int skipOffset; |
| |
| // Only used by the "primary" TermState -- clones don't |
| // copy this (basically they are "transient"): |
| ByteArrayDataInput bytesReader; |
| byte[] bytes; |
| |
| @Override |
| public Object clone() { |
| StandardTermState other = new StandardTermState(); |
| other.copyFrom(this); |
| return other; |
| } |
| |
| @Override |
| public void copyFrom(TermState _other) { |
| super.copyFrom(_other); |
| StandardTermState other = (StandardTermState) _other; |
| freqOffset = other.freqOffset; |
| proxOffset = other.proxOffset; |
| skipOffset = other.skipOffset; |
| |
| // Do not copy bytes, bytesReader (else TermState is |
| // very heavy, ie drags around the entire block's |
| // byte[]). On seek back, if next() is in fact used |
| // (rare!), they will be re-read from disk. |
| } |
| |
| @Override |
| public String toString() { |
| return super.toString() + " freqFP=" + freqOffset + " proxFP=" + proxOffset + " skipOffset=" + skipOffset; |
| } |
| } |
| |
| @Override |
| public BlockTermState newTermState() { |
| return new StandardTermState(); |
| } |
| |
| @Override |
| public void close() throws IOException { |
| try { |
| if (freqIn != null) { |
| freqIn.close(); |
| } |
| } finally { |
| if (proxIn != null) { |
| proxIn.close(); |
| } |
| } |
| } |
| |
| /* Reads but does not decode the byte[] blob holding |
| metadata for the current terms block */ |
| @Override |
| public void readTermsBlock(IndexInput termsIn, FieldInfo fieldInfo, BlockTermState _termState) throws IOException { |
| final StandardTermState termState = (StandardTermState) _termState; |
| |
| final int len = termsIn.readVInt(); |
| //System.out.println("SPR.readTermsBlock termsIn.fp=" + termsIn.getFilePointer()); |
| if (termState.bytes == null) { |
| termState.bytes = new byte[ArrayUtil.oversize(len, 1)]; |
| termState.bytesReader = new ByteArrayDataInput(null); |
| } else if (termState.bytes.length < len) { |
| termState.bytes = new byte[ArrayUtil.oversize(len, 1)]; |
| } |
| |
| termsIn.readBytes(termState.bytes, 0, len); |
| termState.bytesReader.reset(termState.bytes, 0, len); |
| } |
| |
| @Override |
| public void nextTerm(FieldInfo fieldInfo, BlockTermState _termState) |
| throws IOException { |
| final StandardTermState termState = (StandardTermState) _termState; |
| //System.out.println("StandardR.nextTerm seg=" + segment); |
| final boolean isFirstTerm = termState.termCount == 0; |
| |
| if (isFirstTerm) { |
| termState.freqOffset = termState.bytesReader.readVLong(); |
| } else { |
| termState.freqOffset += termState.bytesReader.readVLong(); |
| } |
| //System.out.println(" dF=" + termState.docFreq); |
| //System.out.println(" freqFP=" + termState.freqOffset); |
| assert termState.freqOffset < freqIn.length(); |
| |
| if (termState.docFreq >= skipMinimum) { |
| termState.skipOffset = termState.bytesReader.readVInt(); |
| //System.out.println(" skipOffset=" + termState.skipOffset + " vs freqIn.length=" + freqIn.length()); |
| assert termState.freqOffset + termState.skipOffset < freqIn.length(); |
| } else { |
| // undefined |
| } |
| |
| if (!fieldInfo.omitTermFreqAndPositions) { |
| if (isFirstTerm) { |
| termState.proxOffset = termState.bytesReader.readVLong(); |
| } else { |
| termState.proxOffset += termState.bytesReader.readVLong(); |
| } |
| //System.out.println(" proxFP=" + termState.proxOffset); |
| } |
| } |
| |
| @Override |
| public DocsEnum docs(FieldInfo fieldInfo, BlockTermState termState, Bits skipDocs, DocsEnum reuse) throws IOException { |
| SegmentDocsEnum docsEnum; |
| if (reuse == null || !(reuse instanceof SegmentDocsEnum)) { |
| docsEnum = new SegmentDocsEnum(freqIn); |
| } else { |
| docsEnum = (SegmentDocsEnum) reuse; |
| if (docsEnum.startFreqIn != freqIn) { |
| // If you are using ParellelReader, and pass in a |
| // reused DocsEnum, it could have come from another |
| // reader also using standard codec |
| docsEnum = new SegmentDocsEnum(freqIn); |
| } |
| } |
| return docsEnum.reset(fieldInfo, (StandardTermState) termState, skipDocs); |
| } |
| |
| @Override |
| public DocsAndPositionsEnum docsAndPositions(FieldInfo fieldInfo, BlockTermState termState, Bits skipDocs, DocsAndPositionsEnum reuse) throws IOException { |
| if (fieldInfo.omitTermFreqAndPositions) { |
| return null; |
| } |
| |
| // TODO: refactor |
| if (fieldInfo.storePayloads) { |
| SegmentDocsAndPositionsAndPayloadsEnum docsEnum; |
| if (reuse == null || !(reuse instanceof SegmentDocsAndPositionsAndPayloadsEnum)) { |
| docsEnum = new SegmentDocsAndPositionsAndPayloadsEnum(freqIn, proxIn); |
| } else { |
| docsEnum = (SegmentDocsAndPositionsAndPayloadsEnum) reuse; |
| if (docsEnum.startFreqIn != freqIn) { |
| // If you are using ParellelReader, and pass in a |
| // reused DocsEnum, it could have come from another |
| // reader also using standard codec |
| docsEnum = new SegmentDocsAndPositionsAndPayloadsEnum(freqIn, proxIn); |
| } |
| } |
| return docsEnum.reset(fieldInfo, (StandardTermState) termState, skipDocs); |
| } else { |
| SegmentDocsAndPositionsEnum docsEnum; |
| if (reuse == null || !(reuse instanceof SegmentDocsAndPositionsEnum)) { |
| docsEnum = new SegmentDocsAndPositionsEnum(freqIn, proxIn); |
| } else { |
| docsEnum = (SegmentDocsAndPositionsEnum) reuse; |
| if (docsEnum.startFreqIn != freqIn) { |
| // If you are using ParellelReader, and pass in a |
| // reused DocsEnum, it could have come from another |
| // reader also using standard codec |
| docsEnum = new SegmentDocsAndPositionsEnum(freqIn, proxIn); |
| } |
| } |
| return docsEnum.reset(fieldInfo, (StandardTermState) termState, skipDocs); |
| } |
| } |
| |
| // Decodes only docs |
| private class SegmentDocsEnum extends DocsEnum { |
| final IndexInput freqIn; |
| final IndexInput startFreqIn; |
| |
| boolean omitTF; // does current field omit term freq? |
| boolean storePayloads; // does current field store payloads? |
| |
| int limit; // number of docs in this posting |
| int ord; // how many docs we've read |
| int doc; // doc we last read |
| int freq; // freq we last read |
| |
| Bits skipDocs; |
| |
| long freqOffset; |
| int skipOffset; |
| |
| boolean skipped; |
| DefaultSkipListReader skipper; |
| |
| public SegmentDocsEnum(IndexInput freqIn) throws IOException { |
| startFreqIn = freqIn; |
| this.freqIn = (IndexInput) freqIn.clone(); |
| } |
| |
| public SegmentDocsEnum reset(FieldInfo fieldInfo, StandardTermState termState, Bits skipDocs) throws IOException { |
| omitTF = fieldInfo.omitTermFreqAndPositions; |
| if (omitTF) { |
| freq = 1; |
| } |
| storePayloads = fieldInfo.storePayloads; |
| this.skipDocs = skipDocs; |
| freqOffset = termState.freqOffset; |
| skipOffset = termState.skipOffset; |
| |
| // TODO: for full enum case (eg segment merging) this |
| // seek is unnecessary; maybe we can avoid in such |
| // cases |
| freqIn.seek(termState.freqOffset); |
| limit = termState.docFreq; |
| assert limit > 0; |
| ord = 0; |
| doc = 0; |
| //System.out.println(" sde limit=" + limit + " freqFP=" + freqOffset); |
| |
| skipped = false; |
| |
| return this; |
| } |
| |
| @Override |
| public int nextDoc() throws IOException { |
| while(true) { |
| if (ord == limit) { |
| return doc = NO_MORE_DOCS; |
| } |
| |
| ord++; |
| |
| // Decode next doc/freq pair |
| final int code = freqIn.readVInt(); |
| if (omitTF) { |
| doc += code; |
| } else { |
| doc += code >>> 1; // shift off low bit |
| if ((code & 1) != 0) { // if low bit is set |
| freq = 1; // freq is one |
| } else { |
| freq = freqIn.readVInt(); // else read freq |
| } |
| } |
| |
| if (skipDocs == null || !skipDocs.get(doc)) { |
| break; |
| } |
| } |
| |
| return doc; |
| } |
| |
| @Override |
| public int read() throws IOException { |
| |
| final int[] docs = bulkResult.docs.ints; |
| final int[] freqs = bulkResult.freqs.ints; |
| int i = 0; |
| final int length = docs.length; |
| while (i < length && ord < limit) { |
| ord++; |
| // manually inlined call to next() for speed |
| final int code = freqIn.readVInt(); |
| if (omitTF) { |
| doc += code; |
| } else { |
| doc += code >>> 1; // shift off low bit |
| if ((code & 1) != 0) { // if low bit is set |
| freq = 1; // freq is one |
| } else { |
| freq = freqIn.readVInt(); // else read freq |
| } |
| } |
| |
| if (skipDocs == null || !skipDocs.get(doc)) { |
| docs[i] = doc; |
| freqs[i] = freq; |
| ++i; |
| } |
| } |
| |
| return i; |
| } |
| |
| @Override |
| public int docID() { |
| return doc; |
| } |
| |
| @Override |
| public int freq() { |
| return freq; |
| } |
| |
| @Override |
| public int advance(int target) throws IOException { |
| |
| if ((target - skipInterval) >= doc && limit >= skipMinimum) { |
| |
| // There are enough docs in the posting to have |
| // skip data, and it isn't too close. |
| |
| if (skipper == null) { |
| // This is the first time this enum has ever been used for skipping -- do lazy init |
| skipper = new DefaultSkipListReader((IndexInput) freqIn.clone(), maxSkipLevels, skipInterval); |
| } |
| |
| if (!skipped) { |
| |
| // This is the first time this posting has |
| // skipped since reset() was called, so now we |
| // load the skip data for this posting |
| |
| skipper.init(freqOffset + skipOffset, |
| freqOffset, 0, |
| limit, storePayloads); |
| |
| skipped = true; |
| } |
| |
| final int newOrd = skipper.skipTo(target); |
| |
| if (newOrd > ord) { |
| // Skipper moved |
| |
| ord = newOrd; |
| doc = skipper.getDoc(); |
| freqIn.seek(skipper.getFreqPointer()); |
| } |
| } |
| |
| // scan for the rest: |
| do { |
| nextDoc(); |
| } while (target > doc); |
| |
| return doc; |
| } |
| } |
| |
| // Decodes docs & positions. payloads are not present. |
| private class SegmentDocsAndPositionsEnum extends DocsAndPositionsEnum { |
| final IndexInput startFreqIn; |
| private final IndexInput freqIn; |
| private final IndexInput proxIn; |
| |
| int limit; // number of docs in this posting |
| int ord; // how many docs we've read |
| int doc; // doc we last read |
| int freq; // freq we last read |
| int position; |
| |
| Bits skipDocs; |
| |
| long freqOffset; |
| int skipOffset; |
| long proxOffset; |
| |
| int posPendingCount; |
| |
| boolean skipped; |
| DefaultSkipListReader skipper; |
| private long lazyProxPointer; |
| |
| public SegmentDocsAndPositionsEnum(IndexInput freqIn, IndexInput proxIn) throws IOException { |
| startFreqIn = freqIn; |
| this.freqIn = (IndexInput) freqIn.clone(); |
| this.proxIn = (IndexInput) proxIn.clone(); |
| } |
| |
| public SegmentDocsAndPositionsEnum reset(FieldInfo fieldInfo, StandardTermState termState, Bits skipDocs) throws IOException { |
| assert !fieldInfo.omitTermFreqAndPositions; |
| assert !fieldInfo.storePayloads; |
| |
| this.skipDocs = skipDocs; |
| |
| // TODO: for full enum case (eg segment merging) this |
| // seek is unnecessary; maybe we can avoid in such |
| // cases |
| freqIn.seek(termState.freqOffset); |
| lazyProxPointer = termState.proxOffset; |
| |
| limit = termState.docFreq; |
| assert limit > 0; |
| |
| ord = 0; |
| doc = 0; |
| position = 0; |
| |
| skipped = false; |
| posPendingCount = 0; |
| |
| freqOffset = termState.freqOffset; |
| proxOffset = termState.proxOffset; |
| skipOffset = termState.skipOffset; |
| //System.out.println("StandardR.D&PE reset seg=" + segment + " limit=" + limit + " freqFP=" + freqOffset + " proxFP=" + proxOffset); |
| |
| return this; |
| } |
| |
| @Override |
| public int nextDoc() throws IOException { |
| while(true) { |
| if (ord == limit) { |
| //System.out.println("StandardR.D&PE seg=" + segment + " nextDoc return doc=END"); |
| return doc = NO_MORE_DOCS; |
| } |
| |
| ord++; |
| |
| // Decode next doc/freq pair |
| final int code = freqIn.readVInt(); |
| |
| doc += code >>> 1; // shift off low bit |
| if ((code & 1) != 0) { // if low bit is set |
| freq = 1; // freq is one |
| } else { |
| freq = freqIn.readVInt(); // else read freq |
| } |
| posPendingCount += freq; |
| |
| if (skipDocs == null || !skipDocs.get(doc)) { |
| break; |
| } |
| } |
| |
| position = 0; |
| |
| //System.out.println("StandardR.D&PE nextDoc seg=" + segment + " return doc=" + doc); |
| return doc; |
| } |
| |
| @Override |
| public int docID() { |
| return doc; |
| } |
| |
| @Override |
| public int freq() { |
| return freq; |
| } |
| |
| @Override |
| public int advance(int target) throws IOException { |
| |
| //System.out.println("StandardR.D&PE advance target=" + target); |
| |
| if ((target - skipInterval) >= doc && limit >= skipMinimum) { |
| |
| // There are enough docs in the posting to have |
| // skip data, and it isn't too close |
| |
| if (skipper == null) { |
| // This is the first time this enum has ever been used for skipping -- do lazy init |
| skipper = new DefaultSkipListReader((IndexInput) freqIn.clone(), maxSkipLevels, skipInterval); |
| } |
| |
| if (!skipped) { |
| |
| // This is the first time this posting has |
| // skipped, since reset() was called, so now we |
| // load the skip data for this posting |
| |
| skipper.init(freqOffset+skipOffset, |
| freqOffset, proxOffset, |
| limit, false); |
| |
| skipped = true; |
| } |
| |
| final int newOrd = skipper.skipTo(target); |
| |
| if (newOrd > ord) { |
| // Skipper moved |
| ord = newOrd; |
| doc = skipper.getDoc(); |
| freqIn.seek(skipper.getFreqPointer()); |
| lazyProxPointer = skipper.getProxPointer(); |
| posPendingCount = 0; |
| position = 0; |
| } |
| } |
| |
| // Now, linear scan for the rest: |
| do { |
| nextDoc(); |
| } while (target > doc); |
| |
| return doc; |
| } |
| |
| @Override |
| public int nextPosition() throws IOException { |
| |
| if (lazyProxPointer != -1) { |
| proxIn.seek(lazyProxPointer); |
| lazyProxPointer = -1; |
| } |
| |
| // scan over any docs that were iterated without their positions |
| if (posPendingCount > freq) { |
| position = 0; |
| while(posPendingCount != freq) { |
| if ((proxIn.readByte() & 0x80) == 0) { |
| posPendingCount--; |
| } |
| } |
| } |
| |
| position += proxIn.readVInt(); |
| |
| posPendingCount--; |
| |
| assert posPendingCount >= 0: "nextPosition() was called too many times (more than freq() times) posPendingCount=" + posPendingCount; |
| |
| return position; |
| } |
| |
| /** Returns the payload at this position, or null if no |
| * payload was indexed. */ |
| @Override |
| public BytesRef getPayload() throws IOException { |
| throw new IOException("No payloads exist for this field!"); |
| } |
| |
| @Override |
| public boolean hasPayload() { |
| return false; |
| } |
| } |
| |
| // Decodes docs & positions & payloads |
| private class SegmentDocsAndPositionsAndPayloadsEnum extends DocsAndPositionsEnum { |
| final IndexInput startFreqIn; |
| private final IndexInput freqIn; |
| private final IndexInput proxIn; |
| |
| int limit; // number of docs in this posting |
| int ord; // how many docs we've read |
| int doc; // doc we last read |
| int freq; // freq we last read |
| int position; |
| |
| Bits skipDocs; |
| |
| long freqOffset; |
| int skipOffset; |
| long proxOffset; |
| |
| int posPendingCount; |
| int payloadLength; |
| boolean payloadPending; |
| |
| boolean skipped; |
| DefaultSkipListReader skipper; |
| private BytesRef payload; |
| private long lazyProxPointer; |
| |
| public SegmentDocsAndPositionsAndPayloadsEnum(IndexInput freqIn, IndexInput proxIn) throws IOException { |
| startFreqIn = freqIn; |
| this.freqIn = (IndexInput) freqIn.clone(); |
| this.proxIn = (IndexInput) proxIn.clone(); |
| } |
| |
| public SegmentDocsAndPositionsAndPayloadsEnum reset(FieldInfo fieldInfo, StandardTermState termState, Bits skipDocs) throws IOException { |
| assert !fieldInfo.omitTermFreqAndPositions; |
| assert fieldInfo.storePayloads; |
| if (payload == null) { |
| payload = new BytesRef(); |
| payload.bytes = new byte[1]; |
| } |
| |
| this.skipDocs = skipDocs; |
| |
| // TODO: for full enum case (eg segment merging) this |
| // seek is unnecessary; maybe we can avoid in such |
| // cases |
| freqIn.seek(termState.freqOffset); |
| lazyProxPointer = termState.proxOffset; |
| |
| limit = termState.docFreq; |
| ord = 0; |
| doc = 0; |
| position = 0; |
| |
| skipped = false; |
| posPendingCount = 0; |
| payloadPending = false; |
| |
| freqOffset = termState.freqOffset; |
| proxOffset = termState.proxOffset; |
| skipOffset = termState.skipOffset; |
| //System.out.println("StandardR.D&PE reset seg=" + segment + " limit=" + limit + " freqFP=" + freqOffset + " proxFP=" + proxOffset + " this=" + this); |
| |
| return this; |
| } |
| |
| @Override |
| public int nextDoc() throws IOException { |
| while(true) { |
| if (ord == limit) { |
| //System.out.println("StandardR.D&PE seg=" + segment + " nextDoc return doc=END"); |
| return doc = NO_MORE_DOCS; |
| } |
| |
| ord++; |
| |
| // Decode next doc/freq pair |
| final int code = freqIn.readVInt(); |
| |
| doc += code >>> 1; // shift off low bit |
| if ((code & 1) != 0) { // if low bit is set |
| freq = 1; // freq is one |
| } else { |
| freq = freqIn.readVInt(); // else read freq |
| } |
| posPendingCount += freq; |
| |
| if (skipDocs == null || !skipDocs.get(doc)) { |
| break; |
| } |
| } |
| |
| position = 0; |
| |
| //System.out.println("StandardR.D&PE nextDoc seg=" + segment + " return doc=" + doc); |
| return doc; |
| } |
| |
| @Override |
| public int docID() { |
| return doc; |
| } |
| |
| @Override |
| public int freq() { |
| return freq; |
| } |
| |
| @Override |
| public int advance(int target) throws IOException { |
| |
| //System.out.println("StandardR.D&PE advance seg=" + segment + " target=" + target + " this=" + this); |
| |
| if ((target - skipInterval) >= doc && limit >= skipMinimum) { |
| |
| // There are enough docs in the posting to have |
| // skip data, and it isn't too close |
| |
| if (skipper == null) { |
| // This is the first time this enum has ever been used for skipping -- do lazy init |
| skipper = new DefaultSkipListReader((IndexInput) freqIn.clone(), maxSkipLevels, skipInterval); |
| } |
| |
| if (!skipped) { |
| |
| // This is the first time this posting has |
| // skipped, since reset() was called, so now we |
| // load the skip data for this posting |
| //System.out.println(" init skipper freqOffset=" + freqOffset + " skipOffset=" + skipOffset + " vs len=" + freqIn.length()); |
| skipper.init(freqOffset+skipOffset, |
| freqOffset, proxOffset, |
| limit, true); |
| |
| skipped = true; |
| } |
| |
| final int newOrd = skipper.skipTo(target); |
| |
| if (newOrd > ord) { |
| // Skipper moved |
| ord = newOrd; |
| doc = skipper.getDoc(); |
| freqIn.seek(skipper.getFreqPointer()); |
| lazyProxPointer = skipper.getProxPointer(); |
| posPendingCount = 0; |
| position = 0; |
| payloadPending = false; |
| payloadLength = skipper.getPayloadLength(); |
| } |
| } |
| |
| // Now, linear scan for the rest: |
| do { |
| nextDoc(); |
| } while (target > doc); |
| |
| return doc; |
| } |
| |
| @Override |
| public int nextPosition() throws IOException { |
| |
| if (lazyProxPointer != -1) { |
| proxIn.seek(lazyProxPointer); |
| lazyProxPointer = -1; |
| } |
| |
| if (payloadPending && payloadLength > 0) { |
| // payload of last position as never retrieved -- skip it |
| proxIn.seek(proxIn.getFilePointer() + payloadLength); |
| payloadPending = false; |
| } |
| |
| // scan over any docs that were iterated without their positions |
| while(posPendingCount > freq) { |
| |
| final int code = proxIn.readVInt(); |
| |
| if ((code & 1) != 0) { |
| // new payload length |
| payloadLength = proxIn.readVInt(); |
| assert payloadLength >= 0; |
| } |
| |
| assert payloadLength != -1; |
| proxIn.seek(proxIn.getFilePointer() + payloadLength); |
| |
| posPendingCount--; |
| position = 0; |
| payloadPending = false; |
| //System.out.println("StandardR.D&PE skipPos"); |
| } |
| |
| // read next position |
| if (payloadPending && payloadLength > 0) { |
| // payload wasn't retrieved for last position |
| proxIn.seek(proxIn.getFilePointer()+payloadLength); |
| } |
| |
| final int code = proxIn.readVInt(); |
| if ((code & 1) != 0) { |
| // new payload length |
| payloadLength = proxIn.readVInt(); |
| assert payloadLength >= 0; |
| } |
| assert payloadLength != -1; |
| |
| payloadPending = true; |
| position += code >>> 1; |
| |
| posPendingCount--; |
| |
| assert posPendingCount >= 0: "nextPosition() was called too many times (more than freq() times) posPendingCount=" + posPendingCount; |
| |
| //System.out.println("StandardR.D&PE nextPos return pos=" + position); |
| return position; |
| } |
| |
| /** Returns the payload at this position, or null if no |
| * payload was indexed. */ |
| @Override |
| public BytesRef getPayload() throws IOException { |
| assert lazyProxPointer == -1; |
| assert posPendingCount < freq; |
| if (!payloadPending) { |
| throw new IOException("Either no payload exists at this term position or an attempt was made to load it more than once."); |
| } |
| if (payloadLength > payload.bytes.length) { |
| payload.grow(payloadLength); |
| } |
| |
| proxIn.readBytes(payload.bytes, 0, payloadLength); |
| payload.length = payloadLength; |
| payloadPending = false; |
| |
| return payload; |
| } |
| |
| @Override |
| public boolean hasPayload() { |
| return payloadPending && payloadLength > 0; |
| } |
| } |
| } |