| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| package org.apache.lucene.index; |
| |
| |
| import java.io.IOException; |
| import java.util.Iterator; |
| import java.util.LinkedHashMap; |
| import java.util.List; |
| import java.util.Map; |
| |
| import org.apache.lucene.index.FreqProxTermsWriterPerField.FreqProxPostingsArray; |
| import org.apache.lucene.util.AttributeSource; |
| import org.apache.lucene.util.BytesRef; |
| import org.apache.lucene.util.BytesRefBuilder; |
| |
| /** Implements limited (iterators only, no stats) {@link |
| * Fields} interface over the in-RAM buffered |
| * fields/terms/postings, to flush postings through the |
| * PostingsFormat. */ |
| |
| class FreqProxFields extends Fields { |
| final Map<String,FreqProxTermsWriterPerField> fields = new LinkedHashMap<>(); |
| |
| public FreqProxFields(List<FreqProxTermsWriterPerField> fieldList) { |
| // NOTE: fields are already sorted by field name |
| for(FreqProxTermsWriterPerField field : fieldList) { |
| fields.put(field.getFieldName(), field); |
| } |
| } |
| |
| public Iterator<String> iterator() { |
| return fields.keySet().iterator(); |
| } |
| |
| @Override |
| public Terms terms(String field) throws IOException { |
| FreqProxTermsWriterPerField perField = fields.get(field); |
| return perField == null ? null : new FreqProxTerms(perField); |
| } |
| |
| @Override |
| public int size() { |
| throw new UnsupportedOperationException(); |
| } |
| |
| private static class FreqProxTerms extends Terms { |
| final FreqProxTermsWriterPerField terms; |
| |
| public FreqProxTerms(FreqProxTermsWriterPerField terms) { |
| this.terms = terms; |
| } |
| |
| @Override |
| public TermsEnum iterator() { |
| FreqProxTermsEnum termsEnum = new FreqProxTermsEnum(terms); |
| termsEnum.reset(); |
| return termsEnum; |
| } |
| |
| @Override |
| public long size() { |
| throw new UnsupportedOperationException(); |
| } |
| |
| @Override |
| public long getSumTotalTermFreq() { |
| throw new UnsupportedOperationException(); |
| } |
| |
| @Override |
| public long getSumDocFreq() { |
| throw new UnsupportedOperationException(); |
| } |
| |
| @Override |
| public int getDocCount() { |
| throw new UnsupportedOperationException(); |
| } |
| |
| @Override |
| public boolean hasFreqs() { |
| return terms.indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS) >= 0; |
| } |
| |
| @Override |
| public boolean hasOffsets() { |
| // NOTE: the in-memory buffer may have indexed offsets |
| // because that's what FieldInfo said when we started, |
| // but during indexing this may have been downgraded: |
| return terms.indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0; |
| } |
| |
| @Override |
| public boolean hasPositions() { |
| // NOTE: the in-memory buffer may have indexed positions |
| // because that's what FieldInfo said when we started, |
| // but during indexing this may have been downgraded: |
| return terms.indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0; |
| } |
| |
| @Override |
| public boolean hasPayloads() { |
| return terms.sawPayloads; |
| } |
| } |
| |
| private static class FreqProxTermsEnum extends BaseTermsEnum { |
| final FreqProxTermsWriterPerField terms; |
| final int[] sortedTermIDs; |
| final FreqProxPostingsArray postingsArray; |
| final BytesRef scratch = new BytesRef(); |
| final int numTerms; |
| int ord; |
| |
| FreqProxTermsEnum(FreqProxTermsWriterPerField terms) { |
| this.terms = terms; |
| this.numTerms = terms.getNumTerms(); |
| sortedTermIDs = terms.getSortedTermIDs(); |
| assert sortedTermIDs != null; |
| postingsArray = (FreqProxPostingsArray) terms.postingsArray; |
| } |
| |
| public void reset() { |
| ord = -1; |
| } |
| |
| public SeekStatus seekCeil(BytesRef text) { |
| // TODO: we could instead keep the BytesRefHash |
| // intact so this is a hash lookup |
| |
| // binary search: |
| int lo = 0; |
| int hi = numTerms - 1; |
| while (hi >= lo) { |
| int mid = (lo + hi) >>> 1; |
| int textStart = postingsArray.textStarts[sortedTermIDs[mid]]; |
| terms.bytePool.setBytesRef(scratch, textStart); |
| int cmp = scratch.compareTo(text); |
| if (cmp < 0) { |
| lo = mid + 1; |
| } else if (cmp > 0) { |
| hi = mid - 1; |
| } else { |
| // found: |
| ord = mid; |
| assert term().compareTo(text) == 0; |
| return SeekStatus.FOUND; |
| } |
| } |
| |
| // not found: |
| ord = lo; |
| if (ord >= numTerms) { |
| return SeekStatus.END; |
| } else { |
| int textStart = postingsArray.textStarts[sortedTermIDs[ord]]; |
| terms.bytePool.setBytesRef(scratch, textStart); |
| assert term().compareTo(text) > 0; |
| return SeekStatus.NOT_FOUND; |
| } |
| } |
| |
| public void seekExact(long ord) { |
| this.ord = (int) ord; |
| int textStart = postingsArray.textStarts[sortedTermIDs[this.ord]]; |
| terms.bytePool.setBytesRef(scratch, textStart); |
| } |
| |
| @Override |
| public BytesRef next() { |
| ord++; |
| if (ord >= numTerms) { |
| return null; |
| } else { |
| int textStart = postingsArray.textStarts[sortedTermIDs[ord]]; |
| terms.bytePool.setBytesRef(scratch, textStart); |
| return scratch; |
| } |
| } |
| |
| @Override |
| public BytesRef term() { |
| return scratch; |
| } |
| |
| @Override |
| public long ord() { |
| return ord; |
| } |
| |
| @Override |
| public int docFreq() { |
| // We do not store this per-term, and we cannot |
| // implement this at merge time w/o an added pass |
| // through the postings: |
| throw new UnsupportedOperationException(); |
| } |
| |
| @Override |
| public long totalTermFreq() { |
| // We do not store this per-term, and we cannot |
| // implement this at merge time w/o an added pass |
| // through the postings: |
| throw new UnsupportedOperationException(); |
| } |
| |
| @Override |
| public PostingsEnum postings(PostingsEnum reuse, int flags) { |
| if (PostingsEnum.featureRequested(flags, PostingsEnum.POSITIONS)) { |
| FreqProxPostingsEnum posEnum; |
| |
| if (!terms.hasProx) { |
| // Caller wants positions but we didn't index them; |
| // don't lie: |
| throw new IllegalArgumentException("did not index positions"); |
| } |
| |
| if (!terms.hasOffsets && PostingsEnum.featureRequested(flags, PostingsEnum.OFFSETS)) { |
| // Caller wants offsets but we didn't index them; |
| // don't lie: |
| throw new IllegalArgumentException("did not index offsets"); |
| } |
| |
| if (reuse instanceof FreqProxPostingsEnum) { |
| posEnum = (FreqProxPostingsEnum) reuse; |
| if (posEnum.postingsArray != postingsArray) { |
| posEnum = new FreqProxPostingsEnum(terms, postingsArray); |
| } |
| } else { |
| posEnum = new FreqProxPostingsEnum(terms, postingsArray); |
| } |
| posEnum.reset(sortedTermIDs[ord]); |
| return posEnum; |
| } |
| |
| FreqProxDocsEnum docsEnum; |
| |
| if (!terms.hasFreq && PostingsEnum.featureRequested(flags, PostingsEnum.FREQS)) { |
| // Caller wants freqs but we didn't index them; |
| // don't lie: |
| throw new IllegalArgumentException("did not index freq"); |
| } |
| |
| if (reuse instanceof FreqProxDocsEnum) { |
| docsEnum = (FreqProxDocsEnum) reuse; |
| if (docsEnum.postingsArray != postingsArray) { |
| docsEnum = new FreqProxDocsEnum(terms, postingsArray); |
| } |
| } else { |
| docsEnum = new FreqProxDocsEnum(terms, postingsArray); |
| } |
| docsEnum.reset(sortedTermIDs[ord]); |
| return docsEnum; |
| } |
| |
| @Override |
| public ImpactsEnum impacts(int flags) throws IOException { |
| throw new UnsupportedOperationException(); |
| } |
| |
| /** |
| * Expert: Returns the TermsEnums internal state to position the TermsEnum |
| * without re-seeking the term dictionary. |
| * <p> |
| * NOTE: A seek by {@link TermState} might not capture the |
| * {@link AttributeSource}'s state. Callers must maintain the |
| * {@link AttributeSource} states separately |
| * |
| * @see TermState |
| * @see #seekExact(BytesRef, TermState) |
| */ |
| public TermState termState() throws IOException { |
| return new TermState() { |
| @Override |
| public void copyFrom(TermState other) { |
| throw new UnsupportedOperationException(); |
| } |
| }; |
| } |
| } |
| |
| private static class FreqProxDocsEnum extends PostingsEnum { |
| |
| final FreqProxTermsWriterPerField terms; |
| final FreqProxPostingsArray postingsArray; |
| final ByteSliceReader reader = new ByteSliceReader(); |
| final boolean readTermFreq; |
| int docID = -1; |
| int freq; |
| boolean ended; |
| int termID; |
| |
| public FreqProxDocsEnum(FreqProxTermsWriterPerField terms, FreqProxPostingsArray postingsArray) { |
| this.terms = terms; |
| this.postingsArray = postingsArray; |
| this.readTermFreq = terms.hasFreq; |
| } |
| |
| public void reset(int termID) { |
| this.termID = termID; |
| terms.initReader(reader, termID, 0); |
| ended = false; |
| docID = -1; |
| } |
| |
| @Override |
| public int docID() { |
| return docID; |
| } |
| |
| @Override |
| public int freq() { |
| // Don't lie here ... don't want codecs writings lots |
| // of wasted 1s into the index: |
| if (!readTermFreq) { |
| throw new IllegalStateException("freq was not indexed"); |
| } else { |
| return freq; |
| } |
| } |
| |
| @Override |
| public int nextPosition() throws IOException { |
| return -1; |
| } |
| |
| @Override |
| public int startOffset() throws IOException { |
| return -1; |
| } |
| |
| @Override |
| public int endOffset() throws IOException { |
| return -1; |
| } |
| |
| @Override |
| public BytesRef getPayload() throws IOException { |
| return null; |
| } |
| |
| @Override |
| public int nextDoc() throws IOException { |
| if (docID == -1) { |
| docID = 0; |
| } |
| if (reader.eof()) { |
| if (ended) { |
| return NO_MORE_DOCS; |
| } else { |
| ended = true; |
| docID = postingsArray.lastDocIDs[termID]; |
| if (readTermFreq) { |
| freq = postingsArray.termFreqs[termID]; |
| } |
| } |
| } else { |
| int code = reader.readVInt(); |
| if (!readTermFreq) { |
| docID += code; |
| } else { |
| docID += code >>> 1; |
| if ((code & 1) != 0) { |
| freq = 1; |
| } else { |
| freq = reader.readVInt(); |
| } |
| } |
| |
| assert docID != postingsArray.lastDocIDs[termID]; |
| } |
| |
| return docID; |
| } |
| |
| @Override |
| public int advance(int target) { |
| throw new UnsupportedOperationException(); |
| } |
| |
| @Override |
| public long cost() { |
| throw new UnsupportedOperationException(); |
| } |
| } |
| |
| private static class FreqProxPostingsEnum extends PostingsEnum { |
| |
| final FreqProxTermsWriterPerField terms; |
| final FreqProxPostingsArray postingsArray; |
| final ByteSliceReader reader = new ByteSliceReader(); |
| final ByteSliceReader posReader = new ByteSliceReader(); |
| final boolean readOffsets; |
| int docID = -1; |
| int freq; |
| int pos; |
| int startOffset; |
| int endOffset; |
| int posLeft; |
| int termID; |
| boolean ended; |
| boolean hasPayload; |
| BytesRefBuilder payload = new BytesRefBuilder(); |
| |
| public FreqProxPostingsEnum(FreqProxTermsWriterPerField terms, FreqProxPostingsArray postingsArray) { |
| this.terms = terms; |
| this.postingsArray = postingsArray; |
| this.readOffsets = terms.hasOffsets; |
| assert terms.hasProx; |
| assert terms.hasFreq; |
| } |
| |
| public void reset(int termID) { |
| this.termID = termID; |
| terms.initReader(reader, termID, 0); |
| terms.initReader(posReader, termID, 1); |
| ended = false; |
| docID = -1; |
| posLeft = 0; |
| } |
| |
| @Override |
| public int docID() { |
| return docID; |
| } |
| |
| @Override |
| public int freq() { |
| return freq; |
| } |
| |
| @Override |
| public int nextDoc() throws IOException { |
| if (docID == -1) { |
| docID = 0; |
| } |
| while (posLeft != 0) { |
| nextPosition(); |
| } |
| |
| if (reader.eof()) { |
| if (ended) { |
| return NO_MORE_DOCS; |
| } else { |
| ended = true; |
| docID = postingsArray.lastDocIDs[termID]; |
| freq = postingsArray.termFreqs[termID]; |
| } |
| } else { |
| int code = reader.readVInt(); |
| docID += code >>> 1; |
| if ((code & 1) != 0) { |
| freq = 1; |
| } else { |
| freq = reader.readVInt(); |
| } |
| |
| assert docID != postingsArray.lastDocIDs[termID]; |
| } |
| |
| posLeft = freq; |
| pos = 0; |
| startOffset = 0; |
| return docID; |
| } |
| |
| @Override |
| public int advance(int target) { |
| throw new UnsupportedOperationException(); |
| } |
| |
| @Override |
| public long cost() { |
| throw new UnsupportedOperationException(); |
| } |
| |
| @Override |
| public int nextPosition() throws IOException { |
| assert posLeft > 0; |
| posLeft--; |
| int code = posReader.readVInt(); |
| pos += code >>> 1; |
| if ((code & 1) != 0) { |
| hasPayload = true; |
| // has a payload |
| payload.setLength(posReader.readVInt()); |
| payload.grow(payload.length()); |
| posReader.readBytes(payload.bytes(), 0, payload.length()); |
| } else { |
| hasPayload = false; |
| } |
| |
| if (readOffsets) { |
| startOffset += posReader.readVInt(); |
| endOffset = startOffset + posReader.readVInt(); |
| } |
| |
| return pos; |
| } |
| |
| @Override |
| public int startOffset() { |
| if (!readOffsets) { |
| throw new IllegalStateException("offsets were not indexed"); |
| } |
| return startOffset; |
| } |
| |
| @Override |
| public int endOffset() { |
| if (!readOffsets) { |
| throw new IllegalStateException("offsets were not indexed"); |
| } |
| return endOffset; |
| } |
| |
| @Override |
| public BytesRef getPayload() { |
| if (hasPayload) { |
| return payload.get(); |
| } else { |
| return null; |
| } |
| } |
| } |
| } |