blob: bd9bd608af814324ef5ec5cc1cec65f2d4cbc42d [file] [log] [blame]
/*
* Copyright 2009-2010 by The Regents of the University of California
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* you may obtain a copy of the License from
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package edu.uci.ics.hyracks.storage.am.invertedindex.impls;
import java.nio.ByteBuffer;
import java.util.List;
import edu.uci.ics.hyracks.api.context.IHyracksTaskContext;
import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
import edu.uci.ics.hyracks.dataflow.common.data.accessors.ITupleReference;
import edu.uci.ics.hyracks.dataflow.common.data.marshalling.IntegerSerializerDeserializer;
import edu.uci.ics.hyracks.storage.am.common.ophelpers.MultiComparator;
import edu.uci.ics.hyracks.storage.am.invertedindex.api.IInvertedListCursor;
public class TOccurrenceSearcherSuffixScanOnly extends TOccurrenceSearcher {
protected final MultiComparator invListCmp;
public TOccurrenceSearcherSuffixScanOnly(IHyracksTaskContext ctx, InvertedIndex invIndex) {
super(ctx, invIndex);
this.invListCmp = MultiComparator.create(invIndex.getInvListElementCmpFactories());
}
protected int mergeSuffixLists(int numPrefixTokens, int numQueryTokens, int maxPrevBufIdx) throws HyracksDataException {
for (int i = numPrefixTokens; i < numQueryTokens; i++) {
swap = prevResultBuffers;
prevResultBuffers = newResultBuffers;
newResultBuffers = swap;
currentNumResults = 0;
invListCursors.get(i).pinPagesSync();
maxPrevBufIdx = mergeSuffixListScan(invListCursors.get(i), prevResultBuffers, maxPrevBufIdx,
newResultBuffers, i, numQueryTokens);
invListCursors.get(i).unpinPages();
}
return maxPrevBufIdx;
}
protected int mergeSuffixListScan(IInvertedListCursor invListCursor, List<ByteBuffer> prevResultBuffers,
int maxPrevBufIdx, List<ByteBuffer> newResultBuffers, int invListIx, int numQueryTokens) {
int newBufIdx = 0;
ByteBuffer newCurrentBuffer = newResultBuffers.get(0);
int prevBufIdx = 0;
ByteBuffer prevCurrentBuffer = prevResultBuffers.get(0);
boolean advanceCursor = true;
boolean advancePrevResult = false;
int resultTidx = 0;
resultFrameTupleAcc.reset(prevCurrentBuffer);
resultFrameTupleApp.reset(newCurrentBuffer, true);
while (invListCursor.hasNext() && resultTidx < resultFrameTupleAcc.getTupleCount()) {
if (advanceCursor)
invListCursor.next();
ITupleReference invListTuple = invListCursor.getTuple();
resultTuple.reset(prevCurrentBuffer.array(), resultFrameTupleAcc.getTupleStartOffset(resultTidx));
int cmp = invListCmp.compare(invListTuple, resultTuple);
if (cmp == 0) {
int count = IntegerSerializerDeserializer.getInt(resultTuple.getFieldData(0),
resultTuple.getFieldStart(resultTuple.getFieldCount() - 1)) + 1;
newBufIdx = appendTupleToNewResults(resultTuple, count, newBufIdx);
advanceCursor = true;
advancePrevResult = true;
} else {
if (cmp < 0) {
advanceCursor = true;
advancePrevResult = false;
} else {
int count = IntegerSerializerDeserializer.getInt(resultTuple.getFieldData(0),
resultTuple.getFieldStart(resultTuple.getFieldCount() - 1));
if (count + numQueryTokens - invListIx > occurrenceThreshold) {
newBufIdx = appendTupleToNewResults(resultTuple, count, newBufIdx);
}
advanceCursor = false;
advancePrevResult = true;
}
}
if (advancePrevResult) {
resultTidx++;
if (resultTidx >= resultFrameTupleAcc.getTupleCount()) {
prevBufIdx++;
if (prevBufIdx <= maxPrevBufIdx) {
prevCurrentBuffer = prevResultBuffers.get(prevBufIdx);
resultFrameTupleAcc.reset(prevCurrentBuffer);
resultTidx = 0;
}
}
}
}
// append remaining elements from previous result set
while (resultTidx < resultFrameTupleAcc.getTupleCount()) {
int count = IntegerSerializerDeserializer.getInt(resultTuple.getFieldData(0),
resultTuple.getFieldStart(resultTuple.getFieldCount() - 1));
newBufIdx = appendTupleToNewResults(resultTuple, count, newBufIdx);
resultTidx++;
if (resultTidx >= resultFrameTupleAcc.getTupleCount()) {
prevBufIdx++;
if (prevBufIdx <= maxPrevBufIdx) {
prevCurrentBuffer = prevResultBuffers.get(prevBufIdx);
resultFrameTupleAcc.reset(prevCurrentBuffer);
resultTidx = 0;
}
}
}
return newBufIdx;
}
}