fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/search/PartitionedTOccurrenceSearcher.java - asterixdb - Git at Google

 /*
  * Copyright 2009-2012 by The Regents of the University of California
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
  * you may obtain a copy of the License from
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 package edu.uci.ics.hyracks.storage.am.lsm.invertedindex.search;

 import java.io.IOException;
 import java.util.ArrayList;

 import edu.uci.ics.hyracks.api.context.IHyracksCommonContext;
 import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
 import edu.uci.ics.hyracks.dataflow.common.comm.io.ArrayTupleBuilder;
 import edu.uci.ics.hyracks.dataflow.common.comm.io.ArrayTupleReference;
 import edu.uci.ics.hyracks.dataflow.common.data.accessors.ITupleReference;
 import edu.uci.ics.hyracks.dataflow.common.data.marshalling.ShortSerializerDeserializer;
 import edu.uci.ics.hyracks.storage.am.common.api.IIndexOperationContext;
 import edu.uci.ics.hyracks.storage.am.common.api.IndexException;
 import edu.uci.ics.hyracks.storage.am.common.tuples.ConcatenatingTupleReference;
 import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.api.IInvertedIndex;
 import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.api.IInvertedIndexSearchModifier;
 import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.api.IInvertedListCursor;
 import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.api.IPartitionedInvertedIndex;
 import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.exceptions.OccurrenceThresholdPanicException;
 import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.ondisk.OnDiskInvertedIndexSearchCursor;

 public class PartitionedTOccurrenceSearcher extends AbstractTOccurrenceSearcher {

     protected final ArrayTupleBuilder lowerBoundTupleBuilder = new ArrayTupleBuilder(1);
     protected final ArrayTupleReference lowerBoundTuple = new ArrayTupleReference();
     protected final ArrayTupleBuilder upperBoundTupleBuilder = new ArrayTupleBuilder(1);
     protected final ArrayTupleReference upperBoundTuple = new ArrayTupleReference();
     protected final ConcatenatingTupleReference fullLowSearchKey = new ConcatenatingTupleReference(2);
     protected final ConcatenatingTupleReference fullHighSearchKey = new ConcatenatingTupleReference(2);

     // Inverted list cursors ordered by token. Used to read relevant inverted-list partitions of one token one after
     // the other for better I/O performance (because the partitions of one inverted list are stored contiguously in a file).
     // The above implies that we currently require holding all inverted list for a query in memory.
     protected final ArrayList<IInvertedListCursor> cursorsOrderedByTokens = new ArrayList<IInvertedListCursor>();
     protected final InvertedListPartitions partitions = new InvertedListPartitions();

     public PartitionedTOccurrenceSearcher(IHyracksCommonContext ctx, IInvertedIndex invIndex) {
         super(ctx, invIndex);
         initHelperTuples();
     }

     private void initHelperTuples() {
         try {
             lowerBoundTupleBuilder.reset();
             // Write dummy value.
             lowerBoundTupleBuilder.getDataOutput().writeShort(Short.MIN_VALUE);
             lowerBoundTupleBuilder.addFieldEndOffset();
             lowerBoundTuple.reset(lowerBoundTupleBuilder.getFieldEndOffsets(), lowerBoundTupleBuilder.getByteArray());
             // Only needed for setting the number of fields in searchKey.
             searchKey.reset(queryTokenAccessor, 0);
             fullLowSearchKey.reset();
             fullLowSearchKey.addTuple(searchKey);
             fullLowSearchKey.addTuple(lowerBoundTuple);

             upperBoundTupleBuilder.reset();
             // Write dummy value.
             upperBoundTupleBuilder.getDataOutput().writeShort(Short.MAX_VALUE);
             upperBoundTupleBuilder.addFieldEndOffset();
             upperBoundTuple.reset(upperBoundTupleBuilder.getFieldEndOffsets(), upperBoundTupleBuilder.getByteArray());
             // Only needed for setting the number of fields in searchKey.
             searchKey.reset(queryTokenAccessor, 0);
             fullHighSearchKey.reset();
             fullHighSearchKey.addTuple(searchKey);
             fullHighSearchKey.addTuple(upperBoundTuple);
         } catch (IOException e) {
             throw new IllegalStateException(e);
         }
     }

     public void search(OnDiskInvertedIndexSearchCursor resultCursor, InvertedIndexSearchPredicate searchPred,
             IIndexOperationContext ictx) throws HyracksDataException, IndexException {
         IPartitionedInvertedIndex partInvIndex = (IPartitionedInvertedIndex) invIndex;
         searchResult.reset();
         if (partInvIndex.isEmpty()) {
             return;
         }

         tokenizeQuery(searchPred);
         short numQueryTokens = (short) queryTokenAccessor.getTupleCount();

         IInvertedIndexSearchModifier searchModifier = searchPred.getSearchModifier();
         short numTokensLowerBound = searchModifier.getNumTokensLowerBound(numQueryTokens);
         short numTokensUpperBound = searchModifier.getNumTokensUpperBound(numQueryTokens);

         occurrenceThreshold = searchModifier.getOccurrenceThreshold(numQueryTokens);
         if (occurrenceThreshold <= 0) {
             throw new OccurrenceThresholdPanicException("Merge Threshold is <= 0. Failing Search.");
         }

         short maxCountPossible = numQueryTokens;
         invListCursorCache.reset();
         partitions.reset(numTokensLowerBound, numTokensUpperBound);
         cursorsOrderedByTokens.clear();
         for (int i = 0; i < numQueryTokens; i++) {
             searchKey.reset(queryTokenAccessor, i);
             if (!partInvIndex.openInvertedListPartitionCursors(this, ictx, numTokensLowerBound, numTokensUpperBound,
                     partitions, cursorsOrderedByTokens)) {
                 maxCountPossible--;
                 // No results possible.
                 if (maxCountPossible < occurrenceThreshold) {
                     return;
                 }
             }
         }

         ArrayList<IInvertedListCursor>[] partitionCursors = partitions.getPartitions();
         short start = partitions.getMinValidPartitionIndex();
         short end = partitions.getMaxValidPartitionIndex();

         // Typically, we only enter this case for disk-based inverted indexes.
         // TODO: This behavior could potentially lead to a deadlock if we cannot pin
         // all inverted lists in memory, and are forced to wait for a page to get evicted
         // (other concurrent searchers may be in the same situation).
         // We should detect such cases, then unpin all pages, and then keep retrying to pin until we succeed.
         // This will require a different "tryPin()" mechanism in the BufferCache that will return false
         // if we'd have to wait for a page to get evicted.
         if (!cursorsOrderedByTokens.isEmpty()) {
             for (int i = start; i <= end; i++) {
                 if (partitionCursors[i] == null) {
                     continue;
                 }
                 // Prune partition because no element in it can satisfy the occurrence threshold.
                 if (partitionCursors[i].size() < occurrenceThreshold) {
                     cursorsOrderedByTokens.removeAll(partitionCursors[i]);
                 }
             }
             // Pin all the cursors in the order of tokens.
             int numCursors = cursorsOrderedByTokens.size();
             for (int i = 0; i < numCursors; i++) {
                 cursorsOrderedByTokens.get(i).pinPages();
             }
         }

         // Process the partitions one-by-one.
         for (int i = start; i <= end; i++) {
             if (partitionCursors[i] == null) {
                 continue;
             }
             // Prune partition because no element in it can satisfy the occurrence threshold.
             if (partitionCursors[i].size() < occurrenceThreshold) {
                 continue;
             }
             // Merge inverted lists of current partition.
             int numPrefixLists = searchModifier.getNumPrefixLists(occurrenceThreshold, partitionCursors[i].size());
             invListMerger.reset();
             invListMerger.merge(partitionCursors[i], occurrenceThreshold, numPrefixLists, searchResult);
         }

         resultCursor.open(null, searchPred);
     }

     public void setNumTokensBoundsInSearchKeys(short numTokensLowerBound, short numTokensUpperBound) {
         ShortSerializerDeserializer.putShort(numTokensLowerBound, lowerBoundTuple.getFieldData(0),
                 lowerBoundTuple.getFieldStart(0));
         ShortSerializerDeserializer.putShort(numTokensUpperBound, upperBoundTuple.getFieldData(0),
                 upperBoundTuple.getFieldStart(0));
     }

     public ITupleReference getPrefixSearchKey() {
         return searchKey;
     }

     public ITupleReference getFullLowSearchKey() {
         return fullLowSearchKey;
     }

     public ITupleReference getFullHighSearchKey() {
         return fullHighSearchKey;
     }

     public IInvertedListCursor getCachedInvertedListCursor() {
         return invListCursorCache.getNext();
     }
 }
	/*
	* Copyright 2009-2012 by The Regents of the University of California
	* Licensed under the Apache License, Version 2.0 (the "License");
	* you may not use this file except in compliance with the License.
	* you may obtain a copy of the License from
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/

	package edu.uci.ics.hyracks.storage.am.lsm.invertedindex.search;

	import java.io.IOException;
	import java.util.ArrayList;

	import edu.uci.ics.hyracks.api.context.IHyracksCommonContext;
	import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
	import edu.uci.ics.hyracks.dataflow.common.comm.io.ArrayTupleBuilder;
	import edu.uci.ics.hyracks.dataflow.common.comm.io.ArrayTupleReference;
	import edu.uci.ics.hyracks.dataflow.common.data.accessors.ITupleReference;
	import edu.uci.ics.hyracks.dataflow.common.data.marshalling.ShortSerializerDeserializer;
	import edu.uci.ics.hyracks.storage.am.common.api.IIndexOperationContext;
	import edu.uci.ics.hyracks.storage.am.common.api.IndexException;
	import edu.uci.ics.hyracks.storage.am.common.tuples.ConcatenatingTupleReference;
	import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.api.IInvertedIndex;
	import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.api.IInvertedIndexSearchModifier;
	import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.api.IInvertedListCursor;
	import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.api.IPartitionedInvertedIndex;
	import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.exceptions.OccurrenceThresholdPanicException;
	import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.ondisk.OnDiskInvertedIndexSearchCursor;

	public class PartitionedTOccurrenceSearcher extends AbstractTOccurrenceSearcher {

	protected final ArrayTupleBuilder lowerBoundTupleBuilder = new ArrayTupleBuilder(1);
	protected final ArrayTupleReference lowerBoundTuple = new ArrayTupleReference();
	protected final ArrayTupleBuilder upperBoundTupleBuilder = new ArrayTupleBuilder(1);
	protected final ArrayTupleReference upperBoundTuple = new ArrayTupleReference();
	protected final ConcatenatingTupleReference fullLowSearchKey = new ConcatenatingTupleReference(2);
	protected final ConcatenatingTupleReference fullHighSearchKey = new ConcatenatingTupleReference(2);

	// Inverted list cursors ordered by token. Used to read relevant inverted-list partitions of one token one after
	// the other for better I/O performance (because the partitions of one inverted list are stored contiguously in a file).
	// The above implies that we currently require holding all inverted list for a query in memory.
	protected final ArrayList<IInvertedListCursor> cursorsOrderedByTokens = new ArrayList<IInvertedListCursor>();
	protected final InvertedListPartitions partitions = new InvertedListPartitions();

	public PartitionedTOccurrenceSearcher(IHyracksCommonContext ctx, IInvertedIndex invIndex) {
	super(ctx, invIndex);
	initHelperTuples();
	}

	private void initHelperTuples() {
	try {
	lowerBoundTupleBuilder.reset();
	// Write dummy value.
	lowerBoundTupleBuilder.getDataOutput().writeShort(Short.MIN_VALUE);
	lowerBoundTupleBuilder.addFieldEndOffset();
	lowerBoundTuple.reset(lowerBoundTupleBuilder.getFieldEndOffsets(), lowerBoundTupleBuilder.getByteArray());
	// Only needed for setting the number of fields in searchKey.
	searchKey.reset(queryTokenAccessor, 0);
	fullLowSearchKey.reset();
	fullLowSearchKey.addTuple(searchKey);
	fullLowSearchKey.addTuple(lowerBoundTuple);

	upperBoundTupleBuilder.reset();
	// Write dummy value.
	upperBoundTupleBuilder.getDataOutput().writeShort(Short.MAX_VALUE);
	upperBoundTupleBuilder.addFieldEndOffset();
	upperBoundTuple.reset(upperBoundTupleBuilder.getFieldEndOffsets(), upperBoundTupleBuilder.getByteArray());
	// Only needed for setting the number of fields in searchKey.
	searchKey.reset(queryTokenAccessor, 0);
	fullHighSearchKey.reset();
	fullHighSearchKey.addTuple(searchKey);
	fullHighSearchKey.addTuple(upperBoundTuple);
	} catch (IOException e) {
	throw new IllegalStateException(e);
	}
	}

	public void search(OnDiskInvertedIndexSearchCursor resultCursor, InvertedIndexSearchPredicate searchPred,
	IIndexOperationContext ictx) throws HyracksDataException, IndexException {
	IPartitionedInvertedIndex partInvIndex = (IPartitionedInvertedIndex) invIndex;
	searchResult.reset();
	if (partInvIndex.isEmpty()) {
	return;
	}

	tokenizeQuery(searchPred);
	short numQueryTokens = (short) queryTokenAccessor.getTupleCount();

	IInvertedIndexSearchModifier searchModifier = searchPred.getSearchModifier();
	short numTokensLowerBound = searchModifier.getNumTokensLowerBound(numQueryTokens);
	short numTokensUpperBound = searchModifier.getNumTokensUpperBound(numQueryTokens);

	occurrenceThreshold = searchModifier.getOccurrenceThreshold(numQueryTokens);
	if (occurrenceThreshold <= 0) {
	throw new OccurrenceThresholdPanicException("Merge Threshold is <= 0. Failing Search.");
	}

	short maxCountPossible = numQueryTokens;
	invListCursorCache.reset();
	partitions.reset(numTokensLowerBound, numTokensUpperBound);
	cursorsOrderedByTokens.clear();
	for (int i = 0; i < numQueryTokens; i++) {
	searchKey.reset(queryTokenAccessor, i);
	if (!partInvIndex.openInvertedListPartitionCursors(this, ictx, numTokensLowerBound, numTokensUpperBound,
	partitions, cursorsOrderedByTokens)) {
	maxCountPossible--;
	// No results possible.
	if (maxCountPossible < occurrenceThreshold) {
	return;
	}
	}
	}

	ArrayList<IInvertedListCursor>[] partitionCursors = partitions.getPartitions();
	short start = partitions.getMinValidPartitionIndex();
	short end = partitions.getMaxValidPartitionIndex();

	// Typically, we only enter this case for disk-based inverted indexes.
	// TODO: This behavior could potentially lead to a deadlock if we cannot pin
	// all inverted lists in memory, and are forced to wait for a page to get evicted
	// (other concurrent searchers may be in the same situation).
	// We should detect such cases, then unpin all pages, and then keep retrying to pin until we succeed.
	// This will require a different "tryPin()" mechanism in the BufferCache that will return false
	// if we'd have to wait for a page to get evicted.
	if (!cursorsOrderedByTokens.isEmpty()) {
	for (int i = start; i <= end; i++) {
	if (partitionCursors[i] == null) {
	continue;
	}
	// Prune partition because no element in it can satisfy the occurrence threshold.
	if (partitionCursors[i].size() < occurrenceThreshold) {
	cursorsOrderedByTokens.removeAll(partitionCursors[i]);
	}
	}
	// Pin all the cursors in the order of tokens.
	int numCursors = cursorsOrderedByTokens.size();
	for (int i = 0; i < numCursors; i++) {
	cursorsOrderedByTokens.get(i).pinPages();
	}
	}

	// Process the partitions one-by-one.
	for (int i = start; i <= end; i++) {
	if (partitionCursors[i] == null) {
	continue;
	}
	// Prune partition because no element in it can satisfy the occurrence threshold.
	if (partitionCursors[i].size() < occurrenceThreshold) {
	continue;
	}
	// Merge inverted lists of current partition.
	int numPrefixLists = searchModifier.getNumPrefixLists(occurrenceThreshold, partitionCursors[i].size());
	invListMerger.reset();
	invListMerger.merge(partitionCursors[i], occurrenceThreshold, numPrefixLists, searchResult);
	}

	resultCursor.open(null, searchPred);
	}

	public void setNumTokensBoundsInSearchKeys(short numTokensLowerBound, short numTokensUpperBound) {
	ShortSerializerDeserializer.putShort(numTokensLowerBound, lowerBoundTuple.getFieldData(0),
	lowerBoundTuple.getFieldStart(0));
	ShortSerializerDeserializer.putShort(numTokensUpperBound, upperBoundTuple.getFieldData(0),
	upperBoundTuple.getFieldStart(0));
	}

	public ITupleReference getPrefixSearchKey() {
	return searchKey;
	}

	public ITupleReference getFullLowSearchKey() {
	return fullLowSearchKey;
	}

	public ITupleReference getFullHighSearchKey() {
	return fullHighSearchKey;
	}

	public IInvertedListCursor getCachedInvertedListCursor() {
	return invListCursorCache.getNext();
	}
	}