blob: 870e6d953eca7f8c796bf41ff4c99e428d85e77e [file] [log] [blame]
/*
* Copyright 2009-2010 by The Regents of the University of California
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* you may obtain a copy of the License from
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package edu.uci.ics.hyracks.storage.am.lsm.invertedindex.util;
import java.io.ByteArrayInputStream;
import java.io.DataInput;
import java.io.DataInputStream;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashSet;
import java.util.List;
import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparatorFactory;
import edu.uci.ics.hyracks.api.dataflow.value.ISerializerDeserializer;
import edu.uci.ics.hyracks.api.dataflow.value.ITypeTraits;
import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
import edu.uci.ics.hyracks.dataflow.common.data.accessors.ITupleReference;
import edu.uci.ics.hyracks.dataflow.common.util.SerdeUtils;
import edu.uci.ics.hyracks.dataflow.common.util.TupleUtils;
import edu.uci.ics.hyracks.storage.am.btree.OrderedIndexTestContext;
import edu.uci.ics.hyracks.storage.am.common.CheckTuple;
import edu.uci.ics.hyracks.storage.am.common.api.IIndex;
import edu.uci.ics.hyracks.storage.am.common.api.IndexException;
import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.api.IInvertedIndex;
import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.common.LSMInvertedIndexTestHarness;
import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.exceptions.InvertedIndexException;
import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.tokenizers.IBinaryTokenizerFactory;
@SuppressWarnings("rawtypes")
public class LSMInvertedIndexTestContext extends OrderedIndexTestContext {
public static enum InvertedIndexType {
INMEMORY,
ONDISK,
LSM,
PARTITIONED_INMEMORY,
PARTITIONED_ONDISK,
PARTITIONED_LSM
};
protected IInvertedIndex invIndex;
protected IBinaryComparatorFactory[] allCmpFactories;
protected IBinaryTokenizerFactory tokenizerFactory;
protected InvertedIndexType invIndexType;
protected InvertedIndexTokenizingTupleIterator indexTupleIter;
protected HashSet<Comparable> allTokens = new HashSet<Comparable>();
protected List<ITupleReference> documentCorpus = new ArrayList<ITupleReference>();
public LSMInvertedIndexTestContext(ISerializerDeserializer[] fieldSerdes, IIndex index,
IBinaryTokenizerFactory tokenizerFactory, InvertedIndexType invIndexType,
InvertedIndexTokenizingTupleIterator indexTupleIter) {
super(fieldSerdes, index);
invIndex = (IInvertedIndex) index;
this.tokenizerFactory = tokenizerFactory;
this.invIndexType = invIndexType;
this.indexTupleIter = indexTupleIter;
}
@Override
public int getKeyFieldCount() {
return fieldSerdes.length;
}
@Override
public IBinaryComparatorFactory[] getComparatorFactories() {
if (allCmpFactories == null) {
// Concatenate token and inv-list comparators.
IInvertedIndex invIndex = (IInvertedIndex) index;
IBinaryComparatorFactory[] tokenCmpFactories = invIndex.getTokenCmpFactories();
IBinaryComparatorFactory[] invListCmpFactories = invIndex.getInvListCmpFactories();
int totalCmpCount = tokenCmpFactories.length + invListCmpFactories.length;
allCmpFactories = new IBinaryComparatorFactory[totalCmpCount];
for (int i = 0; i < tokenCmpFactories.length; i++) {
allCmpFactories[i] = tokenCmpFactories[i];
}
for (int i = 0; i < invListCmpFactories.length; i++) {
allCmpFactories[i + tokenCmpFactories.length] = invListCmpFactories[i];
}
}
return allCmpFactories;
}
public static LSMInvertedIndexTestContext create(LSMInvertedIndexTestHarness harness,
ISerializerDeserializer[] fieldSerdes, int tokenFieldCount, IBinaryTokenizerFactory tokenizerFactory,
InvertedIndexType invIndexType) throws IndexException {
ITypeTraits[] allTypeTraits = SerdeUtils.serdesToTypeTraits(fieldSerdes);
IBinaryComparatorFactory[] allCmpFactories = SerdeUtils.serdesToComparatorFactories(fieldSerdes,
fieldSerdes.length);
// Set token type traits and comparators.
ITypeTraits[] tokenTypeTraits = new ITypeTraits[tokenFieldCount];
IBinaryComparatorFactory[] tokenCmpFactories = new IBinaryComparatorFactory[tokenFieldCount];
for (int i = 0; i < tokenTypeTraits.length; i++) {
tokenTypeTraits[i] = allTypeTraits[i];
tokenCmpFactories[i] = allCmpFactories[i];
}
// Set inverted-list element type traits and comparators.
int invListFieldCount = fieldSerdes.length - tokenFieldCount;
ITypeTraits[] invListTypeTraits = new ITypeTraits[invListFieldCount];
IBinaryComparatorFactory[] invListCmpFactories = new IBinaryComparatorFactory[invListFieldCount];
for (int i = 0; i < invListTypeTraits.length; i++) {
invListTypeTraits[i] = allTypeTraits[i + tokenFieldCount];
invListCmpFactories[i] = allCmpFactories[i + tokenFieldCount];
}
// Create index and test context.
IInvertedIndex invIndex;
switch (invIndexType) {
case INMEMORY: {
invIndex = InvertedIndexUtils.createInMemoryBTreeInvertedindex(harness.getMemBufferCache(),
harness.getMemFreePageManager(), invListTypeTraits, invListCmpFactories, tokenTypeTraits,
tokenCmpFactories, tokenizerFactory);
break;
}
case PARTITIONED_INMEMORY: {
invIndex = InvertedIndexUtils.createPartitionedInMemoryBTreeInvertedindex(harness.getMemBufferCache(),
harness.getMemFreePageManager(), invListTypeTraits, invListCmpFactories, tokenTypeTraits,
tokenCmpFactories, tokenizerFactory);
break;
}
case ONDISK: {
invIndex = InvertedIndexUtils.createOnDiskInvertedIndex(harness.getDiskBufferCache(),
harness.getDiskFileMapProvider(), invListTypeTraits, invListCmpFactories, tokenTypeTraits,
tokenCmpFactories, harness.getInvListsFileRef());
break;
}
case PARTITIONED_ONDISK: {
invIndex = InvertedIndexUtils.createPartitionedOnDiskInvertedIndex(harness.getDiskBufferCache(),
harness.getDiskFileMapProvider(), invListTypeTraits, invListCmpFactories, tokenTypeTraits,
tokenCmpFactories, harness.getInvListsFileRef());
break;
}
case LSM: {
invIndex = InvertedIndexUtils.createLSMInvertedIndex(harness.getMemBufferCache(),
harness.getMemFreePageManager(), harness.getDiskFileMapProvider(), invListTypeTraits,
invListCmpFactories, tokenTypeTraits, tokenCmpFactories, tokenizerFactory,
harness.getDiskBufferCache(), harness.getIOManager(), harness.getOnDiskDir(),
harness.getMergePolicy(), harness.getOperationTrackerFactory(), harness.getIOScheduler(),
harness.getIOOperationCallbackProvider());
break;
}
case PARTITIONED_LSM: {
invIndex = InvertedIndexUtils.createPartitionedLSMInvertedIndex(harness.getMemBufferCache(),
harness.getMemFreePageManager(), harness.getDiskFileMapProvider(), invListTypeTraits,
invListCmpFactories, tokenTypeTraits, tokenCmpFactories, tokenizerFactory,
harness.getDiskBufferCache(), harness.getIOManager(), harness.getOnDiskDir(),
harness.getMergePolicy(), harness.getOperationTrackerFactory(), harness.getIOScheduler(),
harness.getIOOperationCallbackProvider());
break;
}
default: {
throw new InvertedIndexException("Unknow inverted-index type '" + invIndexType + "'.");
}
}
InvertedIndexTokenizingTupleIterator indexTupleIter = null;
switch (invIndexType) {
case INMEMORY:
case ONDISK:
case LSM: {
indexTupleIter = new InvertedIndexTokenizingTupleIterator(invIndex.getTokenTypeTraits().length,
invIndex.getInvListTypeTraits().length, tokenizerFactory.createTokenizer());
break;
}
case PARTITIONED_INMEMORY:
case PARTITIONED_ONDISK:
case PARTITIONED_LSM: {
indexTupleIter = new PartitionedInvertedIndexTokenizingTupleIterator(
invIndex.getTokenTypeTraits().length, invIndex.getInvListTypeTraits().length,
tokenizerFactory.createTokenizer());
break;
}
default: {
throw new InvertedIndexException("Unknow inverted-index type '" + invIndexType + "'.");
}
}
LSMInvertedIndexTestContext testCtx = new LSMInvertedIndexTestContext(fieldSerdes, invIndex, tokenizerFactory,
invIndexType, indexTupleIter);
return testCtx;
}
public void insertCheckTuples(ITupleReference tuple, Collection<CheckTuple> checkTuples)
throws HyracksDataException {
documentCorpus.add(TupleUtils.copyTuple(tuple));
indexTupleIter.reset(tuple);
while (indexTupleIter.hasNext()) {
indexTupleIter.next();
ITupleReference insertTuple = indexTupleIter.getTuple();
CheckTuple checkTuple = createCheckTuple(insertTuple);
insertCheckTuple(checkTuple, checkTuples);
allTokens.add(checkTuple.getField(0));
}
}
public void deleteCheckTuples(ITupleReference tuple, Collection<CheckTuple> checkTuples)
throws HyracksDataException {
indexTupleIter.reset(tuple);
while (indexTupleIter.hasNext()) {
indexTupleIter.next();
ITupleReference insertTuple = indexTupleIter.getTuple();
CheckTuple checkTuple = createCheckTuple(insertTuple);
deleteCheckTuple(checkTuple, checkTuples);
}
}
public HashSet<Comparable> getAllTokens() {
return allTokens;
}
@SuppressWarnings("unchecked")
public CheckTuple createCheckTuple(ITupleReference tuple) throws HyracksDataException {
CheckTuple checkTuple = new CheckTuple(fieldSerdes.length, fieldSerdes.length);
for (int i = 0; i < fieldSerdes.length; i++) {
ByteArrayInputStream bains = new ByteArrayInputStream(tuple.getFieldData(i), tuple.getFieldStart(i),
tuple.getFieldLength(i));
DataInput in = new DataInputStream(bains);
Comparable field = (Comparable) fieldSerdes[i].deserialize(in);
checkTuple.appendField(field);
}
return checkTuple;
}
@Override
public void upsertCheckTuple(CheckTuple checkTuple, Collection<CheckTuple> checkTuples) {
throw new UnsupportedOperationException("Upsert not supported by inverted index.");
}
public IBinaryTokenizerFactory getTokenizerFactory() {
return tokenizerFactory;
}
public List<ITupleReference> getDocumentCorpus() {
return documentCorpus;
}
public InvertedIndexType getInvertedIndexType() {
return invIndexType;
}
}