blob: d5a074e8827ebb4f53cfa883f2eb3b23b4ecb9b6 [file] [log] [blame]
/*
* Copyright 2009-2012 by The Regents of the University of California
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* you may obtain a copy of the License from
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package edu.uci.ics.hyracks.storage.am.lsm.invertedindex.inmemory;
import java.io.File;
import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparatorFactory;
import edu.uci.ics.hyracks.api.dataflow.value.ITypeTraits;
import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
import edu.uci.ics.hyracks.api.io.FileReference;
import edu.uci.ics.hyracks.dataflow.common.data.accessors.ITupleReference;
import edu.uci.ics.hyracks.storage.am.btree.exceptions.BTreeDuplicateKeyException;
import edu.uci.ics.hyracks.storage.am.btree.exceptions.BTreeException;
import edu.uci.ics.hyracks.storage.am.btree.exceptions.BTreeNonExistentKeyException;
import edu.uci.ics.hyracks.storage.am.btree.frames.BTreeLeafFrameType;
import edu.uci.ics.hyracks.storage.am.btree.impls.BTree;
import edu.uci.ics.hyracks.storage.am.btree.impls.BTree.BTreeAccessor;
import edu.uci.ics.hyracks.storage.am.btree.util.BTreeUtils;
import edu.uci.ics.hyracks.storage.am.common.api.IFreePageManager;
import edu.uci.ics.hyracks.storage.am.common.api.IIndexAccessor;
import edu.uci.ics.hyracks.storage.am.common.api.IIndexBulkLoader;
import edu.uci.ics.hyracks.storage.am.common.api.IIndexOperationContext;
import edu.uci.ics.hyracks.storage.am.common.api.IModificationOperationCallback;
import edu.uci.ics.hyracks.storage.am.common.api.ISearchOperationCallback;
import edu.uci.ics.hyracks.storage.am.common.api.IndexException;
import edu.uci.ics.hyracks.storage.am.common.ophelpers.IndexOperation;
import edu.uci.ics.hyracks.storage.am.lsm.common.freepage.InMemoryBufferCache;
import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.api.IInvertedIndex;
import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.api.IInvertedListCursor;
import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.tokenizers.IBinaryTokenizerFactory;
import edu.uci.ics.hyracks.storage.common.buffercache.IBufferCache;
public class InMemoryInvertedIndex implements IInvertedIndex {
protected final BTree btree;
protected final FileReference memBTreeFile = new FileReference(new File("memBTree"));
protected final ITypeTraits[] tokenTypeTraits;
protected final IBinaryComparatorFactory[] tokenCmpFactories;
protected final ITypeTraits[] invListTypeTraits;
protected final IBinaryComparatorFactory[] invListCmpFactories;
protected final IBinaryTokenizerFactory tokenizerFactory;
protected final ITypeTraits[] btreeTypeTraits;
protected final IBinaryComparatorFactory[] btreeCmpFactories;
public InMemoryInvertedIndex(IBufferCache memBufferCache, IFreePageManager memFreePageManager,
ITypeTraits[] invListTypeTraits, IBinaryComparatorFactory[] invListCmpFactories,
ITypeTraits[] tokenTypeTraits, IBinaryComparatorFactory[] tokenCmpFactories,
IBinaryTokenizerFactory tokenizerFactory) throws BTreeException {
this.tokenTypeTraits = tokenTypeTraits;
this.tokenCmpFactories = tokenCmpFactories;
this.invListTypeTraits = invListTypeTraits;
this.invListCmpFactories = invListCmpFactories;
this.tokenizerFactory = tokenizerFactory;
// BTree tuples: <tokens, inverted-list elements>.
int numBTreeFields = tokenTypeTraits.length + invListTypeTraits.length;
btreeTypeTraits = new ITypeTraits[numBTreeFields];
btreeCmpFactories = new IBinaryComparatorFactory[numBTreeFields];
for (int i = 0; i < tokenTypeTraits.length; i++) {
btreeTypeTraits[i] = tokenTypeTraits[i];
btreeCmpFactories[i] = tokenCmpFactories[i];
}
for (int i = 0; i < invListTypeTraits.length; i++) {
btreeTypeTraits[tokenTypeTraits.length + i] = invListTypeTraits[i];
btreeCmpFactories[tokenTypeTraits.length + i] = invListCmpFactories[i];
}
this.btree = BTreeUtils.createBTree(memBufferCache, memFreePageManager,
((InMemoryBufferCache) memBufferCache).getFileMapProvider(), btreeTypeTraits, btreeCmpFactories,
BTreeLeafFrameType.REGULAR_NSM, memBTreeFile);
}
@Override
public void create() throws HyracksDataException {
btree.create();
}
@Override
public void activate() throws HyracksDataException {
btree.activate();
}
@Override
public void clear() throws HyracksDataException {
btree.clear();
}
@Override
public void deactivate() throws HyracksDataException {
btree.deactivate();
}
@Override
public void destroy() throws HyracksDataException {
btree.destroy();
}
@Override
public void validate() throws HyracksDataException {
btree.validate();
}
public void insert(ITupleReference tuple, BTreeAccessor btreeAccessor, IIndexOperationContext ictx)
throws HyracksDataException, IndexException {
InMemoryInvertedIndexOpContext ctx = (InMemoryInvertedIndexOpContext) ictx;
ctx.tupleIter.reset(tuple);
while (ctx.tupleIter.hasNext()) {
ctx.tupleIter.next();
ITupleReference insertTuple = ctx.tupleIter.getTuple();
try {
btreeAccessor.insert(insertTuple);
} catch (BTreeDuplicateKeyException e) {
// This exception may be caused by duplicate tokens in the same insert "document".
// We ignore such duplicate tokens in all inverted-index implementations, hence
// we can safely ignore this exception.
}
}
}
public void delete(ITupleReference tuple, BTreeAccessor btreeAccessor, IIndexOperationContext ictx)
throws HyracksDataException, IndexException {
InMemoryInvertedIndexOpContext ctx = (InMemoryInvertedIndexOpContext) ictx;
ctx.tupleIter.reset(tuple);
while (ctx.tupleIter.hasNext()) {
ctx.tupleIter.next();
ITupleReference deleteTuple = ctx.tupleIter.getTuple();
try {
btreeAccessor.delete(deleteTuple);
} catch (BTreeNonExistentKeyException e) {
// Ignore this exception, since a document may have duplicate tokens.
}
}
}
@Override
public long getMemoryAllocationSize() {
InMemoryBufferCache memBufferCache = (InMemoryBufferCache) btree.getBufferCache();
return memBufferCache.getNumPages() * memBufferCache.getPageSize();
}
@Override
public IInvertedListCursor createInvertedListCursor() {
return new InMemoryInvertedListCursor(invListTypeTraits.length, tokenTypeTraits.length);
}
@Override
public void openInvertedListCursor(IInvertedListCursor listCursor, ITupleReference searchKey,
IIndexOperationContext ictx) throws HyracksDataException, IndexException {
InMemoryInvertedIndexOpContext ctx = (InMemoryInvertedIndexOpContext) ictx;
ctx.setOperation(IndexOperation.SEARCH);
InMemoryInvertedListCursor inMemListCursor = (InMemoryInvertedListCursor) listCursor;
inMemListCursor.prepare(ctx.btreeAccessor, ctx.btreePred, ctx.tokenFieldsCmp, ctx.btreeCmp);
inMemListCursor.reset(searchKey);
}
@Override
public IIndexAccessor createAccessor(IModificationOperationCallback modificationCallback,
ISearchOperationCallback searchCallback) {
return new InMemoryInvertedIndexAccessor(this, new InMemoryInvertedIndexOpContext(btree, tokenCmpFactories,
tokenizerFactory));
}
@Override
public IBufferCache getBufferCache() {
return btree.getBufferCache();
}
public BTree getBTree() {
return btree;
}
@Override
public IBinaryComparatorFactory[] getInvListCmpFactories() {
return invListCmpFactories;
}
@Override
public ITypeTraits[] getInvListTypeTraits() {
return invListTypeTraits;
}
@Override
public IIndexBulkLoader createBulkLoader(float fillFactor, boolean verifyInput, long numElementsHint)
throws IndexException {
throw new UnsupportedOperationException("Bulk load not supported by in-memory inverted index.");
}
@Override
public ITypeTraits[] getTokenTypeTraits() {
return tokenTypeTraits;
}
@Override
public IBinaryComparatorFactory[] getTokenCmpFactories() {
return tokenCmpFactories;
}
}