| /* |
| * Copyright 2009-2010 by The Regents of the University of California |
| * Licensed under the Apache License, Version 2.0 (the "License"); |
| * you may not use this file except in compliance with the License. |
| * you may obtain a copy of the License from |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| package edu.uci.ics.hyracks.storage.am.invertedindex.searchers; |
| |
| import java.io.ByteArrayInputStream; |
| import java.io.DataInput; |
| import java.io.DataInputStream; |
| import java.io.DataOutput; |
| import java.io.File; |
| import java.nio.ByteBuffer; |
| import java.util.ArrayList; |
| import java.util.List; |
| import java.util.Random; |
| import java.util.UUID; |
| |
| import org.junit.Test; |
| |
| import edu.uci.ics.hyracks.api.application.INCApplicationContext; |
| import edu.uci.ics.hyracks.api.comm.IFrameTupleAccessor; |
| import edu.uci.ics.hyracks.api.context.IHyracksJobletContext; |
| import edu.uci.ics.hyracks.api.context.IHyracksRootContext; |
| import edu.uci.ics.hyracks.api.context.IHyracksStageletContext; |
| import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparator; |
| import edu.uci.ics.hyracks.api.dataflow.value.ISerializerDeserializer; |
| import edu.uci.ics.hyracks.api.dataflow.value.ITypeTrait; |
| import edu.uci.ics.hyracks.api.dataflow.value.RecordDescriptor; |
| import edu.uci.ics.hyracks.api.dataflow.value.TypeTrait; |
| import edu.uci.ics.hyracks.api.io.FileReference; |
| import edu.uci.ics.hyracks.dataflow.common.comm.io.ArrayTupleBuilder; |
| import edu.uci.ics.hyracks.dataflow.common.comm.io.FrameTupleAccessor; |
| import edu.uci.ics.hyracks.dataflow.common.comm.io.FrameTupleAppender; |
| import edu.uci.ics.hyracks.dataflow.common.data.accessors.FrameTupleReference; |
| import edu.uci.ics.hyracks.dataflow.common.data.comparators.IntegerBinaryComparatorFactory; |
| import edu.uci.ics.hyracks.dataflow.common.data.comparators.UTF8StringBinaryComparatorFactory; |
| import edu.uci.ics.hyracks.dataflow.common.data.marshalling.IntegerSerializerDeserializer; |
| import edu.uci.ics.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer; |
| import edu.uci.ics.hyracks.storage.am.btree.api.IBTreeInteriorFrame; |
| import edu.uci.ics.hyracks.storage.am.btree.api.IBTreeInteriorFrameFactory; |
| import edu.uci.ics.hyracks.storage.am.btree.api.IBTreeLeafFrame; |
| import edu.uci.ics.hyracks.storage.am.btree.api.IBTreeLeafFrameFactory; |
| import edu.uci.ics.hyracks.storage.am.btree.api.IBTreeMetaDataFrame; |
| import edu.uci.ics.hyracks.storage.am.btree.api.IBTreeMetaDataFrameFactory; |
| import edu.uci.ics.hyracks.storage.am.btree.frames.MetaDataFrameFactory; |
| import edu.uci.ics.hyracks.storage.am.btree.frames.NSMInteriorFrameFactory; |
| import edu.uci.ics.hyracks.storage.am.btree.frames.NSMLeafFrameFactory; |
| import edu.uci.ics.hyracks.storage.am.btree.impls.BTree; |
| import edu.uci.ics.hyracks.storage.am.btree.impls.BTreeOp; |
| import edu.uci.ics.hyracks.storage.am.btree.impls.BTreeOpContext; |
| import edu.uci.ics.hyracks.storage.am.btree.impls.MultiComparator; |
| import edu.uci.ics.hyracks.storage.am.btree.tuples.TypeAwareTupleWriterFactory; |
| import edu.uci.ics.hyracks.storage.am.invertedindex.api.IBinaryTokenizer; |
| import edu.uci.ics.hyracks.storage.am.invertedindex.api.IInvertedIndexResultCursor; |
| import edu.uci.ics.hyracks.storage.am.invertedindex.impls.SimpleConjunctiveSearcher; |
| import edu.uci.ics.hyracks.storage.am.invertedindex.tokenizers.DelimitedUTF8StringBinaryTokenizer; |
| import edu.uci.ics.hyracks.storage.common.IStorageManagerInterface; |
| import edu.uci.ics.hyracks.storage.common.buffercache.IBufferCache; |
| import edu.uci.ics.hyracks.storage.common.buffercache.ICacheMemoryAllocator; |
| import edu.uci.ics.hyracks.storage.common.file.IFileMapProvider; |
| import edu.uci.ics.hyracks.test.support.TestJobletContext; |
| import edu.uci.ics.hyracks.test.support.TestNCApplicationContext; |
| import edu.uci.ics.hyracks.test.support.TestRootContext; |
| import edu.uci.ics.hyracks.test.support.TestStageletContext; |
| import edu.uci.ics.hyracks.test.support.TestStorageManagerComponentHolder; |
| import edu.uci.ics.hyracks.test.support.TestStorageManagerInterface; |
| |
| public class SimpleConjunctiveSearcherTest { |
| // testing params |
| // private static final int PAGE_SIZE = 256; |
| // private static final int NUM_PAGES = 10; |
| // private static final int HYRACKS_FRAME_SIZE = 256; |
| |
| // realistic params |
| // private static final int PAGE_SIZE = 65536; |
| private static final int PAGE_SIZE = 32768; |
| private static final int NUM_PAGES = 10; |
| private static final int HYRACKS_FRAME_SIZE = 32768; |
| |
| static { |
| TestStorageManagerComponentHolder.init(PAGE_SIZE, NUM_PAGES); |
| } |
| |
| private String tmpDir = System.getProperty("java.io.tmpdir"); |
| |
| public class BufferAllocator implements ICacheMemoryAllocator { |
| @Override |
| public ByteBuffer[] allocate(int pageSize, int numPages) { |
| ByteBuffer[] buffers = new ByteBuffer[numPages]; |
| for (int i = 0; i < numPages; ++i) { |
| buffers[i] = ByteBuffer.allocate(pageSize); |
| } |
| return buffers; |
| } |
| } |
| |
| @Test |
| public void test01() throws Exception { |
| IHyracksRootContext rootCtx = new TestRootContext(HYRACKS_FRAME_SIZE); |
| INCApplicationContext appCtx = new TestNCApplicationContext(rootCtx); |
| IHyracksJobletContext jobletCtx = new TestJobletContext(appCtx, UUID.randomUUID(), 0); |
| IHyracksStageletContext stageletCtx = new TestStageletContext(jobletCtx, UUID.randomUUID()); |
| |
| IStorageManagerInterface smi = new TestStorageManagerInterface(); |
| |
| IBufferCache bufferCache = smi.getBufferCache(stageletCtx); |
| IFileMapProvider fmp = smi.getFileMapProvider(stageletCtx); |
| FileReference file = new FileReference(new File(tmpDir + "/" + "btreetest.bin")); |
| bufferCache.createFile(file); |
| int fileId = fmp.lookupFileId(file); |
| bufferCache.openFile(fileId); |
| |
| // declare fields |
| int fieldCount = 2; |
| ITypeTrait[] typeTraits = new ITypeTrait[fieldCount]; |
| typeTraits[0] = new TypeTrait(ITypeTrait.VARIABLE_LENGTH); |
| typeTraits[1] = new TypeTrait(4); |
| |
| // declare keys |
| int keyFieldCount = 2; |
| IBinaryComparator[] cmps = new IBinaryComparator[keyFieldCount]; |
| cmps[0] = UTF8StringBinaryComparatorFactory.INSTANCE.createBinaryComparator(); |
| cmps[1] = IntegerBinaryComparatorFactory.INSTANCE.createBinaryComparator(); |
| |
| MultiComparator cmp = new MultiComparator(typeTraits, cmps); |
| |
| TypeAwareTupleWriterFactory tupleWriterFactory = new TypeAwareTupleWriterFactory(typeTraits); |
| // SimpleTupleWriterFactory tupleWriterFactory = new |
| // SimpleTupleWriterFactory(); |
| IBTreeLeafFrameFactory leafFrameFactory = new NSMLeafFrameFactory(tupleWriterFactory); |
| // IBTreeLeafFrameFactory leafFrameFactory = new |
| // FieldPrefixNSMLeafFrameFactory(tupleWriterFactory); |
| IBTreeInteriorFrameFactory interiorFrameFactory = new NSMInteriorFrameFactory(tupleWriterFactory); |
| IBTreeMetaDataFrameFactory metaFrameFactory = new MetaDataFrameFactory(); |
| |
| IBTreeLeafFrame leafFrame = leafFrameFactory.getFrame(); |
| IBTreeInteriorFrame interiorFrame = interiorFrameFactory.getFrame(); |
| IBTreeMetaDataFrame metaFrame = metaFrameFactory.getFrame(); |
| |
| BTree btree = new BTree(bufferCache, interiorFrameFactory, leafFrameFactory, cmp); |
| btree.create(fileId, leafFrame, metaFrame); |
| btree.open(fileId); |
| |
| Random rnd = new Random(); |
| rnd.setSeed(50); |
| |
| ByteBuffer frame = stageletCtx.allocateFrame(); |
| FrameTupleAppender appender = new FrameTupleAppender(stageletCtx.getFrameSize()); |
| ArrayTupleBuilder tb = new ArrayTupleBuilder(cmp.getFieldCount()); |
| DataOutput dos = tb.getDataOutput(); |
| |
| ISerializerDeserializer[] btreeSerde = { UTF8StringSerializerDeserializer.INSTANCE, |
| IntegerSerializerDeserializer.INSTANCE }; |
| RecordDescriptor btreeRecDesc = new RecordDescriptor(btreeSerde); |
| IFrameTupleAccessor accessor = new FrameTupleAccessor(stageletCtx.getFrameSize(), btreeRecDesc); |
| accessor.reset(frame); |
| FrameTupleReference tuple = new FrameTupleReference(); |
| |
| List<String> tokens = new ArrayList<String>(); |
| tokens.add("computer"); |
| tokens.add("hyracks"); |
| tokens.add("fast"); |
| tokens.add("university"); |
| tokens.add("science"); |
| tokens.add("major"); |
| |
| int maxId = 10000; |
| int addProb = 0; |
| int addProbStep = 2; |
| |
| BTreeOpContext opCtx = btree.createOpContext(BTreeOp.BTO_INSERT, leafFrame, interiorFrame, metaFrame); |
| |
| for (int i = 0; i < tokens.size(); i++) { |
| |
| addProb += addProbStep; |
| for (int j = 0; j < maxId; j++) { |
| if ((Math.abs(rnd.nextInt()) % addProb) == 0) { |
| tb.reset(); |
| UTF8StringSerializerDeserializer.INSTANCE.serialize(tokens.get(i), dos); |
| tb.addFieldEndOffset(); |
| IntegerSerializerDeserializer.INSTANCE.serialize(j, dos); |
| tb.addFieldEndOffset(); |
| |
| appender.reset(frame, true); |
| appender.append(tb.getFieldEndOffsets(), tb.getByteArray(), 0, tb.getSize()); |
| |
| tuple.reset(accessor, 0); |
| |
| try { |
| btree.insert(tuple, opCtx); |
| } catch (Exception e) { |
| e.printStackTrace(); |
| } |
| } |
| } |
| } |
| |
| int numPages = btree.getMaxPage(metaFrame); |
| System.out.println("NUMPAGES: " + numPages); |
| |
| // build query as tuple reference |
| ISerializerDeserializer[] querySerde = { UTF8StringSerializerDeserializer.INSTANCE }; |
| RecordDescriptor queryRecDesc = new RecordDescriptor(querySerde); |
| |
| FrameTupleAppender queryAppender = new FrameTupleAppender(stageletCtx.getFrameSize()); |
| ArrayTupleBuilder queryTb = new ArrayTupleBuilder(querySerde.length); |
| DataOutput queryDos = queryTb.getDataOutput(); |
| |
| IFrameTupleAccessor queryAccessor = new FrameTupleAccessor(stageletCtx.getFrameSize(), queryRecDesc); |
| queryAccessor.reset(frame); |
| FrameTupleReference queryTuple = new FrameTupleReference(); |
| |
| String query = "computer hyracks fast"; |
| char queryDelimiter = ' '; |
| IBinaryTokenizer queryTokenizer = new DelimitedUTF8StringBinaryTokenizer(queryDelimiter); |
| |
| queryTb.reset(); |
| UTF8StringSerializerDeserializer.INSTANCE.serialize(query, queryDos); |
| queryTb.addFieldEndOffset(); |
| |
| queryAppender.reset(frame, true); |
| queryAppender.append(queryTb.getFieldEndOffsets(), queryTb.getByteArray(), 0, queryTb.getSize()); |
| queryTuple.reset(queryAccessor, 0); |
| |
| int numKeyFields = 1; |
| int numValueFields = 1; |
| ISerializerDeserializer[] resultSerde = new ISerializerDeserializer[numValueFields]; |
| for (int i = 0; i < numValueFields; i++) { |
| resultSerde[i] = btreeSerde[numKeyFields + i]; |
| } |
| RecordDescriptor resultRecDesc = new RecordDescriptor(resultSerde); |
| FrameTupleAccessor resultAccessor = new FrameTupleAccessor(stageletCtx.getFrameSize(), resultRecDesc); |
| FrameTupleReference resultTuple = new FrameTupleReference(); |
| |
| SimpleConjunctiveSearcher searcher = new SimpleConjunctiveSearcher(stageletCtx, btree, btreeRecDesc, |
| queryTokenizer, numKeyFields, numValueFields); |
| |
| long timeStart = System.currentTimeMillis(); |
| searcher.search(queryTuple, 0); |
| long timeEnd = System.currentTimeMillis(); |
| System.out.println("SEARCH TIME: " + (timeEnd - timeStart) + "ms"); |
| |
| // System.out.println("INTERSECTION RESULTS"); |
| IInvertedIndexResultCursor resultCursor = searcher.getResultCursor(); |
| while (resultCursor.hasNext()) { |
| resultCursor.next(); |
| resultAccessor.reset(resultCursor.getBuffer()); |
| for (int i = 0; i < resultAccessor.getTupleCount(); i++) { |
| resultTuple.reset(resultAccessor, i); |
| for (int j = 0; j < resultTuple.getFieldCount(); j++) { |
| ByteArrayInputStream inStream = new ByteArrayInputStream(resultTuple.getFieldData(j), |
| resultTuple.getFieldStart(j), resultTuple.getFieldLength(j)); |
| DataInput dataIn = new DataInputStream(inStream); |
| Object o = resultSerde[j].deserialize(dataIn); |
| System.out.print(o + " "); |
| } |
| System.out.println(); |
| } |
| } |
| |
| /* |
| * IBinaryComparator[] searchCmps = new IBinaryComparator[1]; |
| * searchCmps[0] = |
| * UTF8StringBinaryComparatorFactory.INSTANCE.createBinaryComparator(); |
| * MultiComparator searchCmp = new MultiComparator(typeTraits, |
| * searchCmps); |
| * |
| * // ordered scan IBTreeCursor scanCursor = new |
| * RangeSearchCursor(leafFrame); RangePredicate nullPred = new |
| * RangePredicate(true, null, null, true, true, null); BTreeOpContext |
| * searchOpCtx = btree.createOpContext(BTreeOp.BTO_SEARCH, leafFrame, |
| * interiorFrame, metaFrame); btree.search(scanCursor, nullPred, |
| * searchOpCtx); |
| * |
| * try { while (scanCursor.hasNext()) { scanCursor.next(); |
| * ITupleReference frameTuple = scanCursor.getTuple(); String rec = |
| * cmp.printTuple(frameTuple, btreeSerde); System.out.println(rec); } } |
| * catch (Exception e) { e.printStackTrace(); } finally { |
| * scanCursor.close(); } |
| */ |
| |
| btree.close(); |
| bufferCache.closeFile(fileId); |
| bufferCache.close(); |
| } |
| } |