| /** |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| package org.apache.tajo.storage; |
| |
| import org.apache.hadoop.fs.FileStatus; |
| import org.apache.hadoop.fs.FileSystem; |
| import org.apache.hadoop.fs.Path; |
| import org.apache.tajo.QueryId; |
| import org.apache.tajo.TajoIdProtos; |
| import org.apache.tajo.catalog.*; |
| import org.apache.tajo.catalog.proto.CatalogProtos.StoreType; |
| import org.apache.tajo.catalog.statistics.TableStats; |
| import org.apache.tajo.common.TajoDataTypes.Type; |
| import org.apache.tajo.conf.TajoConf; |
| import org.apache.tajo.datum.Datum; |
| import org.apache.tajo.datum.DatumFactory; |
| import org.apache.tajo.datum.NullDatum; |
| import org.apache.tajo.datum.ProtobufDatumFactory; |
| import org.apache.tajo.storage.fragment.FileFragment; |
| import org.apache.tajo.storage.rcfile.RCFile; |
| import org.apache.tajo.util.CommonTestingUtil; |
| import org.junit.Test; |
| import org.junit.runner.RunWith; |
| import org.junit.runners.Parameterized; |
| |
| import java.io.IOException; |
| import java.util.Arrays; |
| import java.util.Collection; |
| |
| import static org.junit.Assert.assertEquals; |
| import static org.junit.Assert.assertTrue; |
| |
| @RunWith(Parameterized.class) |
| public class TestStorages { |
| private TajoConf conf; |
| private static String TEST_PATH = "target/test-data/TestStorages"; |
| |
| private StoreType storeType; |
| private boolean splitable; |
| private boolean statsable; |
| private Path testDir; |
| private FileSystem fs; |
| |
| public TestStorages(StoreType type, boolean splitable, boolean statsable) throws IOException { |
| this.storeType = type; |
| this.splitable = splitable; |
| this.statsable = statsable; |
| |
| conf = new TajoConf(); |
| |
| if (storeType == StoreType.RCFILE) { |
| conf.setInt(RCFile.RECORD_INTERVAL_CONF_STR, 100); |
| } |
| |
| |
| testDir = CommonTestingUtil.getTestDir(TEST_PATH); |
| fs = testDir.getFileSystem(conf); |
| } |
| |
| @Parameterized.Parameters |
| public static Collection<Object[]> generateParameters() { |
| return Arrays.asList(new Object[][] { |
| {StoreType.CSV, true, true}, |
| {StoreType.RAW, false, false}, |
| {StoreType.RCFILE, true, true}, |
| {StoreType.TREVNI, false, true}, |
| }); |
| } |
| |
| @Test |
| public void testSplitable() throws IOException { |
| if (splitable) { |
| Schema schema = new Schema(); |
| schema.addColumn("id", Type.INT4); |
| schema.addColumn("age", Type.INT8); |
| |
| TableMeta meta = CatalogUtil.newTableMeta(storeType); |
| Path tablePath = new Path(testDir, "Splitable.data"); |
| Appender appender = StorageManagerFactory.getStorageManager(conf).getAppender(meta, schema, tablePath); |
| appender.enableStats(); |
| appender.init(); |
| int tupleNum = 10000; |
| VTuple vTuple; |
| |
| for(int i = 0; i < tupleNum; i++) { |
| vTuple = new VTuple(2); |
| vTuple.put(0, DatumFactory.createInt4(i + 1)); |
| vTuple.put(1, DatumFactory.createInt8(25l)); |
| appender.addTuple(vTuple); |
| } |
| appender.close(); |
| TableStats stat = appender.getStats(); |
| assertEquals(tupleNum, stat.getNumRows().longValue()); |
| |
| FileStatus status = fs.getFileStatus(tablePath); |
| long fileLen = status.getLen(); |
| long randomNum = (long) (Math.random() * fileLen) + 1; |
| |
| FileFragment[] tablets = new FileFragment[2]; |
| tablets[0] = new FileFragment("Splitable", tablePath, 0, randomNum); |
| tablets[1] = new FileFragment("Splitable", tablePath, randomNum, (fileLen - randomNum)); |
| |
| Scanner scanner = StorageManagerFactory.getStorageManager(conf).getScanner(meta, schema, tablets[0], schema); |
| assertTrue(scanner.isSplittable()); |
| scanner.init(); |
| int tupleCnt = 0; |
| while (scanner.next() != null) { |
| tupleCnt++; |
| } |
| scanner.close(); |
| |
| scanner = StorageManagerFactory.getStorageManager(conf).getScanner(meta, schema, tablets[1], schema); |
| assertTrue(scanner.isSplittable()); |
| scanner.init(); |
| while (scanner.next() != null) { |
| tupleCnt++; |
| } |
| scanner.close(); |
| |
| assertEquals(tupleNum, tupleCnt); |
| } |
| } |
| |
| @Test |
| public void testProjection() throws IOException { |
| Schema schema = new Schema(); |
| schema.addColumn("id", Type.INT4); |
| schema.addColumn("age", Type.INT8); |
| schema.addColumn("score", Type.FLOAT4); |
| |
| TableMeta meta = CatalogUtil.newTableMeta(storeType); |
| |
| Path tablePath = new Path(testDir, "testProjection.data"); |
| Appender appender = StorageManagerFactory.getStorageManager(conf).getAppender(meta, schema, tablePath); |
| appender.init(); |
| int tupleNum = 10000; |
| VTuple vTuple; |
| |
| for(int i = 0; i < tupleNum; i++) { |
| vTuple = new VTuple(3); |
| vTuple.put(0, DatumFactory.createInt4(i + 1)); |
| vTuple.put(1, DatumFactory.createInt8(i + 2)); |
| vTuple.put(2, DatumFactory.createFloat4(i + 3)); |
| appender.addTuple(vTuple); |
| } |
| appender.close(); |
| |
| FileStatus status = fs.getFileStatus(tablePath); |
| FileFragment fragment = new FileFragment("testReadAndWrite", tablePath, 0, status.getLen()); |
| |
| Schema target = new Schema(); |
| target.addColumn("age", Type.INT8); |
| target.addColumn("score", Type.FLOAT4); |
| Scanner scanner = StorageManagerFactory.getStorageManager(conf).getScanner(meta, schema, fragment, target); |
| scanner.init(); |
| int tupleCnt = 0; |
| Tuple tuple; |
| while ((tuple = scanner.next()) != null) { |
| if (storeType == StoreType.RCFILE || storeType == StoreType.TREVNI || storeType == StoreType.CSV) { |
| assertTrue(tuple.get(0) == null); |
| } |
| assertTrue(tupleCnt + 2 == tuple.get(1).asInt8()); |
| assertTrue(tupleCnt + 3 == tuple.get(2).asFloat4()); |
| tupleCnt++; |
| } |
| scanner.close(); |
| |
| assertEquals(tupleNum, tupleCnt); |
| } |
| |
| @Test |
| public void testVariousTypes() throws IOException { |
| Schema schema = new Schema(); |
| schema.addColumn("col1", Type.BOOLEAN); |
| schema.addColumn("col2", Type.BIT); |
| schema.addColumn("col3", Type.CHAR, 7); |
| schema.addColumn("col4", Type.INT2); |
| schema.addColumn("col5", Type.INT4); |
| schema.addColumn("col6", Type.INT8); |
| schema.addColumn("col7", Type.FLOAT4); |
| schema.addColumn("col8", Type.FLOAT8); |
| schema.addColumn("col9", Type.TEXT); |
| schema.addColumn("col10", Type.BLOB); |
| schema.addColumn("col11", Type.INET4); |
| schema.addColumn("col12", Type.NULL_TYPE); |
| schema.addColumn("col13", CatalogUtil.newDataType(Type.PROTOBUF, TajoIdProtos.QueryIdProto.class.getName())); |
| |
| Options options = new Options(); |
| TableMeta meta = CatalogUtil.newTableMeta(storeType, options); |
| |
| Path tablePath = new Path(testDir, "testVariousTypes.data"); |
| Appender appender = StorageManagerFactory.getStorageManager(conf).getAppender(meta, schema, tablePath); |
| appender.init(); |
| |
| QueryId queryid = new QueryId("12345", 5); |
| ProtobufDatumFactory factory = ProtobufDatumFactory.get(TajoIdProtos.QueryIdProto.class.getName()); |
| |
| Tuple tuple = new VTuple(13); |
| tuple.put(new Datum[] { |
| DatumFactory.createBool(true), |
| DatumFactory.createBit((byte) 0x99), |
| DatumFactory.createChar("hyunsik"), |
| DatumFactory.createInt2((short) 17), |
| DatumFactory.createInt4(59), |
| DatumFactory.createInt8(23l), |
| DatumFactory.createFloat4(77.9f), |
| DatumFactory.createFloat8(271.9f), |
| DatumFactory.createText("hyunsik"), |
| DatumFactory.createBlob("hyunsik".getBytes()), |
| DatumFactory.createInet4("192.168.0.1"), |
| NullDatum.get(), |
| factory.createDatum(queryid.getProto()) |
| }); |
| appender.addTuple(tuple); |
| appender.flush(); |
| appender.close(); |
| |
| FileStatus status = fs.getFileStatus(tablePath); |
| FileFragment fragment = new FileFragment("table", tablePath, 0, status.getLen()); |
| Scanner scanner = StorageManagerFactory.getStorageManager(conf).getScanner(meta, schema, fragment); |
| scanner.init(); |
| |
| Tuple retrieved; |
| while ((retrieved=scanner.next()) != null) { |
| for (int i = 0; i < tuple.size(); i++) { |
| assertEquals(tuple.get(i), retrieved.get(i)); |
| } |
| } |
| scanner.close(); |
| } |
| |
| @Test |
| public void testRCFileTextSerializeDeserialize() throws IOException { |
| if(storeType != StoreType.RCFILE) return; |
| |
| Schema schema = new Schema(); |
| schema.addColumn("col1", Type.BOOLEAN); |
| schema.addColumn("col2", Type.BIT); |
| schema.addColumn("col3", Type.CHAR, 7); |
| schema.addColumn("col4", Type.INT2); |
| schema.addColumn("col5", Type.INT4); |
| schema.addColumn("col6", Type.INT8); |
| schema.addColumn("col7", Type.FLOAT4); |
| schema.addColumn("col8", Type.FLOAT8); |
| schema.addColumn("col9", Type.TEXT); |
| schema.addColumn("col10", Type.BLOB); |
| schema.addColumn("col11", Type.INET4); |
| schema.addColumn("col12", Type.NULL_TYPE); |
| schema.addColumn("col13", CatalogUtil.newDataType(Type.PROTOBUF, TajoIdProtos.QueryIdProto.class.getName())); |
| |
| Options options = new Options(); |
| TableMeta meta = CatalogUtil.newTableMeta(storeType, options); |
| meta.putOption(CatalogConstants.CSVFILE_SERDE, TextSerializerDeserializer.class.getName()); |
| |
| Path tablePath = new Path(testDir, "testVariousTypes.data"); |
| Appender appender = StorageManagerFactory.getStorageManager(conf).getAppender(meta, schema, tablePath); |
| appender.enableStats(); |
| appender.init(); |
| |
| QueryId queryid = new QueryId("12345", 5); |
| ProtobufDatumFactory factory = ProtobufDatumFactory.get(TajoIdProtos.QueryIdProto.class.getName()); |
| |
| Tuple tuple = new VTuple(13); |
| tuple.put(new Datum[] { |
| DatumFactory.createBool(true), |
| DatumFactory.createBit((byte) 0x99), |
| DatumFactory.createChar("jinho"), |
| DatumFactory.createInt2((short) 17), |
| DatumFactory.createInt4(59), |
| DatumFactory.createInt8(23l), |
| DatumFactory.createFloat4(77.9f), |
| DatumFactory.createFloat8(271.9f), |
| DatumFactory.createText("jinho"), |
| DatumFactory.createBlob("hyunsik babo".getBytes()), |
| DatumFactory.createInet4("192.168.0.1"), |
| NullDatum.get(), |
| factory.createDatum(queryid.getProto()) |
| }); |
| appender.addTuple(tuple); |
| appender.flush(); |
| appender.close(); |
| |
| FileStatus status = fs.getFileStatus(tablePath); |
| assertEquals(appender.getStats().getNumBytes().longValue(), status.getLen()); |
| |
| FileFragment fragment = new FileFragment("table", tablePath, 0, status.getLen()); |
| Scanner scanner = StorageManagerFactory.getStorageManager(conf).getScanner(meta, schema, fragment); |
| scanner.init(); |
| |
| Tuple retrieved; |
| while ((retrieved=scanner.next()) != null) { |
| for (int i = 0; i < tuple.size(); i++) { |
| assertEquals(tuple.get(i), retrieved.get(i)); |
| } |
| } |
| scanner.close(); |
| assertEquals(appender.getStats().getNumBytes().longValue(), scanner.getInputStats().getNumBytes().longValue()); |
| assertEquals(appender.getStats().getNumRows().longValue(), scanner.getInputStats().getNumRows().longValue()); |
| } |
| |
| @Test |
| public void testRCFileBinarySerializeDeserialize() throws IOException { |
| if(storeType != StoreType.RCFILE) return; |
| |
| Schema schema = new Schema(); |
| schema.addColumn("col1", Type.BOOLEAN); |
| schema.addColumn("col2", Type.BIT); |
| schema.addColumn("col3", Type.CHAR, 7); |
| schema.addColumn("col4", Type.INT2); |
| schema.addColumn("col5", Type.INT4); |
| schema.addColumn("col6", Type.INT8); |
| schema.addColumn("col7", Type.FLOAT4); |
| schema.addColumn("col8", Type.FLOAT8); |
| schema.addColumn("col9", Type.TEXT); |
| schema.addColumn("col10", Type.BLOB); |
| schema.addColumn("col11", Type.INET4); |
| schema.addColumn("col12", Type.NULL_TYPE); |
| schema.addColumn("col13", CatalogUtil.newDataType(Type.PROTOBUF, TajoIdProtos.QueryIdProto.class.getName())); |
| |
| Options options = new Options(); |
| TableMeta meta = CatalogUtil.newTableMeta(storeType, options); |
| meta.putOption(CatalogConstants.RCFILE_SERDE, BinarySerializerDeserializer.class.getName()); |
| |
| Path tablePath = new Path(testDir, "testVariousTypes.data"); |
| Appender appender = StorageManagerFactory.getStorageManager(conf).getAppender(meta, schema, tablePath); |
| appender.enableStats(); |
| appender.init(); |
| |
| QueryId queryid = new QueryId("12345", 5); |
| ProtobufDatumFactory factory = ProtobufDatumFactory.get(TajoIdProtos.QueryIdProto.class.getName()); |
| |
| Tuple tuple = new VTuple(13); |
| tuple.put(new Datum[] { |
| DatumFactory.createBool(true), |
| DatumFactory.createBit((byte) 0x99), |
| DatumFactory.createChar("jinho"), |
| DatumFactory.createInt2((short) 17), |
| DatumFactory.createInt4(59), |
| DatumFactory.createInt8(23l), |
| DatumFactory.createFloat4(77.9f), |
| DatumFactory.createFloat8(271.9f), |
| DatumFactory.createText("jinho"), |
| DatumFactory.createBlob("hyunsik babo".getBytes()), |
| DatumFactory.createInet4("192.168.0.1"), |
| NullDatum.get(), |
| factory.createDatum(queryid.getProto()) |
| }); |
| appender.addTuple(tuple); |
| appender.flush(); |
| appender.close(); |
| |
| FileStatus status = fs.getFileStatus(tablePath); |
| assertEquals(appender.getStats().getNumBytes().longValue(), status.getLen()); |
| |
| FileFragment fragment = new FileFragment("table", tablePath, 0, status.getLen()); |
| Scanner scanner = StorageManagerFactory.getStorageManager(conf).getScanner(meta, schema, fragment); |
| scanner.init(); |
| |
| Tuple retrieved; |
| while ((retrieved=scanner.next()) != null) { |
| for (int i = 0; i < tuple.size(); i++) { |
| assertEquals(tuple.get(i), retrieved.get(i)); |
| } |
| } |
| scanner.close(); |
| assertEquals(appender.getStats().getNumBytes().longValue(), scanner.getInputStats().getNumBytes().longValue()); |
| assertEquals(appender.getStats().getNumRows().longValue(), scanner.getInputStats().getNumRows().longValue()); |
| } |
| |
| @Test |
| public void testTime() throws IOException { |
| if (storeType == StoreType.CSV || storeType == StoreType.RAW) { |
| Schema schema = new Schema(); |
| schema.addColumn("col1", Type.DATE); |
| schema.addColumn("col2", Type.TIME); |
| schema.addColumn("col3", Type.TIMESTAMP); |
| |
| Options options = new Options(); |
| TableMeta meta = CatalogUtil.newTableMeta(storeType, options); |
| |
| Path tablePath = new Path(testDir, "testTime.data"); |
| Appender appender = StorageManagerFactory.getStorageManager(conf).getAppender(meta, schema, tablePath); |
| appender.init(); |
| |
| Tuple tuple = new VTuple(3); |
| tuple.put(new Datum[]{ |
| DatumFactory.createDate("1980-04-01"), |
| DatumFactory.createTime("12:34:56"), |
| DatumFactory.createTimeStamp((int) System.currentTimeMillis() / 1000) |
| }); |
| appender.addTuple(tuple); |
| appender.flush(); |
| appender.close(); |
| |
| FileStatus status = fs.getFileStatus(tablePath); |
| FileFragment fragment = new FileFragment("table", tablePath, 0, status.getLen()); |
| Scanner scanner = StorageManagerFactory.getStorageManager(conf).getScanner(meta, schema, fragment); |
| scanner.init(); |
| |
| Tuple retrieved; |
| while ((retrieved = scanner.next()) != null) { |
| for (int i = 0; i < tuple.size(); i++) { |
| assertEquals(tuple.get(i), retrieved.get(i)); |
| } |
| } |
| scanner.close(); |
| } |
| } |
| } |