| /*** |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| package org.apache.tajo.storage.raw; |
| |
| import org.apache.commons.logging.Log; |
| import org.apache.commons.logging.LogFactory; |
| import org.apache.hadoop.conf.Configuration; |
| import org.apache.hadoop.fs.FileStatus; |
| import org.apache.hadoop.fs.FileSystem; |
| import org.apache.hadoop.fs.Path; |
| import org.apache.hadoop.hdfs.MiniDFSCluster; |
| import org.apache.tajo.BuiltinStorages; |
| import org.apache.tajo.catalog.*; |
| import org.apache.tajo.common.TajoDataTypes; |
| import org.apache.tajo.conf.TajoConf; |
| import org.apache.tajo.datum.DatumFactory; |
| import org.apache.tajo.datum.ProtobufDatum; |
| import org.apache.tajo.rpc.protocolrecords.PrimitiveProtos; |
| import org.apache.tajo.storage.TestFileTablespace; |
| import org.apache.tajo.storage.Tuple; |
| import org.apache.tajo.storage.fragment.FileFragment; |
| import org.apache.tajo.storage.rawfile.DirectRawFileScanner; |
| import org.apache.tajo.storage.rawfile.DirectRawFileWriter; |
| import org.apache.tajo.tuple.memory.MemoryRowBlock; |
| import org.apache.tajo.tuple.memory.RowWriter; |
| import org.apache.tajo.unit.StorageUnit; |
| import org.apache.tajo.util.FileUtil; |
| import org.apache.tajo.util.ProtoUtil; |
| import org.junit.After; |
| import org.junit.Before; |
| import org.junit.Rule; |
| import org.junit.Test; |
| import org.junit.rules.Timeout; |
| import org.junit.runner.RunWith; |
| import org.junit.runners.Parameterized; |
| import org.junit.runners.Parameterized.Parameters; |
| |
| import java.io.IOException; |
| import java.util.Arrays; |
| import java.util.Collection; |
| import java.util.UUID; |
| import java.util.concurrent.TimeUnit; |
| |
| import static org.junit.Assert.*; |
| |
| @RunWith(Parameterized.class) |
| public class TestDirectRawFile { |
| private static final Log LOG = LogFactory.getLog(TestDirectRawFile.class); |
| public static String UNICODE_FIELD_PREFIX = "abc_가나다_"; |
| public static Schema schema; |
| |
| private static String TEST_PATH = "target/test-data/TestDirectRawFile"; |
| private MiniDFSCluster cluster; |
| private FileSystem fs; |
| private boolean isLocal; |
| |
| private TajoConf tajoConf; |
| private Path testDir; |
| |
| @Rule |
| public Timeout timeout = new Timeout(120, TimeUnit.SECONDS); |
| |
| @Parameters(name = "{index}: isLocal: {0}") |
| public static Collection<Object[]> generateParameters() throws IOException { |
| return Arrays.asList(new Object[][]{ |
| {false}, |
| {true} |
| }); |
| } |
| |
| |
| public TestDirectRawFile(boolean isLocal) throws IOException { |
| this.isLocal = isLocal; |
| } |
| |
| @Before |
| public void setup() throws IOException { |
| if (isLocal) { |
| fs = FileSystem.getLocal(new TajoConf()); |
| } else { |
| final Configuration conf = TestFileTablespace.getTestHdfsConfiguration(); |
| |
| cluster = new MiniDFSCluster.Builder(conf) |
| .numDataNodes(1) |
| .format(true) |
| .build(); |
| |
| fs = cluster.getFileSystem(); |
| } |
| |
| this.tajoConf = new TajoConf(fs.getConf()); |
| this.testDir = getTestDir(fs, TEST_PATH); |
| } |
| |
| @After |
| public void tearDown() throws IOException { |
| if (isLocal) { |
| fs.delete(testDir, true); |
| } else { |
| cluster.shutdown(); |
| } |
| } |
| |
| public Path getTestDir(FileSystem fs, String dir) throws IOException { |
| Path path = new Path(dir); |
| if(fs.exists(path)) |
| fs.delete(path, true); |
| |
| fs.mkdirs(path); |
| |
| return fs.makeQualified(path); |
| } |
| |
| static { |
| schema = SchemaBuilder.builder() |
| .add("col0", TajoDataTypes.Type.BOOLEAN) |
| .add("col1", TajoDataTypes.Type.INT2) |
| .add("col2", TajoDataTypes.Type.INT4) |
| .add("col3", TajoDataTypes.Type.INT8) |
| .add("col4", TajoDataTypes.Type.FLOAT4) |
| .add("col5", TajoDataTypes.Type.FLOAT8) |
| .add("col6", TajoDataTypes.Type.TEXT) |
| .add("col7", TajoDataTypes.Type.TIMESTAMP) |
| .add("col8", TajoDataTypes.Type.DATE) |
| .add("col9", TajoDataTypes.Type.TIME) |
| .add("col10", TajoDataTypes.Type.INTERVAL) |
| .add("col11", TajoDataTypes.Type.INET4) |
| .add("col12", |
| CatalogUtil.newDataType(TajoDataTypes.Type.PROTOBUF, PrimitiveProtos.StringProto.class.getName())) |
| .build(); |
| } |
| |
| public FileStatus writeRowBlock(TajoConf conf, TableMeta meta, MemoryRowBlock rowBlock, Path outputFile) |
| throws IOException { |
| DirectRawFileWriter writer = new DirectRawFileWriter(conf, null, schema, meta, outputFile); |
| writer.init(); |
| writer.writeRowBlock(rowBlock); |
| writer.close(); |
| |
| FileStatus status = outputFile.getFileSystem(conf).getFileStatus(outputFile); |
| assertTrue(status.getLen() > 0); |
| LOG.info("Written file size: " + FileUtil.humanReadableByteCount(status.getLen(), false)); |
| return status; |
| } |
| |
| public FileStatus writeRowBlock(TajoConf conf, TableMeta meta, MemoryRowBlock rowBlock) throws IOException { |
| Path outputDir = new Path(testDir, UUID.randomUUID() + ""); |
| outputDir.getFileSystem(conf).mkdirs(outputDir); |
| Path outputFile = new Path(outputDir, "output.draw"); |
| return writeRowBlock(conf, meta, rowBlock, outputFile); |
| } |
| |
| @Test |
| public void testRWForAllTypesWithNextTuple() throws IOException { |
| int rowNum = 10000; |
| |
| MemoryRowBlock rowBlock = createRowBlock(rowNum); |
| |
| TableMeta meta = CatalogUtil.newTableMeta(BuiltinStorages.DRAW); |
| FileStatus outputFile = writeRowBlock(tajoConf, meta, rowBlock); |
| rowBlock.release(); |
| |
| FileFragment fragment = |
| new FileFragment("testRWForAllTypesWithNextTuple", outputFile.getPath(), 0, outputFile.getLen()); |
| DirectRawFileScanner reader = new DirectRawFileScanner(tajoConf, schema, meta, fragment); |
| reader.init(); |
| |
| long readStart = System.currentTimeMillis(); |
| int j = 0; |
| Tuple tuple; |
| while ((tuple = reader.next()) != null) { |
| validateTupleResult(j, tuple); |
| j++; |
| } |
| |
| LOG.info("Total read rows: " + j); |
| long readEnd = System.currentTimeMillis(); |
| LOG.info("reading takes " + (readEnd - readStart) + " msec"); |
| reader.close(); |
| assertEquals(rowNum, j); |
| } |
| |
| @Test |
| public void testRepeatedScan() throws IOException { |
| int rowNum = 2; |
| |
| MemoryRowBlock rowBlock = createRowBlock(rowNum); |
| TableMeta meta = CatalogUtil.newTableMeta(BuiltinStorages.DRAW); |
| FileStatus outputFile = writeRowBlock(tajoConf, meta, rowBlock); |
| |
| rowBlock.release(); |
| |
| FileFragment fragment = |
| new FileFragment("testRepeatedScan", outputFile.getPath(), 0, outputFile.getLen()); |
| DirectRawFileScanner reader = new DirectRawFileScanner(tajoConf, schema, meta, fragment); |
| reader.init(); |
| |
| int j = 0; |
| while (reader.next() != null) { |
| j++; |
| } |
| assertEquals(rowNum, j); |
| |
| for (int i = 0; i < 5; i++) { |
| assertNull(reader.next()); |
| } |
| |
| reader.close(); |
| } |
| |
| @Test |
| public void testReset() throws IOException { |
| int rowNum = 2; |
| |
| MemoryRowBlock rowBlock = createRowBlock(rowNum); |
| |
| TableMeta meta = CatalogUtil.newTableMeta(BuiltinStorages.DRAW); |
| FileStatus outputFile = writeRowBlock(tajoConf, meta, rowBlock); |
| rowBlock.release(); |
| |
| FileFragment fragment = |
| new FileFragment("testReset", outputFile.getPath(), 0, outputFile.getLen()); |
| DirectRawFileScanner reader = new DirectRawFileScanner(tajoConf, schema, meta, fragment); |
| reader.init(); |
| |
| int j = 0; |
| while (reader.next() != null) { |
| j++; |
| } |
| assertEquals(rowNum, j); |
| |
| for (int i = 0; i < 5; i++) { |
| assertNull(reader.next()); |
| } |
| |
| reader.reset(); |
| |
| j = 0; |
| while (reader.next() != null) { |
| j++; |
| } |
| assertEquals(rowNum, j); |
| |
| for (int i = 0; i < 5; i++) { |
| assertNull(reader.next()); |
| } |
| reader.close(); |
| } |
| |
| public static MemoryRowBlock createRowBlock(int rowNum) { |
| long allocateStart = System.currentTimeMillis(); |
| MemoryRowBlock rowBlock = new MemoryRowBlock(SchemaUtil.toDataTypes(schema), StorageUnit.KB * 128); |
| long allocatedEnd = System.currentTimeMillis(); |
| LOG.info(FileUtil.humanReadableByteCount(rowBlock.capacity(), true) + " bytes allocated " |
| + (allocatedEnd - allocateStart) + " msec"); |
| |
| long writeStart = System.currentTimeMillis(); |
| for (int i = 0; i < rowNum; i++) { |
| fillRow(i, rowBlock.getWriter()); |
| } |
| long writeEnd = System.currentTimeMillis(); |
| LOG.info("writing takes " + (writeEnd - writeStart) + " msec"); |
| |
| return rowBlock; |
| } |
| |
| public static void fillRow(int i, RowWriter builder) { |
| builder.startRow(); |
| builder.putBool(i % 1 == 0 ? true : false); // 0 |
| builder.putInt2((short) 1); // 1 |
| builder.putInt4(i); // 2 |
| builder.putInt8(i); // 3 |
| builder.putFloat4(i); // 4 |
| builder.putFloat8(i); // 5 |
| builder.putText((UNICODE_FIELD_PREFIX + i).getBytes()); // 6 |
| builder.putTimestamp(DatumFactory.createTimestamp("2014-04-16 08:48:00").asInt8() + i); // 7 |
| builder.putDate(DatumFactory.createDate("2014-04-16").asInt4() + i); // 8 |
| builder.putTime(DatumFactory.createTime("08:48:00").asInt8() + i); // 9 |
| builder.putInterval(DatumFactory.createInterval((i + 1) + " hours")); // 10 |
| builder.putInet4(DatumFactory.createInet4("192.168.0.1").asInt4() + i); // 11 |
| builder.putProtoDatum(new ProtobufDatum(ProtoUtil.convertString(i + ""))); // 12 |
| builder.endRow(); |
| } |
| |
| public static void validateTupleResult(int j, Tuple t) { |
| assertTrue((j % 1 == 0) == t.getBool(0)); |
| assertTrue(1 == t.getInt2(1)); |
| assertEquals(j, t.getInt4(2)); |
| assertEquals(j, t.getInt8(3)); |
| assertTrue(j == t.getFloat4(4)); |
| assertTrue(j == t.getFloat8(5)); |
| assertEquals(new String(UNICODE_FIELD_PREFIX + j), t.getText(6)); |
| assertEquals(DatumFactory.createTimestamp("2014-04-16 08:48:00").asInt8() + (long) j, t.getInt8(7)); |
| assertEquals(DatumFactory.createDate("2014-04-16").asInt4() + j, t.getInt4(8)); |
| assertEquals(DatumFactory.createTime("08:48:00").asInt8() + j, t.getInt8(9)); |
| assertEquals(DatumFactory.createInterval((j + 1) + " hours"), t.getInterval(10)); |
| assertEquals(DatumFactory.createInet4("192.168.0.1").asInt4() + j, t.getInt4(11)); |
| assertEquals(new ProtobufDatum(ProtoUtil.convertString(j + "")), t.getProtobufDatum(12)); |
| } |
| |
| public static void fillRowBlockWithNull(int i, RowWriter writer) { |
| writer.startRow(); |
| |
| if (i == 0) { |
| writer.skipField(); |
| } else { |
| writer.putBool(i % 1 == 0 ? true : false); // 0 |
| } |
| if (i % 1 == 0) { |
| writer.skipField(); |
| } else { |
| writer.putInt2((short) 1); // 1 |
| } |
| |
| if (i % 2 == 0) { |
| writer.skipField(); |
| } else { |
| writer.putInt4(i); // 2 |
| } |
| |
| if (i % 3 == 0) { |
| writer.skipField(); |
| } else { |
| writer.putInt8(i); // 3 |
| } |
| |
| if (i % 4 == 0) { |
| writer.skipField(); |
| } else { |
| writer.putFloat4(i); // 4 |
| } |
| |
| if (i % 5 == 0) { |
| writer.skipField(); |
| } else { |
| writer.putFloat8(i); // 5 |
| } |
| |
| if (i % 6 == 0) { |
| writer.skipField(); |
| } else { |
| writer.putText((UNICODE_FIELD_PREFIX + i).getBytes()); // 6 |
| } |
| |
| if (i % 7 == 0) { |
| writer.skipField(); |
| } else { |
| writer.putTimestamp(DatumFactory.createTimestamp("2014-04-16 08:48:00").asInt8() + i); // 7 |
| } |
| |
| if (i % 8 == 0) { |
| writer.skipField(); |
| } else { |
| writer.putDate(DatumFactory.createDate("2014-04-16").asInt4() + i); // 8 |
| } |
| |
| if (i % 9 == 0) { |
| writer.skipField(); |
| } else { |
| writer.putTime(DatumFactory.createTime("08:48:00").asInt8() + i); // 9 |
| } |
| |
| if (i % 10 == 0) { |
| writer.skipField(); |
| } else { |
| writer.putInterval(DatumFactory.createInterval((i + 1) + " hours")); // 10 |
| } |
| |
| if (i % 11 == 0) { |
| writer.skipField(); |
| } else { |
| writer.putInet4(DatumFactory.createInet4("192.168.0.1").asInt4() + i); // 11 |
| } |
| |
| if (i % 12 == 0) { |
| writer.skipField(); |
| } else { |
| writer.putProtoDatum(new ProtobufDatum(ProtoUtil.convertString(i + ""))); // 12 |
| } |
| |
| writer.endRow(); |
| } |
| |
| public static void validateNullity(int j, Tuple tuple) { |
| if (j == 0) { |
| tuple.isBlankOrNull(0); |
| } else { |
| assertTrue((j % 1 == 0) == tuple.getBool(0)); |
| } |
| |
| if (j % 1 == 0) { |
| tuple.isBlankOrNull(1); |
| } else { |
| assertTrue(1 == tuple.getInt2(1)); |
| } |
| |
| if (j % 2 == 0) { |
| tuple.isBlankOrNull(2); |
| } else { |
| assertEquals(j, tuple.getInt4(2)); |
| } |
| |
| if (j % 3 == 0) { |
| tuple.isBlankOrNull(3); |
| } else { |
| assertEquals(j, tuple.getInt8(3)); |
| } |
| |
| if (j % 4 == 0) { |
| tuple.isBlankOrNull(4); |
| } else { |
| assertTrue(j == tuple.getFloat4(4)); |
| } |
| |
| if (j % 5 == 0) { |
| tuple.isBlankOrNull(5); |
| } else { |
| assertTrue(j == tuple.getFloat8(5)); |
| } |
| |
| if (j % 6 == 0) { |
| tuple.isBlankOrNull(6); |
| } else { |
| assertEquals(new String(UNICODE_FIELD_PREFIX + j), tuple.getText(6)); |
| } |
| |
| if (j % 7 == 0) { |
| tuple.isBlankOrNull(7); |
| } else { |
| assertEquals(DatumFactory.createTimestamp("2014-04-16 08:48:00").asInt8() + (long) j, tuple.getInt8(7)); |
| } |
| |
| if (j % 8 == 0) { |
| tuple.isBlankOrNull(8); |
| } else { |
| assertEquals(DatumFactory.createDate("2014-04-16").asInt4() + j, tuple.getInt4(8)); |
| } |
| |
| if (j % 9 == 0) { |
| tuple.isBlankOrNull(9); |
| } else { |
| assertEquals(DatumFactory.createTime("08:48:00").asInt8() + j, tuple.getInt8(9)); |
| } |
| |
| if (j % 10 == 0) { |
| tuple.isBlankOrNull(10); |
| } else { |
| assertEquals(DatumFactory.createInterval((j + 1) + " hours"), tuple.getInterval(10)); |
| } |
| |
| if (j % 11 == 0) { |
| tuple.isBlankOrNull(11); |
| } else { |
| assertEquals(DatumFactory.createInet4("192.168.0.1").asInt4() + j, tuple.getInt4(11)); |
| } |
| |
| if (j % 12 == 0) { |
| tuple.isBlankOrNull(12); |
| } else { |
| assertEquals(new ProtobufDatum(ProtoUtil.convertString(j + "")), tuple.getProtobufDatum(12)); |
| } |
| } |
| } |