TAJO-1644: When inserting empty data into a partitioned table, existing data would be removed. (jaehwa)
diff --git a/CHANGES b/CHANGES
index 0046ad4..32e26ce 100644
--- a/CHANGES
+++ b/CHANGES
@@ -11,6 +11,9 @@
BUG FIXES
+ TAJO-1644: When inserting empty data into a partitioned table,
+ existing data would be removed. (jaehwa)
+
TAJO-1642: CatalogServer need to check meta table first. (jaehwa)
TASKS
diff --git a/tajo-common/src/main/java/org/apache/tajo/SessionVars.java b/tajo-common/src/main/java/org/apache/tajo/SessionVars.java
index 4882b27..a74775a 100644
--- a/tajo-common/src/main/java/org/apache/tajo/SessionVars.java
+++ b/tajo-common/src/main/java/org/apache/tajo/SessionVars.java
@@ -123,6 +123,10 @@
NULL_CHAR(ConfVars.$TEXT_NULL, "null char of text file output", DEFAULT),
CODEGEN(ConfVars.$CODEGEN, "Runtime code generation enabled (experiment)", DEFAULT),
+ PARTITION_NO_RESULT_OVERWRITE_ENABLED(ConfVars.$PARTITION_NO_RESULT_OVERWRITE_ENABLED,
+ "If True, a partitioned table is overwritten even if a sub query leads to no result. "
+ + "Otherwise, the table data will be kept if there is no result", DEFAULT),
+
// Behavior Control ---------------------------------------------------------
ARITHABORT(ConfVars.$BEHAVIOR_ARITHMETIC_ABORT,
"If true, a running query will be terminated when an overflow or divide-by-zero occurs.", DEFAULT),
diff --git a/tajo-common/src/main/java/org/apache/tajo/conf/TajoConf.java b/tajo-common/src/main/java/org/apache/tajo/conf/TajoConf.java
index 95ef4bc..84f2872 100644
--- a/tajo-common/src/main/java/org/apache/tajo/conf/TajoConf.java
+++ b/tajo-common/src/main/java/org/apache/tajo/conf/TajoConf.java
@@ -359,6 +359,10 @@
// Behavior Control ---------------------------------------------------------
$BEHAVIOR_ARITHMETIC_ABORT("tajo.behavior.arithmetic-abort", false),
+ // If True, a partitioned table is overwritten even if a sub query leads to no result.
+ // Otherwise, the table data will be kept if there is no result
+ $PARTITION_NO_RESULT_OVERWRITE_ENABLED("tajo.partition.overwrite.even-if-no-result", false),
+
// ResultSet ---------------------------------------------------------
$RESULT_SET_FETCH_ROWNUM("tajo.resultset.fetch.rownum", 200),
$RESULT_SET_BLOCK_WAIT("tajo.resultset.block.wait", true),
diff --git a/tajo-core/src/test/java/org/apache/tajo/engine/query/TestTablePartitions.java b/tajo-core/src/test/java/org/apache/tajo/engine/query/TestTablePartitions.java
index b48720a..58e0b78 100644
--- a/tajo-core/src/test/java/org/apache/tajo/engine/query/TestTablePartitions.java
+++ b/tajo-core/src/test/java/org/apache/tajo/engine/query/TestTablePartitions.java
@@ -360,34 +360,26 @@
@Test
public final void testInsertIntoColumnPartitionedTableByThreeColumns() throws Exception {
+ ResultSet res = null;
String tableName = CatalogUtil.normalizeIdentifier("testInsertIntoColumnPartitionedTableByThreeColumns");
- ResultSet res = testBase.execute(
- "create table " + tableName + " (col4 text) partition by column(col1 int4, col2 int4, col3 float8) ");
+
+ res = testBase.execute(
+ "create table " + tableName + " (col4 text) partition by column(col1 int4, col2 int4, col3 float8) ");
res.close();
TajoTestingCluster cluster = testBase.getTestingCluster();
CatalogService catalog = cluster.getMaster().getCatalog();
assertTrue(catalog.existsTable(DEFAULT_DATABASE_NAME, tableName));
res = executeString("insert into " + tableName
- + " select l_returnflag, l_orderkey, l_partkey, l_quantity from lineitem");
+ + " select l_returnflag, l_orderkey, l_partkey, l_quantity from lineitem");
+
res.close();
TableDesc desc = catalog.getTableDesc(DEFAULT_DATABASE_NAME, tableName);
Path path = new Path(desc.getPath());
FileSystem fs = FileSystem.get(conf);
- assertTrue(fs.isDirectory(path));
- assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=1")));
- assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=1/col2=1")));
- assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=1/col2=1/col3=17.0")));
- assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=2")));
- assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=2/col2=2")));
- assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=2/col2=2/col3=38.0")));
- assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=3")));
- assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=3/col2=2")));
- assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=3/col2=3")));
- assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=3/col2=2/col3=45.0")));
- assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=3/col2=3/col3=49.0")));
+ verifyDirectoriesForThreeColumns(fs, path, 1);
if (!testingCluster.isHCatalogStoreRunning()) {
assertEquals(5, desc.getStats().getNumRows().intValue());
}
@@ -405,7 +397,6 @@
}
res.close();
-
Map<Double, int []> resultRows2 = Maps.newHashMap();
resultRows2.put(49.0d, new int[]{3, 3});
resultRows2.put(45.0d, new int[]{3, 2});
@@ -422,38 +413,27 @@
// insert into already exists partitioned table
res = executeString("insert into " + tableName
- + " select l_returnflag, l_orderkey, l_partkey, l_quantity from lineitem");
+ + " select l_returnflag, l_orderkey, l_partkey, l_quantity from lineitem");
res.close();
desc = catalog.getTableDesc(DEFAULT_DATABASE_NAME, tableName);
path = new Path(desc.getPath());
- assertTrue(fs.isDirectory(path));
- assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=1")));
- assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=1/col2=1")));
- assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=1/col2=1/col3=17.0")));
- assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=2")));
- assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=2/col2=2")));
- assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=2/col2=2/col3=38.0")));
- assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=3")));
- assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=3/col2=2")));
- assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=3/col2=3")));
- assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=3/col2=2/col3=45.0")));
- assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=3/col2=3/col3=49.0")));
-
+ verifyDirectoriesForThreeColumns(fs, path, 2);
if (!testingCluster.isHCatalogStoreRunning()) {
assertEquals(5, desc.getStats().getNumRows().intValue());
}
+
String expected = "N\n" +
- "N\n" +
- "N\n" +
- "N\n" +
- "N\n" +
- "N\n" +
- "R\n" +
- "R\n" +
- "R\n" +
- "R\n";
+ "N\n" +
+ "N\n" +
+ "N\n" +
+ "N\n" +
+ "N\n" +
+ "R\n" +
+ "R\n" +
+ "R\n" +
+ "R\n";
String tableData = getTableFileContents(new Path(desc.getPath()));
assertEquals(expected, tableData);
@@ -462,38 +442,79 @@
String resultSetData = resultSetToString(res);
res.close();
expected = "col4,col1,col2,col3\n" +
- "-------------------------------\n" +
- "N,2,2,38.0\n" +
- "N,2,2,38.0\n" +
- "R,3,2,45.0\n" +
- "R,3,2,45.0\n";
+ "-------------------------------\n" +
+ "N,2,2,38.0\n" +
+ "N,2,2,38.0\n" +
+ "R,3,2,45.0\n" +
+ "R,3,2,45.0\n";
assertEquals(expected, resultSetData);
res = executeString("select * from " + tableName + " where (col1 = 2 or col1 = 3) and col2 >= 2");
resultSetData = resultSetToString(res);
res.close();
expected = "col4,col1,col2,col3\n" +
- "-------------------------------\n" +
- "N,2,2,38.0\n" +
- "N,2,2,38.0\n" +
- "R,3,2,45.0\n" +
- "R,3,2,45.0\n" +
- "R,3,3,49.0\n" +
- "R,3,3,49.0\n";
+ "-------------------------------\n" +
+ "N,2,2,38.0\n" +
+ "N,2,2,38.0\n" +
+ "R,3,2,45.0\n" +
+ "R,3,2,45.0\n" +
+ "R,3,3,49.0\n" +
+ "R,3,3,49.0\n";
assertEquals(expected, resultSetData);
// Check not to remove existing partition directories.
res = executeString("insert overwrite into " + tableName
- + " select l_returnflag, l_orderkey, l_partkey, 30.0 as l_quantity from lineitem "
- + " where l_orderkey = 1 and l_partkey = 1 and l_linenumber = 1");
+ + " select l_returnflag, l_orderkey, l_partkey, 30.0 as l_quantity from lineitem "
+ + " where l_orderkey = 1 and l_partkey = 1 and l_linenumber = 1");
res.close();
- desc = catalog.getTableDesc(DEFAULT_DATABASE_NAME, tableName);
+ verifyDirectoriesForThreeColumns(fs, path, 3);
+ if (!testingCluster.isHCatalogStoreRunning()) {
+ // TODO: If there is existing another partition directory, we must add its rows number to result row numbers.
+ // desc = catalog.getTableDesc(DEFAULT_DATABASE_NAME, tableName);
+ // assertEquals(6, desc.getStats().getNumRows().intValue());
+ }
+
+ verifyKeptExistingData(res, tableName);
+
+ // insert overwrite empty result to partitioned table
+ res = executeString("insert overwrite into " + tableName
+ + " select l_returnflag, l_orderkey, l_partkey, l_quantity from lineitem where l_orderkey > 100");
+ res.close();
+
+ verifyDirectoriesForThreeColumns(fs, path, 4);
+ verifyKeptExistingData(res, tableName);
+
+ executeString("DROP TABLE " + tableName + " PURGE").close();
+ }
+
+ private final void verifyKeptExistingData(ResultSet res, String tableName) throws Exception {
+ res = executeString("select * from " + tableName + " where col2 = 1");
+ String resultSetData = resultSetToString(res);
+ res.close();
+ String expected = "col4,col1,col2,col3\n" +
+ "-------------------------------\n" +
+ "N,1,1,17.0\n" +
+ "N,1,1,17.0\n" +
+ "N,1,1,30.0\n" +
+ "N,1,1,36.0\n" +
+ "N,1,1,36.0\n";
+
+ assertEquals(expected, resultSetData);
+ }
+
+ private final void verifyDirectoriesForThreeColumns(FileSystem fs, Path path, int step) throws Exception {
assertTrue(fs.isDirectory(path));
assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=1")));
assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=1/col2=1")));
- assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=1/col2=1/col3=17.0")));
- assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=1/col2=1/col3=30.0")));
+
+ if (step == 1 || step == 2) {
+ assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=1/col2=1/col3=17.0")));
+ } else {
+ assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=1/col2=1/col3=17.0")));
+ assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=1/col2=1/col3=30.0")));
+ }
+
assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=2")));
assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=2/col2=2")));
assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=2/col2=2/col3=38.0")));
@@ -502,38 +523,6 @@
assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=3/col2=3")));
assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=3/col2=2/col3=45.0")));
assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=3/col2=3/col3=49.0")));
-
- if (!testingCluster.isHCatalogStoreRunning()) {
- // TODO: If there is existing another partition directory, we must add its rows number to result row numbers.
- // assertEquals(6, desc.getStats().getNumRows().intValue());
- }
-
- res = executeString("select * from " + tableName + " where col2 = 1");
- resultSetData = resultSetToString(res);
- res.close();
- expected = "col4,col1,col2,col3\n" +
- "-------------------------------\n" +
- "N,1,1,17.0\n" +
- "N,1,1,17.0\n" +
- "N,1,1,30.0\n" +
- "N,1,1,36.0\n" +
- "N,1,1,36.0\n";
-
- assertEquals(expected, resultSetData);
-
- // insert overwrite empty result to partitioned table
- res = executeString("insert overwrite into " + tableName
- + " select l_returnflag, l_orderkey, l_partkey, l_quantity from lineitem where l_orderkey" +
- " > 100");
- res.close();
-
- desc = catalog.getTableDesc(DEFAULT_DATABASE_NAME, tableName);
-
- ContentSummary summary = fs.getContentSummary(new Path(desc.getPath()));
-
- assertEquals(summary.getDirectoryCount(), 1L);
- assertEquals(summary.getFileCount(), 0L);
- assertEquals(summary.getLength(), 0L);
}
@Test
diff --git a/tajo-core/src/test/resources/results/TestTajoCli/testHelpSessionVars.result b/tajo-core/src/test/resources/results/TestTajoCli/testHelpSessionVars.result
index 19b2ee1..0a81e15 100644
--- a/tajo-core/src/test/resources/results/TestTajoCli/testHelpSessionVars.result
+++ b/tajo-core/src/test/resources/results/TestTajoCli/testHelpSessionVars.result
@@ -34,7 +34,8 @@
\set MAX_OUTPUT_FILE_SIZE [int value] - Maximum per-output file size (mb). 0 means infinite.
\set NULL_CHAR [text value] - null char of text file output
\set CODEGEN [true or false] - Runtime code generation enabled (experiment)
+\set PARTITION_NO_RESULT_OVERWRITE_ENABLED [true or false] - If True, a partitioned table is overwritten even if a sub query leads to no result. Otherwise, the table data will be kept if there is no result
\set ARITHABORT [true or false] - If true, a running query will be terminated when an overflow or divide-by-zero occurs.
\set FETCH_ROWNUM [int value] - Sets the number of rows at a time from Master
\set BLOCK_ON_RESULT [true or false] - Whether to block result set on query execution
-\set DEBUG_ENABLED [true or false] - (debug only) debug mode enabled
+\set DEBUG_ENABLED [true or false] - (debug only) debug mode enabled
\ No newline at end of file
diff --git a/tajo-storage/tajo-storage-common/src/main/java/org/apache/tajo/storage/StorageManager.java b/tajo-storage/tajo-storage-common/src/main/java/org/apache/tajo/storage/StorageManager.java
index ce963c8..231694a 100644
--- a/tajo-storage/tajo-storage-common/src/main/java/org/apache/tajo/storage/StorageManager.java
+++ b/tajo-storage/tajo-storage-common/src/main/java/org/apache/tajo/storage/StorageManager.java
@@ -642,7 +642,12 @@
Path oldTableDir = new Path(stagingDir, TajoConstants.INSERT_OVERWIRTE_OLD_TABLE_NAME);
ContentSummary summary = fs.getContentSummary(stagingResultDir);
- if (!queryContext.get(QueryVars.OUTPUT_PARTITIONS, "").isEmpty() && summary.getFileCount() > 0L) {
+ // When inserting empty data into a partitioned table, check if keep existing data need to be remove or not.
+ boolean overwriteEnabled = queryContext.getBool(SessionVars.PARTITION_NO_RESULT_OVERWRITE_ENABLED);
+
+ // If existing data doesn't need to keep, check if there are some files.
+ if ( (!queryContext.get(QueryVars.OUTPUT_PARTITIONS, "").isEmpty())
+ && (!overwriteEnabled || (overwriteEnabled && summary.getFileCount() > 0L))) {
// This is a map for existing non-leaf directory to rename. A key is current directory and a value is
// renaming directory.
Map<Path, Path> renameDirs = TUtil.newHashMap();