ATLAS-4492: Atlas to skip external temporary table created in hive
Signed-off-by: Sidharth Mishra <sidmishra@apache.org>
diff --git a/addons/hive-bridge/src/main/java/org/apache/atlas/hive/hook/HiveHook.java b/addons/hive-bridge/src/main/java/org/apache/atlas/hive/hook/HiveHook.java
index 6ea4848..4d74d0c 100644
--- a/addons/hive-bridge/src/main/java/org/apache/atlas/hive/hook/HiveHook.java
+++ b/addons/hive-bridge/src/main/java/org/apache/atlas/hive/hook/HiveHook.java
@@ -73,6 +73,7 @@
public static final String HOOK_HIVE_IGNORE_DDL_OPERATIONS = CONF_PREFIX + "hs2.ignore.ddl.operations";
public static final String HOOK_HIVE_FILTER_ENTITY_ADDITIONAL_TYPES_TO_RETAIN = CONF_PREFIX + "hs2.filter.entity.additional.types.to.retain";
public static final String HOOK_HIVE_SKIP_TEMP_TABLES = CONF_PREFIX + "skip.temp.tables";
+ public static final String HOOK_HIVE_SKIP_ALL_TEMP_TABLES = CONF_PREFIX + "skip.all.temp.tables";
public static final String DEFAULT_HOST_NAME = "localhost";
private static final Map<String, HiveOperation> OPERATION_MAP = new HashMap<>();
@@ -95,6 +96,7 @@
private static final boolean hiveProcessPopulateDeprecatedAttributes;
private static HiveHookObjectNamesCache knownObjects = null;
private static String hostName;
+ private static boolean skipAllTempTablesIncludingExternal;
private static boolean skipTempTables = true;
static {
@@ -154,10 +156,11 @@
defaultDummyDatabase.add(SemanticAnalyzer.DUMMY_DATABASE);
defaultDummyTable.add(SemanticAnalyzer.DUMMY_TABLE);
- ignoreDummyDatabaseName = atlasProperties.getList("atlas.hook.hive.ignore.dummy.database.name", defaultDummyDatabase);
- ignoreDummyTableName = atlasProperties.getList("atlas.hook.hive.ignore.dummy.table.name", defaultDummyTable);
- ignoreValuesTmpTableNamePrefix = atlasProperties.getString("atlas.hook.hive.ignore.values.tmp.table.name.prefix", "Values__Tmp__Table__");
- skipTempTables = atlasProperties.getBoolean(HOOK_HIVE_SKIP_TEMP_TABLES, true);
+ ignoreDummyDatabaseName = atlasProperties.getList("atlas.hook.hive.ignore.dummy.database.name", defaultDummyDatabase);
+ ignoreDummyTableName = atlasProperties.getList("atlas.hook.hive.ignore.dummy.table.name", defaultDummyTable);
+ ignoreValuesTmpTableNamePrefix = atlasProperties.getString("atlas.hook.hive.ignore.values.tmp.table.name.prefix", "Values__Tmp__Table__");
+ skipAllTempTablesIncludingExternal = atlasProperties.getBoolean(HOOK_HIVE_SKIP_ALL_TEMP_TABLES, false);
+ skipTempTables = skipAllTempTablesIncludingExternal || atlasProperties.getBoolean(HOOK_HIVE_SKIP_TEMP_TABLES, true);
try {
hostName = InetAddress.getLocalHost().getHostName();
@@ -308,6 +311,10 @@
return skipTempTables;
}
+ public static boolean isSkipAllTempTablesIncludingExternal() {
+ return skipAllTempTablesIncludingExternal;
+ }
+
public PreprocessAction getPreprocessActionForHiveTable(String qualifiedName) {
PreprocessAction ret = PreprocessAction.NONE;
diff --git a/addons/hive-bridge/src/main/java/org/apache/atlas/hive/hook/events/CreateTable.java b/addons/hive-bridge/src/main/java/org/apache/atlas/hive/hook/events/CreateTable.java
index 91611de..7d39cb6 100644
--- a/addons/hive-bridge/src/main/java/org/apache/atlas/hive/hook/events/CreateTable.java
+++ b/addons/hive-bridge/src/main/java/org/apache/atlas/hive/hook/events/CreateTable.java
@@ -19,6 +19,7 @@
package org.apache.atlas.hive.hook.events;
import org.apache.atlas.hive.hook.AtlasHiveHookContext;
+import org.apache.atlas.hive.hook.HiveHook;
import org.apache.atlas.model.instance.AtlasEntity;
import org.apache.atlas.model.instance.AtlasEntity.AtlasEntitiesWithExtInfo;
import org.apache.atlas.model.notification.HookNotification;
@@ -189,8 +190,11 @@
}
private boolean skipTemporaryTable(Table table) {
- // If its an external table, even though the temp table skip flag is on, we create the table since we need the HDFS path to temp table lineage.
- return table != null && skipTempTables && table.isTemporary() && !EXTERNAL_TABLE.equals(table.getTableType());
+ /**
+ * If its an external table, even though the temp table skip flag(skip.temp.tables) is on, we create the table since we need the HDFS path to temp table lineage.
+ * We skip external temp table only on enabling flag for skip all temp tables including external tables(skip.all.temp.tables)
+ **/
+ return table != null && skipTempTables && table.isTemporary() && (!EXTERNAL_TABLE.equals(table.getTableType()) || HiveHook.isSkipAllTempTablesIncludingExternal());
}
private boolean isCreateExtTableOperation(Table table) {
diff --git a/addons/hive-bridge/src/test/java/org/apache/atlas/hive/hook/HiveHookIT.java b/addons/hive-bridge/src/test/java/org/apache/atlas/hive/hook/HiveHookIT.java
index 1db73e5..c257551 100755
--- a/addons/hive-bridge/src/test/java/org/apache/atlas/hive/hook/HiveHookIT.java
+++ b/addons/hive-bridge/src/test/java/org/apache/atlas/hive/hook/HiveHookIT.java
@@ -22,6 +22,7 @@
import com.fasterxml.jackson.databind.node.ObjectNode;
import com.google.common.base.Joiner;
import com.sun.jersey.api.client.ClientResponse;
+import org.apache.atlas.ApplicationProperties;
import org.apache.atlas.AtlasClient;
import org.apache.atlas.AtlasServiceException;
import org.apache.atlas.hive.HiveITBase;
@@ -36,6 +37,7 @@
import org.apache.atlas.model.typedef.AtlasTypesDef;
import org.apache.atlas.type.AtlasTypeUtil;
import org.apache.commons.collections.CollectionUtils;
+import org.apache.commons.configuration.Configuration;
import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.conf.HiveConf;
@@ -279,6 +281,32 @@
validateHDFSPaths(processsEntity, INPUTS, pFile);
}
+ //Disabling for now. Need to revisit and check
+ @Test(enabled = false)
+ public void testCreateTemporaryExternalTable() throws Exception {
+ String tableName = tableName();
+ String colName = columnName();
+ String query = String.format("create TEMPORARY EXTERNAL table %s.%s(%s, %s)", DEFAULT_DB , tableName , colName + " int", "name string");
+ String tableQualifiedName = HiveMetaStoreBridge.getTableQualifiedName(CLUSTER_NAME, DEFAULT_DB, tableName, true);
+
+ Configuration configuration = ApplicationProperties.get();
+ configuration.setProperty(HiveHook.HOOK_HIVE_SKIP_ALL_TEMP_TABLES, true);
+ runCommand(query);
+
+ Thread.sleep(10000);
+
+ try {
+ atlasClientV2.getEntityByAttribute(HiveDataTypes.HIVE_TABLE.getName(), Collections.singletonMap(ATTRIBUTE_QUALIFIED_NAME, tableQualifiedName));
+ } catch (AtlasServiceException e) {
+ if (e.getStatus() == ClientResponse.Status.NOT_FOUND) {
+ return;
+ }
+ }
+
+ fail(String.format("Entity was not supposed to exist for typeName = %s, attributeName = %s, attributeValue = %s", HiveDataTypes.HIVE_TABLE.getName(), ATTRIBUTE_QUALIFIED_NAME, tableQualifiedName));
+ }
+
+
private Set<ReadEntity> getInputs(String inputName, Entity.Type entityType) throws HiveException {
final ReadEntity entity;