blob: 47d7fb3fedc972d0b2903d0334cdf1b9c7d7dea7 [file] [log] [blame]
{"version":3,"sources":["/home/madhan/Apache/git/atlas/docs/target/src/documents/Hook/HookHive.md","/home/madhan/Apache/git/atlas/docs/target/theme/components/shared/Img/index.js","/home/madhan/Apache/git/atlas/docs/target/theme/styles/styled-colors.js"],"names":["layoutProps","MDXContent","components","props","mdxType","parentName","wrapLines","language","style","theme","src","height","width","href","isMDXComponent","Img","baseUrl","useConfig","boxShadow","WebkitBoxShadow","MozBoxShadow","dark","hljs","color"],"mappings":"snBAYMA,EAAc,GAIL,SAASC,EAAW,GAG/B,IAFFC,EAAU,EAAVA,WACGC,EAAK,iBAER,OAAO,cALS,UAKC,iBAAKH,EAAiBG,EAAK,CAAED,WAAYA,EAAYE,QAAQ,cAE5E,oBACE,GAAM,6CAA2C,8CAEnD,oBACE,GAAM,cAAY,cAEpB,mEACA,wBACE,oBAAIC,WAAW,MAAI,iBACnB,oBAAIA,WAAW,MAAI,UACjB,oBAAIA,WAAW,MACb,oBAAIA,WAAW,MAAI,sBACnB,oBAAIA,WAAW,MAAI,uGAGvB,oBAAIA,WAAW,MAAI,aACjB,oBAAIA,WAAW,MACb,oBAAIA,WAAW,MAAI,wBACnB,oBAAIA,WAAW,MAAI,sNACnB,oBAAIA,WAAW,MAAI,cACjB,oBAAIA,WAAW,MACb,oBAAIA,WAAW,MAAI,wBACnB,oBAAIA,WAAW,MAAI,+EAGvB,oBAAIA,WAAW,MAAI,mBACjB,oBAAIA,WAAW,MACb,oBAAIA,WAAW,MAAI,8BACnB,oBAAIA,WAAW,MAAI,wKAGvB,oBAAIA,WAAW,MAAI,eACjB,oBAAIA,WAAW,MACb,oBAAIA,WAAW,MAAI,wBACnB,oBAAIA,WAAW,MAAI,mKAGvB,oBAAIA,WAAW,MAAI,sBACjB,oBAAIA,WAAW,MACb,oBAAIA,WAAW,MAAI,wBACnB,oBAAIA,WAAW,MAAI,kHAM7B,wBACE,oBAAIA,WAAW,MAAI,eACnB,oBAAIA,WAAW,MAAI,sBACjB,oBAAIA,WAAW,MACb,oBAAIA,WAAW,MAAI,gCAIzB,wBACE,oBAAIA,WAAW,MAAI,gBACjB,oBAAIA,WAAW,MACb,oBAAIA,WAAW,MAAI,aACjB,oBAAIA,WAAW,MACb,oBAAIA,WAAW,MAAI,4BAGvB,oBAAIA,WAAW,MAAI,aACjB,oBAAIA,WAAW,MACb,oBAAIA,WAAW,MAAI,uDAM7B,uOACA,cAAC,IAAiB,CAACC,WAAW,EAAMC,SAAS,QAAQC,MAAOC,IAAYL,QAAQ,qBAAmB,+PAMnG,oBACE,GAAM,aAAW,aAEnB,6PAEA,wBACE,oBAAIC,WAAW,MAAI,gEAErB,cAAC,IAAiB,CAACC,WAAW,EAAMC,SAAS,MAAMC,MAAOC,IAAYL,QAAQ,qBAAmB,8HAMjG,wBACE,oBAAIC,WAAW,MAAI,0DACnB,oBAAIA,WAAW,MAAI,gDACnB,oBAAIA,WAAW,MAAI,yFAA4F,4BAAYA,WAAW,MAAI,mBAAiC,cAC3K,oBAAIA,WAAW,MAAI,kCAAoC,4BAAYA,WAAW,MAAI,mBAAiC,yDACnH,oBAAIA,WAAW,MAAI,QAAU,4BAAYA,WAAW,MAAI,gBAA8B,8DAExF,qIACA,cAAC,IAAiB,CAACC,WAAW,EAAMC,SAAS,QAAQC,MAAOC,IAAYL,QAAQ,qBAAmB,gzBAUnG,gOAA6M,mBAAGC,WAAW,IACvN,KAAQ,0DAAwD,2BAEpE,oBACE,GAAM,wBAAsB,wBAE9B,0IACA,oBACE,GAAM,SAAO,SAEf,wBACE,oBAAIA,WAAW,MAAI,qDACnB,oBAAIA,WAAW,MAAI,8EACnB,oBAAIA,WAAW,MAAI,qEACjB,oBAAIA,WAAW,MACb,oBAAIA,WAAW,MAAI,6DACnB,oBAAIA,WAAW,MAAI,iIACnB,oBAAIA,WAAW,MAAI,yEAGvB,oBAAIA,WAAW,MAAI,oGACnB,oBAAIA,WAAW,MAAI,kFAErB,oBACE,GAAM,YAAU,YAElB,mDACA,cAAC,IAAiB,CAACC,WAAW,EAAMC,SAAS,MAAMC,MAAOC,IAAYL,QAAQ,qBAAmB,8CAGjG,qDACA,cAAC,IAAG,CAACM,IAAG,iCAAoCC,OAAO,MAAMC,MAAM,MAAMR,QAAQ,QAC7E,oBACE,GAAM,yCAAuC,yCAE/C,wBACE,oBAAIC,WAAW,MAAI,oFACnB,oBAAIA,WAAW,MAAI,8IAErB,oBACE,GAAM,SAAO,SAEf,wBACE,oBAAIA,WAAW,MAAI,0EAA4E,mBAAGQ,KAAK,oDAAkD,cAAmB,8BAC5K,oBAAIR,WAAW,MAAI,iNACnB,oBAAIA,WAAW,MAAI,oEACjB,oBAAIA,WAAW,MACb,oBAAIA,WAAW,MAAI,mBACnB,oBAAIA,WAAW,MAAI,6CACnB,oBAAIA,WAAW,MAAI,wBACnB,oBAAIA,WAAW,MAAI,iBACnB,oBAAIA,WAAW,MAAI,kBACnB,oBAAIA,WAAW,MAAI,kFACnB,oBAAIA,WAAW,MAAI,iBAIzB,oBACE,GAAM,2BAAyB,2BAEjC,+XAGA,cAAC,IAAiB,CAACC,WAAW,EAAMC,SAAS,QAAQC,MAAOC,IAAYL,QAAQ,qBAAmB,0XAUtG,oLAEDH,EAAWa,gBAAiB,G,sEC9M5B,+EAuCeC,IAnBHZ,IACX,MAAM,IAAEO,EAAG,MAAEE,EAAK,OAAED,GAAWR,GACzB,QAAEa,GAAYC,sBAMpB,OACC,2BACC,uBAC6BT,MART,CACtBU,UAAW,6FACIC,gBAAiB,4FACjBC,aAAc,6FAM1BV,IAAM,GAAEM,IAAUN,IAClBC,OAAS,IAAEA,GAAU,QACrBC,MAAQ,IAAEA,GAAS,a,+DClCvB,iFAqBAS,IAAKC,KAAKC,MAAQ,UACHF,MAAI","file":"static/js/documents-hook-hook-hive.a0073868.js","sourcesContent":["\nimport React from 'react'\nimport { mdx } from '@mdx-js/react'\n\n/* @jsxRuntime classic */\n/* @jsx mdx */\nimport themen from 'theme/styles/styled-colors';\nimport * as theme from 'react-syntax-highlighter/dist/esm/styles/hljs';\nimport SyntaxHighlighter from 'react-syntax-highlighter';\nimport Img from 'theme/components/shared/Img'\n\n\nconst layoutProps = {\n \n};\nconst MDXLayout = \"wrapper\"\nexport default function MDXContent({\n components,\n ...props\n}) {\n return <MDXLayout {...layoutProps} {...props} components={components} mdxType=\"MDXLayout\">\n\n <h1 {...{\n \"id\": \"apache-atlas-hook--bridge-for-apache-hive\"\n }}>{`Apache Atlas Hook & Bridge for Apache Hive`}</h1>\n <h2 {...{\n \"id\": \"hive-model\"\n }}>{`Hive Model`}</h2>\n <p>{`Hive model includes the following types:`}</p>\n <ul>\n <li parentName=\"ul\">{`Entity types:`}</li>\n <li parentName=\"ul\">{`hive_db`}\n <ul parentName=\"li\">\n <li parentName=\"ul\">{`super-types: Asset`}</li>\n <li parentName=\"ul\">{`attributes: qualifiedName, name, description, owner, clusterName, location, parameters, ownerName`}</li>\n </ul>\n </li>\n <li parentName=\"ul\">{`hive_table`}\n <ul parentName=\"li\">\n <li parentName=\"ul\">{`super-types: DataSet`}</li>\n <li parentName=\"ul\">{`attributes: qualifiedName, name, description, owner, db, createTime, lastAccessTime, comment, retention, sd, partitionKeys, columns, aliases, parameters, viewOriginalText, viewExpandedText, tableType, temporary`}</li>\n <li parentName=\"ul\">{`hive_column`}\n <ul parentName=\"li\">\n <li parentName=\"ul\">{`super-types: DataSet`}</li>\n <li parentName=\"ul\">{`attributes: qualifiedName, name, description, owner, type, comment, table`}</li>\n </ul>\n </li>\n <li parentName=\"ul\">{`hive_storagedesc`}\n <ul parentName=\"li\">\n <li parentName=\"ul\">{`super-types: Referenceable`}</li>\n <li parentName=\"ul\">{`attributes: qualifiedName, table, location, inputFormat, outputFormat, compressed, numBuckets, serdeInfo, bucketCols, sortCols, parameters, storedAsSubDirectories`}</li>\n </ul>\n </li>\n <li parentName=\"ul\">{`hive_process`}\n <ul parentName=\"li\">\n <li parentName=\"ul\">{`super-types: Process`}</li>\n <li parentName=\"ul\">{`attributes: qualifiedName, name, description, owner, inputs, outputs, startTime, endTime, userName, operationType, queryText, queryPlan, queryId, clusterName`}</li>\n </ul>\n </li>\n <li parentName=\"ul\">{`hive_column_lineage`}\n <ul parentName=\"li\">\n <li parentName=\"ul\">{`super-types: Process`}</li>\n <li parentName=\"ul\">{`attributes: qualifiedName, name, description, owner, inputs, outputs, query, depenendencyType, expression`}</li>\n </ul>\n </li>\n </ul>\n </li>\n </ul>\n <ul>\n <li parentName=\"ul\">{`Enum types:`}</li>\n <li parentName=\"ul\">{`hive_principal_type`}\n <ul parentName=\"li\">\n <li parentName=\"ul\">{`values: USER, ROLE, GROUP`}</li>\n </ul>\n </li>\n </ul>\n <ul>\n <li parentName=\"ul\">{`Struct types:`}\n <ul parentName=\"li\">\n <li parentName=\"ul\">{`hive_order`}\n <ul parentName=\"li\">\n <li parentName=\"ul\">{`attributes: col, order`}</li>\n </ul>\n </li>\n <li parentName=\"ul\">{`hive_serde`}\n <ul parentName=\"li\">\n <li parentName=\"ul\">{`attributes: name, serializationLib, parameters`}</li>\n </ul>\n </li>\n </ul>\n </li>\n </ul>\n <p>{`Hive entities are created and de-duped in Atlas using unique attribute qualifiedName, whose value should be formatted as detailed below. Note that dbName, tableName and columnName should be in lower case.`}</p>\n <SyntaxHighlighter wrapLines={true} language=\"shell\" style={theme.dark} mdxType=\"SyntaxHighlighter\">\n {`hive_db.qualifiedName: <dbName>@<clusterName>\nhive_table.qualifiedName: <dbName>.<tableName>@<clusterName>\nhive_column.qualifiedName: <dbName>.<tableName>.<columnName>@<clusterName>\nhive_process.queryString: trimmed query string in lower case`}\n </SyntaxHighlighter>\n <h2 {...{\n \"id\": \"hive-hook\"\n }}>{`Hive Hook`}</h2>\n <p>{`Atlas Hive hook registers with Hive to listen for create/update/delete operations and updates the metadata in Atlas, via Kafka notifications, for the changes in Hive.\nFollow the instructions below to setup Atlas hook in Hive:`}</p>\n <ul>\n <li parentName=\"ul\">{`Set-up Atlas hook in hive-site.xml by adding the following:`}</li>\n </ul>\n <SyntaxHighlighter wrapLines={true} language=\"xml\" style={theme.dark} mdxType=\"SyntaxHighlighter\">\n {`<property>\n <name>hive.exec.post.hooks</name>\n <value>org.apache.atlas.hive.hook.HiveHook</value>\n </property>`}\n </SyntaxHighlighter>\n <ul>\n <li parentName=\"ul\">{`untar apache-atlas-\\${project.version}-hive-hook.tar.gz`}</li>\n <li parentName=\"ul\">{`cd apache-atlas-hive-hook-\\${project.version}`}</li>\n <li parentName=\"ul\">{`Copy entire contents of folder apache-atlas-hive-hook-\\${project.version}/hook/hive to `}<inlineCode parentName=\"li\">{`<atlas package>`}</inlineCode>{`/hook/hive`}</li>\n <li parentName=\"ul\">{`Add 'export HIVE_AUX_JARS_PATH=`}<inlineCode parentName=\"li\">{`<atlas package>`}</inlineCode>{`/hook/hive' in hive-env.sh of your hive configuration`}</li>\n <li parentName=\"ul\">{`Copy `}<inlineCode parentName=\"li\">{`<atlas-conf>`}</inlineCode>{`/atlas-application.properties to the hive conf directory.`}</li>\n </ul>\n <p>{`The following properties in atlas-application.properties control the thread pool and notification details:`}</p>\n <SyntaxHighlighter wrapLines={true} language=\"shell\" style={theme.dark} mdxType=\"SyntaxHighlighter\">\n {`atlas.hook.hive.synchronous=false # whether to run the hook synchronously. false is recommended to avoid delays in Hive query completion. Default: false\natlas.hook.hive.numRetries=3 # number of retries for notification failure. Default: 3\natlas.hook.hive.queueSize=10000 # queue size for the threadpool. Default: 10000\natlas.cluster.name=primary # clusterName to use in qualifiedName of entities. Default: primary\natlas.kafka.zookeeper.connect= # Zookeeper connect URL for Kafka. Example: localhost:2181\natlas.kafka.zookeeper.connection.timeout.ms=30000 # Zookeeper connection timeout. Default: 30000\natlas.kafka.zookeeper.session.timeout.ms=60000 # Zookeeper session timeout. Default: 60000\natlas.kafka.zookeeper.sync.time.ms=20 # Zookeeper sync time. Default: 20`}\n </SyntaxHighlighter>\n <p>{`Other configurations for Kafka notification producer can be specified by prefixing the configuration name with \"atlas.kafka.\". For list of configuration supported by Kafka producer, please refer to `}<a parentName=\"p\" {...{\n \"href\": \"http://kafka.apache.org/documentation/#producerconfigs\"\n }}>{`Kafka Producer Configs`}</a></p>\n <h2 {...{\n \"id\": \"column-level-lineage\"\n }}>{`Column Level Lineage`}</h2>\n <p>{`Starting from 0.8-incubating version of Atlas, Column level lineage is captured in Atlas. Below are the details`}</p>\n <h3 {...{\n \"id\": \"model\"\n }}>{`Model`}</h3>\n <ul>\n <li parentName=\"ul\">{`ColumnLineageProcess type is a subtype of Process`}</li>\n <li parentName=\"ul\">{`This relates an output Column to a set of input Columns or the Input Table`}</li>\n <li parentName=\"ul\">{`The lineage also captures the kind of dependency, as listed below:`}\n <ul parentName=\"li\">\n <li parentName=\"ul\">{`SIMPLE: output column has the same value as the input`}</li>\n <li parentName=\"ul\">{`EXPRESSION: output column is transformed by some expression at runtime (for e.g. a Hive SQL expression) on the Input Columns.`}</li>\n <li parentName=\"ul\">{`SCRIPT: output column is transformed by a user provided script.`}</li>\n </ul>\n </li>\n <li parentName=\"ul\">{`In case of EXPRESSION dependency the expression attribute contains the expression in string form`}</li>\n <li parentName=\"ul\">{`Since Process links input and output DataSets, Column is a subtype of DataSet`}</li>\n </ul>\n <h3 {...{\n \"id\": \"examples\"\n }}>{`Examples`}</h3>\n <p>{`For a simple CTAS below:`}</p>\n <SyntaxHighlighter wrapLines={true} language=\"sql\" style={theme.dark} mdxType=\"SyntaxHighlighter\">\ncreate table t2 as select id, name from T1\n </SyntaxHighlighter>\n <p>{`The lineage is captured as`}</p>\n <Img src={`/images/column_lineage_ex1.png`} height=\"200\" width=\"400\" mdxType=\"Img\" />\n <h3 {...{\n \"id\": \"extracting-lineage-from-hive-commands\"\n }}>{`Extracting Lineage from Hive commands`}</h3>\n <ul>\n <li parentName=\"ul\">{`The HiveHook maps the LineageInfo in the HookContext to Column lineage instances`}</li>\n <li parentName=\"ul\">{`The LineageInfo in Hive provides column-level lineage for the final FileSinkOperator, linking them to the input columns in the Hive Query`}</li>\n </ul>\n <h2 {...{\n \"id\": \"notes\"\n }}>{`NOTES`}</h2>\n <ul>\n <li parentName=\"ul\">{`Column level lineage works with Hive version 1.2.1 after the patch for `}<a href=\"https://issues.apache.org/jira/browse/HIVE-13112\">{`HIVE-13112`}</a>{` is applied to Hive source`}</li>\n <li parentName=\"ul\">{`Since database name, table name and column names are case-insensitive in hive, the corresponding names in entities are lowercase. So, any search APIs should use lowercase while querying on the entity names`}</li>\n <li parentName=\"ul\">{`The following hive operations are captured by hive hook currently`}\n <ul parentName=\"li\">\n <li parentName=\"ul\">{`create database`}</li>\n <li parentName=\"ul\">{`create table/view, create table as select`}</li>\n <li parentName=\"ul\">{`load, import, export`}</li>\n <li parentName=\"ul\">{`DMLs (insert)`}</li>\n <li parentName=\"ul\">{`alter database`}</li>\n <li parentName=\"ul\">{`alter table (skewed table information, stored as, protection is not supported)`}</li>\n <li parentName=\"ul\">{`alter view`}</li>\n </ul>\n </li>\n </ul>\n <h2 {...{\n \"id\": \"importing-hive-metadata\"\n }}>{`Importing Hive Metadata`}</h2>\n <p>{`Apache Atlas provides a command-line utility, import-hive.sh, to import metadata of Apache Hive databases and tables into Apache Atlas.\nThis utility can be used to initialize Apache Atlas with databases/tables present in Apache Hive.\nThis utility supports importing metadata of a specific table, tables in a specific database or all databases and tables.`}</p>\n <SyntaxHighlighter wrapLines={true} language=\"shell\" style={theme.dark} mdxType=\"SyntaxHighlighter\">\n {`Usage 1: <atlas package>/hook-bin/import-hive.sh\nUsage 2: <atlas package>/hook-bin/import-hive.sh [-d <database regex> OR --database <database regex>] [-t <table regex> OR --table <table regex>]\nUsage 3: <atlas package>/hook-bin/import-hive.sh [-f <filename>]\n File Format:\n database1:tbl1\n database1:tbl2\n database2:tbl1`}\n </SyntaxHighlighter>\n </MDXLayout>;\n}\n;\nMDXContent.isMDXComponent = true;","/**\n * Licensed to the Apache Software Foundation (ASF) under one\n * or more contributor license agreements. See the NOTICE file\n * distributed with this work for additional information\n * regarding copyright ownership. The ASF licenses this file\n * to you under the Apache License, Version 2.0 (the\n * \"License\"); you may not use this file except in compliance\n * with the License. You may obtain a copy of the License at\n *\n * http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\nimport * as React from \"react\";\nimport { useConfig } from \"../../../../docz-lib/docz/dist\";\nconst Img = props => {\n\tconst { src, width, height } = props;\n\tconst { baseUrl } = useConfig();\n const styles = {\n\tboxShadow: \"0 2px 2px 0 rgba(0,0,0,0.14), 0 3px 1px -2px rgba(0,0,0,0.12), 0 1px 5px 0 rgba(0,0,0,0.2)\",\n WebkitBoxShadow: \"0 2px 2px 0 rgba(0,0,0,0.14) 0 3px 1px -2px rgba(0,0,0,0.12), 0 1px 5px 0 rgba(0,0,0,0.2)\",\n MozBoxShadow: \"0 2px 2px 0 rgba(0,0,0,0.14) 0 3px 1px -2px rgba(0,0,0,0.12), 0 1px 5px 0 rgba(0,0,0,0.2)\"\n }\n\treturn (\n\t\t<div>\n\t\t\t<img\n style={styles}\n\t\t\t\tsrc={`${baseUrl}${src}`}\n\t\t\t\theight={`${height || \"auto\"}`}\n\t\t\t\twidth={`${width || \"100%\"}`}\n\t\t\t/>\n\t\t</div>\n\t);\n};\nexport default Img;\n","/**\n * Licensed to the Apache Software Foundation (ASF) under one\n * or more contributor license agreements. See the NOTICE file\n * distributed with this work for additional information\n * regarding copyright ownership. The ASF licenses this file\n * to you under the Apache License, Version 2.0 (the\n * \"License\"); you may not use this file except in compliance\n * with the License. You may obtain a copy of the License at\n *\n * http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\nimport { dark } from \"react-syntax-highlighter/dist/esm/styles/hljs\";\n\n//dark[\"powershell\"][\"color\"] = \"#37bb9b\";\ndark.hljs.color = \"#37bb9b\";\nexport default dark;"],"sourceRoot":""}