blob: 7f9bb0576722e5bf95b6481740b6f940e7c42179 [file] [log] [blame]
{"version":3,"sources":["/home/madhan/Apache/git/atlas/docs/target/src/documents/Hook/HookStorm.md","/home/madhan/Apache/git/atlas/docs/target/theme/styles/styled-colors.js"],"names":["layoutProps","MDXContent","components","props","mdxType","parentName","wrapLines","language","style","theme","isMDXComponent","dark","hljs","color"],"mappings":"ykBAWMA,EAAc,GAIL,SAASC,EAAW,GAG/B,IAFFC,EAAU,EAAVA,WACGC,EAAK,iBAER,OAAO,cALS,UAKC,iBAAKH,EAAiBG,EAAK,CAAED,WAAYA,EAAYE,QAAQ,cAE5E,oBACE,GAAM,sCAAoC,sCAE5C,oBACE,GAAM,gBAAc,gBAEtB,4RAG8B,oBAAIC,WAAW,KAAG,YAAkB,KAClE,qJAEA,wQAIA,4EACA,wBACE,oBAAIA,WAAW,MAAI,iDACnB,oBAAIA,WAAW,MAAI,iDAErB,oBACE,GAAM,oBAAkB,oBAE1B,4NAGA,kEACA,wBACE,oBAAIA,WAAW,MAAI,qKACnB,oBAAIA,WAAW,MAAI,qLACnB,oBAAIA,WAAW,MAAI,oEACnB,oBAAIA,WAAW,MAAI,4FAErB,mKAEA,qJAEA,oBACE,GAAM,oBAAkB,oBAE1B,kNAGA,2QAGA,oBACE,GAAM,eAAa,eAErB,wFACA,wBACE,oBAAIA,WAAW,MAAI,8GACnB,oBAAIA,WAAW,MAAI,2GACnB,oBAAIA,WAAW,MAAI,uFAErB,oBACE,GAAM,gBAAc,gBAEtB,2GACA,wBACE,oBAAIA,WAAW,MAAI,2DACnB,oBAAIA,WAAW,MAAI,iDACnB,oBAAIA,WAAW,MAAI,sHAErB,wKAEA,kGACA,oBACE,GAAM,iBAAe,iBAEvB,oBACE,GAAM,uBAAqB,uBAE7B,iGACC,oBAAIA,WAAW,KAAG,+BAAqC,QACxD,cAAC,IAAiB,CAACC,WAAW,EAAMC,SAAS,QAAQC,MAAOC,IAAYL,QAAQ,qBAAmB,iGAGnG,sMAEA,4jBAOA,cAAC,IAAiB,CAACE,WAAW,EAAMC,SAAS,QAAQC,MAAOC,IAAYL,QAAQ,qBAAmB,sCAGnG,6BAAU,oBAAIC,WAAW,KAAG,kCAAwC,6CACpE,cAAC,IAAiB,CAACC,WAAW,EAAMC,SAAS,QAAQC,MAAOC,IAAYL,QAAQ,qBAAmB,uDAGnG,oFACA,0FACA,cAAC,IAAiB,CAACE,WAAW,EAAMC,SAAS,QAAQC,MAAOC,IAAYL,QAAQ,qBAAmB,yMAOtG,qLAEDH,EAAWS,gBAAiB,G,+DC/H5B,iFAqBAC,IAAKC,KAAKC,MAAQ,UACHF,MAAI","file":"static/js/documents-hook-hook-storm.5cb01ef5.js","sourcesContent":["\nimport React from 'react'\nimport { mdx } from '@mdx-js/react'\n\n/* @jsxRuntime classic */\n/* @jsx mdx */\nimport themen from 'theme/styles/styled-colors';\nimport * as theme from 'react-syntax-highlighter/dist/esm/styles/hljs';\nimport SyntaxHighlighter from 'react-syntax-highlighter';\n\n\nconst layoutProps = {\n \n};\nconst MDXLayout = \"wrapper\"\nexport default function MDXContent({\n components,\n ...props\n}) {\n return <MDXLayout {...layoutProps} {...props} components={components} mdxType=\"MDXLayout\">\n\n <h1 {...{\n \"id\": \"apache-atlas-hook-for-apache-storm\"\n }}>{`Apache Atlas Hook for Apache Storm`}</h1>\n <h2 {...{\n \"id\": \"introduction\"\n }}>{`Introduction`}</h2>\n <p>{`Apache Storm is a distributed real-time computation system. Storm makes it\neasy to reliably process unbounded streams of data, doing for real-time\nprocessing what Hadoop did for batch processing. The process is essentially\na DAG of nodes, which is called `}<em parentName=\"p\">{`topology`}</em>{`.`}</p>\n <p>{`Apache Atlas is a metadata repository that enables end-to-end data lineage,\nsearch and associate business classification.`}</p>\n <p>{`The goal of this integration is to push the operational topology\nmetadata along with the underlying data source(s), target(s), derivation\nprocesses and any available business context so Atlas can capture the\nlineage for this topology.`}</p>\n <p>{`There are 2 parts in this process detailed below:`}</p>\n <ul>\n <li parentName=\"ul\">{`Data model to represent the concepts in Storm`}</li>\n <li parentName=\"ul\">{`Storm Atlas Hook to update metadata in Atlas`}</li>\n </ul>\n <h2 {...{\n \"id\": \"storm-data-model\"\n }}>{`Storm Data Model`}</h2>\n <p>{`A data model is represented as Types in Atlas. It contains the descriptions\nof various nodes in the topology graph, such as spouts and bolts and the\ncorresponding producer and consumer types.`}</p>\n <p>{`The following types are added in Atlas.`}</p>\n <ul>\n <li parentName=\"ul\">{`storm_topology - represents the coarse-grained topology. A storm_topology derives from an Atlas Process type and hence can be used to inform Atlas about lineage.`}</li>\n <li parentName=\"ul\">{`Following data sets are added - kafka_topic, jms_topic, hbase_table, hdfs_data_set. These all derive from an Atlas Dataset type and hence form the end points of a lineage graph.`}</li>\n <li parentName=\"ul\">{`storm_spout - Data Producer having outputs, typically Kafka, JMS`}</li>\n <li parentName=\"ul\">{`storm_bolt - Data Consumer having inputs and outputs, typically Hive, HBase, HDFS, etc.`}</li>\n </ul>\n <p>{`The Storm Atlas hook auto registers dependent models like the Hive data model\nif it finds that these are not known to the Atlas server.`}</p>\n <p>{`The data model for each of the types is described in\nthe class definition at org.apache.atlas.storm.model.StormDataModel.`}</p>\n <h2 {...{\n \"id\": \"storm-atlas-hook\"\n }}>{`Storm Atlas Hook`}</h2>\n <p>{`Atlas is notified when a new topology is registered successfully in\nStorm. Storm provides a hook, backtype.storm.ISubmitterHook, at the Storm client used to\nsubmit a storm topology.`}</p>\n <p>{`The Storm Atlas hook intercepts the hook post execution and extracts the metadata from the\ntopology and updates Atlas using the types defined. Atlas implements the\nStorm client hook interface in org.apache.atlas.storm.hook.StormAtlasHook.`}</p>\n <h2 {...{\n \"id\": \"limitations\"\n }}>{`Limitations`}</h2>\n <p>{`The following apply for the first version of the integration.`}</p>\n <ul>\n <li parentName=\"ul\">{`Only new topology submissions are registered with Atlas, any lifecycle changes are not reflected in Atlas.`}</li>\n <li parentName=\"ul\">{`The Atlas server needs to be online when a Storm topology is submitted for the metadata to be captured.`}</li>\n <li parentName=\"ul\">{`The Hook currently does not support capturing lineage for custom spouts and bolts.`}</li>\n </ul>\n <h2 {...{\n \"id\": \"installation\"\n }}>{`Installation`}</h2>\n <p>{`The Storm Atlas Hook needs to be manually installed in Storm on the client side.`}</p>\n <ul>\n <li parentName=\"ul\">{`untar apache-atlas-\\${project.version}-storm-hook.tar.gz`}</li>\n <li parentName=\"ul\">{`cd apache-atlas-storm-hook-\\${project.version}`}</li>\n <li parentName=\"ul\">{`Copy entire contents of folder apache-atlas-storm-hook-\\${project.version}/hook/storm to $ATLAS_PACKAGE/hook/storm`}</li>\n </ul>\n <p>{`Storm Atlas hook jars in $ATLAS_PACKAGE/hook/storm need to be copied to $STORM_HOME/extlib.\nReplace STORM_HOME with storm installation path.`}</p>\n <p>{`Restart all daemons after you have installed the atlas hook into Storm.`}</p>\n <h2 {...{\n \"id\": \"configuration\"\n }}>{`Configuration`}</h2>\n <h3 {...{\n \"id\": \"storm-configuration\"\n }}>{`Storm Configuration`}</h3>\n <p>{`The Storm Atlas Hook needs to be configured in Storm client config\nin `}<em parentName=\"p\">{`$STORM_HOME/conf/storm.yaml`}</em>{` as:`}</p>\n <SyntaxHighlighter wrapLines={true} language=\"shell\" style={theme.dark} mdxType=\"SyntaxHighlighter\">\n {`storm.topology.submission.notifier.plugin.class: \"org.apache.atlas.storm.hook.StormAtlasHook\"`}\n </SyntaxHighlighter>\n <p>{`Also set a 'cluster name' that would be used as a namespace for objects registered in Atlas.\nThis name would be used for namespacing the Storm topology, spouts and bolts.`}</p>\n <p>{`The other objects like data sets should ideally be identified with the cluster name of\nthe components that generate them. For e.g. Hive tables and databases should be\nidentified using the cluster name set in Hive. The Storm Atlas hook will pick this up\nif the Hive configuration is available in the Storm topology jar that is submitted on\nthe client and the cluster name is defined there. This happens similarly for HBase\ndata sets. In case this configuration is not available, the cluster name set in the Storm\nconfiguration will be used.`}</p>\n <SyntaxHighlighter wrapLines={true} language=\"shell\" style={theme.dark} mdxType=\"SyntaxHighlighter\">\natlas.cluster.name: \"cluster_name\"\n </SyntaxHighlighter>\n <p>{`In `}<em parentName=\"p\">{`$STORM_HOME/conf/storm_env.ini`}</em>{`, set an environment variable as follows:`}</p>\n <SyntaxHighlighter wrapLines={true} language=\"shell\" style={theme.dark} mdxType=\"SyntaxHighlighter\">\nSTORM_JAR_JVM_OPTS:\"-Datlas.conf=$ATLAS_HOME/conf/\"\n </SyntaxHighlighter>\n <p>{`where ATLAS_HOME is pointing to where ATLAS is installed.`}</p>\n <p>{`You could also set this up programmatically in Storm Config as:`}</p>\n <SyntaxHighlighter wrapLines={true} language=\"shell\" style={theme.dark} mdxType=\"SyntaxHighlighter\">\n {`Config stormConf = new Config();\n ...\n stormConf.put(Config.STORM_TOPOLOGY_SUBMISSION_NOTIFIER_PLUGIN,\n org.apache.atlas.storm.hook.StormAtlasHook.class.getName());`}\n </SyntaxHighlighter>\n </MDXLayout>;\n}\n;\nMDXContent.isMDXComponent = true;","/**\n * Licensed to the Apache Software Foundation (ASF) under one\n * or more contributor license agreements. See the NOTICE file\n * distributed with this work for additional information\n * regarding copyright ownership. The ASF licenses this file\n * to you under the Apache License, Version 2.0 (the\n * \"License\"); you may not use this file except in compliance\n * with the License. You may obtain a copy of the License at\n *\n * http://www.apache.org/licenses/LICENSE-2.0\n *\n * Unless required by applicable law or agreed to in writing, software\n * distributed under the License is distributed on an \"AS IS\" BASIS,\n * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n * See the License for the specific language governing permissions and\n * limitations under the License.\n */\n\nimport { dark } from \"react-syntax-highlighter/dist/esm/styles/hljs\";\n\n//dark[\"powershell\"][\"color\"] = \"#37bb9b\";\ndark.hljs.color = \"#37bb9b\";\nexport default dark;"],"sourceRoot":""}