| <!DOCTYPE html> |
| <!--[if IE 8]><html class="no-js lt-ie9" lang="en" > <![endif]--> |
| <!--[if gt IE 8]><!--> <html class="no-js" lang="en" > <!--<![endif]--> |
| <head> |
| <meta charset="utf-8"> |
| <meta http-equiv="X-UA-Compatible" content="IE=edge"> |
| <meta name="viewport" content="width=device-width, initial-scale=1.0"> |
| |
| <meta name="author" content="Apache Software Foundation"> |
| <link rel="shortcut icon" href="../../img/favicon.ico"> |
| <title>High Level Consumer - Apache Gobblin</title> |
| <link href='https://fonts.googleapis.com/css?family=Lato:400,700|Roboto+Slab:400,700|Inconsolata:400,700' rel='stylesheet' type='text/css'> |
| |
| <link rel="stylesheet" href="../../css/theme.css" type="text/css" /> |
| <link rel="stylesheet" href="../../css/theme_extra.css" type="text/css" /> |
| <link rel="stylesheet" href="//cdnjs.cloudflare.com/ajax/libs/highlight.js/9.12.0/styles/github.min.css"> |
| <link href="../../css/extra.css" rel="stylesheet"> |
| |
| <script> |
| // Current page data |
| var mkdocs_page_name = "High Level Consumer"; |
| var mkdocs_page_input_path = "developer-guide/HighLevelConsumer.md"; |
| var mkdocs_page_url = null; |
| </script> |
| |
| <script src="../../js/jquery-2.1.1.min.js" defer></script> |
| <script src="../../js/modernizr-2.8.3.min.js" defer></script> |
| <script src="//cdnjs.cloudflare.com/ajax/libs/highlight.js/9.12.0/highlight.min.js"></script> |
| <script>hljs.initHighlightingOnLoad();</script> |
| |
| <script> |
| (function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){ |
| (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o), |
| m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m) |
| })(window,document,'script','https://www.google-analytics.com/analytics.js','ga'); |
| |
| ga('create', 'UA-74333035-1', 'gobblin.readthedocs.org'); |
| ga('send', 'pageview'); |
| </script> |
| |
| </head> |
| |
| <body class="wy-body-for-nav" role="document"> |
| |
| <div class="wy-grid-for-nav"> |
| |
| |
| <nav data-toggle="wy-nav-shift" class="wy-nav-side stickynav"> |
| <div class="wy-side-nav-search"> |
| <a href="../.." class="icon icon-home"> Apache Gobblin</a> |
| <div role="search"> |
| <form id ="rtd-search-form" class="wy-form" action="../../search.html" method="get"> |
| <input type="text" name="q" placeholder="Search docs" title="Type search term here" /> |
| </form> |
| </div> |
| </div> |
| |
| <div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation"> |
| <ul class="current"> |
| |
| |
| <li class="toctree-l1"> |
| |
| <a class="" href="/">Home</a> |
| </li> |
| |
| <li class="toctree-l1"> |
| |
| <a class="" href="../../Powered-By/">Companies Powered By Gobblin</a> |
| </li> |
| |
| <li class="toctree-l1"> |
| |
| <a class="" href="../../Getting-Started/">Getting Started</a> |
| </li> |
| |
| <li class="toctree-l1"> |
| |
| <a class="" href="../../Gobblin-Architecture/">Architecture</a> |
| </li> |
| |
| <li class="toctree-l1"> |
| |
| <span class="caption-text">User Guide</span> |
| <ul class="subnav"> |
| <li class=""> |
| |
| <a class="" href="../../user-guide/Working-with-Job-Configuration-Files/">Job Configuration Files</a> |
| </li> |
| <li class=""> |
| |
| <a class="" href="../../user-guide/Gobblin-Deployment/">Deployment</a> |
| </li> |
| <li class=""> |
| |
| <a class="" href="../../user-guide/Gobblin-as-a-Library/">Gobblin as a Library</a> |
| </li> |
| <li class=""> |
| |
| <a class="" href="../../user-guide/Gobblin-CLI/">Gobblin CLI</a> |
| </li> |
| <li class=""> |
| |
| <a class="" href="../../user-guide/Gobblin-Compliance/">Gobblin Compliance</a> |
| </li> |
| <li class=""> |
| |
| <a class="" href="../../user-guide/Gobblin-on-Yarn/">Gobblin on Yarn</a> |
| </li> |
| <li class=""> |
| |
| <a class="" href="../../user-guide/Compaction/">Compaction</a> |
| </li> |
| <li class=""> |
| |
| <a class="" href="../../user-guide/State-Management-and-Watermarks/">State Management and Watermarks</a> |
| </li> |
| <li class=""> |
| |
| <a class="" href="../../user-guide/Working-with-the-ForkOperator/">Fork Operator</a> |
| </li> |
| <li class=""> |
| |
| <a class="" href="../../user-guide/Configuration-Properties-Glossary/">Configuration Glossary</a> |
| </li> |
| <li class=""> |
| |
| <a class="" href="../../user-guide/Source-schema-and-Converters/">Source schema and Converters</a> |
| </li> |
| <li class=""> |
| |
| <a class="" href="../../user-guide/Partitioned-Writers/">Partitioned Writers</a> |
| </li> |
| <li class=""> |
| |
| <a class="" href="../../user-guide/Monitoring/">Monitoring</a> |
| </li> |
| <li class=""> |
| |
| <a class="" href="../../user-guide/Gobblin-template/">Template</a> |
| </li> |
| <li class=""> |
| |
| <a class="" href="../../user-guide/Gobblin-Schedulers/">Schedulers</a> |
| </li> |
| <li class=""> |
| |
| <a class="" href="../../user-guide/Job-Execution-History-Store/">Job Execution History Store</a> |
| </li> |
| <li class=""> |
| |
| <a class="" href="../../user-guide/Building-Gobblin/">Building Gobblin</a> |
| </li> |
| <li class=""> |
| |
| <a class="" href="../../user-guide/Gobblin-genericLoad/">Generic Configuration Loading</a> |
| </li> |
| <li class=""> |
| |
| <a class="" href="../../user-guide/Hive-Registration/">Hive Registration</a> |
| </li> |
| <li class=""> |
| |
| <a class="" href="../../user-guide/Config-Management/">Config Management</a> |
| </li> |
| <li class=""> |
| |
| <a class="" href="../../user-guide/Docker-Integration/">Docker Integration</a> |
| </li> |
| <li class=""> |
| |
| <a class="" href="../../user-guide/Troubleshooting/">Troubleshooting</a> |
| </li> |
| <li class=""> |
| |
| <a class="" href="../../user-guide/FAQs/">FAQs</a> |
| </li> |
| </ul> |
| </li> |
| |
| <li class="toctree-l1"> |
| |
| <span class="caption-text">Sources</span> |
| <ul class="subnav"> |
| <li class=""> |
| |
| <a class="" href="../../sources/AvroFileSource/">Avro files</a> |
| </li> |
| <li class=""> |
| |
| <a class="" href="../../sources/CopySource/">File copy</a> |
| </li> |
| <li class=""> |
| |
| <a class="" href="../../sources/QueryBasedSource/">Query based</a> |
| </li> |
| <li class=""> |
| |
| <a class="" href="../../sources/RestApiSource/">Rest Api</a> |
| </li> |
| <li class=""> |
| |
| <a class="" href="../../sources/GoogleAnalyticsSource/">Google Analytics</a> |
| </li> |
| <li class=""> |
| |
| <a class="" href="../../sources/GoogleDriveSource/">Google Drive</a> |
| </li> |
| <li class=""> |
| |
| <a class="" href="../../sources/GoogleWebmaster/">Google Webmaster</a> |
| </li> |
| <li class=""> |
| |
| <a class="" href="../../sources/HadoopTextInputSource/">Hadoop Text Input</a> |
| </li> |
| <li class=""> |
| |
| <a class="" href="../../sources/HelloWorldSource/">Hello World</a> |
| </li> |
| <li class=""> |
| |
| <a class="" href="../../sources/HiveAvroToOrcSource/">Hive Avro-to-ORC</a> |
| </li> |
| <li class=""> |
| |
| <a class="" href="../../sources/HivePurgerSource/">Hive compliance purging</a> |
| </li> |
| <li class=""> |
| |
| <a class="" href="../../sources/SimpleJsonSource/">JSON</a> |
| </li> |
| <li class=""> |
| |
| <a class="" href="../../sources/KafkaSource/">Kafka</a> |
| </li> |
| <li class=""> |
| |
| <a class="" href="../../sources/MySQLSource/">MySQL</a> |
| </li> |
| <li class=""> |
| |
| <a class="" href="../../sources/OracleSource/">Oracle</a> |
| </li> |
| <li class=""> |
| |
| <a class="" href="../../sources/SalesforceSource/">Salesforce</a> |
| </li> |
| <li class=""> |
| |
| <a class="" href="../../sources/SftpSource/">SFTP</a> |
| </li> |
| <li class=""> |
| |
| <a class="" href="../../sources/SqlServerSource/">SQL Server</a> |
| </li> |
| <li class=""> |
| |
| <a class="" href="../../sources/TeradataSource/">Teradata</a> |
| </li> |
| <li class=""> |
| |
| <a class="" href="../../sources/WikipediaSource/">Wikipedia</a> |
| </li> |
| </ul> |
| </li> |
| |
| <li class="toctree-l1"> |
| |
| <span class="caption-text">Sinks (Writers)</span> |
| <ul class="subnav"> |
| <li class=""> |
| |
| <a class="" href="../../sinks/AvroHdfsDataWriter/">Avro HDFS</a> |
| </li> |
| <li class=""> |
| |
| <a class="" href="../../sinks/ParquetHdfsDataWriter/">Parquet HDFS</a> |
| </li> |
| <li class=""> |
| |
| <a class="" href="../../sinks/SimpleBytesWriter/">HDFS Byte array</a> |
| </li> |
| <li class=""> |
| |
| <a class="" href="../../sinks/ConsoleWriter/">Console</a> |
| </li> |
| <li class=""> |
| |
| <a class="" href="../../sinks/CouchbaseWriter/">Couchbase</a> |
| </li> |
| <li class=""> |
| |
| <a class="" href="../../sinks/Http/">HTTP</a> |
| </li> |
| <li class=""> |
| |
| <a class="" href="../../sinks/Gobblin-JDBC-Writer/">JDBC</a> |
| </li> |
| <li class=""> |
| |
| <a class="" href="../../sinks/Kafka/">Kafka</a> |
| </li> |
| </ul> |
| </li> |
| |
| <li class="toctree-l1"> |
| |
| <span class="caption-text">Gobblin Adaptors</span> |
| <ul class="subnav"> |
| <li class=""> |
| |
| <a class="" href="../../adaptors/Gobblin-Distcp/">Gobblin Distcp</a> |
| </li> |
| <li class=""> |
| |
| <a class="" href="../../adaptors/Hive-Avro-To-ORC-Converter/">Hive Avro-To-Orc Converter</a> |
| </li> |
| </ul> |
| </li> |
| |
| <li class="toctree-l1"> |
| |
| <span class="caption-text">Case Studies</span> |
| <ul class="subnav"> |
| <li class=""> |
| |
| <a class="" href="../../case-studies/Kafka-HDFS-Ingestion/">Kafka-HDFS Ingestion</a> |
| </li> |
| <li class=""> |
| |
| <a class="" href="../../case-studies/Publishing-Data-to-S3/">Publishing Data to S3</a> |
| </li> |
| <li class=""> |
| |
| <a class="" href="../../case-studies/Writing-ORC-Data/">Writing ORC Data</a> |
| </li> |
| <li class=""> |
| |
| <a class="" href="../../case-studies/Hive-Distcp/">Hive Distcp</a> |
| </li> |
| </ul> |
| </li> |
| |
| <li class="toctree-l1"> |
| |
| <span class="caption-text">Gobblin Data Management</span> |
| <ul class="subnav"> |
| <li class=""> |
| |
| <a class="" href="../../data-management/Gobblin-Retention/">Retention</a> |
| </li> |
| <li class=""> |
| |
| <a class="" href="../../data-management/DistcpNgEvents/">Distcp-NG events</a> |
| </li> |
| </ul> |
| </li> |
| |
| <li class="toctree-l1"> |
| |
| <span class="caption-text">Gobblin Metrics</span> |
| <ul class="subnav"> |
| <li class=""> |
| |
| <a class="" href="../../metrics/Gobblin-Metrics/">Quick Start</a> |
| </li> |
| <li class=""> |
| |
| <a class="" href="../../metrics/Existing-Reporters/">Existing Reporters</a> |
| </li> |
| <li class=""> |
| |
| <a class="" href="../../metrics/Metrics-for-Gobblin-ETL/">Metrics for Gobblin ETL</a> |
| </li> |
| <li class=""> |
| |
| <a class="" href="../../metrics/Gobblin-Metrics-Architecture/">Gobblin Metrics Architecture</a> |
| </li> |
| <li class=""> |
| |
| <a class="" href="../../metrics/Implementing-New-Reporters/">Implementing New Reporters</a> |
| </li> |
| <li class=""> |
| |
| <a class="" href="../../metrics/Gobblin-Metrics-Performance/">Gobblin Metrics Performance</a> |
| </li> |
| </ul> |
| </li> |
| |
| <li class="toctree-l1"> |
| |
| <span class="caption-text">Developer Guide</span> |
| <ul class="subnav"> |
| <li class=""> |
| |
| <a class="" href="../Customization-for-New-Source/">Customization for New Source</a> |
| </li> |
| <li class=""> |
| |
| <a class="" href="../Customization-for-Converter-and-Operator/">Customization for Converter and Operator</a> |
| </li> |
| <li class=""> |
| |
| <a class="" href="../CodingStyle/">Code Style Guide</a> |
| </li> |
| <li class=""> |
| |
| <a class="" href="../Gobblin-Compliance-Design/">Gobblin Compliance Design</a> |
| </li> |
| <li class=""> |
| |
| <a class="" href="../IDE-setup/">IDE setup</a> |
| </li> |
| <li class=""> |
| |
| <a class="" href="../Monitoring-Design/">Monitoring Design</a> |
| </li> |
| <li class=""> |
| |
| <a class="" href="../Documentation-Architecture/">Documentation Architecture</a> |
| </li> |
| <li class=""> |
| |
| <a class="" href="../Contributing/">Contributing</a> |
| </li> |
| <li class=""> |
| |
| <a class="" href="../GobblinModules/">Gobblin Modules</a> |
| </li> |
| <li class=" current"> |
| |
| <a class="current" href="./">High Level Consumer</a> |
| <ul class="subnav"> |
| |
| <li class="toctree-l3"><a href="#problem-statement">Problem Statement</a></li> |
| |
| |
| <li class="toctree-l3"><a href="#new-design-details">New Design & Details</a></li> |
| |
| |
| </ul> |
| </li> |
| </ul> |
| </li> |
| |
| <li class="toctree-l1"> |
| |
| <span class="caption-text">Project</span> |
| <ul class="subnav"> |
| <li class=""> |
| |
| <a class="" href="../../project/Feature-List/">Feature List</a> |
| </li> |
| <li class=""> |
| |
| <a class="" href="/people">Contributors and Team</a> |
| </li> |
| <li class=""> |
| |
| <a class="" href="../../project/Talks-and-Tech-Blogs/">Talks and Tech Blog Posts</a> |
| </li> |
| <li class=""> |
| |
| <a class="" href="../../project/Posts/">Posts</a> |
| </li> |
| </ul> |
| </li> |
| |
| <li class="toctree-l1"> |
| |
| <span class="caption-text">Miscellaneous</span> |
| <ul class="subnav"> |
| <li class=""> |
| |
| <a class="" href="../../miscellaneous/Camus-to-Gobblin-Migration/">Camus to Gobblin Migration</a> |
| </li> |
| <li class=""> |
| |
| <a class="" href="../../miscellaneous/Exactly-Once-Support/">Exactly Once Support</a> |
| </li> |
| </ul> |
| </li> |
| |
| </ul> |
| </div> |
| |
| </nav> |
| |
| <section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"> |
| |
| |
| <nav class="wy-nav-top" role="navigation" aria-label="top navigation"> |
| <i data-toggle="wy-nav-top" class="fa fa-bars"></i> |
| <a href="../..">Apache Gobblin</a> |
| </nav> |
| |
| |
| <div class="wy-nav-content"> |
| <div class="rst-content"> |
| <div role="navigation" aria-label="breadcrumbs navigation"> |
| <ul class="wy-breadcrumbs"> |
| <li><a href="../..">Docs</a> »</li> |
| |
| |
| |
| <li>Developer Guide »</li> |
| |
| |
| |
| <li>High Level Consumer</li> |
| <li class="wy-breadcrumbs-aside"> |
| |
| <a href="https://github.com/apache/incubator-gobblin/edit/master/docs/developer-guide/HighLevelConsumer.md"> Edit on Gobblin</a> |
| |
| </li> |
| </ul> |
| <hr/> |
| </div> |
| <div role="main"> |
| <div class="section"> |
| |
| <h1 id="problem-statement">Problem Statement</h1> |
| <p>Current Gobblin Kafka <a href="https://github.com/apache/incubator-gobblin/blob/master/gobblin-runtime/src/main/java/org/apache/gobblin/runtime/kafka/HighLevelConsumer.java"><code>High Level Consumer</code></a> uses Kafka Consumer (0.8) APIs and gobblin support for them will be deprecated. The Re-design's primary goal is to replace old kafka consumer APIs like <a href="https://archive.apache.org/dist/kafka/0.8.2.2/scaladoc/index.html#kafka.consumer.ConsumerConnector"><code>ConsumerConnector</code></a> and <a href="https://archive.apache.org/dist/kafka/0.8.2.2/scaladoc/index.html#kafka.message.MessageAndMetadata"><code>MessageAndMetadata</code></a> with a consumer abstraction <a href="https://github.com/apache/incubator-gobblin/blob/master/gobblin-modules/gobblin-kafka-common/src/main/java/org/apache/gobblin/kafka/client/GobblinKafkaConsumerClient.java"><code>GobblinKafkaConsumerClient</code></a>. |
| Additionally, the old design uses kafka auto commit feature which can cause potential loss of messages when offsets are committed and the system fails before messages are processed.</p> |
| <p>Detailed design and implementation details can be found <a href="https://cwiki.apache.org/confluence/display/GOBBLIN/GIP+5%3A+High+Level+Consumer+Re-design">here</a></p> |
| <h1 id="new-design-details">New Design & Details</h1> |
| <p>GobblinKafkaConsumerClient</p> |
| <p>The new design uses <a href="https://github.com/apache/incubator-gobblin/blob/master/gobblin-modules/gobblin-kafka-common/src/main/java/org/apache/gobblin/kafka/client/GobblinKafkaConsumerClient.java"><code>GobblinKafkaConsumerClient</code></a> which is a simplified, generic wrapper client to communicate with Kafka. This class does not depend on classes defined in kafka-clients library. This allows the high level consumer to work with different versions of kafka. Concrete classes implementing this interface use a specific version of kafka-client library. See <a href="https://github.com/apache/incubator-gobblin/blob/master/gobblin-modules/gobblin-kafka-09/src/main/java/org/apache/gobblin/kafka/client/Kafka09ConsumerClient.java"><code>Kafka09ConsumerClient</code></a></p> |
| <p>Manual Offset Commit</p> |
| <p><code>GobblinKafkaConsumerClient</code> API has been enhanced to allow manual committing of offsets.</p> |
| <pre><code class="java"> /** |
| * Commit offsets manually to Kafka asynchronously |
| */ |
| default void commitOffsetsAsync(Map<KafkaPartition, Long> partitionOffsets) { |
| return; |
| } |
| |
| /** |
| * Commit offsets manually to Kafka synchronously |
| */ |
| default void commitOffsetsSync(Map<KafkaPartition, Long> partitionOffsets) { |
| return; |
| } |
| |
| /** |
| * returns the last committed offset for a KafkaPartition |
| * @param partition |
| * @return last committed offset or -1 for invalid KafkaPartition |
| */ |
| default long committed(KafkaPartition partition) { |
| return -1L; |
| } |
| </code></pre> |
| |
| <p>High level consumer records topic partitions and their offsets AFTER the messages are processed and commits them periodically to kafka. This ensures at-least once delivery in case of a failure.</p> |
| <p>Additionally, APIs are provided to subscribe to a topic along with a <a href="https://github.com/apache/incubator-gobblin/blob/master/gobblin-modules/gobblin-kafka-common/src/main/java/org/apache/gobblin/kafka/client/GobblinConsumerRebalanceListener.java"><code>GobblinKafkaRebalanceListener</code></a> that provides hooks to when a consumer joins/leaves a consumer group. |
| In this case, we commit remaining offsets and clear offset caches.</p> |
| <pre><code class="java"> /** |
| * Subscribe to a topic |
| * @param topic |
| */ |
| default void subscribe(String topic) { |
| return; |
| } |
| |
| /** |
| * Subscribe to a topic along with a GobblinKafkaRebalanceListener |
| * @param topic |
| */ |
| default void subscribe(String topic, GobblinConsumerRebalanceListener listener) { |
| return; |
| } |
| </code></pre> |
| |
| </div> |
| </div> |
| <footer> |
| |
| <div class="rst-footer-buttons" role="navigation" aria-label="footer navigation"> |
| |
| <a href="../../project/Feature-List/" class="btn btn-neutral float-right" title="Feature List">Next <span class="icon icon-circle-arrow-right"></span></a> |
| |
| |
| <a href="../GobblinModules/" class="btn btn-neutral" title="Gobblin Modules"><span class="icon icon-circle-arrow-left"></span> Previous</a> |
| |
| </div> |
| |
| |
| <hr/> |
| |
| <div role="contentinfo"> |
| <!-- Copyright etc --> |
| |
| </div> |
| |
| Built with <a href="http://www.mkdocs.org">MkDocs</a> using a <a href="https://github.com/snide/sphinx_rtd_theme">theme</a> provided by <a href="https://readthedocs.org">Read the Docs</a>. |
| </footer> |
| |
| </div> |
| </div> |
| |
| </section> |
| |
| </div> |
| |
| <div class="rst-versions" role="note" style="cursor: pointer"> |
| <span class="rst-current-version" data-toggle="rst-current-version"> |
| |
| |
| <span><a href="../GobblinModules/" style="color: #fcfcfc;">« Previous</a></span> |
| |
| |
| <span style="margin-left: 15px"><a href="../../project/Feature-List/" style="color: #fcfcfc">Next »</a></span> |
| |
| </span> |
| </div> |
| <script>var base_url = '../..';</script> |
| <script src="../../js/theme.js" defer></script> |
| <script src="../../js/extra.js" defer></script> |
| <script src="../../search/main.js" defer></script> |
| |
| </body> |
| </html> |