| <!DOCTYPE html> |
| <!--[if IE 8]><html class="no-js lt-ie9" lang="en" > <![endif]--> |
| <!--[if gt IE 8]><!--> <html class="no-js" lang="en" > <!--<![endif]--> |
| <head> |
| <meta charset="utf-8"> |
| <meta http-equiv="X-UA-Compatible" content="IE=edge"> |
| <meta name="viewport" content="width=device-width, initial-scale=1.0"> |
| |
| <meta name="author" content="Apache Software Foundation"> |
| <link rel="shortcut icon" href="../../img/favicon.ico"> |
| <title>Query based - Apache Gobblin</title> |
| <link href='https://fonts.googleapis.com/css?family=Lato:400,700|Roboto+Slab:400,700|Inconsolata:400,700' rel='stylesheet' type='text/css'> |
| |
| <link rel="stylesheet" href="../../css/theme.css" type="text/css" /> |
| <link rel="stylesheet" href="../../css/theme_extra.css" type="text/css" /> |
| <link rel="stylesheet" href="//cdnjs.cloudflare.com/ajax/libs/highlight.js/9.12.0/styles/github.min.css"> |
| <link href="../../css/extra.css" rel="stylesheet"> |
| |
| <script> |
| // Current page data |
| var mkdocs_page_name = "Query based"; |
| var mkdocs_page_input_path = "sources/QueryBasedSource.md"; |
| var mkdocs_page_url = null; |
| </script> |
| |
| <script src="../../js/jquery-2.1.1.min.js" defer></script> |
| <script src="../../js/modernizr-2.8.3.min.js" defer></script> |
| <script src="//cdnjs.cloudflare.com/ajax/libs/highlight.js/9.12.0/highlight.min.js"></script> |
| <script>hljs.initHighlightingOnLoad();</script> |
| |
| |
| </head> |
| |
| <body class="wy-body-for-nav" role="document"> |
| |
| <div class="wy-grid-for-nav"> |
| |
| |
| <nav data-toggle="wy-nav-shift" class="wy-nav-side stickynav"> |
| <div class="wy-side-nav-search"> |
| <a href="../.." class="icon icon-home"> Apache Gobblin</a> |
| <div role="search"> |
| <form id ="rtd-search-form" class="wy-form" action="../../search.html" method="get"> |
| <input type="text" name="q" placeholder="Search docs" title="Type search term here" /> |
| </form> |
| </div> |
| </div> |
| |
| <div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation"> |
| <ul class="current"> |
| |
| |
| <li class="toctree-l1"> |
| |
| <a class="" href="/">Home</a> |
| </li> |
| |
| <li class="toctree-l1"> |
| |
| <a class="" href="../../Powered-By/">Companies Powered By Gobblin</a> |
| </li> |
| |
| <li class="toctree-l1"> |
| |
| <a class="" href="../../Getting-Started/">Getting Started</a> |
| </li> |
| |
| <li class="toctree-l1"> |
| |
| <a class="" href="../../Gobblin-Architecture/">Architecture</a> |
| </li> |
| |
| <li class="toctree-l1"> |
| |
| <span class="caption-text">User Guide</span> |
| <ul class="subnav"> |
| <li class=""> |
| |
| <a class="" href="../../user-guide/Working-with-Job-Configuration-Files/">Job Configuration Files</a> |
| </li> |
| <li class=""> |
| |
| <a class="" href="../../user-guide/Gobblin-Deployment/">Deployment</a> |
| </li> |
| <li class=""> |
| |
| <a class="" href="../../user-guide/Gobblin-as-a-Library/">Gobblin as a Library</a> |
| </li> |
| <li class=""> |
| |
| <a class="" href="../../user-guide/Gobblin-CLI/">Gobblin CLI</a> |
| </li> |
| <li class=""> |
| |
| <a class="" href="../../user-guide/Gobblin-Compliance/">Gobblin Compliance</a> |
| </li> |
| <li class=""> |
| |
| <a class="" href="../../user-guide/Gobblin-on-Yarn/">Gobblin on Yarn</a> |
| </li> |
| <li class=""> |
| |
| <a class="" href="../../user-guide/Compaction/">Compaction</a> |
| </li> |
| <li class=""> |
| |
| <a class="" href="../../user-guide/State-Management-and-Watermarks/">State Management and Watermarks</a> |
| </li> |
| <li class=""> |
| |
| <a class="" href="../../user-guide/Working-with-the-ForkOperator/">Fork Operator</a> |
| </li> |
| <li class=""> |
| |
| <a class="" href="../../user-guide/Configuration-Properties-Glossary/">Configuration Glossary</a> |
| </li> |
| <li class=""> |
| |
| <a class="" href="../../user-guide/Source-schema-and-Converters/">Source schema and Converters</a> |
| </li> |
| <li class=""> |
| |
| <a class="" href="../../user-guide/Partitioned-Writers/">Partitioned Writers</a> |
| </li> |
| <li class=""> |
| |
| <a class="" href="../../user-guide/Monitoring/">Monitoring</a> |
| </li> |
| <li class=""> |
| |
| <a class="" href="../../user-guide/Gobblin-template/">Template</a> |
| </li> |
| <li class=""> |
| |
| <a class="" href="../../user-guide/Gobblin-Schedulers/">Schedulers</a> |
| </li> |
| <li class=""> |
| |
| <a class="" href="../../user-guide/Job-Execution-History-Store/">Job Execution History Store</a> |
| </li> |
| <li class=""> |
| |
| <a class="" href="../../user-guide/Building-Gobblin/">Building Gobblin</a> |
| </li> |
| <li class=""> |
| |
| <a class="" href="../../user-guide/Gobblin-genericLoad/">Generic Configuration Loading</a> |
| </li> |
| <li class=""> |
| |
| <a class="" href="../../user-guide/Hive-Registration/">Hive Registration</a> |
| </li> |
| <li class=""> |
| |
| <a class="" href="../../user-guide/Config-Management/">Config Management</a> |
| </li> |
| <li class=""> |
| |
| <a class="" href="../../user-guide/Docker-Integration/">Docker Integration</a> |
| </li> |
| <li class=""> |
| |
| <a class="" href="../../user-guide/Troubleshooting/">Troubleshooting</a> |
| </li> |
| <li class=""> |
| |
| <a class="" href="../../user-guide/FAQs/">FAQs</a> |
| </li> |
| </ul> |
| </li> |
| |
| <li class="toctree-l1"> |
| |
| <span class="caption-text">Sources</span> |
| <ul class="subnav"> |
| <li class=""> |
| |
| <a class="" href="../AvroFileSource/">Avro files</a> |
| </li> |
| <li class=""> |
| |
| <a class="" href="../CopySource/">File copy</a> |
| </li> |
| <li class=" current"> |
| |
| <a class="current" href="./">Query based</a> |
| <ul class="subnav"> |
| |
| <li class="toctree-l3"><a href="#introduction">Introduction</a></li> |
| |
| |
| <li class="toctree-l3"><a href="#constructs">Constructs</a></li> |
| |
| <ul> |
| |
| <li><a class="toctree-l4" href="#querybasedsource">QueryBasedSource</a></li> |
| |
| <li><a class="toctree-l4" href="#querybasedextractor">QueryBasedExtractor</a></li> |
| |
| </ul> |
| |
| |
| <li class="toctree-l3"><a href="#configuration">Configuration</a></li> |
| |
| |
| </ul> |
| </li> |
| <li class=""> |
| |
| <a class="" href="../RestApiSource/">Rest Api</a> |
| </li> |
| <li class=""> |
| |
| <a class="" href="../GoogleAnalyticsSource/">Google Analytics</a> |
| </li> |
| <li class=""> |
| |
| <a class="" href="../GoogleDriveSource/">Google Drive</a> |
| </li> |
| <li class=""> |
| |
| <a class="" href="../GoogleWebmaster/">Google Webmaster</a> |
| </li> |
| <li class=""> |
| |
| <a class="" href="../HadoopTextInputSource/">Hadoop Text Input</a> |
| </li> |
| <li class=""> |
| |
| <a class="" href="../HelloWorldSource/">Hello World</a> |
| </li> |
| <li class=""> |
| |
| <a class="" href="../HiveAvroToOrcSource/">Hive Avro-to-ORC</a> |
| </li> |
| <li class=""> |
| |
| <a class="" href="../HivePurgerSource/">Hive compliance purging</a> |
| </li> |
| <li class=""> |
| |
| <a class="" href="../SimpleJsonSource/">JSON</a> |
| </li> |
| <li class=""> |
| |
| <a class="" href="../KafkaSource/">Kafka</a> |
| </li> |
| <li class=""> |
| |
| <a class="" href="../MySQLSource/">MySQL</a> |
| </li> |
| <li class=""> |
| |
| <a class="" href="../OracleSource/">Oracle</a> |
| </li> |
| <li class=""> |
| |
| <a class="" href="../SalesforceSource/">Salesforce</a> |
| </li> |
| <li class=""> |
| |
| <a class="" href="../SftpSource/">SFTP</a> |
| </li> |
| <li class=""> |
| |
| <a class="" href="../SqlServerSource/">SQL Server</a> |
| </li> |
| <li class=""> |
| |
| <a class="" href="../TeradataSource/">Teradata</a> |
| </li> |
| <li class=""> |
| |
| <a class="" href="../WikipediaSource/">Wikipedia</a> |
| </li> |
| </ul> |
| </li> |
| |
| <li class="toctree-l1"> |
| |
| <span class="caption-text">Sinks (Writers)</span> |
| <ul class="subnav"> |
| <li class=""> |
| |
| <a class="" href="../../sinks/AvroHdfsDataWriter/">Avro HDFS</a> |
| </li> |
| <li class=""> |
| |
| <a class="" href="../../sinks/ParquetHdfsDataWriter/">Parquet HDFS</a> |
| </li> |
| <li class=""> |
| |
| <a class="" href="../../sinks/SimpleBytesWriter/">HDFS Byte array</a> |
| </li> |
| <li class=""> |
| |
| <a class="" href="../../sinks/ConsoleWriter/">Console</a> |
| </li> |
| <li class=""> |
| |
| <a class="" href="../../sinks/CouchbaseWriter/">Couchbase</a> |
| </li> |
| <li class=""> |
| |
| <a class="" href="../../sinks/Http/">HTTP</a> |
| </li> |
| <li class=""> |
| |
| <a class="" href="../../sinks/Gobblin-JDBC-Writer/">JDBC</a> |
| </li> |
| <li class=""> |
| |
| <a class="" href="../../sinks/Kafka/">Kafka</a> |
| </li> |
| </ul> |
| </li> |
| |
| <li class="toctree-l1"> |
| |
| <span class="caption-text">Gobblin Adaptors</span> |
| <ul class="subnav"> |
| <li class=""> |
| |
| <a class="" href="../../adaptors/Gobblin-Distcp/">Gobblin Distcp</a> |
| </li> |
| <li class=""> |
| |
| <a class="" href="../../adaptors/Hive-Avro-To-ORC-Converter/">Hive Avro-To-Orc Converter</a> |
| </li> |
| </ul> |
| </li> |
| |
| <li class="toctree-l1"> |
| |
| <span class="caption-text">Case Studies</span> |
| <ul class="subnav"> |
| <li class=""> |
| |
| <a class="" href="../../case-studies/Kafka-HDFS-Ingestion/">Kafka-HDFS Ingestion</a> |
| </li> |
| <li class=""> |
| |
| <a class="" href="../../case-studies/Publishing-Data-to-S3/">Publishing Data to S3</a> |
| </li> |
| <li class=""> |
| |
| <a class="" href="../../case-studies/Writing-ORC-Data/">Writing ORC Data</a> |
| </li> |
| <li class=""> |
| |
| <a class="" href="../../case-studies/Hive-Distcp/">Hive Distcp</a> |
| </li> |
| </ul> |
| </li> |
| |
| <li class="toctree-l1"> |
| |
| <span class="caption-text">Gobblin Data Management</span> |
| <ul class="subnav"> |
| <li class=""> |
| |
| <a class="" href="../../data-management/Gobblin-Retention/">Retention</a> |
| </li> |
| <li class=""> |
| |
| <a class="" href="../../data-management/DistcpNgEvents/">Distcp-NG events</a> |
| </li> |
| </ul> |
| </li> |
| |
| <li class="toctree-l1"> |
| |
| <span class="caption-text">Gobblin Metrics</span> |
| <ul class="subnav"> |
| <li class=""> |
| |
| <a class="" href="../../metrics/Gobblin-Metrics/">Quick Start</a> |
| </li> |
| <li class=""> |
| |
| <a class="" href="../../metrics/Existing-Reporters/">Existing Reporters</a> |
| </li> |
| <li class=""> |
| |
| <a class="" href="../../metrics/Metrics-for-Gobblin-ETL/">Metrics for Gobblin ETL</a> |
| </li> |
| <li class=""> |
| |
| <a class="" href="../../metrics/Gobblin-Metrics-Architecture/">Gobblin Metrics Architecture</a> |
| </li> |
| <li class=""> |
| |
| <a class="" href="../../metrics/Implementing-New-Reporters/">Implementing New Reporters</a> |
| </li> |
| <li class=""> |
| |
| <a class="" href="../../metrics/Gobblin-Metrics-Performance/">Gobblin Metrics Performance</a> |
| </li> |
| </ul> |
| </li> |
| |
| <li class="toctree-l1"> |
| |
| <span class="caption-text">Developer Guide</span> |
| <ul class="subnav"> |
| <li class=""> |
| |
| <a class="" href="../../developer-guide/Customization-for-New-Source/">Customization for New Source</a> |
| </li> |
| <li class=""> |
| |
| <a class="" href="../../developer-guide/Customization-for-Converter-and-Operator/">Customization for Converter and Operator</a> |
| </li> |
| <li class=""> |
| |
| <a class="" href="../../developer-guide/CodingStyle/">Code Style Guide</a> |
| </li> |
| <li class=""> |
| |
| <a class="" href="../../developer-guide/Gobblin-Compliance-Design/">Gobblin Compliance Design</a> |
| </li> |
| <li class=""> |
| |
| <a class="" href="../../developer-guide/IDE-setup/">IDE setup</a> |
| </li> |
| <li class=""> |
| |
| <a class="" href="../../developer-guide/Monitoring-Design/">Monitoring Design</a> |
| </li> |
| <li class=""> |
| |
| <a class="" href="../../developer-guide/Documentation-Architecture/">Documentation Architecture</a> |
| </li> |
| <li class=""> |
| |
| <a class="" href="../../developer-guide/Contributing/">Contributing</a> |
| </li> |
| <li class=""> |
| |
| <a class="" href="../../developer-guide/GobblinModules/">Gobblin Modules</a> |
| </li> |
| <li class=""> |
| |
| <a class="" href="../../developer-guide/HighLevelConsumer/">High Level Consumer</a> |
| </li> |
| </ul> |
| </li> |
| |
| <li class="toctree-l1"> |
| |
| <span class="caption-text">Project</span> |
| <ul class="subnav"> |
| <li class=""> |
| |
| <a class="" href="../../project/Feature-List/">Feature List</a> |
| </li> |
| <li class=""> |
| |
| <a class="" href="/people">Contributors and Team</a> |
| </li> |
| <li class=""> |
| |
| <a class="" href="../../project/Talks-and-Tech-Blogs/">Talks and Tech Blog Posts</a> |
| </li> |
| <li class=""> |
| |
| <a class="" href="../../project/Posts/">Posts</a> |
| </li> |
| </ul> |
| </li> |
| |
| <li class="toctree-l1"> |
| |
| <span class="caption-text">Miscellaneous</span> |
| <ul class="subnav"> |
| <li class=""> |
| |
| <a class="" href="../../miscellaneous/Camus-to-Gobblin-Migration/">Camus to Gobblin Migration</a> |
| </li> |
| <li class=""> |
| |
| <a class="" href="../../miscellaneous/Exactly-Once-Support/">Exactly Once Support</a> |
| </li> |
| </ul> |
| </li> |
| |
| </ul> |
| </div> |
| |
| </nav> |
| |
| <section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"> |
| |
| |
| <nav class="wy-nav-top" role="navigation" aria-label="top navigation"> |
| <i data-toggle="wy-nav-top" class="fa fa-bars"></i> |
| <a href="../..">Apache Gobblin</a> |
| </nav> |
| |
| |
| <div class="wy-nav-content"> |
| <div class="rst-content"> |
| <div role="navigation" aria-label="breadcrumbs navigation"> |
| <ul class="wy-breadcrumbs"> |
| <li><a href="../..">Docs</a> »</li> |
| |
| |
| |
| <li>Sources »</li> |
| |
| |
| |
| <li>Query based</li> |
| <li class="wy-breadcrumbs-aside"> |
| |
| <a href="https://github.com/apache/incubator-gobblin/edit/master/docs/sources/QueryBasedSource.md" rel="nofollow"> Edit on Gobblin</a> |
| |
| </li> |
| </ul> |
| <hr/> |
| </div> |
| <div role="main"> |
| <div class="section"> |
| |
| <div class="toc"> |
| <ul> |
| <li><a href="#introduction">Introduction</a></li> |
| <li><a href="#constructs">Constructs</a><ul> |
| <li><a href="#querybasedsource">QueryBasedSource</a></li> |
| <li><a href="#querybasedextractor">QueryBasedExtractor</a><ul> |
| <li><a href="#build">build</a></li> |
| <li><a href="#readrecord">readRecord</a></li> |
| </ul> |
| </li> |
| </ul> |
| </li> |
| <li><a href="#configuration">Configuration</a></li> |
| </ul> |
| </div> |
| <h1 id="introduction">Introduction</h1> |
| <p><a href="https://github.com/apache/incubator-gobblin/blob/master/gobblin-core/src/main/java/org/apache/gobblin/source/extractor/extract/QueryBasedSource.java" rel="nofollow"><code>QueryBasedSource</code></a> |
| represents a category of sources whose data is pulled by sending queries. A dataset of a source is identified as a |
| <a href="https://github.com/apache/incubator-gobblin/blob/master/gobblin-core/src/main/java/org/apache/gobblin/source/extractor/extract/QueryBasedSource.java#L96" rel="nofollow"><code>SourceEntity</code></a>. |
| Query can be done by sending HTTP requests or SQL commands. A source often, but not always, has a corresponding |
| <a href="https://github.com/apache/incubator-gobblin/blob/master/gobblin-core/src/main/java/org/apache/gobblin/source/extractor/extract/QueryBasedExtractor.java" rel="nofollow"><code>QueryBasedExtractor</code></a>, |
| which defines the way and implements common routines to extract data from the source.</p> |
| <h1 id="constructs">Constructs</h1> |
| <h2 id="querybasedsource"><code>QueryBasedSource</code></h2> |
| <p><p align="center"> |
| <figure> <br /> |
| <img src="../../img/Gobblin-Query-Based-Sources.png" alt="Query based sources" width="800"> |
| <figcaption><br>Figure 1: Query based sources<br></figcaption> |
| </figure> |
| </p></p> |
| <p>Like other categories of sources, a <code>QueryBasedSource</code> focuses on creating work units as well. The way it does follows the general pattern:</p> |
| <ul> |
| <li>calculate low watermark of current run based on previous runs</li> |
| <li>compute a high watermark</li> |
| <li>partition datasets of current run into work units</li> |
| <li>pick up previously failed work units.</li> |
| </ul> |
| <p>At last, it will group several work units as |
| <a href="https://github.com/apache/incubator-gobblin/blob/master/gobblin-api/src/main/java/org/apache/gobblin/source/workunit/MultiWorkUnit.java" rel="nofollow"><code>MultiWorkUnit</code></a> |
| according to the <code>mr.job.max.mappers</code> configuration (Note: other categories of source might have a different approach to group work units into <code>MultiWorkUnit</code>). </p> |
| <h2 id="querybasedextractor"><code>QueryBasedExtractor</code></h2> |
| <p><p align="center"> |
| <figure> <br /> |
| <img src=../../img/Gobblin-Query-Based-Extractors.png alt="Query based extractors" width="800"> |
| <figcaption><br>Figure 2: Query based extractors<br></figcaption> |
| </figure> |
| </p></p> |
| <p>Currently in Gobblin, depending on how an extractor communicates with a source |
| (or <a href="https://github.com/apache/incubator-gobblin/blob/master/gobblin-core/src/main/java/org/apache/gobblin/source/extractor/extract/ProtocolSpecificLayer.java" rel="nofollow">different communication protocols</a>), |
| a <code>QueryBasedExtractor</code> falls into 2 categories: |
| <a href="https://github.com/apache/incubator-gobblin/blob/master/gobblin-core/src/main/java/org/apache/gobblin/source/extractor/extract/restapi/RestApiExtractor.java" rel="nofollow"><code>RestApiExtractor</code></a> |
| and |
| <a href="https://github.com/apache/incubator-gobblin/blob/master/gobblin-modules/gobblin-sql/src/main/java/org/apache/gobblin/source/jdbc/JdbcExtractor.java" rel="nofollow"><code>JdbcExtractor</code></a>. |
| A specific extractor has to provide some |
| <a href="https://github.com/apache/incubator-gobblin/blob/master/gobblin-core/src/main/java/org/apache/gobblin/source/extractor/extract/SourceSpecificLayer.java" rel="nofollow">source specific logic</a> |
| in order to successfully extract information from the source.</p> |
| <h3 id="build"><code>build</code></h3> |
| <p>Building a query based extractor may involve three queries:</p> |
| <p align="center"> |
| <figure> |
| <img src=../../img/Gobblin-Query-Based-Extractor-Build-Queries.png alt="Query based extractor build queries" width="400"> |
| <figcaption><br>Figure 3: Query based extractor build queries<br></figcaption> |
| </figure> |
| </p> |
| |
| <ul> |
| <li><code>extractMetadata</code> sends a query to fetch the data schema. For example:</li> |
| </ul> |
| <pre><code class="sql">select col.column_name, col.data_type, |
| case when CHARACTER_OCTET_LENGTH is null then 0 else 0 end as length, |
| case when NUMERIC_PRECISION is null then 0 else NUMERIC_PRECISION end as precesion, |
| case when NUMERIC_SCALE is null then 0 else NUMERIC_SCALE end as scale, |
| case when is_nullable='NO' then 'false' else 'true' end as nullable, '' as format, |
| case when col.column_comment is null then '' else col.column_comment end as comment |
| from information_schema.COLUMNS col |
| WHERE upper(col.table_name)=upper(?) AND upper(col.table_schema)=upper(?) |
| order by col.ORDINAL_POSITION |
| </code></pre> |
| |
| <ul> |
| <li><code>getMaxWatermark</code> sends a query for calculating the latest high watermark. For example:</li> |
| </ul> |
| <pre><code class="sql">SELECT max(SystemModTime) FROM salesforce.campaign |
| where (SystemModTime >= '2014-01-01 00:00:00' and SystemModTime <= '2017-03-09 10:42:10') |
| </code></pre> |
| |
| <ul> |
| <li><code>getSourceCount</code> sends a query for the total count of records to be pulled from the source. For example:</li> |
| </ul> |
| <pre><code class="sql">SELECT COUNT(1) FROM salesforce.campaign |
| where (SystemModTime >= '2014-01-01 00:00:00' and SystemModTime <= '2017-03-01 19:03:07') |
| </code></pre> |
| |
| <p>The actual implementations of those methods are pushed to an upper layer, which uses its own protocol(e.g. <a href="../RestApiSource/">Rest Api</a> or Jdbc. The examples given are using Jdbc.) to query the source.</p> |
| <h3 id="readrecord"><code>readRecord</code></h3> |
| <p>While querying the record set for the last work unit, the upper bounds will be removed if appropriate. For a daily open-ended full dump job, it will fetch a more complete data set as there |
| might be some new data generated or existing data changes between the data query creation and execution. </p> |
| <p>Two separate approaches to fetch record set:</p> |
| <ul> |
| <li><code>getRecordSet</code>: A standard way to send a query, e.g. Rest api or Jdbc</li> |
| </ul> |
| <pre><code class="sql">SELECT id,name,budget,systemmodtime FROM salesforce.campaign |
| where (SystemModTime >= '2014-01-01 00:00:00') |
| </code></pre> |
| |
| <ul> |
| <li><code>getRecordSetFromSourceApi</code>: A specific way to send a query based on source api, e.g. Salesforce</li> |
| </ul> |
| <p>Likewise, the actual implementations of those methods are pushed to an upper layer. See chapters: <a href="../RestApiSource/">Rest Api</a>, <a href="../SalesforceSource/">Salesforce</a>.</p> |
| <h1 id="configuration">Configuration</h1> |
| <table> |
| <thead> |
| <tr> |
| <th>Configuration Key</th> |
| <th>Default Value</th> |
| <th>Description</th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr> |
| <td><code>source.querybased.schema</code></td> |
| <td>Must Provide</td> |
| <td>Database name</td> |
| </tr> |
| <tr> |
| <td><code>source.entity</code></td> |
| <td>Must Provide</td> |
| <td>Name of the source entity that will be pulled from the source. It could be a database table, a source topic, a restful entity, etc.</td> |
| </tr> |
| <tr> |
| <td><code>source.max.number.of.partitions</code></td> |
| <td>20</td> |
| <td>Maximum number of partitions or work units to split this current run across. Only used by the <code>QueryBasedSource</code> and <code>FileBasedSource</code>.</td> |
| </tr> |
| <tr> |
| <td><code>source.querybased.watermark.type</code></td> |
| <td>Must Provide</td> |
| <td>The format of the watermark that is used when extracting data from the source. Possible types are timestamp, date, hour, simple.</td> |
| </tr> |
| <tr> |
| <td><code>source.querybased.start.value</code></td> |
| <td>Must provide</td> |
| <td>Value for the watermark to start pulling data from, also the default watermark if the previous watermark cannot be found in the old task states.</td> |
| </tr> |
| <tr> |
| <td><code>source.querybased.end.value</code></td> |
| <td>Optional</td> |
| <td>The high watermark the job should pull up to.</td> |
| </tr> |
| <tr> |
| <td><code>extract.delta.fields</code></td> |
| <td>Optional</td> |
| <td>List of columns that are associated with the watermark.</td> |
| </tr> |
| <tr> |
| <td><code>source.querybased.skip.count.calc</code></td> |
| <td>False</td> |
| <td>Disable calculating the total counts of records to be pulled from the source.</td> |
| </tr> |
| <tr> |
| <td><code>source.querybased.is.specific.api.active</code></td> |
| <td>False</td> |
| <td>True if this pull needs to use source specific apis instead of standard protocols. Ex: Use salesforce bulk api instead of rest api</td> |
| </tr> |
| </tbody> |
| </table> |
| |
| </div> |
| </div> |
| <footer> |
| |
| <div class="rst-footer-buttons" role="navigation" aria-label="footer navigation"> |
| |
| <a href="../RestApiSource/" class="btn btn-neutral float-right" title="Rest Api">Next <span class="icon icon-circle-arrow-right"></span></a> |
| |
| |
| <a href="../CopySource/" class="btn btn-neutral" title="File copy"><span class="icon icon-circle-arrow-left"></span> Previous</a> |
| |
| </div> |
| |
| |
| <hr/> |
| |
| <div role="contentinfo"> |
| <!-- Copyright etc --> |
| |
| </div> |
| |
| Built with <a href="http://www.mkdocs.org" rel="nofollow">MkDocs</a> using a <a href="https://github.com/snide/sphinx_rtd_theme" rel="nofollow">theme</a> provided by <a href="https://readthedocs.org" rel="nofollow">Read the Docs</a>. |
| </footer> |
| |
| </div> |
| </div> |
| |
| </section> |
| |
| </div> |
| |
| <div class="rst-versions" role="note" style="cursor: pointer"> |
| <span class="rst-current-version" data-toggle="rst-current-version"> |
| |
| |
| <span><a href="../CopySource/" style="color: #fcfcfc;">« Previous</a></span> |
| |
| |
| <span style="margin-left: 15px"><a href="../RestApiSource/" style="color: #fcfcfc">Next »</a></span> |
| |
| </span> |
| </div> |
| <script>var base_url = '../..';</script> |
| <script src="../../js/theme.js" defer></script> |
| <script src="../../js/extra.js" defer></script> |
| <script src="../../search/main.js" defer></script> |
| |
| </body> |
| </html> |