| <!DOCTYPE html> |
| <!--[if IE 8]><html class="no-js lt-ie9" lang="en" > <![endif]--> |
| <!--[if gt IE 8]><!--> <html class="no-js" lang="en" > <!--<![endif]--> |
| <head> |
| <meta charset="utf-8"> |
| <meta http-equiv="X-UA-Compatible" content="IE=edge"> |
| <meta name="viewport" content="width=device-width, initial-scale=1.0"> |
| |
| |
| |
| <title>CSV Formatter - Apache Apex Malhar Documentation</title> |
| |
| |
| <link rel="shortcut icon" href="../../favicon.ico"> |
| |
| |
| |
| <link href='https://fonts.googleapis.com/css?family=Lato:400,700|Roboto+Slab:400,700|Inconsolata:400,700' rel='stylesheet' type='text/css'> |
| |
| <link rel="stylesheet" href="../../css/theme.css" type="text/css" /> |
| <link rel="stylesheet" href="../../css/theme_extra.css" type="text/css" /> |
| <link rel="stylesheet" href="../../css/highlight.css"> |
| |
| |
| <script> |
| // Current page data |
| var mkdocs_page_name = "CSV Formatter"; |
| var mkdocs_page_input_path = "operators/csvformatter.md"; |
| var mkdocs_page_url = "/operators/csvformatter/"; |
| </script> |
| |
| <script src="../../js/jquery-2.1.1.min.js"></script> |
| <script src="../../js/modernizr-2.8.3.min.js"></script> |
| <script type="text/javascript" src="../../js/highlight.pack.js"></script> |
| <script src="../../js/theme.js"></script> |
| |
| |
| </head> |
| |
| <body class="wy-body-for-nav" role="document"> |
| |
| <div class="wy-grid-for-nav"> |
| |
| |
| <nav data-toggle="wy-nav-shift" class="wy-nav-side stickynav"> |
| <div class="wy-side-nav-search"> |
| <a href="../.." class="icon icon-home"> Apache Apex Malhar Documentation</a> |
| <div role="search"> |
| <form id ="rtd-search-form" class="wy-form" action="../../search.html" method="get"> |
| <input type="text" name="q" placeholder="Search docs" /> |
| </form> |
| </div> |
| </div> |
| |
| <div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation"> |
| <ul class="current"> |
| |
| <li> |
| <li class="toctree-l1 "> |
| <a class="" href="../..">Apache Apex Malhar</a> |
| |
| </li> |
| <li> |
| |
| <li> |
| <ul class="subnav"> |
| <li><span>APIs</span></li> |
| |
| |
| |
| <li class="toctree-l1 "> |
| <a class="" href="../../apis/calcite/">SQL</a> |
| |
| </li> |
| |
| |
| </ul> |
| <li> |
| |
| <li> |
| <ul class="subnav"> |
| <li><span>Operators</span></li> |
| |
| |
| |
| <li class="toctree-l1 "> |
| <a class="" href="../block_reader/">Block Reader</a> |
| |
| </li> |
| |
| |
| |
| <li class="toctree-l1 current"> |
| <a class="current" href="./">CSV Formatter</a> |
| |
| <ul> |
| |
| <li class="toctree-l3"><a href="#csvformatter">CsvFormatter</a></li> |
| |
| <li><a class="toctree-l4" href="#operator-objective">Operator Objective</a></li> |
| |
| <li><a class="toctree-l4" href="#operator-information">Operator Information</a></li> |
| |
| <li><a class="toctree-l4" href="#properties-attributes-and-ports">Properties, Attributes and Ports</a></li> |
| |
| <li><a class="toctree-l4" href="#limitations">Limitations</a></li> |
| |
| <li><a class="toctree-l4" href="#example">Example</a></li> |
| |
| <li><a class="toctree-l4" href="#advanced">Advanced</a></li> |
| |
| |
| </ul> |
| |
| </li> |
| |
| |
| |
| <li class="toctree-l1 "> |
| <a class="" href="../csvParserOperator/">CSV Parser</a> |
| |
| </li> |
| |
| |
| |
| <li class="toctree-l1 "> |
| <a class="" href="../deduper/">Deduper</a> |
| |
| </li> |
| |
| |
| |
| <li class="toctree-l1 "> |
| <a class="" href="../enricher/">Enricher</a> |
| |
| </li> |
| |
| |
| |
| <li class="toctree-l1 "> |
| <a class="" href="../fsInputOperator/">File Input</a> |
| |
| </li> |
| |
| |
| |
| <li class="toctree-l1 "> |
| <a class="" href="../file_output/">File Output</a> |
| |
| </li> |
| |
| |
| |
| <li class="toctree-l1 "> |
| <a class="" href="../file_splitter/">File Splitter</a> |
| |
| </li> |
| |
| |
| |
| <li class="toctree-l1 "> |
| <a class="" href="../filter/">Filter</a> |
| |
| </li> |
| |
| |
| |
| <li class="toctree-l1 "> |
| <a class="" href="../fixedWidthParserOperator/">Fixed Width Parser</a> |
| |
| </li> |
| |
| |
| |
| <li class="toctree-l1 "> |
| <a class="" href="../ftpInputOperator/">FTP Input Operator</a> |
| |
| </li> |
| |
| |
| |
| <li class="toctree-l1 "> |
| <a class="" href="../AbstractJdbcTransactionableOutputOperator/">Jdbc Output Operator</a> |
| |
| </li> |
| |
| |
| |
| <li class="toctree-l1 "> |
| <a class="" href="../jdbcPollInputOperator/">JDBC Poller Input</a> |
| |
| </li> |
| |
| |
| |
| <li class="toctree-l1 "> |
| <a class="" href="../jmsInputOperator/">JMS Input</a> |
| |
| </li> |
| |
| |
| |
| <li class="toctree-l1 "> |
| <a class="" href="../jsonFormatter/">JSON Formatter</a> |
| |
| </li> |
| |
| |
| |
| <li class="toctree-l1 "> |
| <a class="" href="../jsonParser/">JSON Parser</a> |
| |
| </li> |
| |
| |
| |
| <li class="toctree-l1 "> |
| <a class="" href="../kafkaInputOperator/">Kafka Input</a> |
| |
| </li> |
| |
| |
| |
| <li class="toctree-l1 "> |
| <a class="" href="../regexparser/">Regex Parser</a> |
| |
| </li> |
| |
| |
| |
| <li class="toctree-l1 "> |
| <a class="" href="../s3outputmodule/">S3 Output Module</a> |
| |
| </li> |
| |
| |
| |
| <li class="toctree-l1 "> |
| <a class="" href="../transform/">Transformer</a> |
| |
| </li> |
| |
| |
| |
| <li class="toctree-l1 "> |
| <a class="" href="../windowedOperator/">Windowed Operator</a> |
| |
| </li> |
| |
| |
| |
| <li class="toctree-l1 "> |
| <a class="" href="../xmlParserOperator/">XML Parser</a> |
| |
| </li> |
| |
| |
| </ul> |
| <li> |
| |
| </ul> |
| </div> |
| |
| </nav> |
| |
| <section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"> |
| |
| |
| <nav class="wy-nav-top" role="navigation" aria-label="top navigation"> |
| <i data-toggle="wy-nav-top" class="fa fa-bars"></i> |
| <a href="../..">Apache Apex Malhar Documentation</a> |
| </nav> |
| |
| |
| <div class="wy-nav-content"> |
| <div class="rst-content"> |
| <div role="navigation" aria-label="breadcrumbs navigation"> |
| <ul class="wy-breadcrumbs"> |
| <li><a href="../..">Docs</a> »</li> |
| |
| |
| |
| <li>Operators »</li> |
| |
| |
| |
| <li>CSV Formatter</li> |
| <li class="wy-breadcrumbs-aside"> |
| |
| </li> |
| </ul> |
| <hr/> |
| </div> |
| <div role="main"> |
| <div class="section"> |
| |
| <h1 id="csvformatter">CsvFormatter</h1> |
| <h2 id="operator-objective">Operator Objective</h2> |
| <p>This operator receives a POJO (<a href="https://en.wikipedia.org/wiki/Plain_Old_Java_Object">Plain Old Java Object</a>) as an incoming tuple, converts the data in |
| the incoming POJO to a custom delimited string and emits the delimited string.</p> |
| <p>CsvFormatter supports schema definition as a JSON string. </p> |
| <p>CsvFormatter does not hold any state and is <strong>idempotent</strong>, <strong>fault-tolerant</strong> and <strong>statically/dynamically partitionable</strong>.</p> |
| <h2 id="operator-information">Operator Information</h2> |
| <ol> |
| <li>Operator location: <strong><em>malhar-contrib</em></strong></li> |
| <li>Available since: <strong><em>3.2.0</em></strong></li> |
| <li>Operator state: <strong><em>Evolving</em></strong></li> |
| <li>Java Packages:<ul> |
| <li>Operator: <strong><em><a href="https://www.datatorrent.com/docs/apidocs/com/datatorrent/contrib/formatter/CsvFormatter.html">com.datatorrent.contrib.formatter.CsvFormatter</a></em></strong></li> |
| </ul> |
| </li> |
| </ol> |
| <h2 id="properties-attributes-and-ports">Properties, Attributes and Ports</h2> |
| <h3 id="properties-of-pojoenricher"><a name="props"></a>Properties of POJOEnricher</h3> |
| <table> |
| <thead> |
| <tr> |
| <th><strong>Property</strong></th> |
| <th><strong>Description</strong></th> |
| <th><strong>Type</strong></th> |
| <th><strong>Mandatory</strong></th> |
| <th><strong>Default Value</strong></th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr> |
| <td><em>schema</em></td> |
| <td>Contents of the schema.Schema is specified in a json format.</td> |
| <td>String</td> |
| <td>Yes</td> |
| <td>N/A</td> |
| </tr> |
| </tbody> |
| </table> |
| <h3 id="platform-attributes-that-influences-operator-behavior">Platform Attributes that influences operator behavior</h3> |
| <table> |
| <thead> |
| <tr> |
| <th><strong>Attribute</strong></th> |
| <th><strong>Description</strong></th> |
| <th><strong>Type</strong></th> |
| <th><strong>Mandatory</strong></th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr> |
| <td><em>in.TUPLE_CLASS</em></td> |
| <td>TUPLE_CLASS attribute on input port which tells operator the class of POJO which will be incoming</td> |
| <td>Class or FQCN</td> |
| <td>Yes</td> |
| </tr> |
| </tbody> |
| </table> |
| <h3 id="ports">Ports</h3> |
| <table> |
| <thead> |
| <tr> |
| <th><strong>Port</strong></th> |
| <th><strong>Description</strong></th> |
| <th><strong>Type</strong></th> |
| <th><strong>Mandatory</strong></th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr> |
| <td><em>in</em></td> |
| <td>Tuples which need to be formatted are received on this port</td> |
| <td>Object (POJO)</td> |
| <td>Yes</td> |
| </tr> |
| <tr> |
| <td><em>out</em></td> |
| <td>Tuples that are formatted are emitted from this port</td> |
| <td>String</td> |
| <td>No</td> |
| </tr> |
| <tr> |
| <td><em>err</em></td> |
| <td>Tuples that could not be converted are emitted on this port</td> |
| <td>Object</td> |
| <td>No</td> |
| </tr> |
| </tbody> |
| </table> |
| <h2 id="limitations">Limitations</h2> |
| <p>Current CsvFormatter contain following limitations:</p> |
| <ol> |
| <li>The field names in schema and the pojo field names should match.For eg. if name of the schema field is "customerName", then POJO should contain a field with the same name. </li> |
| <li>Field wise validation/formatting is not yet supported.</li> |
| <li>The fields will be written to the file in the same order as specified in schema.json</li> |
| </ol> |
| <h2 id="example">Example</h2> |
| <p>Example for CsvFormatter can be found at: <a href="https://github.com/DataTorrent/examples/tree/master/tutorials/csvformatter">https://github.com/DataTorrent/examples/tree/master/tutorials/csvformatter</a></p> |
| <h2 id="advanced">Advanced</h2> |
| <h3 id="schema-format-for-csvformatter"><a name="JSONFileFormat"></a> Schema format for CsvFormatter</h3> |
| <p>CsvFormatter expects schema to be a String in JSON format:</p> |
| <p>Example for format of schema:</p> |
| <pre><code class="json">{ |
| "separator": ",", |
| "quoteChar": "\"", |
| "lineDelimiter": "\n", |
| "fields": [ |
| { |
| "name": "campaignId", |
| "type": "Integer" |
| }, |
| { |
| "name": "startDate", |
| "type": "Date", |
| "constraints": { |
| "format": "yyyy-MM-dd" |
| } |
| } |
| ] |
| } |
| </code></pre> |
| |
| <h3 id="partitioning-of-csvformatter">Partitioning of CsvFormatter</h3> |
| <p>Being stateless operator, CsvFormatter will ensure built-in partitioners present in Malhar library can be directly used by setting properties as follows:</p> |
| <h4 id="stateless-partioning-of-csvformatter">Stateless partioning of CsvFormatter</h4> |
| <p>Stateless partitioning will ensure that CsvFormatter will be partitioned right at the start of the application and will remain partitioned throughout the lifetime of the DAG. |
| CsvFormatter can be stateless partitioned by adding following lines to properties.xml:</p> |
| <pre><code class="xml"> <property> |
| <name>dt.operator.{OperatorName}.attr.PARTITIONER</name> |
| <value>com.datatorrent.common.partitioner.StatelessPartitioner:2</value> |
| </property> |
| </code></pre> |
| |
| <p>where {OperatorName} is the name of the CsvFormatter operator. |
| Above lines will partition CsvFormatter statically 2 times. Above value can be changed accordingly to change the number of static partitions.</p> |
| <h4 id="dynamic-partitioning-of-csvformatter">Dynamic Partitioning of CsvFormatter</h4> |
| <p>Dynamic partitioning is a feature of Apex platform which changes the partition of the operator based on certain conditions. |
| CsvFormatter can be dynamically partitioned using below out-of-the-box partitioner:</p> |
| <h5 id="throughput-based">Throughput based</h5> |
| <p>Following code can be added to populateDAG method of application to dynamically partition CsvFormatter:</p> |
| <pre><code class="java"> StatelessThroughputBasedPartitioner<CsvFormatter> partitioner = new StatelessThroughputBasedPartitioner<>(); |
| partitioner.setCooldownMillis(conf.getLong(COOL_DOWN_MILLIS, 10000)); |
| partitioner.setMaximumEvents(conf.getLong(MAX_THROUGHPUT, 30000)); |
| partitioner.setMinimumEvents(conf.getLong(MIN_THROUGHPUT, 10000)); |
| dag.setAttribute(csvFormatter, OperatorContext.STATS_LISTENERS, Arrays.asList(new StatsListener[]{partitioner})); |
| dag.setAttribute(csvFormatter, OperatorContext.PARTITIONER, partitioner); |
| </code></pre> |
| |
| <p>Above code will dynamically partition CsvFormatter when throughput changes. |
| If overall throughput of CsvFormatter goes beyond 30000 or less than 10000, the platform will repartition CsvFormatter |
| to balance throughput of a single partition to be between 10000 and 30000. |
| CooldownMillis of 10000 will be used as threshold time for which throughput change is observed.</p> |
| |
| </div> |
| </div> |
| <footer> |
| |
| <div class="rst-footer-buttons" role="navigation" aria-label="footer navigation"> |
| |
| <a href="../csvParserOperator/" class="btn btn-neutral float-right" title="CSV Parser">Next <span class="icon icon-circle-arrow-right"></span></a> |
| |
| |
| <a href="../block_reader/" class="btn btn-neutral" title="Block Reader"><span class="icon icon-circle-arrow-left"></span> Previous</a> |
| |
| </div> |
| |
| |
| <hr/> |
| |
| <div role="contentinfo"> |
| <!-- Copyright etc --> |
| |
| </div> |
| |
| Built with <a href="http://www.mkdocs.org">MkDocs</a> using a <a href="https://github.com/snide/sphinx_rtd_theme">theme</a> provided by <a href="https://readthedocs.org">Read the Docs</a>. |
| </footer> |
| |
| </div> |
| </div> |
| |
| </section> |
| |
| </div> |
| |
| <div class="rst-versions" role="note" style="cursor: pointer"> |
| <span class="rst-current-version" data-toggle="rst-current-version"> |
| |
| |
| <span><a href="../block_reader/" style="color: #fcfcfc;">« Previous</a></span> |
| |
| |
| <span style="margin-left: 15px"><a href="../csvParserOperator/" style="color: #fcfcfc">Next »</a></span> |
| |
| </span> |
| </div> |
| |
| </body> |
| </html> |