blob: 58ba0a22a48d68805e5915e2d8914bb5a22cce64 [file] [log] [blame]
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="utf-8">
<meta http-equiv="X-UA-Compatible" content="IE=edge">
<meta name="viewport" content="width=device-width, initial-scale=1">
<link href='images/favicon.ico' rel='shortcut icon' type='image/x-icon'>
<!-- The above 3 meta tags *must* come first in the head; any other head content must come *after* these tags -->
<title>CarbonData</title>
<style>
</style>
<!-- Bootstrap -->
<link rel="stylesheet" href="css/bootstrap.min.css">
<link href="css/style.css" rel="stylesheet">
<!-- HTML5 shim and Respond.js for IE8 support of HTML5 elements and media queries -->
<!-- WARNING: Respond.js doesn't work if you view the page via file:// -->
<!--[if lt IE 9]>
<script src="https://oss.maxcdn.com/html5shiv/3.7.3/html5shiv.min.js"></script>
<script src="https://oss.maxcdn.scom/respond/1.4.2/respond.min.js"></script>
<![endif]-->
<script src="js/jquery.min.js"></script>
<script src="js/bootstrap.min.js"></script>
<script defer src="https://use.fontawesome.com/releases/v5.0.8/js/all.js"></script>
</head>
<body>
<header>
<nav class="navbar navbar-default navbar-custom cd-navbar-wrapper">
<div class="container">
<div class="navbar-header">
<button aria-controls="navbar" aria-expanded="false" data-target="#navbar" data-toggle="collapse"
class="navbar-toggle collapsed" type="button">
<span class="sr-only">Toggle navigation</span>
<span class="icon-bar"></span>
<span class="icon-bar"></span>
<span class="icon-bar"></span>
</button>
<a href="index.html" class="logo">
<img src="images/CarbonDataLogo.png" alt="CarbonData logo" title="CarbocnData logo"/>
</a>
</div>
<div class="navbar-collapse collapse cd_navcontnt" id="navbar">
<ul class="nav navbar-nav navbar-right navlist-custom">
<li><a href="index.html" class="hidden-xs"><i class="fa fa-home" aria-hidden="true"></i> </a>
</li>
<li><a href="index.html" class="hidden-lg hidden-md hidden-sm">Home</a></li>
<li class="dropdown">
<a href="#" class="dropdown-toggle " data-toggle="dropdown" role="button" aria-haspopup="true"
aria-expanded="false"> Download <span class="caret"></span></a>
<ul class="dropdown-menu">
<li>
<a href="https://dist.apache.org/repos/dist/release/carbondata/2.2.0/"
target="_blank">Apache CarbonData 2.2.0</a></li>
<li>
<a href="https://dist.apache.org/repos/dist/release/carbondata/2.1.1/"
target="_blank">Apache CarbonData 2.1.1</a></li>
<li>
<a href="https://dist.apache.org/repos/dist/release/carbondata/2.1.0/"
target="_blank">Apache CarbonData 2.1.0</a></li>
<li>
<a href="https://dist.apache.org/repos/dist/release/carbondata/2.0.1/"
target="_blank">Apache CarbonData 2.0.1</a></li>
<li>
<a href="https://dist.apache.org/repos/dist/release/carbondata/2.0.0/"
target="_blank">Apache CarbonData 2.0.0</a></li>
<li>
<a href="https://dist.apache.org/repos/dist/release/carbondata/1.6.1/"
target="_blank">Apache CarbonData 1.6.1</a></li>
<li>
<a href="https://dist.apache.org/repos/dist/release/carbondata/1.6.0/"
target="_blank">Apache CarbonData 1.6.0</a></li>
<li>
<a href="https://dist.apache.org/repos/dist/release/carbondata/1.5.4/"
target="_blank">Apache CarbonData 1.5.4</a></li>
<li>
<a href="https://dist.apache.org/repos/dist/release/carbondata/1.5.3/"
target="_blank">Apache CarbonData 1.5.3</a></li>
<li>
<a href="https://dist.apache.org/repos/dist/release/carbondata/1.5.2/"
target="_blank">Apache CarbonData 1.5.2</a></li>
<li>
<a href="https://dist.apache.org/repos/dist/release/carbondata/1.5.1/"
target="_blank">Apache CarbonData 1.5.1</a></li>
<li>
<a href="https://cwiki.apache.org/confluence/display/CARBONDATA/Releases"
target="_blank">Release Archive</a></li>
</ul>
</li>
<li><a href="documentation.html" class="active">Documentation</a></li>
<li class="dropdown">
<a href="#" class="dropdown-toggle" data-toggle="dropdown" role="button" aria-haspopup="true"
aria-expanded="false">Community <span class="caret"></span></a>
<ul class="dropdown-menu">
<li>
<a href="https://github.com/apache/carbondata/blob/master/docs/how-to-contribute-to-apache-carbondata.md"
target="_blank">Contributing to CarbonData</a></li>
<li>
<a href="https://github.com/apache/carbondata/blob/master/docs/release-guide.md"
target="_blank">Release Guide</a></li>
<li>
<a href="https://cwiki.apache.org/confluence/display/CARBONDATA/PMC+and+Committers+member+list"
target="_blank">Project PMC and Committers</a></li>
<li>
<a href="https://cwiki.apache.org/confluence/pages/viewpage.action?pageId=66850609"
target="_blank">CarbonData Meetups</a></li>
<li><a href="security.html">Apache CarbonData Security</a></li>
<li><a href="https://issues.apache.org/jira/browse/CARBONDATA" target="_blank">Apache
Jira</a></li>
<li><a href="videogallery.html">CarbonData Videos </a></li>
</ul>
</li>
<li class="dropdown">
<a href="http://www.apache.org/" class="apache_link hidden-xs dropdown-toggle"
data-toggle="dropdown" role="button" aria-haspopup="true" aria-expanded="false">Apache</a>
<ul class="dropdown-menu">
<li><a href="http://www.apache.org/" target="_blank">Apache Homepage</a></li>
<li><a href="http://www.apache.org/licenses/" target="_blank">License</a></li>
<li><a href="http://www.apache.org/foundation/sponsorship.html"
target="_blank">Sponsorship</a></li>
<li><a href="http://www.apache.org/foundation/thanks.html" target="_blank">Thanks</a></li>
</ul>
</li>
<li class="dropdown">
<a href="http://www.apache.org/" class="hidden-lg hidden-md hidden-sm dropdown-toggle"
data-toggle="dropdown" role="button" aria-haspopup="true" aria-expanded="false">Apache</a>
<ul class="dropdown-menu">
<li><a href="http://www.apache.org/" target="_blank">Apache Homepage</a></li>
<li><a href="http://www.apache.org/licenses/" target="_blank">License</a></li>
<li><a href="http://www.apache.org/foundation/sponsorship.html"
target="_blank">Sponsorship</a></li>
<li><a href="http://www.apache.org/foundation/thanks.html" target="_blank">Thanks</a></li>
</ul>
</li>
<li>
<a href="#" id="search-icon"><i class="fa fa-search" aria-hidden="true"></i></a>
</li>
</ul>
</div><!--/.nav-collapse -->
<div id="search-box">
<form method="get" action="http://www.google.com/search" target="_blank">
<div class="search-block">
<table border="0" cellpadding="0" width="100%">
<tr>
<td style="width:80%">
<input type="text" name="q" size=" 5" maxlength="255" value=""
class="search-input" placeholder="Search...." required/>
</td>
<td style="width:20%">
<input type="submit" value="Search"/></td>
</tr>
<tr>
<td align="left" style="font-size:75%" colspan="2">
<input type="checkbox" name="sitesearch" value="carbondata.apache.org" checked/>
<span style=" position: relative; top: -3px;"> Only search for CarbonData</span>
</td>
</tr>
</table>
</div>
</form>
</div>
</div>
</nav>
</header> <!-- end Header part -->
<div class="fixed-padding"></div> <!-- top padding with fixde header -->
<section><!-- Dashboard nav -->
<div class="container-fluid q">
<div class="col-sm-12 col-md-12 maindashboard">
<div class="verticalnavbar">
<nav class="b-sticky-nav">
<div class="nav-scroller">
<div class="nav__inner">
<a class="b-nav__intro nav__item" href="./introduction.html">introduction</a>
<a class="b-nav__quickstart nav__item" href="./quick-start-guide.html">quick start</a>
<a class="b-nav__uses nav__item" href="./usecases.html">use cases</a>
<div class="nav__item nav__item__with__subs">
<a class="b-nav__docs nav__item nav__sub__anchor" href="./language-manual.html">Language Reference</a>
<a class="nav__item nav__sub__item" href="./ddl-of-carbondata.html">DDL</a>
<a class="nav__item nav__sub__item" href="./dml-of-carbondata.html">DML</a>
<a class="nav__item nav__sub__item" href="./streaming-guide.html">Streaming</a>
<a class="nav__item nav__sub__item" href="./configuration-parameters.html">Configuration</a>
<a class="nav__item nav__sub__item" href="./index-developer-guide.html">Indexes</a>
<a class="nav__item nav__sub__item" href="./supported-data-types-in-carbondata.html">Data Types</a>
</div>
<div class="nav__item nav__item__with__subs">
<a class="b-nav__datamap nav__item nav__sub__anchor" href="./index-management.html">Index Managament</a>
<a class="nav__item nav__sub__item" href="./bloomfilter-index-guide.html">Bloom Filter</a>
<a class="nav__item nav__sub__item" href="./lucene-index-guide.html">Lucene</a>
<a class="nav__item nav__sub__item" href="./secondary-index-guide.html">Secondary Index</a>
<a class="nav__item nav__sub__item" href="../spatial-index-guide.html">Spatial Index</a>
<a class="nav__item nav__sub__item" href="../mv-guide.html">MV</a>
</div>
<div class="nav__item nav__item__with__subs">
<a class="b-nav__api nav__item nav__sub__anchor" href="./sdk-guide.html">API</a>
<a class="nav__item nav__sub__item" href="./sdk-guide.html">Java SDK</a>
<a class="nav__item nav__sub__item" href="./csdk-guide.html">C++ SDK</a>
</div>
<a class="b-nav__perf nav__item" href="./performance-tuning.html">Performance Tuning</a>
<a class="b-nav__s3 nav__item" href="./s3-guide.html">S3 Storage</a>
<a class="b-nav__indexserver nav__item" href="./index-server.html">Index Server</a>
<a class="b-nav__prestodb nav__item" href="./prestodb-guide.html">PrestoDB Integration</a>
<a class="b-nav__prestosql nav__item" href="./prestosql-guide.html">PrestoSQL Integration</a>
<a class="b-nav__flink nav__item" href="./flink-integration-guide.html">Flink Integration</a>
<a class="b-nav__scd nav__item" href="./scd-and-cdc-guide.html">SCD & CDC</a>
<a class="b-nav__faq nav__item" href="./faq.html">FAQ</a>
<a class="b-nav__contri nav__item" href="./how-to-contribute-to-apache-carbondata.html">Contribute</a>
<a class="b-nav__security nav__item" href="./security.html">Security</a>
<a class="b-nav__release nav__item" href="./release-guide.html">Release Guide</a>
</div>
</div>
<div class="navindicator">
<div class="b-nav__intro navindicator__item"></div>
<div class="b-nav__quickstart navindicator__item"></div>
<div class="b-nav__uses navindicator__item"></div>
<div class="b-nav__docs navindicator__item"></div>
<div class="b-nav__datamap navindicator__item"></div>
<div class="b-nav__api navindicator__item"></div>
<div class="b-nav__perf navindicator__item"></div>
<div class="b-nav__s3 navindicator__item"></div>
<div class="b-nav__indexserver navindicator__item"></div>
<div class="b-nav__prestodb navindicator__item"></div>
<div class="b-nav__prestosql navindicator__item"></div>
<div class="b-nav__flink navindicator__item"></div>
<div class="b-nav__scd navindicator__item"></div>
<div class="b-nav__faq navindicator__item"></div>
<div class="b-nav__contri navindicator__item"></div>
<div class="b-nav__security navindicator__item"></div>
</div>
</nav>
</div>
<div class="mdcontent">
<section>
<div style="padding:10px 15px;">
<div id="viewpage" name="viewpage">
<div class="row">
<div class="col-sm-12 col-md-12">
<div>
<h1>
<a id="sdk-guide" class="anchor" href="#sdk-guide" aria-hidden="true"><span aria-hidden="true" class="octicon octicon-link"></span></a>SDK Guide</h1>
<p>CarbonData provides SDK to facilitate</p>
<ol>
<li><a href="#sdk-writer">Writing carbondata files from other application which does not use Spark</a></li>
<li><a href="#sdk-reader">Reading carbondata files from other application which does not use Spark</a></li>
</ol>
<h1>
<a id="sdk-writer" class="anchor" href="#sdk-writer" aria-hidden="true"><span aria-hidden="true" class="octicon octicon-link"></span></a>SDK Writer</h1>
<p>In the carbon jars package, there exist a carbondata-sdk-x.x.x-SNAPSHOT.jar, including SDK writer and reader.
If user want to use SDK, except carbondata-sdk-x.x.x-SNAPSHOT.jar,
it needs carbondata-core-x.x.x-SNAPSHOT.jar, carbondata-common-x.x.x-SNAPSHOT.jar,
carbondata-format-x.x.x-SNAPSHOT.jar, carbondata-hadoop-x.x.x-SNAPSHOT.jar and carbondata-processing-x.x.x-SNAPSHOT.jar.
What's more, user also can use carbondata-sdk.jar directly.</p>
<p>This SDK writer, writes carbondata file and carbonindex file at a given path.
External client can make use of this writer to convert other format data or live data to create carbondata and index files.
These SDK writer output contains just carbondata and carbonindex files. No metadata folder will be present.</p>
<h2>
<a id="quick-example" class="anchor" href="#quick-example" aria-hidden="true"><span aria-hidden="true" class="octicon octicon-link"></span></a>Quick example</h2>
<h3>
<a id="example-with-csv-format" class="anchor" href="#example-with-csv-format" aria-hidden="true"><span aria-hidden="true" class="octicon octicon-link"></span></a>Example with csv format</h3>
<div class="highlight highlight-source-java"><pre><span class="pl-k">import</span> <span class="pl-smi">java.io.IOException</span>;
<span class="pl-k">import</span> <span class="pl-smi">org.apache.carbondata.common.exceptions.sql.InvalidLoadOptionException</span>;
<span class="pl-k">import</span> <span class="pl-smi">org.apache.carbondata.core.metadata.datatype.DataTypes</span>;
<span class="pl-k">import</span> <span class="pl-smi">org.apache.carbondata.core.util.CarbonProperties</span>;
<span class="pl-k">import</span> <span class="pl-smi">org.apache.carbondata.sdk.file.CarbonWriter</span>;
<span class="pl-k">import</span> <span class="pl-smi">org.apache.carbondata.sdk.file.CarbonWriterBuilder</span>;
<span class="pl-k">import</span> <span class="pl-smi">org.apache.carbondata.core.metadata.datatype.Field</span>;
<span class="pl-k">import</span> <span class="pl-smi">org.apache.carbondata.sdk.file.Schema</span>;
<span class="pl-k">public</span> <span class="pl-k">class</span> <span class="pl-en">TestSdk</span> {
<span class="pl-c"><span class="pl-c">//</span> pass true or false while executing the main to use offheap memory or not</span>
<span class="pl-k">public</span> <span class="pl-k">static</span> <span class="pl-k">void</span> <span class="pl-en">main</span>(<span class="pl-k">String</span>[] <span class="pl-v">args</span>) <span class="pl-k">throws</span> <span class="pl-smi">IOException</span>, <span class="pl-smi">InvalidLoadOptionException</span> {
<span class="pl-k">if</span> (args<span class="pl-k">.</span>length <span class="pl-k">&gt;</span> <span class="pl-c1">0</span> <span class="pl-k">&amp;&amp;</span> args[<span class="pl-c1">0</span>] <span class="pl-k">!=</span> <span class="pl-c1">null</span>) {
testSdkWriter(args[<span class="pl-c1">0</span>]);
} <span class="pl-k">else</span> {
testSdkWriter(<span class="pl-s"><span class="pl-pds">"</span>true<span class="pl-pds">"</span></span>);
}
}
<span class="pl-k">public</span> <span class="pl-k">static</span> <span class="pl-k">void</span> <span class="pl-en">testSdkWriter</span>(<span class="pl-smi">String</span> <span class="pl-v">enableOffheap</span>) <span class="pl-k">throws</span> <span class="pl-smi">IOException</span>, <span class="pl-smi">InvalidLoadOptionException</span> {
<span class="pl-smi">String</span> path <span class="pl-k">=</span> <span class="pl-s"><span class="pl-pds">"</span>./target/testCSVSdkWriter<span class="pl-pds">"</span></span>;
<span class="pl-k">Field</span>[] fields <span class="pl-k">=</span> <span class="pl-k">new</span> <span class="pl-smi">Field</span>[<span class="pl-c1">2</span>];
fields[<span class="pl-c1">0</span>] <span class="pl-k">=</span> <span class="pl-k">new</span> <span class="pl-smi">Field</span>(<span class="pl-s"><span class="pl-pds">"</span>name<span class="pl-pds">"</span></span>, <span class="pl-smi">DataTypes</span><span class="pl-c1"><span class="pl-k">.</span>STRING</span>);
fields[<span class="pl-c1">1</span>] <span class="pl-k">=</span> <span class="pl-k">new</span> <span class="pl-smi">Field</span>(<span class="pl-s"><span class="pl-pds">"</span>age<span class="pl-pds">"</span></span>, <span class="pl-smi">DataTypes</span><span class="pl-c1"><span class="pl-k">.</span>INT</span>);
<span class="pl-smi">Schema</span> schema <span class="pl-k">=</span> <span class="pl-k">new</span> <span class="pl-smi">Schema</span>(fields);
<span class="pl-smi">CarbonProperties</span><span class="pl-k">.</span>getInstance()<span class="pl-k">.</span>addProperty(<span class="pl-s"><span class="pl-pds">"</span>enable.offheap.sort<span class="pl-pds">"</span></span>, enableOffheap);
<span class="pl-smi">CarbonWriterBuilder</span> builder <span class="pl-k">=</span> <span class="pl-smi">CarbonWriter</span><span class="pl-k">.</span>builder()<span class="pl-k">.</span>outputPath(path)<span class="pl-k">.</span>withCsvInput(schema)<span class="pl-k">.</span>writtenBy(<span class="pl-s"><span class="pl-pds">"</span>SDK<span class="pl-pds">"</span></span>);
<span class="pl-smi">CarbonWriter</span> writer <span class="pl-k">=</span> builder<span class="pl-k">.</span>build();
<span class="pl-k">int</span> rows <span class="pl-k">=</span> <span class="pl-c1">5</span>;
<span class="pl-k">for</span> (<span class="pl-k">int</span> i <span class="pl-k">=</span> <span class="pl-c1">0</span>; i <span class="pl-k">&lt;</span> rows; i<span class="pl-k">++</span>) {
writer<span class="pl-k">.</span>write(<span class="pl-k">new</span> <span class="pl-smi">String</span>[] { <span class="pl-s"><span class="pl-pds">"</span>robot<span class="pl-pds">"</span></span> <span class="pl-k">+</span> (i <span class="pl-k">%</span> <span class="pl-c1">10</span>), <span class="pl-smi">String</span><span class="pl-k">.</span>valueOf(i) });
}
writer<span class="pl-k">.</span>close();
}
}</pre></div>
<h3>
<a id="example-with-avro-format" class="anchor" href="#example-with-avro-format" aria-hidden="true"><span aria-hidden="true" class="octicon octicon-link"></span></a>Example with Avro format</h3>
<div class="highlight highlight-source-java"><pre><span class="pl-k">import</span> <span class="pl-smi">java.io.IOException</span>;
<span class="pl-k">import</span> <span class="pl-smi">org.apache.carbondata.common.exceptions.sql.InvalidLoadOptionException</span>;
<span class="pl-k">import</span> <span class="pl-smi">org.apache.carbondata.core.metadata.datatype.DataTypes</span>;
<span class="pl-k">import</span> <span class="pl-smi">org.apache.carbondata.sdk.file.AvroCarbonWriter</span>;
<span class="pl-k">import</span> <span class="pl-smi">org.apache.carbondata.sdk.file.CarbonWriter</span>;
<span class="pl-k">import</span> <span class="pl-smi">org.apache.carbondata.core.metadata.datatype.Field</span>;
<span class="pl-k">import</span> <span class="pl-smi">org.apache.avro.generic.GenericData</span>;
<span class="pl-k">import</span> <span class="pl-smi">org.apache.commons.lang.CharEncoding</span>;
<span class="pl-k">import</span> <span class="pl-smi">tech.allegro.schema.json2avro.converter.JsonAvroConverter</span>;
<span class="pl-k">public</span> <span class="pl-k">class</span> <span class="pl-en">TestSdkAvro</span> {
<span class="pl-k">public</span> <span class="pl-k">static</span> <span class="pl-k">void</span> <span class="pl-en">main</span>(<span class="pl-k">String</span>[] <span class="pl-v">args</span>) <span class="pl-k">throws</span> <span class="pl-smi">IOException</span>, <span class="pl-smi">InvalidLoadOptionException</span> {
testSdkWriter();
}
<span class="pl-k">public</span> <span class="pl-k">static</span> <span class="pl-k">void</span> <span class="pl-en">testSdkWriter</span>() <span class="pl-k">throws</span> <span class="pl-smi">IOException</span>, <span class="pl-smi">InvalidLoadOptionException</span> {
<span class="pl-smi">String</span> path <span class="pl-k">=</span> <span class="pl-s"><span class="pl-pds">"</span>./AvroCarbonWriterSuiteWriteFiles<span class="pl-pds">"</span></span>;
<span class="pl-c"><span class="pl-c">//</span> Avro schema</span>
<span class="pl-smi">String</span> avroSchema <span class="pl-k">=</span>
<span class="pl-s"><span class="pl-pds">"</span>{<span class="pl-pds">"</span></span> <span class="pl-k">+</span>
<span class="pl-s"><span class="pl-pds">"</span> <span class="pl-cce">\"</span>type<span class="pl-cce">\"</span> : <span class="pl-cce">\"</span>record<span class="pl-cce">\"</span>,<span class="pl-pds">"</span></span> <span class="pl-k">+</span>
<span class="pl-s"><span class="pl-pds">"</span> <span class="pl-cce">\"</span>name<span class="pl-cce">\"</span> : <span class="pl-cce">\"</span>Acme<span class="pl-cce">\"</span>,<span class="pl-pds">"</span></span> <span class="pl-k">+</span>
<span class="pl-s"><span class="pl-pds">"</span> <span class="pl-cce">\"</span>fields<span class="pl-cce">\"</span> : [<span class="pl-pds">"</span></span>
<span class="pl-k">+</span> <span class="pl-s"><span class="pl-pds">"</span>{ <span class="pl-cce">\"</span>name<span class="pl-cce">\"</span> : <span class="pl-cce">\"</span>fname<span class="pl-cce">\"</span>, <span class="pl-cce">\"</span>type<span class="pl-cce">\"</span> : <span class="pl-cce">\"</span>string<span class="pl-cce">\"</span> },<span class="pl-pds">"</span></span>
<span class="pl-k">+</span> <span class="pl-s"><span class="pl-pds">"</span>{ <span class="pl-cce">\"</span>name<span class="pl-cce">\"</span> : <span class="pl-cce">\"</span>age<span class="pl-cce">\"</span>, <span class="pl-cce">\"</span>type<span class="pl-cce">\"</span> : <span class="pl-cce">\"</span>int<span class="pl-cce">\"</span> }]<span class="pl-pds">"</span></span> <span class="pl-k">+</span>
<span class="pl-s"><span class="pl-pds">"</span>}<span class="pl-pds">"</span></span>;
<span class="pl-smi">String</span> json <span class="pl-k">=</span> <span class="pl-s"><span class="pl-pds">"</span>{<span class="pl-cce">\"</span>fname<span class="pl-cce">\"</span>:<span class="pl-cce">\"</span>bob<span class="pl-cce">\"</span>, <span class="pl-cce">\"</span>age<span class="pl-cce">\"</span>:10}<span class="pl-pds">"</span></span>;
<span class="pl-c"><span class="pl-c">//</span> conversion to GenericData.Record</span>
<span class="pl-smi">JsonAvroConverter</span> converter <span class="pl-k">=</span> <span class="pl-k">new</span> <span class="pl-smi">JsonAvroConverter</span>();
<span class="pl-smi">GenericData</span><span class="pl-k">.</span><span class="pl-smi">Record</span> record <span class="pl-k">=</span> converter<span class="pl-k">.</span>convertToGenericDataRecord(
json<span class="pl-k">.</span>getBytes(<span class="pl-smi">CharEncoding</span><span class="pl-c1"><span class="pl-k">.</span>UTF_8</span>), <span class="pl-k">new</span> <span class="pl-smi">org.apache.avro<span class="pl-k">.</span>Schema</span>.<span class="pl-smi">Parser</span>()<span class="pl-k">.</span>parse(avroSchema));
<span class="pl-k">try</span> {
<span class="pl-smi">CarbonWriter</span> writer <span class="pl-k">=</span> <span class="pl-smi">CarbonWriter</span><span class="pl-k">.</span>builder()
.outputPath(path)
.withAvroInput(<span class="pl-k">new</span> <span class="pl-smi">org.apache.avro<span class="pl-k">.</span>Schema</span>.<span class="pl-smi">Parser</span>()<span class="pl-k">.</span>parse(avroSchema))<span class="pl-k">.</span>writtenBy(<span class="pl-s"><span class="pl-pds">"</span>SDK<span class="pl-pds">"</span></span>)<span class="pl-k">.</span>build();
<span class="pl-k">for</span> (<span class="pl-k">int</span> i <span class="pl-k">=</span> <span class="pl-c1">0</span>; i <span class="pl-k">&lt;</span> <span class="pl-c1">100</span>; i<span class="pl-k">++</span>) {
writer<span class="pl-k">.</span>write(record);
}
writer<span class="pl-k">.</span>close();
} <span class="pl-k">catch</span> (<span class="pl-smi">Exception</span> e) {
e<span class="pl-k">.</span>printStackTrace();
}
}
}</pre></div>
<h3>
<a id="example-with-json-format" class="anchor" href="#example-with-json-format" aria-hidden="true"><span aria-hidden="true" class="octicon octicon-link"></span></a>Example with Json format</h3>
<div class="highlight highlight-source-java"><pre><span class="pl-k">import</span> <span class="pl-smi">java.io.IOException</span>;
<span class="pl-k">import</span> <span class="pl-smi">org.apache.carbondata.common.exceptions.sql.InvalidLoadOptionException</span>;
<span class="pl-k">import</span> <span class="pl-smi">org.apache.carbondata.core.metadata.datatype.DataTypes</span>;
<span class="pl-k">import</span> <span class="pl-smi">org.apache.carbondata.core.util.CarbonProperties</span>;
<span class="pl-k">import</span> <span class="pl-smi">org.apache.carbondata.sdk.file.CarbonWriter</span>;
<span class="pl-k">import</span> <span class="pl-smi">org.apache.carbondata.sdk.file.CarbonWriterBuilder</span>;
<span class="pl-k">import</span> <span class="pl-smi">org.apache.carbondata.core.metadata.datatype.Field</span>;
<span class="pl-k">import</span> <span class="pl-smi">org.apache.carbondata.sdk.file.Schema</span>;
<span class="pl-k">public</span> <span class="pl-k">class</span> <span class="pl-en">TestSdkJson</span> {
<span class="pl-k">public</span> <span class="pl-k">static</span> <span class="pl-k">void</span> <span class="pl-en">main</span>(<span class="pl-k">String</span>[] <span class="pl-v">args</span>) <span class="pl-k">throws</span> <span class="pl-smi">InvalidLoadOptionException</span> {
testJsonSdkWriter();
}
<span class="pl-k">public</span> <span class="pl-k">static</span> <span class="pl-k">void</span> <span class="pl-en">testJsonSdkWriter</span>() <span class="pl-k">throws</span> <span class="pl-smi">InvalidLoadOptionException</span> {
<span class="pl-smi">String</span> path <span class="pl-k">=</span> <span class="pl-s"><span class="pl-pds">"</span>./target/testJsonSdkWriter<span class="pl-pds">"</span></span>;
<span class="pl-k">Field</span>[] fields <span class="pl-k">=</span> <span class="pl-k">new</span> <span class="pl-smi">Field</span>[<span class="pl-c1">2</span>];
fields[<span class="pl-c1">0</span>] <span class="pl-k">=</span> <span class="pl-k">new</span> <span class="pl-smi">Field</span>(<span class="pl-s"><span class="pl-pds">"</span>name<span class="pl-pds">"</span></span>, <span class="pl-smi">DataTypes</span><span class="pl-c1"><span class="pl-k">.</span>STRING</span>);
fields[<span class="pl-c1">1</span>] <span class="pl-k">=</span> <span class="pl-k">new</span> <span class="pl-smi">Field</span>(<span class="pl-s"><span class="pl-pds">"</span>age<span class="pl-pds">"</span></span>, <span class="pl-smi">DataTypes</span><span class="pl-c1"><span class="pl-k">.</span>INT</span>);
<span class="pl-smi">Schema</span> <span class="pl-smi">CarbonSchema</span> <span class="pl-k">=</span> <span class="pl-k">new</span> <span class="pl-smi">Schema</span>(fields);
<span class="pl-smi">CarbonWriterBuilder</span> builder <span class="pl-k">=</span> <span class="pl-smi">CarbonWriter</span><span class="pl-k">.</span>builder()<span class="pl-k">.</span>outputPath(path)<span class="pl-k">.</span>withJsonInput(<span class="pl-smi">CarbonSchema</span>)<span class="pl-k">.</span>writtenBy(<span class="pl-s"><span class="pl-pds">"</span>SDK<span class="pl-pds">"</span></span>);
<span class="pl-c"><span class="pl-c">//</span> initialize json writer with carbon schema</span>
<span class="pl-smi">CarbonWriter</span> writer <span class="pl-k">=</span> builder<span class="pl-k">.</span>build();
<span class="pl-c"><span class="pl-c">//</span> one row of json Data as String</span>
<span class="pl-smi">String</span> <span class="pl-smi">JsonRow</span> <span class="pl-k">=</span> <span class="pl-s"><span class="pl-pds">"</span>{<span class="pl-cce">\"</span>name<span class="pl-cce">\"</span>:<span class="pl-cce">\"</span>abcd<span class="pl-cce">\"</span>, <span class="pl-cce">\"</span>age<span class="pl-cce">\"</span>:10}<span class="pl-pds">"</span></span>;
<span class="pl-k">int</span> rows <span class="pl-k">=</span> <span class="pl-c1">5</span>;
<span class="pl-k">for</span> (<span class="pl-k">int</span> i <span class="pl-k">=</span> <span class="pl-c1">0</span>; i <span class="pl-k">&lt;</span> rows; i<span class="pl-k">++</span>) {
writer<span class="pl-k">.</span>write(<span class="pl-smi">JsonRow</span>);
}
writer<span class="pl-k">.</span>close();
}
} </pre></div>
<h2>
<a id="datatypes-mapping" class="anchor" href="#datatypes-mapping" aria-hidden="true"><span aria-hidden="true" class="octicon octicon-link"></span></a>Datatypes Mapping</h2>
<p>Each of SQL data types and Avro Data Types are mapped into data types of SDK. Following are the mapping:</p>
<table>
<thead>
<tr>
<th>SQL DataTypes</th>
<th>Avro DataTypes</th>
<th>Mapped SDK DataTypes</th>
</tr>
</thead>
<tbody>
<tr>
<td>BOOLEAN</td>
<td>BOOLEAN</td>
<td>DataTypes.BOOLEAN</td>
</tr>
<tr>
<td>SMALLINT</td>
<td>-</td>
<td>DataTypes.SHORT</td>
</tr>
<tr>
<td>INTEGER</td>
<td>INTEGER</td>
<td>DataTypes.INT</td>
</tr>
<tr>
<td>BIGINT</td>
<td>LONG</td>
<td>DataTypes.LONG</td>
</tr>
<tr>
<td>DOUBLE</td>
<td>DOUBLE</td>
<td>DataTypes.DOUBLE</td>
</tr>
<tr>
<td>VARCHAR</td>
<td>-</td>
<td>DataTypes.STRING</td>
</tr>
<tr>
<td>BINARY</td>
<td>-</td>
<td>DataTypes.BINARY</td>
</tr>
<tr>
<td>FLOAT</td>
<td>FLOAT</td>
<td>DataTypes.FLOAT</td>
</tr>
<tr>
<td>BYTE</td>
<td>-</td>
<td>DataTypes.BYTE</td>
</tr>
<tr>
<td>DATE</td>
<td>DATE</td>
<td>DataTypes.DATE</td>
</tr>
<tr>
<td>TIMESTAMP</td>
<td>-</td>
<td>DataTypes.TIMESTAMP</td>
</tr>
<tr>
<td>STRING</td>
<td>STRING</td>
<td>DataTypes.STRING</td>
</tr>
<tr>
<td>DECIMAL</td>
<td>DECIMAL</td>
<td>DataTypes.createDecimalType(precision, scale)</td>
</tr>
<tr>
<td>ARRAY</td>
<td>ARRAY</td>
<td>DataTypes.createArrayType(elementType)</td>
</tr>
<tr>
<td>STRUCT</td>
<td>RECORD</td>
<td>DataTypes.createStructType(fields)</td>
</tr>
<tr>
<td>-</td>
<td>ENUM</td>
<td>DataTypes.STRING</td>
</tr>
<tr>
<td>-</td>
<td>UNION</td>
<td>DataTypes.createStructType(types)</td>
</tr>
<tr>
<td>-</td>
<td>MAP</td>
<td>DataTypes.createMapType(keyType, valueType)</td>
</tr>
<tr>
<td>-</td>
<td>TimeMillis</td>
<td>DataTypes.INT</td>
</tr>
<tr>
<td>-</td>
<td>TimeMicros</td>
<td>DataTypes.LONG</td>
</tr>
<tr>
<td>-</td>
<td>TimestampMillis</td>
<td>DataTypes.TIMESTAMP</td>
</tr>
<tr>
<td>-</td>
<td>TimestampMicros</td>
<td>DataTypes.TIMESTAMP</td>
</tr>
</tbody>
</table>
<p><strong>NOTE:</strong></p>
<ol>
<li>
<p>Carbon Supports below logical types of AVRO.
a. Date
The date logical type represents a date within the calendar, with no reference to a particular time zone or time of day.
A date logical type annotates an Avro int, where the int stores the number of days from the unix epoch, 1 January 1970 (ISO calendar).
b. Timestamp (millisecond precision)
The timestamp-millis logical type represents an instant on the global timeline, independent of a particular time zone or calendar, with a precision of one millisecond.
A timestamp-millis logical type annotates an Avro long, where the long stores the number of milliseconds from the unix epoch, 1 January 1970 00:00:00.000 UTC.
c. Timestamp (microsecond precision)
The timestamp-micros logical type represents an instant on the global timeline, independent of a particular time zone or calendar, with a precision of one microsecond.
A timestamp-micros logical type annotates an Avro long, where the long stores the number of microseconds from the unix epoch, 1 January 1970 00:00:00.000000 UTC.
d. Decimal
The decimal logical type represents an arbitrary-precision signed decimal number of the form <em>unscaled × 10<sup>-scale</sup></em>.
A decimal logical type annotates Avro bytes or fixed types. The byte array must contain the two's-complement representation of the unscaled integer value in big-endian byte order. The scale is fixed, and is specified using an attribute.
e. Time (millisecond precision)
The time-millis logical type represents a time of day, with no reference to a particular calendar, time zone or date, with a precision of one millisecond.
A time-millis logical type annotates an Avro int, where the int stores the number of milliseconds after midnight, 00:00:00.000.
f. Time (microsecond precision)
The time-micros logical type represents a time of day, with no reference to a particular calendar, time zone or date, with a precision of one microsecond.
A time-micros logical type annotates an Avro long, where the long stores the number of microseconds after midnight, 00:00:00.000000.</p>
<p>Currently the values of logical types are not validated by carbon.
Expect that avro record passed by the user is already validated by avro record generator tools.</p>
</li>
<li>
<p>If the string data is more than 32K in length, use withTableProperties() with "long_string_columns" property
or directly use DataTypes.VARCHAR if it is carbon schema.</p>
</li>
<li>
<p>Avro Bytes, Fixed and Duration data types are not yet supported.</p>
</li>
</ol>
<h2>
<a id="run-sql-on-files-directly" class="anchor" href="#run-sql-on-files-directly" aria-hidden="true"><span aria-hidden="true" class="octicon octicon-link"></span></a>Run SQL on files directly</h2>
<p>Instead of creating table and query it, you can also query that file directly with SQL.</p>
<h3>
<a id="example" class="anchor" href="#example" aria-hidden="true"><span aria-hidden="true" class="octicon octicon-link"></span></a>Example</h3>
<pre><code>SELECT * FROM carbonfile.`$Path`
</code></pre>
<p>Find example code at <a href="https://github.com/apache/carbondata/blob/master/examples/spark/src/main/scala/org/apache/carbondata/examples/DirectSQLExample.scala" target=_blank>DirectSQLExample</a> in the CarbonData repo.</p>
<h2>
<a id="api-list" class="anchor" href="#api-list" aria-hidden="true"><span aria-hidden="true" class="octicon octicon-link"></span></a>API List</h2>
<h3>
<a id="class-orgapachecarbondatasdkfilecarbonwriterbuilder" class="anchor" href="#class-orgapachecarbondatasdkfilecarbonwriterbuilder" aria-hidden="true"><span aria-hidden="true" class="octicon octicon-link"></span></a>Class org.apache.carbondata.sdk.file.CarbonWriterBuilder</h3>
<pre><code>/**
* Sets the output path of the writer builder
*
* @param path is the absolute path where output files are written
* This method must be called when building CarbonWriterBuilder
* @return updated CarbonWriterBuilder
*/
public CarbonWriterBuilder outputPath(String path);
</code></pre>
<pre><code>/**
* To set the timestamp in the carbondata and carbonindex index files
*
* @param UUID is a timestamp to be used in the carbondata and carbonindex index files.
* By default set to zero.
* @return updated CarbonWriterBuilder
*/
public CarbonWriterBuilder uniqueIdentifier(long UUID);
</code></pre>
<pre><code>/**
* To set the carbondata file size in MB between 1MB-2048MB
*
* @param blockSize is size in MB between 1MB to 2048 MB
* default value is 1024 MB
* @return updated CarbonWriterBuilder
*/
public CarbonWriterBuilder withBlockSize(int blockSize);
</code></pre>
<pre><code>/**
* To set the blocklet size of carbondata file
*
* @param blockletSize is blocklet size in MB
* default value is 64 MB
* @return updated CarbonWriterBuilder
*/
public CarbonWriterBuilder withBlockletSize(int blockletSize);
</code></pre>
<pre><code>/**
* @param enableLocalDictionary enable local dictionary , default is false
* @return updated CarbonWriterBuilder
*/
public CarbonWriterBuilder enableLocalDictionary(boolean enableLocalDictionary);
</code></pre>
<pre><code>/**
* @param localDictionaryThreshold is localDictionaryThreshold,default is 10000
* @return updated CarbonWriterBuilder
*/
public CarbonWriterBuilder localDictionaryThreshold(int localDictionaryThreshold) ;
</code></pre>
<pre><code>/**
* Sets the list of columns that needs to be in sorted order
*
* @param sortColumns is a string array of columns that needs to be sorted.
* If it is null or empty array, no columns are selected for sorting.
* @return updated CarbonWriterBuilder
*/
public CarbonWriterBuilder sortBy(String[] sortColumns);
</code></pre>
<pre><code>/**
* Sets the taskNo for the writer. SDKs concurrently running
* will set taskNo in order to avoid conflicts in file's name during write.
*
* @param taskNo is the TaskNo user wants to specify.
* by default it is system time in nano seconds.
* @return updated CarbonWriterBuilder
*/
public CarbonWriterBuilder taskNo(long taskNo);
</code></pre>
<pre><code>/**
* To support the load options for sdk writer
* @param options key,value pair of load options.
* supported keys values are
* a. bad_records_logger_enable -- true (write into separate logs), false
* b. bad_records_action -- FAIL, FORCE, IGNORE, REDIRECT
* c. bad_record_path -- path
* d. dateformat -- same as JAVA SimpleDateFormat
* e. timestampformat -- same as JAVA SimpleDateFormat
* f. complex_delimiter_level_1 -- value to Split the complexTypeData
* g. complex_delimiter_level_2 -- value to Split the nested complexTypeData
* h. quotechar
* i. escapechar
*
* Default values are as follows.
*
* a. bad_records_logger_enable -- "false"
* b. bad_records_action -- "FAIL"
* c. bad_record_path -- ""
* d. dateformat -- "" , uses from carbon.properties file
* e. timestampformat -- "", uses from carbon.properties file
* f. complex_delimiter_level_1 -- "$"
* g. complex_delimiter_level_2 -- ":"
* h. quotechar -- "\""
* i. escapechar -- "\\"
*
* @return updated CarbonWriterBuilder
*/
public CarbonWriterBuilder withLoadOptions(Map&lt;String, String&gt; options);
</code></pre>
<pre><code>/**
* To support the table properties for sdk writer
*
* @param options key,value pair of create table properties.
* supported keys values are
* a. table_blocksize -- [1-2048] values in MB. Default value is 1024
* b. table_blocklet_size -- values in MB. Default value is 64 MB
* c. local_dictionary_threshold -- positive value, default is 10000
* d. local_dictionary_enable -- true / false. Default is false
* e. sort_columns -- comma separated column. "c1,c2". Default no columns are sorted.
* j. sort_scope -- "local_sort", "no_sort". default value is "no_sort"
* k. long_string_columns -- comma separated string columns which are more than 32k length.
* default value is null.
* l. inverted_index -- comma separated string columns for which inverted index needs to be
* generated
* m. table_page_size_inmb -- [1-1755] MB.
*
* @return updated CarbonWriterBuilder
*/
public CarbonWriterBuilder withTableProperties(Map&lt;String, String&gt; options);
</code></pre>
<pre><code>/**
* To make sdk writer thread safe.
*
* @param numOfThreads should number of threads in which writer is called in multi-thread scenario
* default sdk writer is not thread safe.
* can use one writer instance in one thread only.
* @return updated CarbonWriterBuilder
*/
public CarbonWriterBuilder withThreadSafe(short numOfThreads);
</code></pre>
<pre><code>/**
* To support hadoop configuration
*
* @param conf hadoop configuration support, can set s3a AK,SK,end point and other conf with this
* @return updated CarbonWriterBuilder
*/
public CarbonWriterBuilder withHadoopConf(Configuration conf)
</code></pre>
<pre><code>/**
* Updates the hadoop configuration with the given key value
*
* @param key key word
* @param value value
* @return this object
*/
public CarbonWriterBuilder withHadoopConf(String key, String value);
</code></pre>
<pre><code>/**
* To build a {@link CarbonWriter}, which accepts row in CSV format
*
* @param schema carbon Schema object {org.apache.carbondata.sdk.file.Schema}
* @return CarbonWriterBuilder
*/
public CarbonWriterBuilder withCsvInput(Schema schema);
</code></pre>
<pre><code>/**
* To build a {@link CarbonWriter}, which accepts Avro object
*
* @param avroSchema avro Schema object {org.apache.avro.Schema}
* @return CarbonWriterBuilder
*/
public CarbonWriterBuilder withAvroInput(org.apache.avro.Schema avroSchema);
</code></pre>
<pre><code>/**
* To build a {@link CarbonWriter}, which accepts Json object
*
* @param carbonSchema carbon Schema object
* @return CarbonWriterBuilder
*/
public CarbonWriterBuilder withJsonInput(Schema carbonSchema);
</code></pre>
<pre><code>/**
* To support writing the ApplicationName which is writing the carbondata file
* This is a mandatory API to call, else the build() call will fail with error.
* @param application name which is writing the carbondata files
* @return CarbonWriterBuilder
*/
public CarbonWriterBuilder writtenBy(String appName) {
</code></pre>
<pre><code>/**
* Sets the list of columns for which inverted index needs to generated
*
* @param invertedIndexColumns is a string array of columns for which inverted index needs to
* generated.
* If it is null or an empty array, inverted index will be generated for none of the columns
* @return updated CarbonWriterBuilder
*/
public CarbonWriterBuilder invertedIndexFor(String[] invertedIndexColumns);
</code></pre>
<pre><code>/**
* Build a {@link CarbonWriter}
* This writer is not thread safe,
* use withThreadSafe() configuration in multi thread environment
*
* @return CarbonWriter {AvroCarbonWriter/CSVCarbonWriter/JsonCarbonWriter based on Input Type }
* @throws IOException
* @throws InvalidLoadOptionException
*/
public CarbonWriter build() throws IOException, InvalidLoadOptionException;
</code></pre>
<pre><code>/**
* Configure Row Record Reader for reading.
*
*/
public CarbonReaderBuilder withRowRecordReader()
</code></pre>
<h3>
<a id="class-orgapachecarbondatasdkfilecarbonwriter" class="anchor" href="#class-orgapachecarbondatasdkfilecarbonwriter" aria-hidden="true"><span aria-hidden="true" class="octicon octicon-link"></span></a>Class org.apache.carbondata.sdk.file.CarbonWriter</h3>
<pre><code>/**
* Create a {@link CarbonWriterBuilder} to build a {@link CarbonWriter}
*/
public static CarbonWriterBuilder builder() {
return new CarbonWriterBuilder();
}
</code></pre>
<pre><code>/**
* Write an object to the file, the format of the object depends on the implementation
* If AvroCarbonWriter, object is of type org.apache.avro.generic.GenericData.Record,
* which is one row of data.
* If CSVCarbonWriter, object is of type String[], which is one row of data
* If JsonCarbonWriter, object is of type String, which is one row of json
*
* @param object
* @throws IOException
*/
public abstract void write(Object object) throws IOException;
</code></pre>
<pre><code>/**
* Flush and close the writer
*/
public abstract void close() throws IOException;
</code></pre>
<h3>
<a id="class-orgapachecarbondatacoremetadatadatatypefield" class="anchor" href="#class-orgapachecarbondatacoremetadatadatatypefield" aria-hidden="true"><span aria-hidden="true" class="octicon octicon-link"></span></a>Class org.apache.carbondata.core.metadata.datatype.Field</h3>
<pre><code>/**
* Field Constructor
*
* @param name name of the field
* @param type datatype of field, specified in strings.
*/
public Field(String name, String type);
</code></pre>
<pre><code>/**
* Field constructor
*
* @param name name of the field
* @param type datatype of the field of class DataType
*/
public Field(String name, DataType type);
</code></pre>
<h3>
<a id="class-orgapachecarbondatasdkfileschema" class="anchor" href="#class-orgapachecarbondatasdkfileschema" aria-hidden="true"><span aria-hidden="true" class="octicon octicon-link"></span></a>Class org.apache.carbondata.sdk.file.Schema</h3>
<pre><code>/**
* Construct a schema with fields
*
* @param fields
*/
public Schema(Field[] fields);
</code></pre>
<pre><code>/**
* Create a Schema using JSON string, for example:
* [
* {"name":"string"},
* {"age":"int"}
* ]
* @param json specified as string
* @return Schema
*/
public static Schema parseJson(String json);
</code></pre>
<h3>
<a id="class-orgapachecarbondatasdkfileavrocarbonwriter" class="anchor" href="#class-orgapachecarbondatasdkfileavrocarbonwriter" aria-hidden="true"><span aria-hidden="true" class="octicon octicon-link"></span></a>Class org.apache.carbondata.sdk.file.AvroCarbonWriter</h3>
<pre><code>/**
* Converts avro schema to carbon schema, required by carbonWriter
*
* @param avroSchemaString json formatted avro schema as string
* @return carbon sdk schema
*/
public static org.apache.carbondata.sdk.file.Schema getCarbonSchemaFromAvroSchema(String avroSchemaString);
</code></pre>
<h1>
<a id="sdk-reader" class="anchor" href="#sdk-reader" aria-hidden="true"><span aria-hidden="true" class="octicon octicon-link"></span></a>SDK Reader</h1>
<p>This SDK reader reads CarbonData file and carbonindex file at a given path.
External client can make use of this reader to read CarbonData files without CarbonSession.</p>
<h2>
<a id="quick-example-1" class="anchor" href="#quick-example-1" aria-hidden="true"><span aria-hidden="true" class="octicon octicon-link"></span></a>Quick example</h2>
<pre><code>// 1. Create carbon reader
String path = "./testWriteFiles";
CarbonReader reader = CarbonReader
.builder(path, "_temp")
.projection(new String[]{"stringField", "shortField", "intField", "longField",
"doubleField", "boolField", "dateField", "timeField", "decimalField"})
.build();
// 2. Read data
long day = 24L * 3600 * 1000;
int i = 0;
while (reader.hasNext()) {
Object[] row = (Object[]) reader.readNextRow();
System.out.println(String.format("%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t",
i, row[0], row[1], row[2], row[3], row[4], row[5],
new Date((day * ((int) row[6]))), new Timestamp((long) row[7] / 1000), row[8]
));
i++;
}
// 3. Close this reader
reader.close();
</code></pre>
<p>Find example code at <a href="https://github.com/apache/carbondata/blob/master/examples/spark/src/main/java/org/apache/carbondata/examples/sdk/CarbonReaderExample.java" target=_blank>CarbonReaderExample</a> in the CarbonData repo.</p>
<p>SDK reader also supports reading carbondata files and filling it to apache arrow vectors.
Find example code at <a href="https://github.com/apache/carbondata/blob/master/sdk/sdk/src/test/java/org/apache/carbondata/sdk/file/ArrowCarbonReaderTest.java" target=_blank>ArrowCarbonReaderTest</a> in the CarbonData repo.</p>
<h2>
<a id="api-list-1" class="anchor" href="#api-list-1" aria-hidden="true"><span aria-hidden="true" class="octicon octicon-link"></span></a>API List</h2>
<h3>
<a id="class-orgapachecarbondatasdkfilecarbonreader" class="anchor" href="#class-orgapachecarbondatasdkfilecarbonreader" aria-hidden="true"><span aria-hidden="true" class="octicon octicon-link"></span></a>Class org.apache.carbondata.sdk.file.CarbonReader</h3>
<pre><code>/**
* Return a new {@link CarbonReaderBuilder} instance
*
* @param tablePath table store path
* @param tableName table name
* @return CarbonReaderBuilder object
*/
public static CarbonReaderBuilder builder(String tablePath, String tableName);
</code></pre>
<pre><code>/**
* Return a new CarbonReaderBuilder instance
* Default value of table name is table + tablePath + time
*
* @param tablePath table path
* @return CarbonReaderBuilder object
*/
public static CarbonReaderBuilder builder(String tablePath);
</code></pre>
<pre><code>/**
* Breaks the list of CarbonRecordReader in CarbonReader into multiple
* CarbonReader objects, each iterating through some 'carbondata' files
* and return that list of CarbonReader objects
*
* If the no. of files is greater than maxSplits, then break the
* CarbonReader into maxSplits splits, with each split iterating
* through &gt;= 1 file.
*
* If the no. of files is less than maxSplits, then return list of
* CarbonReader with size as the no. of files, with each CarbonReader
* iterating through exactly one file
*
* @param maxSplits: Int
* @return list of CarbonReader objects
*/
public List&lt;CarbonReader&gt; split(int maxSplits);
</code></pre>
<pre><code>/**
* Return true if has next row
*/
public boolean hasNext();
</code></pre>
<pre><code>/**
* Read and return next row object
*/
public T readNextRow();
</code></pre>
<pre><code>/**
* Read and return next batch row objects
*/
public Object[] readNextBatchRow();
</code></pre>
<pre><code>/**
* Close reader
*/
public void close();
</code></pre>
<h3>
<a id="class-orgapachecarbondatasdkfilearrowcarbonreader" class="anchor" href="#class-orgapachecarbondatasdkfilearrowcarbonreader" aria-hidden="true"><span aria-hidden="true" class="octicon octicon-link"></span></a>Class org.apache.carbondata.sdk.file.ArrowCarbonReader</h3>
<pre><code>/**
* Carbon reader will fill the arrow vector after reading the carbondata files.
* This arrow byte[] can be used to create arrow table and used for in memory analytics
* Note: create a reader at blocklet level, so that arrow byte[] will not exceed INT_MAX
*
* @param carbonSchema org.apache.carbondata.sdk.file.Schema
* @return Serialized byte array
* @throws Exception
*/
public byte[] readArrowBatch(Schema carbonSchema) throws Exception;
</code></pre>
<pre><code>/**
* Carbon reader will fill the arrow vector after reading the carbondata files.
* This arrow byte[] can be used to create arrow table and used for in memory analytics
* Note: create a reader at blocklet level, so that arrow byte[] will not exceed INT_MAX
* User need to close the VectorSchemaRoot after usage by calling VectorSchemaRoot.close()
*
* @param carbonSchema org.apache.carbondata.sdk.file.Schema
* @return Arrow VectorSchemaRoot
* @throws Exception
*/
public VectorSchemaRoot readArrowVectors(Schema carbonSchema) throws Exception;
</code></pre>
<pre><code>/**
* Carbon reader will fill the arrow vector after reading carbondata files.
* Here unsafe memory address will be returned instead of byte[],
* so that this address can be sent across java to python or c modules and
* can directly read the content from this unsafe memory
* Note:Create a carbon reader at blocklet level using CarbonReader.buildWithSplits(split) method,
* so that arrow byte[] will not exceed INT_MAX.
*
* @param carbonSchema org.apache.carbondata.sdk.file.Schema
* @return address of the unsafe memory where arrow buffer is stored
* @throws Exception
*/
public long readArrowBatchAddress(Schema carbonSchema) throws Exception;
</code></pre>
<pre><code>/**
* Free the unsafe memory allocated , if unsafe arrow batch is used.
*
* @param address address of the unsafe memory where arrow bufferer is stored
*/
public void freeArrowBatchMemory(long address)
</code></pre>
<h3>
<a id="class-orgapachecarbondatasdkfilearrowarrowconverter" class="anchor" href="#class-orgapachecarbondatasdkfilearrowarrowconverter" aria-hidden="true"><span aria-hidden="true" class="octicon octicon-link"></span></a>Class org.apache.carbondata.sdk.file.arrow.ArrowConverter</h3>
<pre><code>/**
* To get the arrow vectors directly after filling from carbondata
*
* @return Arrow VectorSchemaRoot. which contains array of arrow vectors.
*/
public VectorSchemaRoot getArrowVectors() throws IOException;
</code></pre>
<pre><code>/**
* Utility API to convert back the arrow byte[] to arrow ArrowRecordBatch.
* User need to close the ArrowRecordBatch after usage by calling ArrowRecordBatch.close()
*
* @param batchBytes input byte array
* @param bufferAllocator arrow buffer allocator
* @return ArrowRecordBatch
* @throws IOException
*/
public static ArrowRecordBatch byteArrayToArrowBatch(byte[] batchBytes, BufferAllocator bufferAllocator) throws IOException;
</code></pre>
<h3>
<a id="class-orgapachecarbondatasdkfilecarbonreaderbuilder" class="anchor" href="#class-orgapachecarbondatasdkfilecarbonreaderbuilder" aria-hidden="true"><span aria-hidden="true" class="octicon octicon-link"></span></a>Class org.apache.carbondata.sdk.file.CarbonReaderBuilder</h3>
<pre><code>/**
* Construct a CarbonReaderBuilder with table path and table name
*
* @param tablePath table path
* @param tableName table name
*/
CarbonReaderBuilder(String tablePath, String tableName);
</code></pre>
<pre><code>/**
* Configure the projection column names of carbon reader
*
* @param projectionColumnNames projection column names
* @return CarbonReaderBuilder object
*/
public CarbonReaderBuilder projection(String[] projectionColumnNames);
</code></pre>
<pre><code>/**
* Configure the filter expression for carbon reader
*
* @param filterExpression filter expression
* @return CarbonReaderBuilder object
*/
public CarbonReaderBuilder filter(Expression filterExpression);
</code></pre>
<pre><code>/**
* Sets the batch size of records to read
*
* @param batch batch size
* @return updated CarbonReaderBuilder
*/
public CarbonReaderBuilder withBatch(int batch);
</code></pre>
<pre><code>/**
* To support hadoop configuration
*
* @param conf hadoop configuration support, can set s3a AK,SK,end point and other conf with this
* @return updated CarbonReaderBuilder
*/
public CarbonReaderBuilder withHadoopConf(Configuration conf);
</code></pre>
<pre><code>/**
* Updates the hadoop configuration with the given key value
*
* @param key key word
* @param value value
* @return this object
*/
public CarbonReaderBuilder withHadoopConf(String key, String value);
</code></pre>
<pre><code>/**
* Build CarbonReader
*
* @param &lt;T&gt;
* @return CarbonReader
* @throws IOException
* @throws InterruptedException
*/
public &lt;T&gt; CarbonReader&lt;T&gt; build();
</code></pre>
<h3>
<a id="class-orgapachecarbondatasdkfilecarbonschemareader" class="anchor" href="#class-orgapachecarbondatasdkfilecarbonschemareader" aria-hidden="true"><span aria-hidden="true" class="octicon octicon-link"></span></a>Class org.apache.carbondata.sdk.file.CarbonSchemaReader</h3>
<pre><code>/**
* Read schema file and return the schema
*
* @param schemaFilePath complete path including schema file name
* @return schema object
* @throws IOException
*/
@Deprecated
public static Schema readSchemaInSchemaFile(String schemaFilePath);
</code></pre>
<pre><code>/**
* Read carbondata file and return the schema
*
* @param dataFilePath complete path including carbondata file name
* @return Schema object
*/
@Deprecated
public static Schema readSchemaInDataFile(String dataFilePath);
</code></pre>
<pre><code>/**
* Read carbonindex file and return the schema
*
* @param indexFilePath complete path including index file name
* @return schema object
* @throws IOException
*/
@Deprecated
public static Schema readSchemaInIndexFile(String indexFilePath);
</code></pre>
<pre><code>/**
* Read schema from path,
* path can be folder path,carbonindex file path, and carbondata file path
* and will not check all files schema
*
* @param path file/folder path
* @return schema
* @throws IOException
*/
public static Schema readSchema(String path);
</code></pre>
<pre><code>/**
* Read schema from path,
* path can be folder path,carbonindex file path, and carbondata file path
* and user can decide whether check all files schema
*
* @param path file/folder path
* @param validateSchema whether check all files schema
* @return schema
* @throws IOException
*/
public static Schema readSchema(String path, boolean validateSchema);
</code></pre>
<pre><code>/**
* Read schema from path,
* path can be folder path, carbonindex file path, and carbondata file path
* and will not check all files schema
*
* @param path file/folder path
* @param conf hadoop configuration support, can set s3a AK,SK,end point and other conf with this
* @return schema
* @throws IOException
*/
public static Schema readSchema(String path, Configuration conf);
</code></pre>
<pre><code>/**
* Read schema from path,
* path can be folder path, carbonindex file path, and carbondata file path
* and user can decide whether check all files schema
*
* @param path file/folder path
* @param validateSchema whether check all files schema
* @param conf hadoop configuration support, can set s3a AK,SK,
* end point and other conf with this
* @return schema
* @throws IOException
*/
public static Schema readSchema(String path, boolean validateSchema, Configuration conf);
</code></pre>
<pre><code>/**
* This method return the version details in formatted string by reading from carbondata file
* If application name is SDK_1.0.0 and this has written the carbondata file in carbondata 1.6 project version,
* then this API returns the String "SDK_1.0.0 in version: 1.6.0-SNAPSHOT"
*
* @param dataFilePath complete path including carbondata file name
* @return string with information of who has written this file in which carbondata project version
* @throws IOException
*/
public static String getVersionDetails(String dataFilePath);
</code></pre>
<h3>
<a id="class-orgapachecarbondatasdkfileschema-1" class="anchor" href="#class-orgapachecarbondatasdkfileschema-1" aria-hidden="true"><span aria-hidden="true" class="octicon octicon-link"></span></a>Class org.apache.carbondata.sdk.file.Schema</h3>
<pre><code>/**
* Construct a schema with fields
*
* @param fields
*/
public Schema(Field[] fields);
</code></pre>
<pre><code>/**
* Construct a schema with List&lt;ColumnSchema&gt;
*
* @param columnSchemaList column schema list
*/
public Schema(List&lt;ColumnSchema&gt; columnSchemaList);
</code></pre>
<pre><code>/**
* Create a Schema using JSON string, for example:
* [
* {"name":"string"},
* {"age":"int"}
* ]
* @param json specified as string
* @return Schema
*/
public static Schema parseJson(String json);
</code></pre>
<pre><code>/**
* Sort the schema order as original order
*
* @return Schema object
*/
public Schema asOriginOrder();
</code></pre>
<h3>
<a id="class-orgapachecarbondatacoremetadatadatatypefield-1" class="anchor" href="#class-orgapachecarbondatacoremetadatadatatypefield-1" aria-hidden="true"><span aria-hidden="true" class="octicon octicon-link"></span></a>Class org.apache.carbondata.core.metadata.datatype.Field</h3>
<pre><code>/**
* Field Constructor
*
* @param name name of the field
* @param type datatype of field, specified in strings.
*/
public Field(String name, String type);
</code></pre>
<pre><code>/**
* Construct Field from ColumnSchema
*
* @param columnSchema ColumnSchema, Store the information about the column meta data
*/
public Field(ColumnSchema columnSchema);
</code></pre>
<p>Find S3 example code at <a href="https://github.com/apache/carbondata/blob/master/examples/spark/src/main/java/org/apache/carbondata/examples/sdk/SDKS3Example.java" target=_blank>SDKS3Example</a> in the CarbonData repo.</p>
<h1>
<a id="common-api-list-for-carbonreader-and-carbonwriter" class="anchor" href="#common-api-list-for-carbonreader-and-carbonwriter" aria-hidden="true"><span aria-hidden="true" class="octicon octicon-link"></span></a>Common API List for CarbonReader and CarbonWriter</h1>
<h3>
<a id="class-orgapachecarbondatacoreutilcarbonproperties" class="anchor" href="#class-orgapachecarbondatacoreutilcarbonproperties" aria-hidden="true"><span aria-hidden="true" class="octicon octicon-link"></span></a>Class org.apache.carbondata.core.util.CarbonProperties</h3>
<pre><code>/**
* This method will be responsible to get the instance of CarbonProperties class
*
* @return carbon properties instance
*/
public static CarbonProperties getInstance();
</code></pre>
<pre><code>/**
* This method will be used to add a new property
*
* @param key is a property name to set for carbon.
* @param value is valid parameter corresponding to property.
* @return CarbonProperties object
*/
public CarbonProperties addProperty(String key, String value);
</code></pre>
<pre><code>/**
* This method will be used to get the property value. If property is not
* present, then it will return the default value.
*
* @param key is a property name to get user specified value.
* @return properties value for corresponding key. If not set, then returns null.
*/
public String getProperty(String key);
</code></pre>
<pre><code>/**
* This method will be used to get the property value. If property is not
* present, then it will return the default value.
*
* @param key is a property name to get user specified value..
* @param defaultValue used to be returned by function if corrosponding key not set.
* @return properties value for corresponding key. If not set, then returns specified defaultValue.
*/
public String getProperty(String key, String defaultValue);
</code></pre>
<p>Reference : <a href="./configuration-parameters.html">list of carbon properties</a></p>
<script>
$(function() {
// Show selected style on nav item
$('.b-nav__api').addClass('selected');
if (!$('.b-nav__api').parent().hasClass('nav__item__with__subs--expanded')) {
// Display api subnav items
$('.b-nav__api').parent().toggleClass('nav__item__with__subs--expanded');
}
});
</script></div>
</div>
</div>
</div>
<div class="doc-footer">
<a href="#top" class="scroll-top">Top</a>
</div>
</div>
</section>
</div>
</div>
</div>
</section><!-- End systemblock part -->
<script src="js/custom.js"></script>
</body>
</html>