blob: 1103071affcc20ac57637bd6126107d66ada9a47 [file] [log] [blame]
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="utf-8">
<meta http-equiv="X-UA-Compatible" content="IE=edge">
<meta name="viewport" content="width=device-width, initial-scale=1">
<link href='images/favicon.ico' rel='shortcut icon' type='image/x-icon'>
<!-- The above 3 meta tags *must* come first in the head; any other head content must come *after* these tags -->
<title>CarbonData</title>
<style>
</style>
<!-- Bootstrap -->
<link rel="stylesheet" href="css/bootstrap.min.css">
<link href="css/style.css" rel="stylesheet">
<!-- HTML5 shim and Respond.js for IE8 support of HTML5 elements and media queries -->
<!-- WARNING: Respond.js doesn't work if you view the page via file:// -->
<!--[if lt IE 9]>
<script src="https://oss.maxcdn.com/html5shiv/3.7.3/html5shiv.min.js"></script>
<script src="https://oss.maxcdn.scom/respond/1.4.2/respond.min.js"></script>
<![endif]-->
<script src="js/jquery.min.js"></script>
<script src="js/bootstrap.min.js"></script>
<script defer src="https://use.fontawesome.com/releases/v5.0.8/js/all.js"></script>
</head>
<body>
<header>
<nav class="navbar navbar-default navbar-custom cd-navbar-wrapper">
<div class="container">
<div class="navbar-header">
<button aria-controls="navbar" aria-expanded="false" data-target="#navbar" data-toggle="collapse"
class="navbar-toggle collapsed" type="button">
<span class="sr-only">Toggle navigation</span>
<span class="icon-bar"></span>
<span class="icon-bar"></span>
<span class="icon-bar"></span>
</button>
<a href="index.html" class="logo">
<img src="images/CarbonDataLogo.png" alt="CarbonData logo" title="CarbocnData logo"/>
</a>
</div>
<div class="navbar-collapse collapse cd_navcontnt" id="navbar">
<ul class="nav navbar-nav navbar-right navlist-custom">
<li><a href="index.html" class="hidden-xs"><i class="fa fa-home" aria-hidden="true"></i> </a>
</li>
<li><a href="index.html" class="hidden-lg hidden-md hidden-sm">Home</a></li>
<li class="dropdown">
<a href="#" class="dropdown-toggle " data-toggle="dropdown" role="button" aria-haspopup="true"
aria-expanded="false"> Download <span class="caret"></span></a>
<ul class="dropdown-menu">
<li>
<a href="https://dist.apache.org/repos/dist/release/carbondata/2.2.0/"
target="_blank">Apache CarbonData 2.2.0</a></li>
<li>
<a href="https://dist.apache.org/repos/dist/release/carbondata/2.1.1/"
target="_blank">Apache CarbonData 2.1.1</a></li>
<li>
<a href="https://dist.apache.org/repos/dist/release/carbondata/2.1.0/"
target="_blank">Apache CarbonData 2.1.0</a></li>
<li>
<a href="https://dist.apache.org/repos/dist/release/carbondata/2.0.1/"
target="_blank">Apache CarbonData 2.0.1</a></li>
<li>
<a href="https://dist.apache.org/repos/dist/release/carbondata/2.0.0/"
target="_blank">Apache CarbonData 2.0.0</a></li>
<li>
<a href="https://dist.apache.org/repos/dist/release/carbondata/1.6.1/"
target="_blank">Apache CarbonData 1.6.1</a></li>
<li>
<a href="https://dist.apache.org/repos/dist/release/carbondata/1.6.0/"
target="_blank">Apache CarbonData 1.6.0</a></li>
<li>
<a href="https://dist.apache.org/repos/dist/release/carbondata/1.5.4/"
target="_blank">Apache CarbonData 1.5.4</a></li>
<li>
<a href="https://dist.apache.org/repos/dist/release/carbondata/1.5.3/"
target="_blank">Apache CarbonData 1.5.3</a></li>
<li>
<a href="https://dist.apache.org/repos/dist/release/carbondata/1.5.2/"
target="_blank">Apache CarbonData 1.5.2</a></li>
<li>
<a href="https://dist.apache.org/repos/dist/release/carbondata/1.5.1/"
target="_blank">Apache CarbonData 1.5.1</a></li>
<li>
<a href="https://cwiki.apache.org/confluence/display/CARBONDATA/Releases"
target="_blank">Release Archive</a></li>
</ul>
</li>
<li><a href="documentation.html" class="active">Documentation</a></li>
<li class="dropdown">
<a href="#" class="dropdown-toggle" data-toggle="dropdown" role="button" aria-haspopup="true"
aria-expanded="false">Community <span class="caret"></span></a>
<ul class="dropdown-menu">
<li>
<a href="https://github.com/apache/carbondata/blob/master/docs/how-to-contribute-to-apache-carbondata.md"
target="_blank">Contributing to CarbonData</a></li>
<li>
<a href="https://github.com/apache/carbondata/blob/master/docs/release-guide.md"
target="_blank">Release Guide</a></li>
<li>
<a href="https://cwiki.apache.org/confluence/display/CARBONDATA/PMC+and+Committers+member+list"
target="_blank">Project PMC and Committers</a></li>
<li>
<a href="https://cwiki.apache.org/confluence/pages/viewpage.action?pageId=66850609"
target="_blank">CarbonData Meetups</a></li>
<li><a href="security.html">Apache CarbonData Security</a></li>
<li><a href="https://issues.apache.org/jira/browse/CARBONDATA" target="_blank">Apache
Jira</a></li>
<li><a href="videogallery.html">CarbonData Videos </a></li>
</ul>
</li>
<li class="dropdown">
<a href="http://www.apache.org/" class="apache_link hidden-xs dropdown-toggle"
data-toggle="dropdown" role="button" aria-haspopup="true" aria-expanded="false">Apache</a>
<ul class="dropdown-menu">
<li><a href="http://www.apache.org/" target="_blank">Apache Homepage</a></li>
<li><a href="http://www.apache.org/licenses/" target="_blank">License</a></li>
<li><a href="http://www.apache.org/foundation/sponsorship.html"
target="_blank">Sponsorship</a></li>
<li><a href="http://www.apache.org/foundation/thanks.html" target="_blank">Thanks</a></li>
</ul>
</li>
<li class="dropdown">
<a href="http://www.apache.org/" class="hidden-lg hidden-md hidden-sm dropdown-toggle"
data-toggle="dropdown" role="button" aria-haspopup="true" aria-expanded="false">Apache</a>
<ul class="dropdown-menu">
<li><a href="http://www.apache.org/" target="_blank">Apache Homepage</a></li>
<li><a href="http://www.apache.org/licenses/" target="_blank">License</a></li>
<li><a href="http://www.apache.org/foundation/sponsorship.html"
target="_blank">Sponsorship</a></li>
<li><a href="http://www.apache.org/foundation/thanks.html" target="_blank">Thanks</a></li>
</ul>
</li>
<li>
<a href="#" id="search-icon"><i class="fa fa-search" aria-hidden="true"></i></a>
</li>
</ul>
</div><!--/.nav-collapse -->
<div id="search-box">
<form method="get" action="http://www.google.com/search" target="_blank">
<div class="search-block">
<table border="0" cellpadding="0" width="100%">
<tr>
<td style="width:80%">
<input type="text" name="q" size=" 5" maxlength="255" value=""
class="search-input" placeholder="Search...." required/>
</td>
<td style="width:20%">
<input type="submit" value="Search"/></td>
</tr>
<tr>
<td align="left" style="font-size:75%" colspan="2">
<input type="checkbox" name="sitesearch" value="carbondata.apache.org" checked/>
<span style=" position: relative; top: -3px;"> Only search for CarbonData</span>
</td>
</tr>
</table>
</div>
</form>
</div>
</div>
</nav>
</header> <!-- end Header part -->
<div class="fixed-padding"></div> <!-- top padding with fixde header -->
<section><!-- Dashboard nav -->
<div class="container-fluid q">
<div class="col-sm-12 col-md-12 maindashboard">
<div class="verticalnavbar">
<nav class="b-sticky-nav">
<div class="nav-scroller">
<div class="nav__inner">
<a class="b-nav__intro nav__item" href="./introduction.html">introduction</a>
<a class="b-nav__quickstart nav__item" href="./quick-start-guide.html">quick start</a>
<a class="b-nav__uses nav__item" href="./usecases.html">use cases</a>
<div class="nav__item nav__item__with__subs">
<a class="b-nav__docs nav__item nav__sub__anchor" href="./language-manual.html">Language Reference</a>
<a class="nav__item nav__sub__item" href="./ddl-of-carbondata.html">DDL</a>
<a class="nav__item nav__sub__item" href="./dml-of-carbondata.html">DML</a>
<a class="nav__item nav__sub__item" href="./streaming-guide.html">Streaming</a>
<a class="nav__item nav__sub__item" href="./configuration-parameters.html">Configuration</a>
<a class="nav__item nav__sub__item" href="./index-developer-guide.html">Indexes</a>
<a class="nav__item nav__sub__item" href="./supported-data-types-in-carbondata.html">Data Types</a>
</div>
<div class="nav__item nav__item__with__subs">
<a class="b-nav__datamap nav__item nav__sub__anchor" href="./index-management.html">Index Managament</a>
<a class="nav__item nav__sub__item" href="./bloomfilter-index-guide.html">Bloom Filter</a>
<a class="nav__item nav__sub__item" href="./lucene-index-guide.html">Lucene</a>
<a class="nav__item nav__sub__item" href="./secondary-index-guide.html">Secondary Index</a>
<a class="nav__item nav__sub__item" href="../spatial-index-guide.html">Spatial Index</a>
<a class="nav__item nav__sub__item" href="../mv-guide.html">MV</a>
</div>
<div class="nav__item nav__item__with__subs">
<a class="b-nav__api nav__item nav__sub__anchor" href="./sdk-guide.html">API</a>
<a class="nav__item nav__sub__item" href="./sdk-guide.html">Java SDK</a>
<a class="nav__item nav__sub__item" href="./csdk-guide.html">C++ SDK</a>
</div>
<a class="b-nav__perf nav__item" href="./performance-tuning.html">Performance Tuning</a>
<a class="b-nav__s3 nav__item" href="./s3-guide.html">S3 Storage</a>
<a class="b-nav__indexserver nav__item" href="./index-server.html">Index Server</a>
<a class="b-nav__prestodb nav__item" href="./prestodb-guide.html">PrestoDB Integration</a>
<a class="b-nav__prestosql nav__item" href="./prestosql-guide.html">PrestoSQL Integration</a>
<a class="b-nav__flink nav__item" href="./flink-integration-guide.html">Flink Integration</a>
<a class="b-nav__scd nav__item" href="./scd-and-cdc-guide.html">SCD & CDC</a>
<a class="b-nav__faq nav__item" href="./faq.html">FAQ</a>
<a class="b-nav__contri nav__item" href="./how-to-contribute-to-apache-carbondata.html">Contribute</a>
<a class="b-nav__security nav__item" href="./security.html">Security</a>
<a class="b-nav__release nav__item" href="./release-guide.html">Release Guide</a>
</div>
</div>
<div class="navindicator">
<div class="b-nav__intro navindicator__item"></div>
<div class="b-nav__quickstart navindicator__item"></div>
<div class="b-nav__uses navindicator__item"></div>
<div class="b-nav__docs navindicator__item"></div>
<div class="b-nav__datamap navindicator__item"></div>
<div class="b-nav__api navindicator__item"></div>
<div class="b-nav__perf navindicator__item"></div>
<div class="b-nav__s3 navindicator__item"></div>
<div class="b-nav__indexserver navindicator__item"></div>
<div class="b-nav__prestodb navindicator__item"></div>
<div class="b-nav__prestosql navindicator__item"></div>
<div class="b-nav__flink navindicator__item"></div>
<div class="b-nav__scd navindicator__item"></div>
<div class="b-nav__faq navindicator__item"></div>
<div class="b-nav__contri navindicator__item"></div>
<div class="b-nav__security navindicator__item"></div>
</div>
</nav>
</div>
<div class="mdcontent">
<section>
<div style="padding:10px 15px;">
<div id="viewpage" name="viewpage">
<div class="row">
<div class="col-sm-12 col-md-12">
<div>
<h1>
<a id="carbondata-as-sparks-datasource" class="anchor" href="#carbondata-as-sparks-datasource" aria-hidden="true"><span aria-hidden="true" class="octicon octicon-link"></span></a>CarbonData as Spark's Datasource</h1>
<p>The CarbonData fileformat is now integrated as Spark datasource for read and write operation without using CarbonSession. This is useful for users who wants to use carbondata as spark's data source.</p>
<p><strong>Note:</strong> You can only apply the functions/features supported by spark datasource APIs, functionalities supported would be similar to Parquet. The carbon session features are not supported. The result is displayed as byte array format when select query on binary column in spark-sql.</p>
<h1>
<a id="create-table-with-ddl" class="anchor" href="#create-table-with-ddl" aria-hidden="true"><span aria-hidden="true" class="octicon octicon-link"></span></a>Create Table with DDL</h1>
<p>Now you can create Carbon table using Spark's datasource DDL syntax.</p>
<pre><code> CREATE [TEMPORARY] TABLE [IF NOT EXISTS] [db_name.]table_name
[(col_name1 col_type1 [COMMENT col_comment1], ...)]
USING CARBON
[OPTIONS (key1=val1, key2=val2, ...)]
[PARTITIONED BY (col_name1, col_name2, ...)]
[CLUSTERED BY (col_name3, col_name4, ...) INTO num_buckets BUCKETS]
[LOCATION path]
[COMMENT table_comment]
[TBLPROPERTIES (key1=val1, key2=val2, ...)]
[AS select_statement]
</code></pre>
<h2>
<a id="supported-options" class="anchor" href="#supported-options" aria-hidden="true"><span aria-hidden="true" class="octicon octicon-link"></span></a>Supported OPTIONS</h2>
<table>
<thead>
<tr>
<th>Property</th>
<th>Default Value</th>
<th>Description</th>
</tr>
</thead>
<tbody>
<tr>
<td>table_blocksize</td>
<td>1024</td>
<td>Size of blocks to write onto hdfs. For more details, see <a href="./ddl-of-carbondata.html#table-block-size-configuration">Table Block Size Configuration</a>.</td>
</tr>
<tr>
<td>table_blocklet_size</td>
<td>64</td>
<td>Size of blocklet to write.</td>
</tr>
<tr>
<td>table_page_size_inmb</td>
<td>0</td>
<td>Size of each page in carbon table, if page size crosses this value before 32000 rows, page will be cut to that many rows. Helps in keep page size to fit cache size</td>
</tr>
<tr>
<td>local_dictionary_threshold</td>
<td>10000</td>
<td>Cardinality upto which the local dictionary can be generated. For more details, see <a href="./ddl-of-carbondata.html#local-dictionary-configuration">Local Dictionary Configuration</a>.</td>
</tr>
<tr>
<td>local_dictionary_enable</td>
<td>false</td>
<td>Enable local dictionary generation. For more details, see <a href="./ddl-of-carbondata.html#local-dictionary-configuration">Local Dictionary Configuration</a>.</td>
</tr>
<tr>
<td>sort_columns</td>
<td>all dimensions are sorted</td>
<td>Columns to include in sort and its order of sort. For more details, see <a href="./ddl-of-carbondata.html#sort-columns-configuration">Sort Columns Configuration</a>.</td>
</tr>
<tr>
<td>sort_scope</td>
<td>local_sort</td>
<td>Sort scope of the load.Options include no sort, local sort, batch sort, and global sort. For more details, see <a href="./ddl-of-carbondata.html#sort-scope-configuration">Sort Scope Configuration</a>.</td>
</tr>
<tr>
<td>long_string_columns</td>
<td>null</td>
<td>Comma separated string/char/varchar columns which are more than 32k length. For more details, see <a href="./ddl-of-carbondata.html#string-longer-than-32000-characters">String longer than 32000 characters</a>.</td>
</tr>
</tbody>
</table>
<p><strong>NOTE:</strong> please set long_string_columns for varchar column.</p>
<h2>
<a id="example" class="anchor" href="#example" aria-hidden="true"><span aria-hidden="true" class="octicon octicon-link"></span></a>Example</h2>
<pre><code> CREATE TABLE CARBON_TABLE (NAME STRING) USING CARBON OPTIONS('table_blocksize'='256')
</code></pre>
<h1>
<a id="using-dataframe" class="anchor" href="#using-dataframe" aria-hidden="true"><span aria-hidden="true" class="octicon octicon-link"></span></a>Using DataFrame</h1>
<p>Carbon format can be used in dataframe also. Following are the ways to use carbon format in dataframe.</p>
<p>Write carbon using dataframe</p>
<pre><code>df.write.format("carbon").save(path)
</code></pre>
<p>Read carbon using dataframe</p>
<pre><code>val df = spark.read.format("carbon").load(path)
</code></pre>
<h2>
<a id="example-1" class="anchor" href="#example-1" aria-hidden="true"><span aria-hidden="true" class="octicon octicon-link"></span></a>Example</h2>
<pre><code>import org.apache.spark.sql.SparkSession
val spark = SparkSession
.builder()
.appName("Spark SQL basic example")
.config("spark.some.config.option", "some-value")
.getOrCreate()
// For implicit conversions like converting RDDs to DataFrames
import spark.implicits._
val df = spark.sparkContext.parallelize(1 to 10 * 10 * 1000)
.map(x =&gt; (r.nextInt(100000), "name" + x % 8, "city" + x % 50, BigDecimal.apply(x % 60)))
.toDF("ID", "name", "city", "age")
// Write to carbon format
df.write.format("carbon").save("/user/person_table")
// Read carbon using dataframe
val dfread = spark.read.format("carbon").load("/user/person_table")
dfread.show()
</code></pre>
<p>Reference : <a href="./configuration-parameters.html">list of carbon properties</a></p>
<script>
$(function() {
// Show selected style on nav item
$('.b-nav__docs').addClass('selected');
// Display docs subnav items
if (!$('.b-nav__docs').parent().hasClass('nav__item__with__subs--expanded')) {
$('.b-nav__docs').parent().toggleClass('nav__item__with__subs--expanded');
}
});
</script></div>
</div>
</div>
</div>
<div class="doc-footer">
<a href="#top" class="scroll-top">Top</a>
</div>
</div>
</section>
</div>
</div>
</div>
</section><!-- End systemblock part -->
<script src="js/custom.js"></script>
</body>
</html>