blob: 7a1789fb483dcab2cc8fd8196064fe923249b462 [file] [log] [blame]
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="utf-8">
<meta http-equiv="X-UA-Compatible" content="IE=edge">
<meta name="viewport" content="width=device-width, initial-scale=1">
<link href='images/favicon.ico' rel='shortcut icon' type='image/x-icon'>
<!-- The above 3 meta tags *must* come first in the head; any other head content must come *after* these tags -->
<title>CarbonData</title>
<style>
</style>
<!-- Bootstrap -->
<link rel="stylesheet" href="css/bootstrap.min.css">
<link href="css/style.css" rel="stylesheet">
<!-- HTML5 shim and Respond.js for IE8 support of HTML5 elements and media queries -->
<!-- WARNING: Respond.js doesn't work if you view the page via file:// -->
<!--[if lt IE 9]>
<script src="https://oss.maxcdn.com/html5shiv/3.7.3/html5shiv.min.js"></script>
<script src="https://oss.maxcdn.scom/respond/1.4.2/respond.min.js"></script>
<![endif]-->
<script src="js/jquery.min.js"></script>
<script src="js/bootstrap.min.js"></script>
<script defer src="https://use.fontawesome.com/releases/v5.0.8/js/all.js"></script>
</head>
<body>
<header>
<nav class="navbar navbar-default navbar-custom cd-navbar-wrapper">
<div class="container">
<div class="navbar-header">
<button aria-controls="navbar" aria-expanded="false" data-target="#navbar" data-toggle="collapse"
class="navbar-toggle collapsed" type="button">
<span class="sr-only">Toggle navigation</span>
<span class="icon-bar"></span>
<span class="icon-bar"></span>
<span class="icon-bar"></span>
</button>
<a href="index.html" class="logo">
<img src="images/CarbonDataLogo.png" alt="CarbonData logo" title="CarbocnData logo"/>
</a>
</div>
<div class="navbar-collapse collapse cd_navcontnt" id="navbar">
<ul class="nav navbar-nav navbar-right navlist-custom">
<li><a href="index.html" class="hidden-xs"><i class="fa fa-home" aria-hidden="true"></i> </a>
</li>
<li><a href="index.html" class="hidden-lg hidden-md hidden-sm">Home</a></li>
<li class="dropdown">
<a href="#" class="dropdown-toggle " data-toggle="dropdown" role="button" aria-haspopup="true"
aria-expanded="false"> Download <span class="caret"></span></a>
<ul class="dropdown-menu">
<li>
<a href="https://dist.apache.org/repos/dist/release/carbondata/2.2.0/"
target="_blank">Apache CarbonData 2.2.0</a></li>
<li>
<a href="https://dist.apache.org/repos/dist/release/carbondata/2.1.1/"
target="_blank">Apache CarbonData 2.1.1</a></li>
<li>
<a href="https://dist.apache.org/repos/dist/release/carbondata/2.1.0/"
target="_blank">Apache CarbonData 2.1.0</a></li>
<li>
<a href="https://dist.apache.org/repos/dist/release/carbondata/2.0.1/"
target="_blank">Apache CarbonData 2.0.1</a></li>
<li>
<a href="https://dist.apache.org/repos/dist/release/carbondata/2.0.0/"
target="_blank">Apache CarbonData 2.0.0</a></li>
<li>
<a href="https://dist.apache.org/repos/dist/release/carbondata/1.6.1/"
target="_blank">Apache CarbonData 1.6.1</a></li>
<li>
<a href="https://dist.apache.org/repos/dist/release/carbondata/1.6.0/"
target="_blank">Apache CarbonData 1.6.0</a></li>
<li>
<a href="https://dist.apache.org/repos/dist/release/carbondata/1.5.4/"
target="_blank">Apache CarbonData 1.5.4</a></li>
<li>
<a href="https://dist.apache.org/repos/dist/release/carbondata/1.5.3/"
target="_blank">Apache CarbonData 1.5.3</a></li>
<li>
<a href="https://dist.apache.org/repos/dist/release/carbondata/1.5.2/"
target="_blank">Apache CarbonData 1.5.2</a></li>
<li>
<a href="https://dist.apache.org/repos/dist/release/carbondata/1.5.1/"
target="_blank">Apache CarbonData 1.5.1</a></li>
<li>
<a href="https://cwiki.apache.org/confluence/display/CARBONDATA/Releases"
target="_blank">Release Archive</a></li>
</ul>
</li>
<li><a href="documentation.html" class="active">Documentation</a></li>
<li class="dropdown">
<a href="#" class="dropdown-toggle" data-toggle="dropdown" role="button" aria-haspopup="true"
aria-expanded="false">Community <span class="caret"></span></a>
<ul class="dropdown-menu">
<li>
<a href="https://github.com/apache/carbondata/blob/master/docs/how-to-contribute-to-apache-carbondata.md"
target="_blank">Contributing to CarbonData</a></li>
<li>
<a href="https://github.com/apache/carbondata/blob/master/docs/release-guide.md"
target="_blank">Release Guide</a></li>
<li>
<a href="https://cwiki.apache.org/confluence/display/CARBONDATA/PMC+and+Committers+member+list"
target="_blank">Project PMC and Committers</a></li>
<li>
<a href="https://cwiki.apache.org/confluence/pages/viewpage.action?pageId=66850609"
target="_blank">CarbonData Meetups</a></li>
<li><a href="security.html">Apache CarbonData Security</a></li>
<li><a href="https://issues.apache.org/jira/browse/CARBONDATA" target="_blank">Apache
Jira</a></li>
<li><a href="videogallery.html">CarbonData Videos </a></li>
</ul>
</li>
<li class="dropdown">
<a href="http://www.apache.org/" class="apache_link hidden-xs dropdown-toggle"
data-toggle="dropdown" role="button" aria-haspopup="true" aria-expanded="false">Apache</a>
<ul class="dropdown-menu">
<li><a href="http://www.apache.org/" target="_blank">Apache Homepage</a></li>
<li><a href="http://www.apache.org/licenses/" target="_blank">License</a></li>
<li><a href="http://www.apache.org/foundation/sponsorship.html"
target="_blank">Sponsorship</a></li>
<li><a href="http://www.apache.org/foundation/thanks.html" target="_blank">Thanks</a></li>
</ul>
</li>
<li class="dropdown">
<a href="http://www.apache.org/" class="hidden-lg hidden-md hidden-sm dropdown-toggle"
data-toggle="dropdown" role="button" aria-haspopup="true" aria-expanded="false">Apache</a>
<ul class="dropdown-menu">
<li><a href="http://www.apache.org/" target="_blank">Apache Homepage</a></li>
<li><a href="http://www.apache.org/licenses/" target="_blank">License</a></li>
<li><a href="http://www.apache.org/foundation/sponsorship.html"
target="_blank">Sponsorship</a></li>
<li><a href="http://www.apache.org/foundation/thanks.html" target="_blank">Thanks</a></li>
</ul>
</li>
<li>
<a href="#" id="search-icon"><i class="fa fa-search" aria-hidden="true"></i></a>
</li>
</ul>
</div><!--/.nav-collapse -->
<div id="search-box">
<form method="get" action="http://www.google.com/search" target="_blank">
<div class="search-block">
<table border="0" cellpadding="0" width="100%">
<tr>
<td style="width:80%">
<input type="text" name="q" size=" 5" maxlength="255" value=""
class="search-input" placeholder="Search...." required/>
</td>
<td style="width:20%">
<input type="submit" value="Search"/></td>
</tr>
<tr>
<td align="left" style="font-size:75%" colspan="2">
<input type="checkbox" name="sitesearch" value="carbondata.apache.org" checked/>
<span style=" position: relative; top: -3px;"> Only search for CarbonData</span>
</td>
</tr>
</table>
</div>
</form>
</div>
</div>
</nav>
</header> <!-- end Header part -->
<div class="fixed-padding"></div> <!-- top padding with fixde header -->
<section><!-- Dashboard nav -->
<div class="container-fluid q">
<div class="col-sm-12 col-md-12 maindashboard">
<div class="verticalnavbar">
<nav class="b-sticky-nav">
<div class="nav-scroller">
<div class="nav__inner">
<a class="b-nav__intro nav__item" href="./introduction.html">introduction</a>
<a class="b-nav__quickstart nav__item" href="./quick-start-guide.html">quick start</a>
<a class="b-nav__uses nav__item" href="./usecases.html">use cases</a>
<div class="nav__item nav__item__with__subs">
<a class="b-nav__docs nav__item nav__sub__anchor" href="./language-manual.html">Language Reference</a>
<a class="nav__item nav__sub__item" href="./ddl-of-carbondata.html">DDL</a>
<a class="nav__item nav__sub__item" href="./dml-of-carbondata.html">DML</a>
<a class="nav__item nav__sub__item" href="./streaming-guide.html">Streaming</a>
<a class="nav__item nav__sub__item" href="./configuration-parameters.html">Configuration</a>
<a class="nav__item nav__sub__item" href="./index-developer-guide.html">Indexes</a>
<a class="nav__item nav__sub__item" href="./supported-data-types-in-carbondata.html">Data Types</a>
</div>
<div class="nav__item nav__item__with__subs">
<a class="b-nav__datamap nav__item nav__sub__anchor" href="./index-management.html">Index Managament</a>
<a class="nav__item nav__sub__item" href="./bloomfilter-index-guide.html">Bloom Filter</a>
<a class="nav__item nav__sub__item" href="./lucene-index-guide.html">Lucene</a>
<a class="nav__item nav__sub__item" href="./secondary-index-guide.html">Secondary Index</a>
<a class="nav__item nav__sub__item" href="../spatial-index-guide.html">Spatial Index</a>
<a class="nav__item nav__sub__item" href="../mv-guide.html">MV</a>
</div>
<div class="nav__item nav__item__with__subs">
<a class="b-nav__api nav__item nav__sub__anchor" href="./sdk-guide.html">API</a>
<a class="nav__item nav__sub__item" href="./sdk-guide.html">Java SDK</a>
<a class="nav__item nav__sub__item" href="./csdk-guide.html">C++ SDK</a>
</div>
<a class="b-nav__perf nav__item" href="./performance-tuning.html">Performance Tuning</a>
<a class="b-nav__s3 nav__item" href="./s3-guide.html">S3 Storage</a>
<a class="b-nav__indexserver nav__item" href="./index-server.html">Index Server</a>
<a class="b-nav__prestodb nav__item" href="./prestodb-guide.html">PrestoDB Integration</a>
<a class="b-nav__prestosql nav__item" href="./prestosql-guide.html">PrestoSQL Integration</a>
<a class="b-nav__flink nav__item" href="./flink-integration-guide.html">Flink Integration</a>
<a class="b-nav__scd nav__item" href="./scd-and-cdc-guide.html">SCD & CDC</a>
<a class="b-nav__faq nav__item" href="./faq.html">FAQ</a>
<a class="b-nav__contri nav__item" href="./how-to-contribute-to-apache-carbondata.html">Contribute</a>
<a class="b-nav__security nav__item" href="./security.html">Security</a>
<a class="b-nav__release nav__item" href="./release-guide.html">Release Guide</a>
</div>
</div>
<div class="navindicator">
<div class="b-nav__intro navindicator__item"></div>
<div class="b-nav__quickstart navindicator__item"></div>
<div class="b-nav__uses navindicator__item"></div>
<div class="b-nav__docs navindicator__item"></div>
<div class="b-nav__datamap navindicator__item"></div>
<div class="b-nav__api navindicator__item"></div>
<div class="b-nav__perf navindicator__item"></div>
<div class="b-nav__s3 navindicator__item"></div>
<div class="b-nav__indexserver navindicator__item"></div>
<div class="b-nav__prestodb navindicator__item"></div>
<div class="b-nav__prestosql navindicator__item"></div>
<div class="b-nav__flink navindicator__item"></div>
<div class="b-nav__scd navindicator__item"></div>
<div class="b-nav__faq navindicator__item"></div>
<div class="b-nav__contri navindicator__item"></div>
<div class="b-nav__security navindicator__item"></div>
</div>
</nav>
</div>
<div class="mdcontent">
<section>
<div style="padding:10px 15px;">
<div id="viewpage" name="viewpage">
<div class="row">
<div class="col-sm-12 col-md-12">
<div>
<h1>
<a id="quick-start" class="anchor" href="#quick-start" aria-hidden="true"><span aria-hidden="true" class="octicon octicon-link"></span></a>Quick Start</h1>
<p>This tutorial provides a quick introduction to using current integration/hive module.</p>
<h2>
<a id="prepare-carbondata-in-spark" class="anchor" href="#prepare-carbondata-in-spark" aria-hidden="true"><span aria-hidden="true" class="octicon octicon-link"></span></a>Prepare CarbonData in Spark</h2>
<ul>
<li>
<p>Create a sample.csv file using the following commands. The CSV file is required for loading data into CarbonData.</p>
<pre><code>cd carbondata
cat &gt; sample.csv &lt;&lt; EOF
id,name,scale,country,salary
1,yuhai,1.77,china,33000.1
2,runlin,1.70,china,33000.2
EOF
</code></pre>
</li>
<li>
<p>copy data to HDFS</p>
</li>
</ul>
<pre><code>$HADOOP_HOME/bin/hadoop fs -put sample.csv &lt;hdfs store path&gt;/sample.csv
</code></pre>
<ul>
<li>Add the following params to $SPARK_CONF_DIR/conf/hive-site.xml</li>
</ul>
<div class="highlight highlight-text-xml"><pre>&lt;<span class="pl-ent">property</span>&gt;
&lt;<span class="pl-ent">name</span>&gt;hive.metastore.pre.event.listeners&lt;/<span class="pl-ent">name</span>&gt;
&lt;<span class="pl-ent">value</span>&gt;org.apache.carbondata.hive.CarbonHiveMetastoreListener&lt;/<span class="pl-ent">value</span>&gt;
&lt;/<span class="pl-ent">property</span>&gt;</pre></div>
<ul>
<li>Start Spark shell by running the following command in the Spark directory</li>
</ul>
<pre><code>./bin/spark-shell --jars &lt;carbondata assembly jar path, carbon hive jar path&gt;
</code></pre>
<pre><code>import org.apache.spark.sql.SparkSession
import org.apache.spark.sql.CarbonSession._
val rootPath = "hdfs:///user/hadoop/carbon"
val storeLocation = s"$rootPath/store"
val warehouse = s"$rootPath/warehouse"
val metaStoreDB = s"$rootPath/metastore_db"
val carbon = SparkSession.builder().enableHiveSupport().config("spark.sql.warehouse.dir", warehouse).config(org.apache.carbondata.core.constants.CarbonCommonConstants.STORE_LOCATION, storeLocation).getOrCreateCarbonSession(storeLocation, metaStoreDB)
carbon.sql("create table hive_carbon(id int, name string, scale decimal, country string, salary double) STORED AS carbondata")
carbon.sql("LOAD DATA INPATH '&lt;hdfs store path&gt;/sample.csv' INTO TABLE hive_carbon")
scala&gt;carbon.sql("SELECT * FROM hive_carbon").show()
</code></pre>
<h2>
<a id="configure-carbon-in-hive" class="anchor" href="#configure-carbon-in-hive" aria-hidden="true"><span aria-hidden="true" class="octicon octicon-link"></span></a>Configure Carbon in Hive</h2>
<h3>
<a id="configure-hive-classpath" class="anchor" href="#configure-hive-classpath" aria-hidden="true"><span aria-hidden="true" class="octicon octicon-link"></span></a>Configure hive classpath</h3>
<pre><code>mkdir hive/auxlibs/
cp carbondata/assembly/target/scala-2.11/carbondata_2.11*.jar hive/auxlibs/
cp carbondata/integration/hive/target/carbondata-hive-*.jar hive/auxlibs/
cp $SPARK_HOME/jars/spark-catalyst*.jar hive/auxlibs/
cp $SPARK_HOME/jars/scala*.jar hive/auxlibs/
export HIVE_AUX_JARS_PATH=hive/auxlibs/
</code></pre>
<h3>
<a id="fix-snappy-issue" class="anchor" href="#fix-snappy-issue" aria-hidden="true"><span aria-hidden="true" class="octicon octicon-link"></span></a>Fix snappy issue</h3>
<pre><code>copy snappy-java-xxx.jar from "./&lt;SPARK_HOME&gt;/jars/" to "./Library/Java/Extensions"
export HADOOP_OPTS="-Dorg.xerial.snappy.lib.path=/Library/Java/Extensions -Dorg.xerial.snappy.lib.name=libsnappyjava.jnilib -Dorg.xerial.snappy.tempdir=/Users/apple/DEMO/tmp"
</code></pre>
<h3>
<a id="carbon-jars-to-be-placed" class="anchor" href="#carbon-jars-to-be-placed" aria-hidden="true"><span aria-hidden="true" class="octicon octicon-link"></span></a>Carbon Jars to be placed</h3>
<pre><code>hive/lib/ (for hive server)
yarn/lib/ (for MapReduce)
Carbon Jars to be copied to the above paths.
</code></pre>
<h3>
<a id="start-hive-beeline-to-query" class="anchor" href="#start-hive-beeline-to-query" aria-hidden="true"><span aria-hidden="true" class="octicon octicon-link"></span></a>Start hive beeline to query</h3>
<pre><code>$HIVE_HOME/bin/beeline
</code></pre>
<h3>
<a id="write-data-from-hive" class="anchor" href="#write-data-from-hive" aria-hidden="true"><span aria-hidden="true" class="octicon octicon-link"></span></a>Write data from hive</h3>
<ul>
<li>Write data from hive into carbondata format.</li>
</ul>
<pre><code>create table hive_carbon(id int, name string, scale decimal, country string, salary double) stored by 'org.apache.carbondata.hive.CarbonStorageHandler';
insert into hive_carbon select * from parquetTable;
</code></pre>
<p><strong>Note</strong>: Only non-transactional tables are supported when created through hive. This means that the standard carbon folder structure would not be followed and all files would be written in a flat folder structure.</p>
<h3>
<a id="query-data-from-hive" class="anchor" href="#query-data-from-hive" aria-hidden="true"><span aria-hidden="true" class="octicon octicon-link"></span></a>Query data from hive</h3>
<ul>
<li>This is to read the carbon table through Hive. It is the integration of the carbon with Hive.</li>
</ul>
<pre><code>set hive.mapred.supports.subdirectories=true;
set mapreduce.dir.recursive=true;
These properties helps to recursively traverse through the directories to read the carbon folder structure.
</code></pre>
<h3>
<a id="example" class="anchor" href="#example" aria-hidden="true"><span aria-hidden="true" class="octicon octicon-link"></span></a>Example</h3>
<pre><code> - Query the table
select * from hive_carbon;
select count(*) from hive_carbon;
select * from hive_carbon order by id;
</code></pre>
<h3>
<a id="note" class="anchor" href="#note" aria-hidden="true"><span aria-hidden="true" class="octicon octicon-link"></span></a>Note</h3>
<ul>
<li>Partition table support is not handled</li>
</ul>
<script>
// Show selected style on nav item
$(function() { $('.b-nav__quickstart').addClass('selected'); });
</script></div>
</div>
</div>
</div>
<div class="doc-footer">
<a href="#top" class="scroll-top">Top</a>
</div>
</div>
</section>
</div>
</div>
</div>
</section><!-- End systemblock part -->
<script src="js/custom.js"></script>
</body>
</html>