blob: 7a6752fb7221bde674705bfe60459608734f2831 [file] [log] [blame]
{
"cells": [
{
"cell_type": "markdown",
"id": "910ebdc2",
"metadata": {},
"source": [
"## 1. Init spark with CarbonExtensions"
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "f73459bd",
"metadata": {},
"outputs": [],
"source": [
"from pyspark.sql import SparkSession\n",
"spark = SparkSession.builder.enableHiveSupport().config(\"spark.sql.extensions\", \"org.apache.spark.sql.CarbonExtensions\").getOrCreate()"
]
},
{
"cell_type": "markdown",
"id": "b73232b2",
"metadata": {},
"source": [
"## 2. Create CarbonData table and insert data by using spark sql"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "d458dc3a",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"++\n",
"||\n",
"++\n",
"++\n",
"\n"
]
},
{
"data": {
"text/plain": [
"DataFrame[]"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"spark.sql(\"DROP TABLE IF EXISTS carbondata_table\").show()\n",
"spark.sql(\"CREATE TABLE IF NOT EXISTS carbondata_table( age INT,adult BOOLEAN) USING carbon\")\n",
"spark.sql(\"INSERT INTO carbondata_table VALUES(20,true)\")\n",
"spark.sql(\"INSERT INTO carbondata_table VALUES(10,false)\")"
]
},
{
"cell_type": "markdown",
"id": "edfc1eeb",
"metadata": {},
"source": [
"## 3. Select data from CarbonData table"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "2c234b64",
"metadata": {
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"+---+-----+\n",
"|age|adult|\n",
"+---+-----+\n",
"| 10|false|\n",
"| 20| true|\n",
"+---+-----+\n",
"\n"
]
}
],
"source": [
"spark.sql(\"SELECT * FROM carbondata_table\").show()"
]
},
{
"cell_type": "markdown",
"id": "c5df896b",
"metadata": {},
"source": [
"## 4. Describe the CarbonData table"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "ac65cba3",
"metadata": {
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"+--------+---------+-------+\n",
"|col_name|data_type|comment|\n",
"+--------+---------+-------+\n",
"| age| int| null|\n",
"| adult| boolean| null|\n",
"+--------+---------+-------+\n",
"\n",
"+--------------------+--------------------+-------+\n",
"| col_name| data_type|comment|\n",
"+--------------------+--------------------+-------+\n",
"| age| int| null|\n",
"| adult| boolean| null|\n",
"| | | |\n",
"|# Detailed Table ...| | |\n",
"| Database| default| |\n",
"| Table| carbondata_table| |\n",
"| Owner| jovyan| |\n",
"| Created Time|Sat Apr 15 17:22:...| |\n",
"| Last Access| UNKNOWN| |\n",
"| Created By| Spark 3.1.1| |\n",
"| Type| MANAGED| |\n",
"| Provider| carbon| |\n",
"| Location|file:/home/jovyan...| |\n",
"| Serde Library|org.apache.carbon...| |\n",
"| InputFormat|org.apache.carbon...| |\n",
"| OutputFormat|org.apache.carbon...| |\n",
"+--------------------+--------------------+-------+\n",
"\n"
]
}
],
"source": [
"spark.sql(\"DESCRIBE carbondata_table\").show()\n",
"spark.sql(\"DESCRIBE FORMATTED carbondata_table\").show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "bfdcae5a",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.4"
}
},
"nbformat": 4,
"nbformat_minor": 5
}