blob: 23d7c7742f56f27a620e1c460fe6e1bc7c2806e5 [file] [log] [blame]
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "509d5a1e-10f2-4294-a104-9fdb85a29b0c",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"SLF4J: Class path contains multiple SLF4J bindings.\n",
"SLF4J: Found binding in [jar:file:/root/spark-3.3.1-bin-hadoop2/jars/log4j-slf4j-impl-2.17.2.jar!/org/slf4j/impl/StaticLoggerBinder.class]\n",
"SLF4J: Found binding in [jar:file:/root/hadoop-2.7.7/share/hadoop/common/lib/slf4j-log4j12-1.7.10.jar!/org/slf4j/impl/StaticLoggerBinder.class]\n",
"SLF4J: See http://www.slf4j.org/codes.html#multiple_bindings for an explanation.\n",
"SLF4J: Actual binding is of type [org.apache.logging.slf4j.Log4jLoggerFactory]\n",
"24/04/03 22:04:56 WARN Utils: Your hostname, vsr542 resolves to a loopback address: 127.0.1.1; using 10.0.2.142 instead (on interface eno1)\n",
"24/04/03 22:04:56 WARN Utils: Set SPARK_LOCAL_IP if you need to bind to another address\n",
"Setting default log level to \"WARN\".\n",
"To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).\n",
"24/04/03 22:04:57 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable\n",
"24/04/03 22:05:00 WARN Client: Neither spark.yarn.jars nor spark.yarn.archive is set, falling back to uploading libraries under SPARK_HOME.\n"
]
}
],
"source": [
"import findspark\n",
"findspark.init()\n",
"\n",
"from pyspark import SparkConf, SparkContext\n",
"nativesql_jars = \"/path/to/gluten-XXXX.jar\"\n",
"conf = SparkConf().setAppName(\"PySpark Gluten\").setMaster(\"yarn\")\n",
"conf.set(\"spark.executor.instances\", \"2\")\n",
"conf.set(\"spark.executor.memory\", \"6g\")\n",
"conf.set(\"spark.executor.cores\", \"2\")\n",
"conf.set(\"spark.driver.memory\", \"2g\")\n",
"conf.set(\"spark.memory.offHeap.enabled\", \"true\")\n",
"conf.set(\"spark.memory.offHeap.size\", \"2g\")\n",
"conf.set(\"spark.executor.memoryOverhead\", \"384M\")\n",
"conf.set(\"spark.driver.extraClassPath\", nativesql_jars)\n",
"conf.set(\"spark.executor.extraClassPath\", nativesql_jars)\n",
"conf.set(\"spark.plugins\", \"org.apache.gluten.GlutenPlugin\")\n",
"conf.set(\"spark.gluten.loadLibFromJar\", \"false\")\n",
"conf.set(\"spark.shuffle.manager\", \"org.apache.spark.shuffle.sort.ColumnarShuffleManager\")\n",
"sc = SparkContext(conf=conf)\n",
"from pyspark.sql import SparkSession\n",
"spark_session = SparkSession(sc)\n"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "6c4ec66a-3b8c-4a65-b948-8420b492333d",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"== Physical Plan ==\n",
"VeloxColumnarToRowExec\n",
"+- ^(1) FilterExecTransformer (isnotnull(category#1) AND (category#1 = Cellphone))\n",
" +- ^(1) InputIteratorTransformer[product#0, category#1, revenue#2L]\n",
" +- ^(1) InputAdapter\n",
" +- ^(1) RowToVeloxColumnar\n",
" +- *(1) Scan ExistingRDD[product#0,category#1,revenue#2L]\n",
"\n",
"\n"
]
}
],
"source": [
"df = spark_session.createDataFrame(\n",
" [\n",
" (\"Normal\", \"Cellphone\", 6000),\n",
" (\"Normal\", \"Tablet\", 1500),\n",
" (\"Mini\", \"Tablet\", 5500),\n",
" (\"Mini\", \"Cellphone\", 5000),\n",
" (\"Foldable\", \"Cellphone\", 6500),\n",
" (\"Foldable\", \"Tablet\", 2500),\n",
" (\"Pro\", \"Cellphone\", 3000),\n",
" (\"Pro\", \"Tablet\", 4000),\n",
" (\"Pro Max\", \"Cellphone\", 4500)\n",
" ],\n",
" [\"product\", \"category\", \"revenue\"]\n",
")\n",
"df.filter(\"category = 'Cellphone'\").explain()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.19"
},
"nbTranslate": {
"displayLangs": [
"*"
],
"hotkey": "alt-t",
"langInMainMenu": true,
"sourceLang": "en",
"targetLang": "fr",
"useGoogleTranslate": true
},
"toc": {
"base_numbering": 1,
"nav_menu": {},
"number_sections": true,
"sideBar": true,
"skip_h1_title": false,
"title_cell": "Table of Contents",
"title_sidebar": "Contents",
"toc_cell": false,
"toc_position": {},
"toc_section_display": true,
"toc_window_display": false
}
},
"nbformat": 4,
"nbformat_minor": 5
}