| { |
| "cells": [ |
| { |
| "cell_type": "markdown", |
| "metadata": {}, |
| "source": [ |
| "# initialization" |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": null, |
| "metadata": { |
| "tags": [ |
| "parameters" |
| ] |
| }, |
| "outputs": [], |
| "source": [ |
| "# Local path to gluten project.\n", |
| "gluten_home='/home/sparkuser/gluten'\n", |
| "\n", |
| "# Local path to gluten jar.\n", |
| "gluten_target_jar='/home/sparkuser/gluten-velox-bundle-spark3.3_2.12-centos_7_x86_64-1.5.0-SNAPSHOT.jar'\n", |
| "\n", |
| "# Spark app master. e.g. 'yarn'\n", |
| "master='yarn'\n", |
| "\n", |
| "# List of workers.\n", |
| "clients=['localhost']\n", |
| "\n", |
| "# List of block devices. e.g. ['nvme1n1', 'nvme2n1']\n", |
| "disk_dev=[]\n", |
| "\n", |
| "# List of network devices. e.g. ['ens787f0']\n", |
| "nic_dev=[]\n", |
| "\n", |
| "# Select workload. Can be either 'tpch' or 'tpcds'.\n", |
| "workload='tpch'\n", |
| "\n", |
| "# Run with gluten. If False, run Spark.\n", |
| "run_gluten=True\n", |
| "\n", |
| "# TPC tables\n", |
| "tpch_tabledir=''\n", |
| "tpcds_tabledir=''\n", |
| "\n", |
| "# Parallelism\n", |
| "executors_per_node=32\n", |
| "cores_per_executor=7\n", |
| "\n", |
| "gluten_tpch_task_per_core=2\n", |
| "gluten_tpcds_task_per_core=4\n", |
| "spark_tpch_task_per_core=8\n", |
| "spark_tpcds_task_per_core=8\n", |
| "\n", |
| "# Physical memory on each worker node.\n", |
| "memory_per_node='1000g'\n", |
| "\n", |
| "# Offheap ratio. 0 to disable offheap for Spark.\n", |
| "# onheap:offheap = 1:2\n", |
| "spark_offheap_ratio=2.0\n", |
| "# onheap:offheap = 1:7\n", |
| "gluten_offheap_ratio=7.0\n", |
| "\n", |
| "# spark.io.compression.codec\n", |
| "spark_codec='lz4'\n", |
| "# spark.gluten.sql.columnar.shuffle.codec\n", |
| "gluten_codec='lz4'\n", |
| "# spark.gluten.sql.columnar.shuffle.codecBackend\n", |
| "gluten_codec_backend=''\n", |
| "# spark.gluten.sql.columnar.maxBatchSize\n", |
| "max_batch_size=4096\n", |
| "# spark.app.name, empty to use default name.\n", |
| "app_name=''\n", |
| "\n", |
| "# Hostname or IP to server for perf analysis. Able to connect via ssh.\n", |
| "server=''\n", |
| "\n", |
| "# Gluten home on server.\n", |
| "server_gluten_home='/home/sparkuser/gluten'\n", |
| "\n", |
| "# Specify the directory on perf analysis server. Usually a codename for this run.\n", |
| "base_dir=''\n", |
| "\n", |
| "# Proxy used to connect to server for perf analysis.\n", |
| "proxy=''\n", |
| "\n", |
| "# Emon event file for `emon -i`. Set to emptry string '' if emon is unavailable.\n", |
| "# Supported emon events on platform can be verified via `emon -i emon.list`\n", |
| "emon_list=''\n", |
| "\n", |
| "# Whether to run perf analysis scripts. Only takes effect if server is set.\n", |
| "analyze_perf=False\n", |
| "\n", |
| "# List of email to receive perf analysis results.\n", |
| "emails = []\n", |
| "\n", |
| "# Pull request number.\n", |
| "pr=''" |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": null, |
| "metadata": {}, |
| "outputs": [], |
| "source": [ |
| "initialize_ipynb = !realpath native_sql_initialize.ipynb\n", |
| "print(f\"Running notebook: {initialize_ipynb[0]}\\n\")\n", |
| "%run {initialize_ipynb[0]}" |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": null, |
| "metadata": {}, |
| "outputs": [], |
| "source": [ |
| "newClients = []\n", |
| "for l in clients:\n", |
| " if l == 'localhost':\n", |
| " newClients.append(localhost)\n", |
| " else:\n", |
| " newClients.append(l)\n", |
| "clients = newClients\n", |
| "\n", |
| "if server == 'localhost':\n", |
| " server = localhost" |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": null, |
| "metadata": {}, |
| "outputs": [], |
| "source": [ |
| "if not app_name and run_gluten:\n", |
| " if pr.isdigit():\n", |
| " app_name = f'PR{pr}'\n", |
| " elif not pr:\n", |
| " app_name = 'main'" |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": null, |
| "metadata": {}, |
| "outputs": [], |
| "source": [ |
| "%%javascript\n", |
| "IPython.notebook.kernel.execute('nb_name = \"' + IPython.notebook.notebook_name + '\"')" |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": null, |
| "metadata": { |
| "deletable": false, |
| "editable": false, |
| "run_control": { |
| "frozen": true |
| } |
| }, |
| "outputs": [], |
| "source": [ |
| "nb_name=PAPERMILL_OUTPUT_PATH" |
| ] |
| }, |
| { |
| "cell_type": "markdown", |
| "metadata": {}, |
| "source": [ |
| "# Application Level Configuration" |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": null, |
| "metadata": {}, |
| "outputs": [], |
| "source": [ |
| "tpch_workload=False\n", |
| "tpcds_workload=False\n", |
| "\n", |
| "if workload.lower() == 'tpch':\n", |
| " tpch_workload=True\n", |
| "elif workload.lower() == 'tpcds':\n", |
| " tpcds_workload=True\n", |
| "else:\n", |
| " raise ValueError(f\"Unknown workload: {workload}\")\n", |
| "\n", |
| "def gluten_conf_overwrite(conf):\n", |
| " conf.set('spark.gluten.sql.columnar.shuffle.codec', gluten_codec)\\\n", |
| " .set('spark.gluten.sql.columnar.shuffle.codecBackend', gluten_codec_backend)\\\n", |
| " .set('spark.gluten.sql.columnar.maxBatchSize', max_batch_size)\\\n", |
| " .set('spark.executor.extraJavaOptions',\\\n", |
| " '-XX:+UseParallelOldGC -XX:ParallelGCThreads=2 -XX:NewRatio=1 -XX:SurvivorRatio=1 -XX:+UseCompressedOops -verbose:gc -XX:+PrintGCDetails -XX:+PrintGCTimeStamps -XX:ErrorFile=/home/sparkuser/logs/java/hs_err_pid%p.log')\\\n", |
| " .set('spark.gluten.memory.overAcquiredMemoryRatio','0')\\\n", |
| "\n", |
| " if tpch_workload:\n", |
| " pass\n", |
| " elif tpcds_workload:\n", |
| " pass\n", |
| " return conf\n", |
| "\n", |
| "def spark_conf_overwrite(conf):\n", |
| " conf.set('spark.io.compression.codec', spark_codec)\\\n", |
| " .set('spark.executorEnv.LD_LIBRARY_PATH',f\"{os.getenv('HADOOP_HOME')}/lib/native/\") \\\n", |
| " .set('spark.yarn.appMasterEnv.LD_LIBRARY_PATH',f\"{os.getenv('HADOOP_HOME')}/lib/native/\") \\\n", |
| "\n", |
| " if tpch_workload:\n", |
| " pass\n", |
| " elif tpcds_workload:\n", |
| " pass\n", |
| " return conf\n", |
| "\n", |
| "def app_conf_overwrite(conf):\n", |
| " if run_gluten:\n", |
| " return gluten_conf_overwrite(conf)\n", |
| " return spark_conf_overwrite(conf)" |
| ] |
| }, |
| { |
| "cell_type": "markdown", |
| "metadata": {}, |
| "source": [ |
| "# Run Workload" |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": null, |
| "metadata": {}, |
| "outputs": [], |
| "source": [ |
| "# Config and clean pagecache before each run\n", |
| "config_pagecache(clients, run_gluten)\n", |
| "dropcache(clients)\n", |
| "print_kernel_params(clients)" |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": null, |
| "metadata": {}, |
| "outputs": [], |
| "source": [ |
| "# Create SparkSession\n", |
| "sc, spark, appid, test_tpc=create_cntx(run_gluten, workload, app_conf_overwrite, server, base_dir, nb_name, app_name)" |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": null, |
| "metadata": {}, |
| "outputs": [], |
| "source": [ |
| "if run_gluten:\n", |
| " config_mem_cgroup(clients)" |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": null, |
| "metadata": {}, |
| "outputs": [], |
| "source": [ |
| "test_tpc.start_monitor(clients, emon_list=emon_list)" |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": null, |
| "metadata": {}, |
| "outputs": [], |
| "source": [ |
| "test_tpc.power_run(explain=False, print_result=False, load_table=True)" |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": null, |
| "metadata": {}, |
| "outputs": [], |
| "source": [ |
| "test_tpc.stop_monitor(clients)" |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": null, |
| "metadata": {}, |
| "outputs": [], |
| "source": [ |
| "if analyze_perf:\n", |
| " test_tpc.run_perf_analysis(server_gluten_home, disk_dev, nic_dev, proxy, emails, pr)" |
| ] |
| }, |
| { |
| "cell_type": "markdown", |
| "metadata": {}, |
| "source": [ |
| "# Show Performance" |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": null, |
| "metadata": {}, |
| "outputs": [], |
| "source": [ |
| "test_tpc.print_result()" |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": null, |
| "metadata": { |
| "code_folding": [] |
| }, |
| "outputs": [], |
| "source": [ |
| "for client in clients:\n", |
| " draw_sar(appid, qtime=test_tpc.result, disk_dev=disk_dev, nic_dev=nic_dev, client=client)" |
| ] |
| } |
| ], |
| "metadata": { |
| "celltoolbar": "Tags", |
| "hide_input": false, |
| "kernelspec": { |
| "display_name": "Python 3 (ipykernel)", |
| "language": "python", |
| "name": "python3" |
| }, |
| "language_info": { |
| "codemirror_mode": { |
| "name": "ipython", |
| "version": 3 |
| }, |
| "file_extension": ".py", |
| "mimetype": "text/x-python", |
| "name": "python", |
| "nbconvert_exporter": "python", |
| "pygments_lexer": "ipython3", |
| "version": "3.10.12" |
| }, |
| "nbTranslate": { |
| "displayLangs": [ |
| "*" |
| ], |
| "hotkey": "alt-t", |
| "langInMainMenu": true, |
| "sourceLang": "en", |
| "targetLang": "fr", |
| "useGoogleTranslate": true |
| }, |
| "toc": { |
| "base_numbering": 1, |
| "nav_menu": {}, |
| "number_sections": true, |
| "sideBar": false, |
| "skip_h1_title": false, |
| "title_cell": "Table of Contents", |
| "title_sidebar": "Contents", |
| "toc_cell": false, |
| "toc_position": { |
| "height": "428.672px", |
| "left": "1339.91px", |
| "top": "374.297px", |
| "width": "456.969px" |
| }, |
| "toc_section_display": true, |
| "toc_window_display": true |
| }, |
| "varInspector": { |
| "cols": { |
| "lenName": 16, |
| "lenType": 16, |
| "lenVar": 40 |
| }, |
| "kernels_config": { |
| "python": { |
| "delete_cmd_postfix": "", |
| "delete_cmd_prefix": "del ", |
| "library": "var_list.py", |
| "varRefreshCmd": "print(var_dic_list())" |
| }, |
| "r": { |
| "delete_cmd_postfix": ") ", |
| "delete_cmd_prefix": "rm(", |
| "library": "var_list.r", |
| "varRefreshCmd": "cat(var_dic_list()) " |
| } |
| }, |
| "types_to_exclude": [ |
| "module", |
| "function", |
| "builtin_function_or_method", |
| "instance", |
| "_Feature" |
| ], |
| "window_display": false |
| } |
| }, |
| "nbformat": 4, |
| "nbformat_minor": 4 |
| } |