| { |
| "cells": [ |
| { |
| "cell_type": "markdown", |
| "metadata": {}, |
| "source": [ |
| "# Count DB Tables Rows" |
| ] |
| }, |
| { |
| "cell_type": "markdown", |
| "metadata": {}, |
| "source": [ |
| "## Getting spark session" |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": 1, |
| "metadata": {}, |
| "outputs": [], |
| "source": [ |
| "from marvin_python_toolbox.common.data_source_provider import get_spark_session" |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": 2, |
| "metadata": {}, |
| "outputs": [], |
| "source": [ |
| "spark = get_spark_session(enable_hive=True)" |
| ] |
| }, |
| { |
| "cell_type": "markdown", |
| "metadata": {}, |
| "source": [ |
| "## Getting all hive local dbs" |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": 3, |
| "metadata": {}, |
| "outputs": [], |
| "source": [ |
| "dbs = spark.sql(\"show databases\").collect()" |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": 4, |
| "metadata": {}, |
| "outputs": [ |
| { |
| "name": "stdout", |
| "output_type": "stream", |
| "text": [ |
| "Connecting with core db ...\n", |
| " bsc_product [14953204]\n", |
| " mis_product_hierarchy [5796251]\n", |
| "Connecting with default db ...\n", |
| "Connecting with marvin db ...\n", |
| " simple_product_classification_engine_core_bsc_product_120374ac16e58cdf8f0c050d0f698addadf2c41c [14953204]\n", |
| " simple_product_classification_engine_core_mis_product_hierarchy_0b8069f3ba31eedca44b30bc8a61130f5776d119 [5796251]\n" |
| ] |
| } |
| ], |
| "source": [ |
| "for db in dbs:\n", |
| " db_name = db['databaseName']\n", |
| " print(\"Connecting with {} db ...\".format(db_name)) \n", |
| " spark.sql(\"use {}\".format(db_name))\n", |
| " tables = spark.sql(\"show tables\").collect()\n", |
| " \n", |
| " for table in tables:\n", |
| " table_name = table['tableName']\n", |
| " count = spark.sql(\"select 1 from {}\".format(table_name)).count()\n", |
| " print \" {} [{}]\".format(table_name, count)" |
| ] |
| }, |
| { |
| "cell_type": "markdown", |
| "metadata": {}, |
| "source": [ |
| "## Stoping and realease spark session" |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": null, |
| "metadata": { |
| "collapsed": true |
| }, |
| "outputs": [], |
| "source": [ |
| "spark.stop()" |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": 6, |
| "metadata": {}, |
| "outputs": [ |
| { |
| "data": { |
| "text/plain": [ |
| "14953204" |
| ] |
| }, |
| "execution_count": 6, |
| "metadata": {}, |
| "output_type": "execute_result" |
| } |
| ], |
| "source": [ |
| "spark.sql(\"select * from core.bsc_product\").count()" |
| ] |
| }, |
| { |
| "cell_type": "code", |
| "execution_count": null, |
| "metadata": {}, |
| "outputs": [], |
| "source": [] |
| } |
| ], |
| "metadata": { |
| "kernelspec": { |
| "display_name": "Python 2", |
| "language": "python", |
| "name": "python2" |
| }, |
| "language_info": { |
| "codemirror_mode": { |
| "name": "ipython", |
| "version": 2 |
| }, |
| "file_extension": ".py", |
| "mimetype": "text/x-python", |
| "name": "python", |
| "nbconvert_exporter": "python", |
| "pygments_lexer": "ipython2", |
| "version": "2.7.6" |
| } |
| }, |
| "nbformat": 4, |
| "nbformat_minor": 1 |
| } |