blob: a64928801df15ceb97f70afc9744e52285c8aa4d [file] [log] [blame]
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Count DB Tables Rows"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Getting spark session"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"from marvin_python_toolbox.common.data_source_provider import get_spark_session"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"spark = get_spark_session(enable_hive=True)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Getting all hive local dbs"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"dbs = spark.sql(\"show databases\").collect()"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Connecting with core db ...\n",
" bsc_product [14953204]\n",
" mis_product_hierarchy [5796251]\n",
"Connecting with default db ...\n",
"Connecting with marvin db ...\n",
" simple_product_classification_engine_core_bsc_product_120374ac16e58cdf8f0c050d0f698addadf2c41c [14953204]\n",
" simple_product_classification_engine_core_mis_product_hierarchy_0b8069f3ba31eedca44b30bc8a61130f5776d119 [5796251]\n"
]
}
],
"source": [
"for db in dbs:\n",
" db_name = db['databaseName']\n",
" print(\"Connecting with {} db ...\".format(db_name)) \n",
" spark.sql(\"use {}\".format(db_name))\n",
" tables = spark.sql(\"show tables\").collect()\n",
" \n",
" for table in tables:\n",
" table_name = table['tableName']\n",
" count = spark.sql(\"select 1 from {}\".format(table_name)).count()\n",
" print \" {} [{}]\".format(table_name, count)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Stoping and realease spark session"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"spark.stop()"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"14953204"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"spark.sql(\"select * from core.bsc_product\").count()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 2",
"language": "python",
"name": "python2"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.6"
}
},
"nbformat": 4,
"nbformat_minor": 1
}