Count DB Tables Rows

Getting spark session


In [1]:
from marvin_python_toolbox.common.data_source_provider import get_spark_session

In [2]:
spark = get_spark_session(enable_hive=True)

Getting all hive local dbs


In [3]:
dbs = spark.sql("show databases").collect()

In [4]:
for db in dbs:
    db_name = db['databaseName']
    print("Connecting with {} db ...".format(db_name)) 
    spark.sql("use {}".format(db_name))
    tables = spark.sql("show tables").collect()
    
    for table in tables:
        table_name = table['tableName']
        count = spark.sql("select 1 from {}".format(table_name)).count()
        print "   {} [{}]".format(table_name, count)


Connecting with core db ...
   bsc_product [14953204]
   mis_product_hierarchy [5796251]
Connecting with default db ...
Connecting with marvin db ...
   simple_product_classification_engine_core_bsc_product_120374ac16e58cdf8f0c050d0f698addadf2c41c [14953204]
   simple_product_classification_engine_core_mis_product_hierarchy_0b8069f3ba31eedca44b30bc8a61130f5776d119 [5796251]

Stoping and realease spark session


In [ ]:
spark.stop()

In [6]:
spark.sql("select * from core.bsc_product").count()


Out[6]:
14953204

In [ ]: