In [128]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import sqlite3
import data_access

%matplotlib inline

In [121]:
conn = sqlite3.connect(data_access.get_database())

Examine Instances


In [122]:
query = 'SELECT DISTINCT instance_name FROM check_results'

instances = pd.read_sql(query, conn)
instances


Out[122]:
instance_name
0 had-data-001

In [130]:
query = ('SELECT strftime("%Y-%m-%d", run_start_timestamp) as yr_mon_day, SUM(check_violation_cnt) as tot '
            'FROM check_results '
            'WHERE instance_name="{}" '
            'AND check_type NOT LIKE "setup_%" '
            'GROUP BY yr_mon_day '
            'ORDER BY yr_mon_day')
instance_dfs = []
for iname in instances['instance_name']:
    res = pd.read_sql(query.format(iname), conn)
    instance_dfs.append(res)

In [131]:
for df in instance_dfs:
    df.plot()


Examine Databases


In [114]:
query = 'SELECT DISTINCT database_name FROM check_results'

databases = pd.read_sql(query, conn)
databases


Out[114]:
database_name
0 rumprod

In [142]:
query = ('SELECT strftime("%Y-%m-%d", run_start_timestamp) as yr_mon_day, SUM(check_violation_cnt) as tot '
            'FROM check_results '
            'WHERE instance_name="{}" '
            'AND database_name="{}" '
            'AND check_type NOT LIKE "setup_%" '
            'GROUP BY yr_mon_day '
            'ORDER BY yr_mon_day')
database_dfs = []
for dname in databases['database_name']:
    res = pd.read_sql(query.format('had-data-001', dname), conn)
    database_dfs.append(res)
database_dfs


Out[142]:
[   yr_mon_day      tot
 0  2015-09-23     1086
 1  2015-09-24      357
 2  2015-09-25     5219
 3  2015-09-26  1952340]

In [143]:
for df in database_dfs:
    df.plot()



In [ ]:


In [ ]: