In [6]:
%pylab inline
Let's import sqlalchemy, pandas and numpy
In [1]:
import psycopg2
import sqlalchemy
from sqlalchemy import create_engine
import pandas as pd
from pandas.io import sql
import datetime as dt
import numpy as np
In [2]:
print "numpy: ", np.__version__
print "pandas: ", pd.__version__
print "sqlalchamy: ", sqlalchemy.__version__
print "psycopg2: ", psycopg2.__version__
In [3]:
# connection to postgresql database
#conn = psycopg2.connect("dbname=test user=postgres")
engine = create_engine('postgresql://postgres:password@localhost:5433/cdrstats-billing')
In [4]:
result = engine.execute("select * from voip_switch")
for row in result:
print "voip_switch:", row['name'], row['ipaddress']
In [5]:
df = sql.read_sql("""SELECT
dateday,
switch_id,
coalesce(nbcalls,0) AS nbcalls,
coalesce(duration,0) AS duration,
coalesce(billsec,0) AS billsec,
coalesce(buy_cost,0) AS buy_cost,
coalesce(sell_cost,0) AS sell_cost
FROM
generate_series(
date_trunc('hour', current_timestamp - interval '48' hour),
date_trunc('hour', current_timestamp + interval '2' hour),
'1 hour')
as dateday
LEFT OUTER JOIN (
SELECT
date_trunc('hour', starting_date) as dayhour,
switch_id as switch_id,
SUM(nbcalls) as nbcalls,
SUM(duration) as duration,
SUM(billsec) as billsec,
SUM(buy_cost) as buy_cost,
SUM(sell_cost) as sell_cost
FROM matv_voip_cdr_aggr_hour
WHERE
starting_date > date_trunc('hour', current_timestamp - interval '48' hour) and
starting_date <= date_trunc('hour', current_timestamp + interval '2' hour)
GROUP BY dayhour, switch_id
) results
ON (dateday = results.dayhour)""", engine)
# index_col=["dateday", "switch_id"]
df.head()
Out[5]:
In [6]:
df.dateday = pd.to_datetime(df.dateday)
df.dateday
Out[6]:
In [7]:
df.set_index('dateday', inplace=True)
In [8]:
df.index[0], df.index[1]
Out[8]:
In [9]:
df = df.reindex(pd.date_range(min(df.index), max(df.index)), fill_value=0)
In [13]:
df['nbcalls']
Out[13]:
In [12]:
df['nbcalls'].iloc[0]
Out[12]:
In [14]:
df.index.names
Out[14]:
In [18]:
table = pd.tools.pivot.pivot_table(df,
values=['nbcalls', 'duration', 'billsec', 'buy_cost', 'sell_cost'],
index=['dateday'],
columns=['dateday', 'switch_id'], fill_value=0)
table
Out[18]:
In [19]:
df = sql.read_sql("""SELECT
dateday,
switch_id,
coalesce(nbcalls,0) AS nbcalls,
coalesce(duration,0) AS duration,
coalesce(billsec,0) AS billsec,
coalesce(buy_cost,0) AS buy_cost,
coalesce(sell_cost,0) AS sell_cost
FROM
generate_series(
date_trunc('hour', current_timestamp - interval '50' hour),
date_trunc('hour', current_timestamp - interval '30' hour),
'1 hour')
as dateday
LEFT OUTER JOIN (
SELECT
date_trunc('hour', starting_date) as dayhour,
switch_id as switch_id,
SUM(nbcalls) as nbcalls,
SUM(duration) as duration,
SUM(billsec) as billsec,
SUM(buy_cost) as buy_cost,
SUM(sell_cost) as sell_cost
FROM matv_voip_cdr_aggr_hour
WHERE
starting_date > date_trunc('hour', current_timestamp - interval '50' hour) and
starting_date <= date_trunc('hour', current_timestamp - interval '30' hour)
GROUP BY dayhour, switch_id
) results
ON (dateday = results.dayhour)""", engine)
print df
totals = df.groupby('switch_id').size()
print totals
In [25]:
df.update(df.switch_id.fillna(0))
df
Out[25]:
In [26]:
table = pd.tools.pivot.pivot_table(df,
values=['nbcalls', 'duration', 'billsec', 'buy_cost', 'sell_cost'],
index=['dateday'],
columns=['switch_id'],
fill_value=0)
table
Out[26]:
In [15]:
table.dtypes
Out[15]:
In [16]:
table.nbcalls
Out[16]:
In [17]:
table.nbcalls.describe()
Out[17]:
In [142]:
table
Out[142]:
In [143]:
table.describe()
Out[143]:
Transpose
In [13]:
table.nbcalls.unstack(0)
Out[13]:
In [40]:
ntable = table.nbcalls.T
ntable
Out[40]:
In [44]:
ntable.columns
Out[44]:
In [69]:
table['nbcalls'].T
Out[69]:
In [ ]:
In [83]:
first_column = table.nbcalls.columns.tolist()[0]
second_column = table.nbcalls.columns.tolist()[1]
num_switch = len(table.nbcalls.columns.tolist())
(first_column, second_column, num_switch, table.nbcalls.columns.tolist())
Out[83]:
In [87]:
ntable.loc[1.0], ntable.loc[2.0]
Out[87]:
In [91]:
for i in ntable.loc[second_column]: print i
In [90]:
list(ntable.loc[second_column])
Out[90]:
In [93]:
for i in table.index: print i
In [92]:
table.nbcalls.values
Out[92]:
In [17]:
table = table.cumsum()
In [18]:
import matplotlib.pyplot as plt
In [19]:
plt.figure(); table.plot(); plt.legend(loc='best')
Out[19]:
In [37]:
table.nbcalls.columns.tolist()
Out[37]: