In [1]:
%pylab inline
In [43]:
import psycopg2
import sqlalchemy
from sqlalchemy import create_engine
import pandas as pd
from pandas.io import sql
from pandas.io.sql import read_sql
import datetime as dt
import numpy as np
In [44]:
engine = create_engine('postgresql://postgres:password@localhost:5433/cdrstats-billing')
In [45]:
result = engine.execute("select * from events")
result
Out[45]:
In [84]:
query = """\
SELECT
date(starting_date) as date,
count(*) as count
FROM events
GROUP BY 1"""
df = read_sql(query, engine)
In [85]:
df.head()
Out[85]:
In [86]:
df.plot()
Out[86]:
In [87]:
df.date = pd.to_datetime(df.date)
df.set_index('date', inplace=True)
df = df.reindex(pd.date_range(min(df.index), max(df.index)), fill_value=0)
In [88]:
pd.date_range(min(df.index), max(df.index))
Out[88]:
In [89]:
df.plot()
Out[89]:
In [90]:
import matplotlib.pyplot as plt
import numpy as np
In [91]:
x = np.linspace(0, 3*np.pi, 500)
plt.plot(x, np.sin(x**2))
plt.title('A simple chirp');
In [98]:
a = np.arange(100, 150)
a24 = np.random.choice(a, size=23)
a24
Out[98]:
In [99]:
s = pd.Series(pd.date_range('2012-1-1', periods=3, freq='D'))
s
Out[99]:
In [100]:
type(s)
Out[100]:
In [101]:
td = pd.Series([ pd.Timedelta(hours=i) for i in range(23) ])
td
Out[101]:
In [185]:
tda = pd.Series(a24)
data = {'hour': td, 'value': tda}
df = pd.DataFrame(data=data)
df.head()
Out[185]:
In [147]:
td = pd.Series([ pd.Timedelta(hours=i) for i in range(24) ])
td.head()
Out[147]:
In [148]:
df.head()
df.value.tail()
Out[148]:
In [166]:
# data2 = df.tail()
data2 = df
# data2
We will now update the NaN values. In order to update a serie correctly, you need to apply update with an other series. sometimes it's confusing if you are working with serie or dataframe, use type() to find out
In [196]:
ser = data2['value']
index = ser[ser.isnull()].index
df_upd = pd.DataFrame(np.random.randn(len(index)), index=index, columns=['value'])
ser.update(df_upd.value)
type(df_upd.value)
ser
# df_upd
Out[196]:
In [184]:
# df.add(td, axis=df.hour, fill_value=1)
In [ ]: