In [1]:

    
%pylab inline









    



Populating the interactive namespace from numpy and matplotlib



In [43]:

    
import psycopg2
import sqlalchemy
from sqlalchemy import create_engine 
import pandas as pd
from pandas.io import sql
from pandas.io.sql import read_sql
import datetime as dt
import numpy as np



In [44]:

    
engine = create_engine('postgresql://postgres:password@localhost:5433/cdrstats-billing')



In [45]:

    
result = engine.execute("select * from events")
result









    Out[45]:





<sqlalchemy.engine.result.ResultProxy at 0x64dd9d0>



In [84]:

    
query = """\
SELECT 
    date(starting_date) as date, 
    count(*) as count
FROM events
GROUP BY 1"""
df = read_sql(query, engine)



In [85]:

    
df.head()



In [86]:

    
df.plot()









    Out[86]:





<matplotlib.axes._subplots.AxesSubplot at 0x65f4250>



In [87]:

    
df.date = pd.to_datetime(df.date)
df.set_index('date', inplace=True)
df = df.reindex(pd.date_range(min(df.index), max(df.index)), fill_value=0)



In [88]:

    
pd.date_range(min(df.index), max(df.index))









    Out[88]:





<class 'pandas.tseries.index.DatetimeIndex'>
[2015-02-24, ..., 2015-03-23]
Length: 28, Freq: D, Timezone: None



In [89]:

    
df.plot()









    Out[89]:





<matplotlib.axes._subplots.AxesSubplot at 0x6ca5790>



In [90]:

    
import matplotlib.pyplot as plt
import numpy as np



In [91]:

    
x = np.linspace(0, 3*np.pi, 500)
plt.plot(x, np.sin(x**2))
plt.title('A simple chirp');



In [98]:

    
a = np.arange(100, 150)
a24 = np.random.choice(a, size=23)
a24









    Out[98]:





array([112, 130, 144, 138, 118, 147, 114, 138, 101, 116, 122, 130, 103,
       105, 147, 145, 109, 112, 136, 117, 103, 126, 106])



In [99]:

    
s = pd.Series(pd.date_range('2012-1-1', periods=3, freq='D'))
s









    Out[99]:





0   2012-01-01
1   2012-01-02
2   2012-01-03
dtype: datetime64[ns]



In [100]:

    
type(s)









    Out[100]:





pandas.core.series.Series



In [101]:

    
td = pd.Series([ pd.Timedelta(hours=i) for i in range(23) ])
td









    Out[101]:





0    00:00:00
1    01:00:00
2    02:00:00
3    03:00:00
4    04:00:00
5    05:00:00
6    06:00:00
7    07:00:00
8    08:00:00
9    09:00:00
10   10:00:00
11   11:00:00
12   12:00:00
13   13:00:00
14   14:00:00
15   15:00:00
16   16:00:00
17   17:00:00
18   18:00:00
19   19:00:00
20   20:00:00
21   21:00:00
22   22:00:00
dtype: timedelta64[ns]



In [185]:

    
tda = pd.Series(a24)
data = {'hour': td, 'value': tda}
df = pd.DataFrame(data=data)
df.head()



In [147]:

    
td = pd.Series([ pd.Timedelta(hours=i) for i in range(24) ])
td.head()









    Out[147]:





0   00:00:00
1   01:00:00
2   02:00:00
3   03:00:00
4   04:00:00
dtype: timedelta64[ns]



In [148]:

    
df.head()
df.value.tail()









    Out[148]:





19    117
20    103
21    126
22    106
23    NaN
Name: value, dtype: float64



In [166]:

    
# data2 = df.tail()
data2 = df
# data2

We will now update the NaN values. In order to update a serie correctly, you need to apply update with an other series. sometimes it's confusing if you are working with serie or dataframe, use type() to find out



In [196]:

    
ser = data2['value']
index = ser[ser.isnull()].index
df_upd = pd.DataFrame(np.random.randn(len(index)), index=index, columns=['value'])
ser.update(df_upd.value)
type(df_upd.value)
ser
# df_upd









    Out[196]:





0     112.000000
1     130.000000
2     144.000000
3     138.000000
4     118.000000
5     147.000000
6     114.000000
7     138.000000
8     101.000000
9     116.000000
10    122.000000
11    130.000000
12    103.000000
13    105.000000
14    147.000000
15    145.000000
16    109.000000
17    112.000000
18    136.000000
19    117.000000
20    103.000000
21    126.000000
22    106.000000
23      0.713658
Name: value, dtype: float64



In [184]:

    
# df.add(td, axis=df.hour, fill_value=1)



In [ ]:

	date	count
0	2015-03-17	1
1	2015-02-27	3
2	2015-02-28	1
3	2015-03-16	1
4	2015-03-06	1

	hour	value
0	00:00:00	112
1	01:00:00	130
2	02:00:00	144
3	03:00:00	138
4	04:00:00	118