In [7]:
import os
import sys
sys.path.append('..')
from util import db_io
import sqlite3
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from util.misc import iso_date, iso_week, iso_year
%matplotlib inline
In [2]:
con = sqlite3.connect('db/telemetry.db', detect_types=sqlite3.PARSE_DECLTYPES)
cursor = con.cursor()
def read_df_none(con):
df = pd.read_sql_query('SELECT timestamp, type, value FROM telemetry', con,
index_col='timestamp')
return df
print "No datetime conversion"
%timeit read_df_none(con)
def read_df_pd(con):
df = pd.read_sql_query('SELECT timestamp, type, value FROM telemetry', con,
index_col='timestamp',
parse_dates={'timestamp': {'unit': 's'}})
df.tz_localize('UTC', copy=False) \
.tz_convert('Europe/Amsterdam', copy=False)
return df
print "datetime via sql_query"
%timeit read_df_pd(con)
# def read_df_iter(con):
# pass
# %timeit read_df_iter(con)
def read_df_np(con):
df = pd.read_sql_query('SELECT timestamp, type, value FROM telemetry', con) # WHERE id<1000000
df.timestamp = df.timestamp.astype("datetime64[s]")
df.set_index('timestamp', inplace=True)
df.tz_localize('UTC', copy=False) \
.tz_convert('Europe/Amsterdam', copy=False)
return df
print "datetime via numpy"
%timeit read_df_np(con)
In [11]:
fig.autofmt_xdate(False)
df = read_df_np(con)
groups = df.groupby(['type']); groups.groups.keys()
color = ['r', 'g', 'b']
fig, axes = plt.subplots(nrows=3, ncols=1, sharex=True)
for n in range(3):
groups.get_group(n+1)['value'].resample('6min').plot(ax=axes[n], style=color[n]);
axes[n].set_xlabel('')
In [15]:
group = df.value.groupby(df['type']).get_group(1)
#slice = df.value.loc[df.type==1]
%timeit group.resample('6min').plot()
##%timeit slice.resample('6min').plot()
In [43]:
# without conversion to datetime64
df = read_df_none(con)
%timeit max(df.index)
# With conversion to datetime64
df = read_df_np(con)
%timeit max(df.index)
%timeit max(df.index.astype(np.int64)/1e9)
In [42]:
## Doesnt: work
# df.index.astype(np.double)//1e9
## Does work:
%timeit df.index.astype(np.int64).astype(np.double)/1e9
%timeit df.index.astype(np.int64).astype(np.double)//1e9
%timeit df.index.astype(np.int64)/1e9
In [10]:
from sqlalchemy import create_engine
# Create your connection.
engine = create_engine('sqlite:///../local/db/telemetry.db')
# --> Slower than simple sqlite3!
In [44]:
# NOTE: Raspberry might be IO bound, rather than CPU bound, when reading in
# uncompressed tables.
df = pd.read_sql_query('SELECT timestamp, type, value FROM telemetry', con)
df.timestamp = df.timestamp.astype("datetime64[s]")
print df.head(5)
def test_sql_write(df):
if os.path.exists('../local/db/test.sql'):
os.remove('../local/db/test.sql')
with sqlite3.connect('../local/db/test.sql') as sql_db:
df.to_sql(name='telemetry', con=sql_db)
def test_sql_read():
with sqlite3.connect('../local/db/telemetry.db') as sql_db:
df = pd.read_sql_query("select * from telemetry", sql_db)
df.set_index('timestamp', inplace=True)
def test_hdf_fixed_write(df):
df.to_hdf('../local/db/test_fixed.hdf','test',mode='w')
def test_hdf_fixed_read():
df = pd.read_hdf('../local/db/test_fixed.hdf','test')
df.set_index('timestamp', inplace=True)
def test_hdf_fixed_write_compress(df):
df.to_hdf('../local/db/test_fixed_compress.hdf','test',mode='w',complib='blosc')
def test_hdf_fixed_read_compress():
df = pd.read_hdf('../local/db/test_fixed_compress.hdf','test')
df.set_index('timestamp', inplace=True)
print 'SQL'
%timeit test_sql_write(df)
%timeit test_sql_read()
print 'HDF'
%timeit test_hdf_fixed_write(df)
%timeit test_hdf_fixed_read()
print 'HDF_compressed'
%timeit test_hdf_fixed_write_compress(df)
%timeit test_hdf_fixed_read_compress()
In [23]:
t = pd.datetime.now()-pd.Timedelta('29 hours')
print t.isocalendar()[1]
print iso_week(t)
In [37]:
(dt.datetime.now()-dt.timedelta(hours=0)).isocalendar()[1]
Out[37]:
In [14]:
iso = iso_date(offset=29)
print '{iso[0]}_w{iso[1]:02}'.format(iso=iso)