notebook.community

Edit and run



In [1]:

    
import sys
import pandas
import datetime
import bigtempo.core as core
import bigtempo.tester as tester



In [2]:

    
dt = datetime.datetime
cities = ['CITY_A', 'CITY_B']



In [3]:

    
engine = core.DatasourceEngine()



In [4]:

    
import os


def _get_test_data_dir():
    data_dir = os.path.abspath('acceptance_tests_data')
    return data_dir if not 'ipy-notebooks' in data_dir else os.path.abspath(os.path.join('..', 'bigtempo', 'tests', 'acceptance_tests_data'))


def _get_test_data_filename(reference, symbol=None):
    symbol_part = '' if not symbol else '{%s}' % symbol
    return '%s%s.csv' % (reference, symbol_part)


def _get_test_data_filepath(reference, symbol=None):
    return os.path.join(_get_test_data_dir(), _get_test_data_filename(reference, symbol))



In [5]:

    
@engine.datasource('SAMPLE',
                   tags=['SAMPLE_IN', 'DAILY'],
                   frequency='B')
class Sample(object):

    def evaluate(self, context, symbol, start=None, end=None):
        return pandas.DataFrame.from_csv(_get_test_data_filepath('SAMPLE', symbol))



In [6]:

    
engine.select().all()









    Out[6]:





<selection 183543660 currently-with="[
    "SAMPLE"
]">



In [7]:

    
engine.get("SAMPLE").process('CITY_A', dt(2001, 1, 1), dt(2001, 12, 31)).plot()









    Out[7]:





<matplotlib.axes.AxesSubplot at 0xaf16aec>



In [8]:

    
@engine.datasource('WEEKLY_SAMPLE',
                   dependencies=['SAMPLE'],
                   tags=['SAMPLE_IN', 'WEEKLY'],
                   frequency='W-FRI')
class Weekly(object):

    def evaluate(self, context, symbol, start=None, end=None):
        return context.dependencies('SAMPLE').resample('W-FRI', how=lambda x: x[-1])



In [9]:

    
engine.select().all()









    Out[9]:





<selection 185954764 currently-with="[
    "SAMPLE", 
    "WEEKLY_SAMPLE"
]">



In [10]:

    
engine.get("WEEKLY_SAMPLE").process('CITY_A', dt(2001, 1, 1), dt(2001, 12, 31)).plot()









    Out[10]:





<matplotlib.axes.AxesSubplot at 0xb22252c>



In [11]:

    
@engine.for_each(engine.select('SAMPLE_IN'))
def _rolling_mean_factory(source_reference):

    @engine.datasource('ROLLING_MEAN:%s' % source_reference,
                       dependencies=[source_reference],
                       lookback=7,
                       tags=['ROLLING_MEAN'])
    class RollingMean(object):

        def evaluate(self, context, symbol, start=None, end=None):
            input_ds = context.dependencies(source_reference)
            return pandas.rolling_mean(input_ds, 7)



In [12]:

    
engine.select('SAMPLE_IN')









    Out[12]:





<selection 188901708 currently-with="[
    "SAMPLE", 
    "WEEKLY_SAMPLE"
]">



In [13]:

    
engine.select('ROLLING_MEAN')









    Out[13]:





<selection 188899532 currently-with="[
    "ROLLING_MEAN:SAMPLE", 
    "ROLLING_MEAN:WEEKLY_SAMPLE"
]">



In [14]:

    
@engine.datasource('MONTHLY_SAMPLE',
                   dependencies=['SAMPLE'],
                   tags=['SAMPLE_IN', 'MONTHLY'],
                   frequency='M')
class Monthly(object):

    def evaluate(self, context, symbol, start=None, end=None):
        return context.dependencies('SAMPLE').resample('M', how=lambda x: x[-1])



In [15]:

    
engine.select('SAMPLE_IN')









    Out[15]:





<selection 188876876 currently-with="[
    "SAMPLE", 
    "MONTHLY_SAMPLE", 
    "WEEKLY_SAMPLE"
]">



In [16]:

    
engine.select('ROLLING_MEAN')









    Out[16]:





<selection 188876588 currently-with="[
    "ROLLING_MEAN:SAMPLE", 
    "ROLLING_MEAN:WEEKLY_SAMPLE", 
    "ROLLING_MEAN:MONTHLY_SAMPLE"
]">



In [17]:

    
m1 = engine.get("MONTHLY_SAMPLE").process('CITY_A', dt(2001, 1, 1), dt(2001, 12, 31))
m1.plot()









    Out[17]:





<matplotlib.axes.AxesSubplot at 0xb456a2c>



In [18]:

    
m2 = engine.get("MONTHLY_SAMPLE").process('CITY_A', dt(2000, 10, 1), dt(2002, 2, 1))
m2.plot()









    Out[18]:





<matplotlib.axes.AxesSubplot at 0xb62c38c>

A possible test to assert time retrieval in:



In [19]:

    
m = (m2 - m1).dropna()
assert len(m) is 12
assert any(m) == False



In [20]:

    
engine.select().all()









    Out[20]:





<selection 190701164 currently-with="[
    "ROLLING_MEAN:SAMPLE", 
    "MONTHLY_SAMPLE", 
    "ROLLING_MEAN:WEEKLY_SAMPLE", 
    "WEEKLY_SAMPLE", 
    "SAMPLE", 
    "ROLLING_MEAN:MONTHLY_SAMPLE"
]">



In [21]:

    
@engine.for_each(engine.select('SAMPLE_IN').union('ROLLING_MEAN'))
def _percentual_change_factory(source_reference):

    @engine.datasource('PERCENTUALT_CHANGE:%s' % source_reference,
                       dependencies=[source_reference],
                       lookback=1,
                       tags=['PERCENTUALT_CHANGE'])
    class PctChange(object):

        def evaluate(self, context, symbol, start=None, end=None):
            return context.dependencies(source_reference).pct_change()



In [22]:

    
engine.select().all()









    Out[22]:





<selection 191059244 currently-with="[
    "PERCENTUALT_CHANGE:WEEKLY_SAMPLE", 
    "PERCENTUALT_CHANGE:ROLLING_MEAN:SAMPLE", 
    "PERCENTUALT_CHANGE:ROLLING_MEAN:WEEKLY_SAMPLE", 
    "ROLLING_MEAN:WEEKLY_SAMPLE", 
    "SAMPLE", 
    "PERCENTUALT_CHANGE:MONTHLY_SAMPLE", 
    "ROLLING_MEAN:SAMPLE", 
    "PERCENTUALT_CHANGE:ROLLING_MEAN:MONTHLY_SAMPLE", 
    "MONTHLY_SAMPLE", 
    "PERCENTUALT_CHANGE:SAMPLE", 
    "WEEKLY_SAMPLE", 
    "ROLLING_MEAN:MONTHLY_SAMPLE"
]">



In [23]:

    
mp1 = engine.get("PERCENTUALT_CHANGE:MONTHLY_SAMPLE").process('CITY_A', dt(2001, 1, 1), dt(2001, 12, 31))
mp1



In [24]:

    
mp1.plot()









    Out[24]:





<matplotlib.axes.AxesSubplot at 0xb63e4cc>



In [25]:

    
#for city in cities:
#    for ds_ref in engine.select().all():
#        engine.get(ds_ref).process(city).to_csv('%s{%s}.csv' % (ds_ref, city))

	values
2001-01-31	-0.027719
2001-02-28	0.066506
2001-03-31	-0.032079
2001-04-30	0.011891
2001-05-31	-0.044768
2001-06-30	0.043208
2001-07-31	-0.017979
2001-08-31	-0.001635
2001-09-30	-0.070178
2001-10-31	0.055934
2001-11-30	0.014012
2001-12-31	0.053552