In [3]:
%matplotlib nbagg
%pylab


Using matplotlib backend: nbAgg
Populating the interactive namespace from numpy and matplotlib
/usr/local/lib/python2.7/site-packages/IPython/core/magics/pylab.py:161: UserWarning: pylab import has clobbered these variables: ['indices']
`%matplotlib` prevents importing * from pylab and numpy
  "\n`%matplotlib` prevents importing * from pylab and numpy"

In [11]:
from fondos import producer, series
import pandas as pd
from statsmodels.tsa.filters.hp_filter import hpfilter
from collections import namedtuple

start_date = '2016-01-01'

Data = namedtuple('Data', 'data orig_data id2obj'.split())

def get_data(start_date='2017-01-01'):
    data = []
    orig_data = []
    id2obj = {}

    for t in producer.tickers:
        q = series.Quote(t)
        returns = q.get_returns(start_date=start_date)
        returns = returns / returns.max()

        orig_data.append(returns)

        returns = hpfilter(returns, lamb=160)[1]

        id2obj[len(data)] = t
        data.append(returns)

    data = pd.DataFrame(data).T
    orig_data = pd.DataFrame(orig_data).T
    return Data(data=data, orig_data=orig_data, id2obj=id2obj)

In [12]:
from itertools import chain
import cluster_dataset

def cluster(data, id2obj):
    cdt = cluster_dataset.ClusterDataset(data, id2obj)
    indices = cdt.cluster_inconsistency(0.99, depth=3)
    return cdt.get_human_readable_cluster(indices)

def plot_cluster(clusters):
    fig, axs = subplots(3, int(ceil(len(clusters)/3.0)), sharex=True, figsize=(15,10))
    axs = list(chain(*axs))
    
    for i, cluster in enumerate(cdt.get_human_readable_cluster(indices)):
        for ticker in cluster:
            cdt.data.T.loc[ticker].plot(ax=axs[i])
            orig_data.T.loc[ticker].plot(c='k', alpha=0.125, label=None, ax=axs[i])
            
        axs[i].legend(
            [e for i, e in enumerate(axs[i].get_lines()) if i % 2 == 0],
            cluster,
            loc='best',
            ncol=2,
            fontsize=9
        )

In [13]:
data_2017 = get_data('2017-01-01')
plot_cluster(cluster(data_2017.data, data_2017.id2obj))



In [14]:
data_2016 = get_data('2016-01-01')
plot_cluster(cluster(data_2016.data, data_2016.id2obj))


---------------------------------------------------------------------------
NoFile                                    Traceback (most recent call last)
<ipython-input-14-b61d4d35b742> in <module>()
----> 1 data_2016 = get_data('2016-01-01')
      2 plot_cluster(cluster(data_2016.data, data_2016.id2obj))

<ipython-input-11-0194d25baf4b> in get_data(start_date)
     15     for t in producer.tickers:
     16         q = series.Quote(t)
---> 17         returns = q.get_returns(start_date=start_date)
     18         returns = returns / returns.max()
     19 

/Users/przivic/prog/fito/fito/data_store/base.pyc in __call__(_, *args, **kwargs)
    207             @wraps(to_wrap)
    208             def __call__(_, *args, **kwargs):
--> 209                 return self.cache_on.execute(AutosavedOperation(*args, **kwargs))
    210 
    211         return FunctionWrapper()

/Users/przivic/prog/fito/fito/operation_runner.pyc in execute(self, operation, force)
     55 
     56         for func in functions:
---> 57             res = func()
     58             if res is not None: break
     59 

/Users/przivic/prog/fito/fito/operation_runner.pyc in <lambda>()
     50         functions.append(
     51             lambda: operation.apply(
---> 52                 self.alias(force=force)
     53             )
     54         )

/Users/przivic/prog/fito/fito/operations/decorate.pyc in apply(self, runner)
    211     def apply(self, runner):
    212         this_args = self.get_this_args(runner)
--> 213         return func_to_execute(**this_args)
    214 
    215     cls_attrs = attrs.copy()

/Users/przivic/prog/fito/fito/operations/decorate.pyc in new_f(*args, **kwargs)
     34         @wraps(self.func)
     35         def new_f(*args, **kwargs):
---> 36             return self.func(first_arg, *args, **kwargs)
     37 
     38         cls = type(instance) if instance is not None else owner

/Users/przivic/prog/fondos/fondos/series.pyc in get_returns(self, field, start_date, end_date, max_normalize)
     67     @mem_cache.autosave(method_type='instance')
     68     def get_returns(self, field='close', start_date=None, end_date=None, max_normalize=False):
---> 69         series = self.data[field.capitalize()]
     70         if start_date is not None: series = series[start_date:]
     71 

/Users/przivic/prog/fondos/fondos/series.pyc in data(self)
     62     @property
     63     def data(self):
---> 64         series = pickle.loads(self.get_data().execute())
     65         if series is not None: return series.data
     66 

/Users/przivic/prog/fito/fito/operations/operation.pyc in execute(self, force)
      9 
     10     def execute(self, force=False):
---> 11         return OperationRunner().execute(self, force=force)
     12 
     13     def apply(self, runner):

/Users/przivic/prog/fito/fito/operation_runner.pyc in execute(self, operation, force)
     55 
     56         for func in functions:
---> 57             res = func()
     58             if res is not None: break
     59 

/Users/przivic/prog/fito/fito/operation_runner.pyc in <lambda>()
     43             functions = [
     44                 lambda: self._get_memory_cache(operation),
---> 45                 lambda: self._get_data_store_cache(operation),
     46             ]
     47         else:

/Users/przivic/prog/fito/fito/operation_runner.pyc in _get_data_store_cache(self, operation)
     74         out_data_store = operation.get_out_data_store()
     75         if out_data_store is not None:
---> 76             return out_data_store.get_or_none(operation)
     77 
     78 

/Users/przivic/prog/fito/fito/data_store/base.pyc in get_or_none(self, spec)
    127     def get_or_none(self, spec):
    128         try:
--> 129             return self.get(spec)
    130         except KeyError:
    131             return None

/Users/przivic/prog/fito/fito/data_store/base.pyc in get(self, spec)
     68                 return self.get_cache[spec]
     69             except KeyError:
---> 70                 res = _get()
     71                 self.get_cache.set(spec, res)
     72                 return res

/Users/przivic/prog/fito/fito/data_store/base.pyc in _get()
     50         def _get():
     51             try:
---> 52                 return self._get(spec)
     53             except KeyError, e:
     54                 # TODO: I don't like puting RehashUI.ignored_specs here

/Users/przivic/prog/fito/fito/data_store/mongo.pyc in _get(self, spec)
    169     def _get(self, spec):
    170         doc = self._get_doc(spec)
--> 171         return self._parse_doc(doc)[1]
    172 
    173     def get_id(self, spec):

/Users/przivic/prog/fito/fito/data_store/mongo.pyc in _parse_doc(self, doc)
    128         values = doc['values']
    129         if self.use_gridfs:
--> 130             values = self.gridfs.get(values).read()
    131         spec = Spec.dict2spec(doc['spec'])
    132         return spec, values

/usr/local/lib/python2.7/site-packages/gridfs/__init__.pyc in get(self, file_id)
    138 
    139         # Raise NoFile now, instead of on first attribute access.
--> 140         gout._ensure_file()
    141         return gout
    142 

/usr/local/lib/python2.7/site-packages/gridfs/grid_file.pyc in _ensure_file(self)
    433             if not self._file:
    434                 raise NoFile("no file in gridfs collection %r with _id %r" %
--> 435                              (self.__files, self.__file_id))
    436 
    437     def __getattr__(self, name):

NoFile: no file in gridfs collection Collection(Database(MongoClient(host=['localhost:27017'], document_class=dict, tz_aware=False, connect=True), u'fondos'), u'stocks_cache.fs.files') with _id ObjectId('59208b9ef8e60e4ad447e6ba')