In [1]:
%matplotlib inline

In [2]:
import os, sys
sys.path.append(os.path.abspath('../../../main/python'))

In [3]:
import datetime as dt

import matplotlib.pyplot as plt
import pandas as pd

import thalesians.tsa.conversions as conv
import thalesians.tsa.finance as finance
import thalesians.tsa.pandasutils as pdutils
import thalesians.tsa.visual as visual

import thalesians.tsa.datasets.finratrace as finratrace

In [4]:
data_file_path = 'S:/Dropbox/Data/TRACE/fifteen-us-companies-trace-bond-trades.zip'
first_report_date = conv.str_to_date('2015.01.01')
last_report_date = conv.str_to_date('2015.12.31')
cusip = '060505DP6'

In [5]:
df = finratrace.load_df_from_file(data_file_path, cusip=cusip,
        first_report_date=first_report_date, last_report_date=last_report_date)

In [6]:
len(df)


Out[6]:
3234

In [7]:
def convert_ascii_rptd_vol_tx(s):
    try: return float(s)
    except:
        if s == '': return float('nan')
        elif s == '1MM+': return 1000000
        elif s == '5MM+': return 5000000
        else: raise ValueError('Unexpected value: "%s"' % str(s))
            
pdutils.convert_df_columns(df, {'ascii_rptd_vol_tx': convert_ascii_rptd_vol_tx}, in_place=True);


---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-7-f2c4e76909c2> in convert_ascii_rptd_vol_tx(s)
      1 def convert_ascii_rptd_vol_tx(s):
----> 2     try: return float(s)
      3     except:

C:\Programs\Win64\Anaconda\V4.4.0_3.6\lib\site-packages\pandas\core\series.py in wrapper(self)
     96         raise TypeError("cannot convert the series to "
---> 97                         "{0}".format(str(converter)))
     98 

TypeError: cannot convert the series to <class 'float'>

During handling of the above exception, another exception occurred:

ValueError                                Traceback (most recent call last)
<ipython-input-7-f2c4e76909c2> in <module>()
      7         else: raise ValueError('Unexpected value: "%s"' % str(s))
      8 
----> 9 pdutils.convert_df_columns(df, {'ascii_rptd_vol_tx': convert_ascii_rptd_vol_tx}, in_place=True);

S:\dev\tsa\src\main\python\thalesians\tsa\pandasutils.py in convert_df_columns(df, conversions, in_place)
    115     assert len(unfamiliar_columns) == 0, 'Unfamiliar columns: %s' % str(unfamiliar_columns)
    116     for column, conversion in conversions.items():
--> 117         df[[column]] = df[[column]].apply(conversion)
    118     return df
    119 

C:\Programs\Win64\Anaconda\V4.4.0_3.6\lib\site-packages\pandas\core\frame.py in apply(self, func, axis, broadcast, raw, reduce, args, **kwds)
   4358                         f, axis,
   4359                         reduce=reduce,
-> 4360                         ignore_failures=ignore_failures)
   4361             else:
   4362                 return self._apply_broadcast(f, axis)

C:\Programs\Win64\Anaconda\V4.4.0_3.6\lib\site-packages\pandas\core\frame.py in _apply_standard(self, func, axis, ignore_failures, reduce)
   4454             try:
   4455                 for i, v in enumerate(series_gen):
-> 4456                     results[i] = func(v)
   4457                     keys.append(v.name)
   4458             except Exception as e:

<ipython-input-7-f2c4e76909c2> in convert_ascii_rptd_vol_tx(s)
      2     try: return float(s)
      3     except:
----> 4         if s == '': return float('nan')
      5         elif s == '1MM+': return 1000000
      6         elif s == '5MM+': return 5000000

C:\Programs\Win64\Anaconda\V4.4.0_3.6\lib\site-packages\pandas\core\generic.py in __nonzero__(self)
    951         raise ValueError("The truth value of a {0} is ambiguous. "
    952                          "Use a.empty, a.bool(), a.item(), a.any() or a.all()."
--> 953                          .format(self.__class__.__name__))
    954 
    955     __bool__ = __nonzero__

ValueError: ('The truth value of a Series is ambiguous. Use a.empty, a.bool(), a.item(), a.any() or a.all().', 'occurred at index ascii_rptd_vol_tx')

In [ ]:
pdutils.detect_df_column_types(df, convert=True, in_place=True)

In [ ]:
pdutils.get_column_types(df)

In [ ]:
daily_df = pdutils.sparsen(
        df,
        bucket='date', date='trd_exctn_dt', time='trd_exctn_tm',
        fix_points=dt.timedelta(hours=2),
        min_fix_point_count=3, min_min_fix_point_time=dt.time(10), min_max_fix_point_time=dt.time(14))

In [ ]:
daily_df.head()

In [ ]:
daily_df.tail()

In [ ]:
plt.plot(daily_df['trd_exctn_dt'], daily_df['rptd_pr']);

In [ ]:
df['trd_exctn_dttm'] = pd.to_datetime(pdutils.combine_date_time(df, 'trd_exctn_dt', 'trd_exctn_tm'))

In [ ]:
df['trc_st'].value_counts()

In [ ]:
visual.visualize_df(df)

In [ ]:
def visualize_trace_df(df):
    fig, ax = visual.visualize_df_sized_point_series(df[(df['contra_party_type'] == 'C') & (df['side'] == 'B')], 'trd_exctn_dttm', 'rptd_pr', 'ascii_rptd_vol_tx', scaling=finance.usd_trade_size_scaling, color='green')
    visual.visualize_df_sized_point_series(df[(df['contra_party_type'] == 'C') & (df['side'] == 'S')], 'trd_exctn_dttm', 'rptd_pr', 'ascii_rptd_vol_tx', fig=fig, ax=ax, scaling=finance.usd_trade_size_scaling, color='red')
    visual.visualize_df_sized_point_series(df[(df['contra_party_type'] == 'D') & (df['side'] == 'S')], 'trd_exctn_dttm', 'rptd_pr', 'ascii_rptd_vol_tx', fig=fig, ax=ax, scaling=finance.usd_trade_size_scaling, color='blue')

In [ ]:
visualize_trace_df(df[-500:])