In [ ]:
import dir_constants as dc
from tqdm import tqdm_notebook
In [ ]:
platform = 'lendingclub'
store = pd.HDFStore(
dc.home_path+'/justin_tinkering/data_science/lendingclub/{0}_store.h5'.
format(platform),
append=True)
In [ ]:
# some setups for code block below
pmt_hist_ids = store['pmt_hist_ids'].astype(int)
max_id = pmt_hist_ids.max()
chunksize = 800
n_chunks = len(pmt_hist_ids)//chunksize + 1
interesting_cols_over_time = [
'outs_princp_beg',
'all_cash_to_inv',
'date',
'fico_last',
'm_on_books',
'status_period_end',
'loan_id',
'loan_id_num',
]
disc_rates = np.arange(.05,.36,.01)
In [ ]:
printed_cols = set()
npv_roi_holder = {}
for n in tqdm_notebook(np.arange(n_chunks)):
if n == 0:
left_bound = 0
else:
left_bound = pmt_hist_ids[n*chunksize]
if n == (n_chunks - 1):
right_bound = max_id
else:
right_bound = pmt_hist_ids[(n+1)*chunksize]
chunk = pd.read_hdf(
store,
'pmt_hist_intermediary_2',
where='(loan_id_num > left_bound) & (loan_id_num <= right_bound)',
columns = interesting_cols_over_time)
id_grouped = chunk.groupby('loan_id')
for ids, group in id_grouped:
npv_roi_dict = {}
funded = group.iat[0,0]
cfs = [-funded] + group['all_cash_to_inv'].tolist()
for rate in disc_rates:
npv_roi_dict[rate] = np.npv(rate/12, cfs)/funded
npv_roi_holder[ids] = npv_roi_dict
In [ ]:
npv_roi_df = pd.DataFrame(npv_roi_holder).T
npv_roi_df.columns = npv_roi_df.columns.values.round(2)
In [ ]:
store['loan_npv_rois'] = npv_roi_df
store.close()
In [ ]:
In [ ]: