In [ ]:
platform = 'lendingclub'
store = pd.HDFStore(
'/Users/justinhsi/justin_tinkering/data_science/lendingclub/{0}_store.h5'.
format(platform),
append=True)
In [ ]:
test = store['test_filtered_columns']
loan_npv_rois = store['loan_npv_rois']
default_series = test['target_strict']
In [ ]:
def basic_models(trials, port_size, available_loans, grade, test,
loan_npv_rois, default_series):
results = {}
pct_default = {}
if grade:
test = test[test['grade'] == grade]
for trial in tqdm_notebook(np.arange(trials)):
loans_to_pick_from = np.random.choice(
test.index.values, available_loans, replace=False)
picks = np.random.choice(loans_to_pick_from, port_size, replace=False)
results[trial] = loan_npv_rois.loc[picks, :].mean().to_dict()
pct_default[trial] = (default_series.loc[picks].sum()) / port_size
pct_default_series = pd.Series(pct_default)
results_df = pd.DataFrame(results).T
results_df['pct_def'] = pct_default_series
# if not grade:
# results_df = results_df.add_prefix('baseline_')
# else:
# results_df = results_df.add_prefix(grade + '_')
if not grade:
grade = 'baseline'
results_df.columns = zip(results_df.columns.values,[grade]*len(results_df.columns.values))
return results_df
In [ ]:
grades = [None]
grades.extend(test.grade.unique().tolist())
grades.sort()
grade_counts = test.grade.value_counts(dropna=False)
In [ ]:
trials = 20000
available_loans = 3000
grades_available_loans = [
int(min(grade_counts[grade] * .2, available_loans))
if grade else available_loans for grade in grades
]
grades_port_size = [int(.3*n_loans) for n_loans in grades_available_loans]
grade_batch_loans = dict(zip(grades, zip(grades_available_loans,grades_port_size)))
In [ ]:
grade_batch_loans
In [ ]:
grades_port_size
In [ ]:
results_dict = {}
for grade in tqdm_notebook(grades):
results_dict[grade] = basic_models(trials, grade_batch_loans[grade][1],
grade_batch_loans[grade][0], grade,
test, loan_npv_rois, default_series)
In [ ]:
result = pd.concat(results_dict.values(), axis=1)
result.columns = pd.MultiIndex.from_tuples(
result.columns.values, names=['discount_rate', 'model'])
result.sort_index(axis=1, inplace = True)
In [ ]:
result.describe()
In [ ]:
store.open()
store['results'] = result
store.close()
In [ ]: