In [ ]:
platform = 'lendingclub'

store = pd.HDFStore(
    '/Users/justinhsi/justin_tinkering/data_science/lendingclub/{0}_store.h5'.
    format(platform),
    append=True)

In [ ]:
test = store['test_filtered_columns']
loan_npv_rois = store['loan_npv_rois']
default_series = test['target_strict']

In [ ]:
def basic_models(trials, port_size, available_loans, grade, test,
                 loan_npv_rois, default_series):
    results = {}
    pct_default = {}
    if grade:
        test = test[test['grade'] == grade]
    for trial in tqdm_notebook(np.arange(trials)):
        loans_to_pick_from = np.random.choice(
            test.index.values, available_loans, replace=False)
        picks = np.random.choice(loans_to_pick_from, port_size, replace=False)
        results[trial] = loan_npv_rois.loc[picks, :].mean().to_dict()
        pct_default[trial] = (default_series.loc[picks].sum()) / port_size
    pct_default_series = pd.Series(pct_default)
    results_df = pd.DataFrame(results).T
    results_df['pct_def'] = pct_default_series
#     if not grade:
#         results_df = results_df.add_prefix('baseline_')
#     else:
#         results_df = results_df.add_prefix(grade + '_')
    if not grade:
        grade = 'baseline'
    results_df.columns = zip(results_df.columns.values,[grade]*len(results_df.columns.values))
    return results_df

In [ ]:
grades = [None]
grades.extend(test.grade.unique().tolist())
grades.sort()
grade_counts = test.grade.value_counts(dropna=False)

In [ ]:
trials = 20000
available_loans = 3000
grades_available_loans = [
    int(min(grade_counts[grade] * .2, available_loans))
    if grade else available_loans for grade in grades
]
grades_port_size = [int(.3*n_loans) for n_loans in grades_available_loans]
grade_batch_loans = dict(zip(grades, zip(grades_available_loans,grades_port_size)))

In [ ]:
grade_batch_loans

In [ ]:
grades_port_size

In [ ]:
results_dict = {}
for grade in tqdm_notebook(grades):
    results_dict[grade] = basic_models(trials, grade_batch_loans[grade][1],
                                       grade_batch_loans[grade][0], grade,
                                       test, loan_npv_rois, default_series)

In [ ]:
result = pd.concat(results_dict.values(), axis=1)
result.columns = pd.MultiIndex.from_tuples(
    result.columns.values, names=['discount_rate', 'model'])
result.sort_index(axis=1, inplace = True)

In [ ]:
result.describe()

In [ ]:
store.open()
store['results'] = result
store.close()

In [ ]: