Author: Justin Hsi


In [ ]:
import time
import dir_constants as dc

# Set some constants __________________________________________________________
now = time.strftime("%Y_%m_%d_%Hh_%Mm_%Ss")
platform = 'lendingclub'

# Set data_path _______________________________________________________________
data_path = dc.home_path + '/rsync_dl_rig/unzipped_lc_csvs'

# Get the loan_info csvs to iterate over ______________________________________
files = os.listdir(data_path)
loan_info_files = [
    file_ for file_ in files
    if not (file_.startswith('.') | file_.startswith('lc_') |
            file_.startswith('PMTHIST') | file_.startswith('LCData'))
]

to_concat = []
for file_ in loan_info_files:
    to_concat.append(
        pd.read_csv(
            data_path + '/' + file_, header=1, engine='python', skipfooter=2))

loan_info = pd.concat(to_concat)

# Block to ensure that rows that aren't actually loans are dropped ____________
# All loans must have int/term/funded 
loan_info = loan_info[loan_info['term'].notnull()]
loan_info['int_rate'] = loan_info['int_rate'].str.strip('%').astype(float)
loan_info['term'] = loan_info['term'].str[:3].astype(int)
loan_info = loan_info[(loan_info['int_rate'] > 0) & (loan_info['term'] > 0) &
                      (loan_info['funded_amnt'] > 0)]


# Reset index and set id to string, also set index to id ______________________
loan_info.reset_index(drop=True, inplace=True)
loan_info['id'] = loan_info['id'].astype(str)
loan_info.set_index('id', drop=False, inplace=True)

# Save in HDFStore ____________________________________________________________
# store = pd.HDFStore(
#     dc.home_path + '/justin_tinkering/data_science/lendingclub/{0}_store.h5'.
#     format(platform),
#     append=True)
# store['loan_info_merged'] = loan_info
# print("{:,}".format(len(loan_info)) + " loans saved " +
#       'for {0}'.format(platform))
# print(store.keys())
# store.close()

In [ ]:
loan_info.reset_index?

In [ ]:
# trying out feather data format
PATH = '/home/justin/justin_tinkering/data_science/lendingclub/data/'
loan_info.reset_index()
loan_info.to_feather(f'{PATH}loan_info')

In [ ]:
os.mkdir('/home/justin/justin_tinkering/data_science/lendingclub/data')

In [ ]: