In [ ]:
import time
import subprocess
import lendingclub.account_info as acc_info
import dir_constants as dc
from os import mkdir
from selenium import webdriver
# from selenium.webdriver import PhantomJS
from selenium.webdriver.support.ui import Select, WebDriverWait
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.chrome.options import Options
platform = 'lendingclub'
chrome_options = webdriver.ChromeOptions()
home = dc.home_path
ppath = home + '/justin_tinkering/data_science/lendingclub/lendingclub_csvs'.format(platform)
now = time.strftime("%Y_%m_%d_%Hh_%Mm_%Ss")
full_path = ppath + '/' + 'lc_' + now
os.mkdir(full_path)
email = acc_info.email_throwaway
password = acc_info.password_throwaway
prefs = {'download.default_directory': '{0}'.format(full_path)}
chrome_options.add_experimental_option('prefs', prefs)
# driver_loc = "/home/justin/justin_tinkering/chromedriver"
# create a new Firefox session
driver = webdriver.PhantomJS() #executable_path=driver_loc
driver.implicitly_wait(30)
driver.maximize_window()
# navigate to the application home page
driver.get("https://www.lendingclub.com/info/download-data.action")
sign_in = driver.find_element_by_link_text('Sign in')
sign_in.click()
email_box = driver.find_element_by_name('email')
password_box = driver.find_element_by_name('password')
time.sleep(5)
email_box.send_keys(email)
time.sleep(5)
password_box.send_keys(password)
button = driver.find_element_by_class_name('form-button')
button.click()
time.sleep(10)
statistics = driver.find_element_by_link_text('Statistics')
statistics.click()
download_data = driver.find_element_by_link_text('Download Data')
download_data.click()
select = driver.find_element_by_id(
'loanStatsDropdown') #get the select element
options = select.find_elements_by_tag_name(
"option") #get all the options into a list
optionsList = []
for option in options: #iterate over the options, place attribute value in list
optionsList.append(option.get_attribute("value"))
for optionValue in optionsList:
print("starting loop on option %s" % optionValue)
select = Select(driver.find_element_by_id('loanStatsDropdown'))
select.select_by_value(optionValue)
download_btn = driver.find_element_by_id('currentLoanStatsFileName')
download_btn.click()
time.sleep(2)
driver.get('https://www.lendingclub.com/site/additional-statistics')
pmt_history = driver.find_element_by_link_text(
'All payments (includes payments made to investors and to LendingClub)')
pmt_history.click()
time.sleep(2)
while True:
if len(os.listdir(full_path)) != (
len(optionsList) + 1): #+1 for one pmt history file
time.sleep(5)
else:
files = os.listdir(full_path)
k = 0
time.sleep(5)
for filename in files:
if 'crdownload' in filename:
print('waiting on downloads to finish.')
time.sleep(60)
else:
k += 1
# print(k)
if k == len(files):
time.sleep(2)
break
print('done downloading')
driver.close()
just_dled_hashes = {}
files = os.listdir(full_path)
for file_ in files:
a = subprocess.check_output(
'shasum -a 256 {0}'.format(full_path + '/' + file_), shell=True)
just_dled_hashes[file_] = a.split()[0]
dirs = [d for d in os.listdir(ppath) if os.path.isdir(os.path.join(ppath, d))]
dirs.sort()
try:
previous_dled = dirs[-2]
previous_dled_hashes = {}
previous_full_path = ppath + '/' + previous_dled + '/'
files = os.listdir(previous_full_path)
files = [file_ for file_ in files if not file_.startswith('.')]
for file_ in files:
a = subprocess.check_output(
'shasum -a 256 {0}'.format(previous_full_path + file_), shell=True)
previous_dled_hashes[file_] = a.split()[0]
k = 0
if len(just_dled_hashes) != len(previous_dled_hashes):
k += 1
else:
for key in just_dled_hashes.keys():
if previous_dled_hashes[key] != just_dled_hashes[key]:
k += 1
if k == 0:
print('no change to previous downloaded lending club loan info csvs')
else:
print(
'At least one of the files changed. Probably need to unzip csvs and re-run cleaning scripts.'
)
except IndexError:
print('this is probably your first time downloading the csvs.')
In [ ]:
just_dled_hashes
In [ ]:
previous_dled_hashes
In [ ]:
aws_access_key_id =
aws_secret_access_key =
In [ ]:
import boto3
s3 = boto3.resource('s3')
In [ ]:
dirs = [d for d in os.listdir(ppath) if os.path.isdir(os.path.join(ppath,d))]
dirs.sort()
most_recent_dls = dirs[-1]
In [ ]:
files_to_store = os.listdir(ppath+'/'+most_recent_dls)
In [ ]:
ppath+'/'+most_recent_dls
In [ ]:
bucket_name = ''
for file_ in tqdm_notebook(files_to_store):
data = open('{0}'.format(ppath+'/'+most_recent_dls+'/'+file_), 'rb')
s3.Bucket(bucket_name).put_object(Key='{0}/{1}'.format(most_recent_dls, file_), Body=data)
In [ ]: