In [ ]:
import time
import subprocess
import lendingclub.account_info as acc_info
import dir_constants as dc
from os import mkdir
from selenium import webdriver
# from selenium.webdriver import PhantomJS
from selenium.webdriver.support.ui import Select, WebDriverWait
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.chrome.options import Options

platform = 'lendingclub'
chrome_options = webdriver.ChromeOptions()
home = dc.home_path
ppath = home + '/justin_tinkering/data_science/lendingclub/lendingclub_csvs'.format(platform)
now = time.strftime("%Y_%m_%d_%Hh_%Mm_%Ss")
full_path = ppath + '/' + 'lc_' + now
os.mkdir(full_path)
email = acc_info.email_throwaway
password = acc_info.password_throwaway

prefs = {'download.default_directory': '{0}'.format(full_path)}
chrome_options.add_experimental_option('prefs', prefs)
# driver_loc = "/home/justin/justin_tinkering/chromedriver"

# create a new Firefox session
driver = webdriver.PhantomJS() #executable_path=driver_loc
driver.implicitly_wait(30)
driver.maximize_window()

# navigate to the application home page
driver.get("https://www.lendingclub.com/info/download-data.action")

sign_in = driver.find_element_by_link_text('Sign in')
sign_in.click()

email_box = driver.find_element_by_name('email')
password_box = driver.find_element_by_name('password')

time.sleep(5)
email_box.send_keys(email)
time.sleep(5)
password_box.send_keys(password)

button = driver.find_element_by_class_name('form-button')
button.click()

time.sleep(10)

statistics = driver.find_element_by_link_text('Statistics')
statistics.click()

download_data = driver.find_element_by_link_text('Download Data')
download_data.click()

select = driver.find_element_by_id(
    'loanStatsDropdown')  #get the select element
options = select.find_elements_by_tag_name(
    "option")  #get all the options into a list

optionsList = []

for option in options:  #iterate over the options, place attribute value in list
    optionsList.append(option.get_attribute("value"))

for optionValue in optionsList:
    print("starting loop on option %s" % optionValue)

    select = Select(driver.find_element_by_id('loanStatsDropdown'))
    select.select_by_value(optionValue)
    download_btn = driver.find_element_by_id('currentLoanStatsFileName')
    download_btn.click()
    time.sleep(2)

driver.get('https://www.lendingclub.com/site/additional-statistics')
pmt_history = driver.find_element_by_link_text(
    'All payments (includes payments made to investors and to LendingClub)')
pmt_history.click()
time.sleep(2)

while True:
    if len(os.listdir(full_path)) != (
            len(optionsList) + 1):  #+1 for one pmt history file
        time.sleep(5)
    else:
        files = os.listdir(full_path)
        k = 0
        time.sleep(5)
        for filename in files:
            if 'crdownload' in filename:
                print('waiting on downloads to finish.')
                time.sleep(60)
            else:
                k += 1
#                 print(k)
        if k == len(files):
            time.sleep(2)
            break

print('done downloading')

driver.close()

just_dled_hashes = {}
files = os.listdir(full_path)
for file_ in files:
    a = subprocess.check_output(
        'shasum -a 256 {0}'.format(full_path + '/' + file_), shell=True)
    just_dled_hashes[file_] = a.split()[0]

dirs = [d for d in os.listdir(ppath) if os.path.isdir(os.path.join(ppath, d))]
dirs.sort()

try:
    previous_dled = dirs[-2]

    previous_dled_hashes = {}
    previous_full_path = ppath + '/' + previous_dled + '/'
    files = os.listdir(previous_full_path)
    files = [file_ for file_ in files if not file_.startswith('.')]
    for file_ in files:
        a = subprocess.check_output(
            'shasum -a 256 {0}'.format(previous_full_path + file_), shell=True)
        previous_dled_hashes[file_] = a.split()[0]

    k = 0
    if len(just_dled_hashes) != len(previous_dled_hashes):
        k += 1
    else:
        for key in just_dled_hashes.keys():
            if previous_dled_hashes[key] != just_dled_hashes[key]:
                k += 1

    if k == 0:
        print('no change to previous downloaded lending club loan info csvs')
    else:
        print(
            'At least one of the files changed. Probably need to unzip csvs and re-run cleaning scripts.'
        )

except IndexError:
    print('this is probably your first time downloading the csvs.')

In [ ]:
just_dled_hashes

In [ ]:
previous_dled_hashes

Below was for storing in s3 bucket


In [ ]:
aws_access_key_id = 
aws_secret_access_key =

In [ ]:
import boto3

s3 = boto3.resource('s3')

In [ ]:
dirs = [d for d in os.listdir(ppath) if os.path.isdir(os.path.join(ppath,d))]
dirs.sort()
most_recent_dls = dirs[-1]

In [ ]:
files_to_store = os.listdir(ppath+'/'+most_recent_dls)

In [ ]:
ppath+'/'+most_recent_dls

In [ ]:
bucket_name = ''
for file_ in tqdm_notebook(files_to_store):
    data = open('{0}'.format(ppath+'/'+most_recent_dls+'/'+file_), 'rb')
    s3.Bucket(bucket_name).put_object(Key='{0}/{1}'.format(most_recent_dls, file_), Body=data)

In [ ]: