use the zipcode



In [238]:

    
%matplotlib inline
import pandas as pd
import mpld3
mpld3.enable_notebook()
%matplotlib inline
import matplotlib
import seaborn as sns
matplotlib.rcParams['savefig.dpi'] = 2 * matplotlib.rcParams['savefig.dpi']



In [4]:

    
import pandas as pd



In [7]:

    
Maps = pd.read_csv('./zip_code_database.csv')



In [333]:

    
import dill
with open('../LendingClubPrediction/CleanedUpData.pkl','rb') as in_strm:
    df = dill.load(in_strm)



In [448]:

    
df['int_rate'] = [float(df.int_rate.iloc[i][:-1]) for i in xrange(len(df.int_rate))]



In [65]:

    
county = pd.read_csv('../zcta_county_rel_10.txt')



In [11]:

    
valide_zip= pd.unique(df.zip_code)



In [12]:

    
zip_number =[int(i.replace('xx','')) for i in valide_zip]



In [26]:

    
a=(Maps.zip/100).floordiv(1)



In [41]:

    
zip_indata = pd.DataFrame({'zip_3':zip_number})

Maps['zip_3'] = a



In [42]:

    
after_merge=Maps.merge(zip_indata,how='inner',on='zip_3')



In [46]:

    
after_merge.to_csv('trial_1.csv')



In [47]:

    
pd.unique(after_merge.zip).shape









    Out[47]:





(40055,)



In [ ]:



In [68]:

    
county_zip = after_merge.merge(county,how='inner',left_on='zip',right_on='ZCTA5')



In [72]:

    
county_zip.iloc[:500].to_csv('county_1.csv')



In [ ]:



In [ ]:



In [ ]:



In [70]:

    
county_zip.columns









    Out[70]:





Index([u'zip', u'type', u'primary_city', u'acceptable_cities',
       u'unacceptable_cities', u'state', u'county', u'timezone', u'area_codes',
       u'latitude', u'longitude', u'world_region', u'country',
       u'decommissioned', u'estimated_population', u'notes', u'zip_3',
       u'ZCTA5', u'STATE', u'COUNTY', u'GEOID', u'POPPT', u'HUPT', u'AREAPT',
       u'AREALANDPT', u'ZPOP', u'ZHU', u'ZAREA', u'ZAREALAND', u'COPOP',
       u'COHU', u'COAREA', u'COAREALAND', u'ZPOPPCT', u'ZHUPCT', u'ZAREAPCT',
       u'ZAREALANDPCT', u'COPOPPCT', u'COHUPCT', u'COAREAPCT',
       u'COAREALANDPCT'],
      dtype='object')



In [104]:

    
from bokeh.sampledata import us_states, us_counties, unemployment
from bokeh.plotting import figure, show, output_file, output_notebook



In [105]:

    
us_states = us_states.data.copy()
us_counties = us_counties.data.copy()
unemployment = unemployment.data

del us_states["HI"]
del us_states["AK"]

state_xs = [us_states[code]["lons"] for code in us_states]
state_ys = [us_states[code]["lats"] for code in us_states]

county_xs=[us_counties[code]["lons"] for code in us_counties if us_counties[code]["state"] not in ["ak", "hi", "pr", "gu", "vi", "mp", "as"]]
county_ys=[us_counties[code]["lats"] for code in us_counties if us_counties[code]["state"] not in ["ak", "hi", "pr", "gu", "vi", "mp", "as"]]

colors = ["#F1EEF6", "#D4B9DA", "#C994C7", "#DF65B0", "#DD1C77", "#980043"]

county_colors = []
for county_id in us_counties:
    if us_counties[county_id]["state"] in ["ak", "hi", "pr", "gu", "vi", "mp", "as"]:
        continue
    try:
        rate = unemployment[county_id]
        idx = min(int(rate/2), 5)
        county_colors.append(colors[idx])
    except KeyError:
        county_colors.append("black")



In [112]:

    
len(county_xs[40])









    Out[112]:





183

LET US JUST DO STATES

PLANS:

1. include a page of exploring different features

    1.1 Need to get the map one working. 
        1.1.1 give options about further filtering based on FICO credit score, and LC  (MAYBE, depends on whether I have time or not.)
    1.2 Need to bar graphs about FICO score, home owner, other features too. So the KEY is make it PRETTY

2. the prediction app

    2.1 Need to explain why use the ensemble model, Simple answer is that it works better. LOL. Boosting decision tree seems to work better, though logistic regression seems to be enough. 
    2.2 Another issue is that if I am going to deploy to Heroku, probably I can only use logistic regression. 
    2.3 Engineer some other features?? Like the difference between low and high score. Maybe some nlp stuff, not sure this will work. Or the employment rate of the states? Libral vs Conservative? This sorts of thing. I should just factor it in use a mean model.

3. maybe a profolio, and a trainning program

    3.1 This is an afterthought. 
    3.2 involves with getting the API working



In [ ]:

some API stuff



In [699]:

    
import simplejson
from requests_oauthlib import OAuth1
import requests
with open("./lendingclub_secrets.json.nogit") as fh:
    secrets = simplejson.loads(fh.read())

# create an auth object
auth = OAuth1(
    secrets["api_key"],
#     secrets["api_secret"],
#     secrets["access_token"],
#     secrets["access_token_secret"]
)



In [700]:

    
# Query Parameters:
# showAll — A non-required Boolean parameter that defines the contents of the result.
# showAll= False



In [701]:

    
params={'showAll' : True}
r = requests.get("https://api.lendingclub.com/api/investor/v1/loans/listing", headers={'Authorization':'5Rtb7dWC4Wps2C3VRAn2hBQVERg='},params=params)
# r = requests.get("https://api.lendingclub.com/api/64233077/v1/loans/listing", params=params)

# 
print r









    



<Response [200]>



In [704]:

    
loanlist=r.json()
print len(loanlist['loans'])



In [703]:

    
import dill
with open('loanlist_3.pkl','wb') as out_strm:
    dill.dump(loanlist,out_strm)



In [712]:

    
loanlist['loans'][0]









    Out[712]:





{u'accNowDelinq': 0,
 u'accOpenPast24Mths': 7,
 u'acceptD': u'2015-09-22T07:38:48.000-07:00',
 u'addrState': u'IL',
 u'addrZip': u'624xx',
 u'annualInc': 75000.0,
 u'avgCurBal': 10216,
 u'bcOpenToBuy': 16025,
 u'bcUtil': 59.5,
 u'chargeoffWithin12Mths': 0,
 u'collections12MthsExMed': 0,
 u'creditPullD': u'2015-09-17T15:45:46.000-07:00',
 u'delinq2Yrs': 0,
 u'delinqAmnt': 0.0,
 u'desc': None,
 u'dti': 35.62,
 u'earliestCrLine': u'1997-06-16T17:00:00.000-07:00',
 u'empLength': None,
 u'empTitle': None,
 u'expD': u'2015-10-06T07:41:16.000-07:00',
 u'expDefaultRate': 10.0,
 u'ficoRangeHigh': 714,
 u'ficoRangeLow': 710,
 u'fundedAmount': 27475.0,
 u'grade': u'F',
 u'homeOwnership': u'MORTGAGE',
 u'id': 60970675,
 u'ilsExpD': u'2015-09-27T18:00:00.000-07:00',
 u'initialListStatus': u'F',
 u'inqLast6Mths': 1,
 u'installment': 760.06,
 u'intRate': 21.99,
 u'investorCount': None,
 u'isIncV': u'NOT_VERIFIED',
 u'listD': u'2015-09-27T18:00:00.000-07:00',
 u'loanAmount': 27525.0,
 u'memberId': 65012395,
 u'moSinOldIlAcct': 181,
 u'moSinOldRevTlOp': 219,
 u'moSinRcntRevTlOp': 1,
 u'moSinRcntTl': 1,
 u'mortAcc': 5,
 u'mthsSinceLastDelinq': 31,
 u'mthsSinceLastMajorDerog': None,
 u'mthsSinceLastRecord': None,
 u'mthsSinceRecentBc': 9,
 u'mthsSinceRecentBcDlq': None,
 u'mthsSinceRecentInq': 2,
 u'mthsSinceRecentRevolDelinq': 31,
 u'numAcctsEver120Ppd': 0,
 u'numActvBcTl': 4,
 u'numActvRevTl': 9,
 u'numBcSats': 6,
 u'numBcTl': 14,
 u'numIlTl': 11,
 u'numOpRevTl': 18,
 u'numRevAccts': 34,
 u'numRevTlBalGt0': 9,
 u'numSats': 23,
 u'numTl120dpd2m': 0,
 u'numTl30dpd': 0,
 u'numTl90gDpd24m': 0,
 u'numTlOpPast12m': 5,
 u'openAcc': 23,
 u'pctTlNvrDlq': 98,
 u'percentBcGt75': 60.0,
 u'pubRec': 0,
 u'pubRecBankruptcies': 0,
 u'purpose': u'credit_card',
 u'reviewStatus': u'NOT_APPROVED',
 u'reviewStatusD': None,
 u'revolBal': 34923.0,
 u'revolUtil': 53.7,
 u'serviceFeeRate': 0.71,
 u'subGrade': u'F1',
 u'taxLiens': 0,
 u'term': 60,
 u'totCollAmt': 0,
 u'totCurBal': 204317,
 u'totHiCredLim': 267778,
 u'totalAcc': 50,
 u'totalBalExMort': 78273,
 u'totalBcLimit': 39600,
 u'totalIlHighCreditLimit': 58224,
 u'totalRevHiLim': 65000}



In [185]:

    
len(loanlist['loans'])









    Out[185]:





348



In [187]:

    
ids = []
for i in range(348):
    ids.append(loanlist['loans'][i]['memberId'])



In [189]:

65013825



In [ ]:



In [ ]:



In [ ]:

    
# output_file("choropleth.html", title="choropleth.py example")
# output_notebook()
# TOOLS = ("hover,save")
# p = figure(title="US Unemployment 2009", toolbar_location="left",tools =TOOLS,
#     plot_width=1100, plot_height=700)

# p.patches(county_xs, county_ys, fill_color=county_colors, fill_alpha=0.7,
#     line_color="white", line_width=0.5)
# p.patches(state_xs, state_ys, fill_alpha=0.0,
#     line_color="#884444", line_width=2)

# show(p)



In [196]:

    
import dill
with open('loanlist_2.pkl','rb') as in_strm:
    loanlist_2 = dill.load(in_strm)
with open('loanlist.pkl','rb') as in_strm:
    loanlist_1 = dill.load(in_strm)



In [203]:

    
ids_1 = [loanlist_1['loans'][i]['id'] for i in range(348)]
ids_2 = [loanlist_2['loans'][i]['id'] for i in range(len(loanlist_2['loans']))]



In [209]:

    
missing =[]
for i in ids_1:
    if i not in ids_2:
        missing.append(i)



In [210]:

    
len(missing)









    Out[210]:





61



In [211]:

    
loanlist_1['loans'][1]









    Out[211]:





{u'accNowDelinq': 0,
 u'accOpenPast24Mths': 4,
 u'acceptD': u'2015-09-21T11:55:30.000-07:00',
 u'addrState': u'HI',
 u'addrZip': u'968xx',
 u'annualInc': 75000.0,
 u'avgCurBal': 4375,
 u'bcOpenToBuy': 5541,
 u'bcUtil': 75.0,
 u'chargeoffWithin12Mths': 0,
 u'collections12MthsExMed': 0,
 u'creditPullD': u'2015-09-18T12:23:22.000-07:00',
 u'delinq2Yrs': 0,
 u'delinqAmnt': 0.0,
 u'desc': None,
 u'dti': 30.11,
 u'earliestCrLine': u'1995-11-17T16:00:00.000-08:00',
 u'empLength': 60,
 u'empTitle': u'Administrative Officer',
 u'expD': u'2015-10-05T12:00:54.000-07:00',
 u'expDefaultRate': 3.67,
 u'ficoRangeHigh': 664,
 u'ficoRangeLow': 660,
 u'fundedAmount': 20875.0,
 u'grade': u'B',
 u'homeOwnership': u'RENT',
 u'id': 60761061,
 u'ilsExpD': u'2015-09-24T10:00:00.000-07:00',
 u'initialListStatus': u'F',
 u'inqLast6Mths': 0,
 u'installment': 677.52,
 u'intRate': 9.99,
 u'investorCount': None,
 u'isIncV': u'SOURCE_VERIFIED',
 u'listD': u'2015-09-24T10:00:00.000-07:00',
 u'loanAmount': 21000.0,
 u'memberId': 64802803,
 u'moSinOldIlAcct': 91,
 u'moSinOldRevTlOp': 238,
 u'moSinRcntRevTlOp': 10,
 u'moSinRcntTl': 10,
 u'mortAcc': 0,
 u'mthsSinceLastDelinq': None,
 u'mthsSinceLastMajorDerog': None,
 u'mthsSinceLastRecord': None,
 u'mthsSinceRecentBc': 49,
 u'mthsSinceRecentBcDlq': None,
 u'mthsSinceRecentInq': 3,
 u'mthsSinceRecentRevolDelinq': None,
 u'numAcctsEver120Ppd': 0,
 u'numActvBcTl': 5,
 u'numActvRevTl': 11,
 u'numBcSats': 7,
 u'numBcTl': 15,
 u'numIlTl': 5,
 u'numOpRevTl': 14,
 u'numRevAccts': 26,
 u'numRevTlBalGt0': 11,
 u'numSats': 15,
 u'numTl120dpd2m': 0,
 u'numTl30dpd': 0,
 u'numTl90gDpd24m': 0,
 u'numTlOpPast12m': 1,
 u'openAcc': 15,
 u'pctTlNvrDlq': 100,
 u'percentBcGt75': 71.4,
 u'pubRec': 0,
 u'pubRecBankruptcies': 0,
 u'purpose': u'debt_consolidation',
 u'reviewStatus': u'APPROVED',
 u'reviewStatusD': u'2015-09-21T12:01:20.000-07:00',
 u'revolBal': 49284.0,
 u'revolUtil': 75.0,
 u'serviceFeeRate': 0.85,
 u'subGrade': u'B3',
 u'taxLiens': 0,
 u'term': 36,
 u'totCollAmt': 0,
 u'totCurBal': 65619,
 u'totHiCredLim': 85876,
 u'totalAcc': 32,
 u'totalBalExMort': 65619,
 u'totalBcLimit': 22200,
 u'totalIlHighCreditLimit': 20176,
 u'totalRevHiLim': 65700}



In [332]:

    
df.columns









    Out[332]:





Index([u'group', u'score'], dtype='object')

Build the data used for the figure



In [334]:

    
df['paidoff'] = (df.stat== 1)



In [ ]:



In [257]:



In [335]:

    
bystate = pd.DataFrame()
bystate['mean_rate'] = df.groupby([u'addr_state']).mean().paidoff
bystate['default_rate'] = 1- df.groupby([u'addr_state']).mean().paidoff

bystate['count'] = df.groupby([u'addr_state']).count().paidoff
bystate['fico_range_high'] = df.groupby([u'addr_state']).mean().fico_range_high
bystate['fico_range_high'] = df.groupby([u'addr_state']).mean().fico_range_high



In [336]:

    
bystate.to_csv('bystate.csv')



In [ ]:



In [283]:

    
df[df.addr_state=='IA']









    Out[283]:






  
    
      
      id
      member_id
      loan_amnt
      funded_amnt
      funded_amnt_inv
      term
      int_rate
      installment
      grade
      sub_grade
      ...
      last_pymnt_amnt
      next_pymnt_d
      last_credit_pull_d
      last_fico_range_high
      last_fico_range_low
      collections_12_mths_ex_med
      mths_since_last_major_derog
      policy_code
      stat
      paidoff
    
  
  
    
      29843
      518047
      669591
      9600
      9600
      9600.00
      36 months
      7.14%
      297.04
      A
      A3
      ...
      320.33
      NaN
      May-2013
      704
      700
      0
      NaN
      1
      1
      True
    
    
      39084
      281565
      281517
      7000
      7000
      3250.00
      36 months
      11.34%
      230.30
      C
      C2
      ...
      2889.87
      NaN
      May-2010
      519
      515
      0
      NaN
      1
      1
      True
    
    
      39312
      248498
      248495
      19500
      19500
      9232.12
      36 months
      9.76%
      627.02
      B
      B2
      ...
      635.07
      NaN
      Feb-2011
      799
      795
      0
      NaN
      1
      1
      True
    
    
      39423
      220023
      219944
      1850
      1850
      1575.00
      36 months
      7.12%
      57.23
      A
      A1
      ...
      3.53
      NaN
      Aug-2010
      754
      750
      0
      NaN
      1
      1
      True
    
    
      39425
      222488
      222393
      18500
      18500
      425.00
      36 months
      9.51%
      592.70
      B
      B2
      ...
      1.62
      NaN
      Mar-2010
      754
      750
      0
      NaN
      1
      1
      True
    
  

5 rows × 58 columns



In [ ]:



In [ ]:



In [339]:



In [ ]:



In [ ]:

    
from bokeh.plotting import figure, show, output_file



In [ ]:



In [382]:



In [ ]:

Loan by grade



In [478]:

    
from bokeh.models import HoverTool, ColumnDataSource
from collections import OrderedDict


# bygrade = pd.DataFrame()
# bygrade['mean_rate'] = df.groupby([u'sub_grade']).mean().paidoff
# bygrade['default_rate'] = 1-df.groupby([u'sub_grade']).mean().paidoff
# bygrade['int_rate'] = df.groupby([u'sub_grade']).mean().int_rate

# bygrade['counts'] = df.groupby([u'sub_grade']).count().paidoff

bygrade = pd.DataFrame()
bygrade['mean_rate'] = df.groupby([u'grade']).mean().paidoff
bygrade['default_rate'] = 1-df.groupby([u'grade']).mean().paidoff
bygrade['int_rate'] = df.groupby([u'grade']).mean().int_rate

bygrade['counts'] = df.groupby([u'grade']).count().paidoff

xgrades = [i for i in bygrade.index]
pay_grades = bygrade.mean_rate.values*100
dft_grades = bygrade.default_rate.values*100



In [479]:

    
# xx = df.groupby([u'sub_grade']).mean()



In [ ]:



In [698]:

    
# TOOLS = "hover,save"

# p = figure(background_fill="#EFE8E2", 
#            x_range=xgrades,
#            x_axis_label='LC grade', y_axis_label=('Pay off rate (%)'),
#            y_range = [0, 100],
#            title="Loan Outcome by LendingClub Grade",
#            tools = TOOLS,
#            plot_width=800, 
#            plot_height=400)

# source1 = ColumnDataSource(
#     data=dict(pay_grades=pay_grades, dft_grades=dft_grades,int_rate=bygrade['int_rate'].values)
# )
# source2 = ColumnDataSource(
#     data=dict(pay_grades=pay_grades, dft_grades=dft_grades,int_rate=bygrade['int_rate'].values)
# )

# p.rect(xgrades, pay_grades/2,  0.6, pay_grades,
#     fill_color="#08c994", source = source1)
# p.rect(xgrades, dft_grades/2 + pay_grades,  0.6,dft_grades,
#     fill_color="#ff5a00", source = source2)


# hover = p.select(dict(type=HoverTool))
# hover.tooltips = OrderedDict([
#     ('Grade', "$x"),
#     ('Payoff rate (%)', '@pay_grades'),
#     ('Default rate (%)', '@dft_grades'),
#     ('Interest (%)','@int_rate'),
# ])

# show(p)



In [ ]:

Loan by credit score



In [ ]:



In [497]:

    
byfico = pd.DataFrame()
byfico['mean_rate'] = df.groupby([u'fico_range_high']).mean().paidoff.iloc[2:]
byfico['default_rate'] = 1- df.groupby([u'fico_range_high']).mean().paidoff[2:]
byfico['count'] = df.groupby([u'fico_range_high']).count().paidoff[2:]



In [ ]:



In [696]:

    
# TOOLS = "hover,save"
# xfico = [(str(int(byfico.index[i]-4)) + ' - ' +str(int(byfico.index[i]))) for i in xrange(len(byfico.index))]

# p = figure(background_fill="#EFE8E2", 
#            x_axis_label='Pay off rate (%)', y_axis_label='FICO score',
#            x_range = [0, 100] , 
#            y_range = xfico[::-1],
#            title="Loan Outcome by FICO score",
#            tools = TOOLS,
#            plot_width=800, 
#            plot_height=600)

# source1 = ColumnDataSource(
#     data=dict(fico_score=xfico, dft_rate=byfico['default_rate'].iloc[::-1]*100,payoff_rate=byfico['mean_rate'].iloc[::-1]*100)
# )
# source2 = ColumnDataSource(
#     data=dict(fico_score=xfico, dft_rate=byfico['default_rate'].iloc[::-1]*100,payoff_rate=byfico['mean_rate'].iloc[::-1]*100)
# )

# p.rect(byfico['mean_rate'].iloc[::-1]*100/2,  
#        xfico[::-1],byfico['mean_rate'].iloc[::-1]*100 ,0.8, 
#        fill_color="#08c994", source = source1)

# p.rect(byfico['default_rate'].iloc[::-1]*100/2 + byfico['mean_rate'].iloc[::-1]*100, 
#        xfico[::-1], byfico['default_rate'].iloc[::-1]*100, 0.8,
#        fill_color="#ff5a00", source = source2)

# hover = p.select(dict(type=HoverTool))
# hover.tooltips = OrderedDict([
#     ('FICO score', "$y"),
#     ('Payoff rate (%)', '@payoff_rate'),
#     ('Default rate (%)', '@dft_rate'),

# ])

# show(p)



In [513]:

    
df.columns









    Out[513]:





Index([u'id', u'member_id', u'loan_amnt', u'funded_amnt', u'funded_amnt_inv',
       u'term', u'int_rate', u'installment', u'grade', u'sub_grade',
       u'emp_title', u'emp_length', u'home_ownership', u'annual_inc',
       u'verification_status', u'issue_d', u'loan_status', u'pymnt_plan',
       u'url', u'desc', u'purpose', u'title', u'zip_code', u'addr_state',
       u'dti', u'delinq_2yrs', u'earliest_cr_line', u'fico_range_low',
       u'fico_range_high', u'inq_last_6mths', u'mths_since_last_delinq',
       u'mths_since_last_record', u'open_acc', u'pub_rec', u'revol_bal',
       u'revol_util', u'total_acc', u'initial_list_status', u'out_prncp',
       u'out_prncp_inv', u'total_pymnt', u'total_pymnt_inv',
       u'total_rec_prncp', u'total_rec_int', u'total_rec_late_fee',
       u'recoveries', u'collection_recovery_fee', u'last_pymnt_d',
       u'last_pymnt_amnt', u'next_pymnt_d', u'last_credit_pull_d',
       u'last_fico_range_high', u'last_fico_range_low',
       u'collections_12_mths_ex_med', u'mths_since_last_major_derog',
       u'policy_code', u'stat', u'paidoff'],
      dtype='object')



In [533]:

    
df['dti_bin'] = np.floor(df.dti/5)*5



In [543]:

    
(pd.unique(df.emp_length))









    Out[543]:





array(['10+ years', '< 1 year', '3 years', '9 years', '4 years', '5 years',
       '1 year', '6 years', '2 years', '7 years', '8 years', 'n/a'], dtype=object)



In [605]:



In [ ]:



In [ ]:



In [607]:

    
# N = 100
# x = np.random.random(size=N) * 100
# y = np.random.random(size=N) * 100
# radii = np.random.random(size=N) * 1.5
# colors = ["#%02x%02x%02x" % (r, g, 150) for r, g in zip(np.floor(50+2*x), np.floor(30+2*y))]
# colors



In [680]:









    Out[680]:





9

Payoff rate by home ownership and emp_length



In [694]:

    
# data =  df.groupby(['home_ownership','emp_length']).mean().paidoff

# from collections import OrderedDict

# import numpy as np

# from bokeh.plotting import ColumnDataSource, figure, show, output_file
# from bokeh.models import HoverTool

# # Read in the data with pandas. Convert the year column to string
# home_ownership = ['RENT', 'OWN', 'MORTGAGE']
# emp_lenght = [ 'n/a', '< 1 year', '1 year', '3 years', '2 years', '4 years', '5 years',
#         '6 years', '7 years', '8 years', '9 years','10+ years']

# # colors = [
# #     "#08C994", "#26BB81", "#45AD6F", "#649F5C", "#83914A",
# #     "#A28337", "#C17525", "#E06712", "#FF5A00"
# # ]
# colors = [
#     "#75968f", "#a5bab7", "#c9d9d3", "#e2e2e2", "#dfccce",
#     "#ddb7b1", "#cc7878", "#933b41", "#550b1d"
# ]
# a=sorted(data.values)[::7]

# home = []
# emp = []
# color = []
# rate = []
# for y in emp_lenght:
#     for m in home_ownership:
#         home.append(m)
#         emp.append(y)
#         rate_by_home_emp = data[m][y]
#         rate.append(rate_by_home_emp*100)
#         for i in xrange(1,9):
#             if rate_by_home_emp > a[i-1] and rate_by_home_emp<= a[i]:
#                 ci = 9-i
# #         ci = int((rate_by_home_emp - min(data))/(max(data)-min(data))*8.9)
        
#         color.append(colors[ci])

# output_notebook()

# TOOLS = "hover,save"

# p = figure(
#     x_axis_label='Employment length (%)', y_axis_label='House Ownership',
#     y_range=home_ownership, x_range=emp_lenght ,
#     x_axis_location="above", plot_width=800, plot_height=400,
#     toolbar_location="left", tools=TOOLS)

# source = ColumnDataSource(
#     data=dict(home=home, emp=emp,color=color, rate=rate)
# )
# p.rect("emp", "home", 1, 1, source=source,  color="color",line_color=None)

# p.grid.grid_line_color = None
# p.axis.axis_line_color = None
# p.axis.major_tick_line_color = None
# p.axis.major_label_text_font_size = "12pt"
# p.axis.major_label_standoff = 0
# p.xaxis.major_label_orientation = np.pi/3

# hover = p.select(dict(type=HoverTool))
# hover.tooltips = OrderedDict([
#     ('Pay off rate (%)', '@rate'),
# ])

# show(p)      # show the plot



In [ ]:



In [590]:

    
max(np.random.random(size=1000) * 100)









    Out[590]:





99.780742644565407



In [ ]:



In [540]:

    
354/4/8









    Out[540]:





11

pofolio generator



In [ ]:



In [ ]:



In [ ]:



In [ ]:



In [ ]:

	id	member_id	loan_amnt	funded_amnt	funded_amnt_inv	term	int_rate	installment	grade	sub_grade	...	last_pymnt_amnt	next_pymnt_d	last_credit_pull_d	last_fico_range_high	last_fico_range_low	mths_since_last_major_derog	policy_code	stat	paidoff
29843	518047	669591	9600	9600	9600.00	36 months	7.14%	297.04	A	A3	...	320.33	NaN	May-2013	704	700	NaN	1	1	True
39084	281565	281517	7000	7000	3250.00	36 months	11.34%	230.30	C	C2	...	2889.87	NaN	May-2010	519	515	NaN	1	1	True
39312	248498	248495	19500	19500	9232.12	36 months	9.76%	627.02	B	B2	...	635.07	NaN	Feb-2011	799	795	NaN	1	1	True
39423	220023	219944	1850	1850	1575.00	36 months	7.12%	57.23	A	A1	...	3.53	NaN	Aug-2010	754	750	NaN	1	1	True
39425	222488	222393	18500	18500	425.00	36 months	9.51%	592.70	B	B2	...	1.62	NaN	Mar-2010	754	750	NaN	1	1	True