In [47]:
import pandas as pd
from matplotlib import pyplot as plt
from scipy.stats import spearmanr
from sklearn.feature_selection import VarianceThreshold
%matplotlib inline
In [2]:
!ls data
prosperLoanData.csv
In [3]:
df = pd.read_csv('data/prosperLoanData.csv')
print(df.shape)
df.head()
(113937, 81)
Out[3]:
ListingKey
ListingNumber
ListingCreationDate
CreditGrade
Term
LoanStatus
ClosedDate
BorrowerAPR
BorrowerRate
LenderYield
...
LP_ServiceFees
LP_CollectionFees
LP_GrossPrincipalLoss
LP_NetPrincipalLoss
LP_NonPrincipalRecoverypayments
PercentFunded
Recommendations
InvestmentFromFriendsCount
InvestmentFromFriendsAmount
Investors
0
1021339766868145413AB3B
193129
2007-08-26 19:09:29.263000000
C
36
Completed
2009-08-14 00:00:00
0.16516
0.1580
0.1380
...
-133.18
0.0
0.0
0.0
0.0
1.0
0
0
0.0
258
1
10273602499503308B223C1
1209647
2014-02-27 08:28:07.900000000
NaN
36
Current
NaN
0.12016
0.0920
0.0820
...
0.00
0.0
0.0
0.0
0.0
1.0
0
0
0.0
1
2
0EE9337825851032864889A
81716
2007-01-05 15:00:47.090000000
HR
36
Completed
2009-12-17 00:00:00
0.28269
0.2750
0.2400
...
-24.20
0.0
0.0
0.0
0.0
1.0
0
0
0.0
41
3
0EF5356002482715299901A
658116
2012-10-22 11:02:35.010000000
NaN
36
Current
NaN
0.12528
0.0974
0.0874
...
-108.01
0.0
0.0
0.0
0.0
1.0
0
0
0.0
158
4
0F023589499656230C5E3E2
909464
2013-09-14 18:38:39.097000000
NaN
36
Current
NaN
0.24614
0.2085
0.1985
...
-60.27
0.0
0.0
0.0
0.0
1.0
0
0
0.0
20
5 rows × 81 columns
In [4]:
df.columns
Out[4]:
Index(['ListingKey', 'ListingNumber', 'ListingCreationDate', 'CreditGrade',
'Term', 'LoanStatus', 'ClosedDate', 'BorrowerAPR', 'BorrowerRate',
'LenderYield', 'EstimatedEffectiveYield', 'EstimatedLoss',
'EstimatedReturn', 'ProsperRating (numeric)', 'ProsperRating (Alpha)',
'ProsperScore', 'ListingCategory (numeric)', 'BorrowerState',
'Occupation', 'EmploymentStatus', 'EmploymentStatusDuration',
'IsBorrowerHomeowner', 'CurrentlyInGroup', 'GroupKey',
'DateCreditPulled', 'CreditScoreRangeLower', 'CreditScoreRangeUpper',
'FirstRecordedCreditLine', 'CurrentCreditLines', 'OpenCreditLines',
'TotalCreditLinespast7years', 'OpenRevolvingAccounts',
'OpenRevolvingMonthlyPayment', 'InquiriesLast6Months', 'TotalInquiries',
'CurrentDelinquencies', 'AmountDelinquent', 'DelinquenciesLast7Years',
'PublicRecordsLast10Years', 'PublicRecordsLast12Months',
'RevolvingCreditBalance', 'BankcardUtilization',
'AvailableBankcardCredit', 'TotalTrades',
'TradesNeverDelinquent (percentage)', 'TradesOpenedLast6Months',
'DebtToIncomeRatio', 'IncomeRange', 'IncomeVerifiable',
'StatedMonthlyIncome', 'LoanKey', 'TotalProsperLoans',
'TotalProsperPaymentsBilled', 'OnTimeProsperPayments',
'ProsperPaymentsLessThanOneMonthLate',
'ProsperPaymentsOneMonthPlusLate', 'ProsperPrincipalBorrowed',
'ProsperPrincipalOutstanding', 'ScorexChangeAtTimeOfListing',
'LoanCurrentDaysDelinquent', 'LoanFirstDefaultedCycleNumber',
'LoanMonthsSinceOrigination', 'LoanNumber', 'LoanOriginalAmount',
'LoanOriginationDate', 'LoanOriginationQuarter', 'MemberKey',
'MonthlyLoanPayment', 'LP_CustomerPayments',
'LP_CustomerPrincipalPayments', 'LP_InterestandFees', 'LP_ServiceFees',
'LP_CollectionFees', 'LP_GrossPrincipalLoss', 'LP_NetPrincipalLoss',
'LP_NonPrincipalRecoverypayments', 'PercentFunded', 'Recommendations',
'InvestmentFromFriendsCount', 'InvestmentFromFriendsAmount',
'Investors'],
dtype='object')
In [14]:
df.dtypes
Out[14]:
ListingKey object
ListingNumber int64
ListingCreationDate object
CreditGrade object
Term int64
LoanStatus object
ClosedDate object
BorrowerAPR float64
BorrowerRate float64
LenderYield float64
EstimatedEffectiveYield float64
EstimatedLoss float64
EstimatedReturn float64
ProsperRating (numeric) float64
ProsperRating (Alpha) object
ProsperScore float64
ListingCategory (numeric) int64
BorrowerState object
Occupation object
EmploymentStatus object
EmploymentStatusDuration float64
IsBorrowerHomeowner bool
CurrentlyInGroup bool
GroupKey object
DateCreditPulled object
CreditScoreRangeLower float64
CreditScoreRangeUpper float64
FirstRecordedCreditLine object
CurrentCreditLines float64
OpenCreditLines float64
...
TotalProsperLoans float64
TotalProsperPaymentsBilled float64
OnTimeProsperPayments float64
ProsperPaymentsLessThanOneMonthLate float64
ProsperPaymentsOneMonthPlusLate float64
ProsperPrincipalBorrowed float64
ProsperPrincipalOutstanding float64
ScorexChangeAtTimeOfListing float64
LoanCurrentDaysDelinquent int64
LoanFirstDefaultedCycleNumber float64
LoanMonthsSinceOrigination int64
LoanNumber int64
LoanOriginalAmount int64
LoanOriginationDate object
LoanOriginationQuarter object
MemberKey object
MonthlyLoanPayment float64
LP_CustomerPayments float64
LP_CustomerPrincipalPayments float64
LP_InterestandFees float64
LP_ServiceFees float64
LP_CollectionFees float64
LP_GrossPrincipalLoss float64
LP_NetPrincipalLoss float64
LP_NonPrincipalRecoverypayments float64
PercentFunded float64
Recommendations int64
InvestmentFromFriendsCount int64
InvestmentFromFriendsAmount float64
Investors int64
Length: 81, dtype: object
In [31]:
for col in df[:5]:
print(df[col].describe())
print()
count 113937
unique 113066
top 17A93590655669644DB4C06
freq 6
Name: ListingKey, dtype: object
count 1.139370e+05
mean 6.278857e+05
std 3.280762e+05
min 4.000000e+00
25% 4.009190e+05
50% 6.005540e+05
75% 8.926340e+05
max 1.255725e+06
Name: ListingNumber, dtype: float64
count 113937
unique 113064
top 2013-10-02 17:20:16.550000000
freq 6
Name: ListingCreationDate, dtype: object
count 28953
unique 8
top C
freq 5649
Name: CreditGrade, dtype: object
count 113937.000000
mean 40.830248
std 10.436212
min 12.000000
25% 36.000000
50% 36.000000
75% 36.000000
max 60.000000
Name: Term, dtype: float64
count 113937
unique 12
top Current
freq 56576
Name: LoanStatus, dtype: object
count 55089
unique 2802
top 2014-03-04 00:00:00
freq 105
Name: ClosedDate, dtype: object
count 113912.000000
mean 0.218828
std 0.080364
min 0.006530
25% 0.156290
50% 0.209760
75% 0.283810
max 0.512290
Name: BorrowerAPR, dtype: float64
count 113937.000000
mean 0.192764
std 0.074818
min 0.000000
25% 0.134000
50% 0.184000
75% 0.250000
max 0.497500
Name: BorrowerRate, dtype: float64
count 113937.000000
mean 0.182701
std 0.074516
min -0.010000
25% 0.124200
50% 0.173000
75% 0.240000
max 0.492500
Name: LenderYield, dtype: float64
count 84853.000000
mean 0.168661
std 0.068467
min -0.182700
25% 0.115670
50% 0.161500
75% 0.224300
max 0.319900
Name: EstimatedEffectiveYield, dtype: float64
count 84853.000000
mean 0.080306
std 0.046764
min 0.004900
25% 0.042400
50% 0.072400
75% 0.112000
max 0.366000
Name: EstimatedLoss, dtype: float64
count 84853.000000
mean 0.096068
std 0.030403
min -0.182700
25% 0.074080
50% 0.091700
75% 0.116600
max 0.283700
Name: EstimatedReturn, dtype: float64
count 84853.000000
mean 4.072243
std 1.673227
min 1.000000
25% 3.000000
50% 4.000000
75% 5.000000
max 7.000000
Name: ProsperRating (numeric), dtype: float64
count 84853
unique 7
top C
freq 18345
Name: ProsperRating (Alpha), dtype: object
count 84853.000000
mean 5.950067
std 2.376501
min 1.000000
25% 4.000000
50% 6.000000
75% 8.000000
max 11.000000
Name: ProsperScore, dtype: float64
count 113937.000000
mean 2.774209
std 3.996797
min 0.000000
25% 1.000000
50% 1.000000
75% 3.000000
max 20.000000
Name: ListingCategory (numeric), dtype: float64
count 108422
unique 51
top CA
freq 14717
Name: BorrowerState, dtype: object
count 110349
unique 67
top Other
freq 28617
Name: Occupation, dtype: object
count 111682
unique 8
top Employed
freq 67322
Name: EmploymentStatus, dtype: object
count 106312.000000
mean 96.071582
std 94.480605
min 0.000000
25% 26.000000
50% 67.000000
75% 137.000000
max 755.000000
Name: EmploymentStatusDuration, dtype: float64
count 113937
unique 2
top True
freq 57478
Name: IsBorrowerHomeowner, dtype: object
count 113937
unique 2
top False
freq 101218
Name: CurrentlyInGroup, dtype: object
count 13341
unique 706
top 783C3371218786870A73D20
freq 1140
Name: GroupKey, dtype: object
count 113937
unique 112992
top 2013-12-23 09:38:12
freq 6
Name: DateCreditPulled, dtype: object
count 113346.000000
mean 685.567731
std 66.458275
min 0.000000
25% 660.000000
50% 680.000000
75% 720.000000
max 880.000000
Name: CreditScoreRangeLower, dtype: float64
count 113346.000000
mean 704.567731
std 66.458275
min 19.000000
25% 679.000000
50% 699.000000
75% 739.000000
max 899.000000
Name: CreditScoreRangeUpper, dtype: float64
count 113240
unique 11585
top 1993-12-01 00:00:00
freq 185
Name: FirstRecordedCreditLine, dtype: object
count 106333.000000
mean 10.317192
std 5.457866
min 0.000000
25% 7.000000
50% 10.000000
75% 13.000000
max 59.000000
Name: CurrentCreditLines, dtype: float64
count 106333.000000
mean 9.260164
std 5.022644
min 0.000000
25% 6.000000
50% 9.000000
75% 12.000000
max 54.000000
Name: OpenCreditLines, dtype: float64
count 113240.000000
mean 26.754539
std 13.637871
min 2.000000
25% 17.000000
50% 25.000000
75% 35.000000
max 136.000000
Name: TotalCreditLinespast7years, dtype: float64
count 113937.00000
mean 6.96979
std 4.63097
min 0.00000
25% 4.00000
50% 6.00000
75% 9.00000
max 51.00000
Name: OpenRevolvingAccounts, dtype: float64
count 113937.000000
mean 398.292161
std 447.159711
min 0.000000
25% 114.000000
50% 271.000000
75% 525.000000
max 14985.000000
Name: OpenRevolvingMonthlyPayment, dtype: float64
count 113240.000000
mean 1.435085
std 2.437507
min 0.000000
25% 0.000000
50% 1.000000
75% 2.000000
max 105.000000
Name: InquiriesLast6Months, dtype: float64
count 112778.000000
mean 5.584405
std 6.429946
min 0.000000
25% 2.000000
50% 4.000000
75% 7.000000
max 379.000000
Name: TotalInquiries, dtype: float64
count 113240.000000
mean 0.592052
std 1.978707
min 0.000000
25% 0.000000
50% 0.000000
75% 0.000000
max 83.000000
Name: CurrentDelinquencies, dtype: float64
count 106315.000000
mean 984.507059
std 7158.270157
min 0.000000
25% 0.000000
50% 0.000000
75% 0.000000
max 463881.000000
Name: AmountDelinquent, dtype: float64
count 112947.000000
mean 4.154984
std 10.160216
min 0.000000
25% 0.000000
50% 0.000000
75% 3.000000
max 99.000000
Name: DelinquenciesLast7Years, dtype: float64
count 113240.000000
mean 0.312646
std 0.727868
min 0.000000
25% 0.000000
50% 0.000000
75% 0.000000
max 38.000000
Name: PublicRecordsLast10Years, dtype: float64
count 106333.000000
mean 0.015094
std 0.154092
min 0.000000
25% 0.000000
50% 0.000000
75% 0.000000
max 20.000000
Name: PublicRecordsLast12Months, dtype: float64
count 1.063330e+05
mean 1.759871e+04
std 3.293640e+04
min 0.000000e+00
25% 3.121000e+03
50% 8.549000e+03
75% 1.952100e+04
max 1.435667e+06
Name: RevolvingCreditBalance, dtype: float64
count 106333.000000
mean 0.561309
std 0.317918
min 0.000000
25% 0.310000
50% 0.600000
75% 0.840000
max 5.950000
Name: BankcardUtilization, dtype: float64
count 106393.000000
mean 11210.225447
std 19818.361309
min 0.000000
25% 880.000000
50% 4100.000000
75% 13180.000000
max 646285.000000
Name: AvailableBankcardCredit, dtype: float64
count 106393.000000
mean 23.230034
std 11.871311
min 0.000000
25% 15.000000
50% 22.000000
75% 30.000000
max 126.000000
Name: TotalTrades, dtype: float64
count 106393.000000
mean 0.885897
std 0.148179
min 0.000000
25% 0.820000
50% 0.940000
75% 1.000000
max 1.000000
Name: TradesNeverDelinquent (percentage), dtype: float64
count 106393.000000
mean 0.802327
std 1.097637
min 0.000000
25% 0.000000
50% 0.000000
75% 1.000000
max 20.000000
Name: TradesOpenedLast6Months, dtype: float64
count 105383.000000
mean 0.275947
std 0.551759
min 0.000000
25% 0.140000
50% 0.220000
75% 0.320000
max 10.010000
Name: DebtToIncomeRatio, dtype: float64
count 113937
unique 8
top $25,000-49,999
freq 32192
Name: IncomeRange, dtype: object
count 113937
unique 2
top True
freq 105268
Name: IncomeVerifiable, dtype: object
count 1.139370e+05
mean 5.608026e+03
std 7.478497e+03
min 0.000000e+00
25% 3.200333e+03
50% 4.666667e+03
75% 6.825000e+03
max 1.750003e+06
Name: StatedMonthlyIncome, dtype: float64
count 113937
unique 113066
top CB1B37030986463208432A1
freq 6
Name: LoanKey, dtype: object
count 22085.000000
mean 1.421100
std 0.764042
min 0.000000
25% 1.000000
50% 1.000000
75% 2.000000
max 8.000000
Name: TotalProsperLoans, dtype: float64
count 22085.000000
mean 22.934345
std 19.249584
min 0.000000
25% 9.000000
50% 16.000000
75% 33.000000
max 141.000000
Name: TotalProsperPaymentsBilled, dtype: float64
count 22085.000000
mean 22.271949
std 18.830425
min 0.000000
25% 9.000000
50% 15.000000
75% 32.000000
max 141.000000
Name: OnTimeProsperPayments, dtype: float64
count 22085.000000
mean 0.613629
std 2.446827
min 0.000000
25% 0.000000
50% 0.000000
75% 0.000000
max 42.000000
Name: ProsperPaymentsLessThanOneMonthLate, dtype: float64
count 22085.000000
mean 0.048540
std 0.556285
min 0.000000
25% 0.000000
50% 0.000000
75% 0.000000
max 21.000000
Name: ProsperPaymentsOneMonthPlusLate, dtype: float64
count 22085.000000
mean 8472.311961
std 7395.507650
min 0.000000
25% 3500.000000
50% 6000.000000
75% 11000.000000
max 72499.000000
Name: ProsperPrincipalBorrowed, dtype: float64
count 22085.000000
mean 2930.313906
std 3806.635075
min 0.000000
25% 0.000000
50% 1626.550000
75% 4126.720000
max 23450.950000
Name: ProsperPrincipalOutstanding, dtype: float64
count 18928.000000
mean -3.223214
std 50.063567
min -209.000000
25% -35.000000
50% -3.000000
75% 25.000000
max 286.000000
Name: ScorexChangeAtTimeOfListing, dtype: float64
count 113937.000000
mean 152.816539
std 466.320254
min 0.000000
25% 0.000000
50% 0.000000
75% 0.000000
max 2704.000000
Name: LoanCurrentDaysDelinquent, dtype: float64
count 16952.000000
mean 16.268464
std 9.005898
min 0.000000
25% 9.000000
50% 14.000000
75% 22.000000
max 44.000000
Name: LoanFirstDefaultedCycleNumber, dtype: float64
count 113937.000000
mean 31.896882
std 29.974184
min 0.000000
25% 6.000000
50% 21.000000
75% 65.000000
max 100.000000
Name: LoanMonthsSinceOrigination, dtype: float64
count 113937.000000
mean 69444.474271
std 38930.479610
min 1.000000
25% 37332.000000
50% 68599.000000
75% 101901.000000
max 136486.000000
Name: LoanNumber, dtype: float64
count 113937.00000
mean 8337.01385
std 6245.80058
min 1000.00000
25% 4000.00000
50% 6500.00000
75% 12000.00000
max 35000.00000
Name: LoanOriginalAmount, dtype: float64
count 113937
unique 1873
top 2014-01-22 00:00:00
freq 491
Name: LoanOriginationDate, dtype: object
count 113937
unique 33
top Q4 2013
freq 14450
Name: LoanOriginationQuarter, dtype: object
count 113937
unique 90831
top 63CA34120866140639431C9
freq 9
Name: MemberKey, dtype: object
count 113937.000000
mean 272.475783
std 192.697812
min 0.000000
25% 131.620000
50% 217.740000
75% 371.580000
max 2251.510000
Name: MonthlyLoanPayment, dtype: float64
count 113937.000000
mean 4183.079489
std 4790.907234
min -2.349900
25% 1005.760000
50% 2583.830000
75% 5548.400000
max 40702.390000
Name: LP_CustomerPayments, dtype: float64
count 113937.000000
mean 3105.536588
std 4069.527670
min 0.000000
25% 500.890000
50% 1587.500000
75% 4000.000000
max 35000.000000
Name: LP_CustomerPrincipalPayments, dtype: float64
count 113937.000000
mean 1077.542901
std 1183.414168
min -2.349900
25% 274.870000
50% 700.840100
75% 1458.540000
max 15617.030000
Name: LP_InterestandFees, dtype: float64
count 113937.000000
mean -54.725641
std 60.675425
min -664.870000
25% -73.180000
50% -34.440000
75% -13.920000
max 32.060000
Name: LP_ServiceFees, dtype: float64
count 113937.000000
mean -14.242698
std 109.232758
min -9274.750000
25% 0.000000
50% 0.000000
75% 0.000000
max 0.000000
Name: LP_CollectionFees, dtype: float64
count 113937.000000
mean 700.446342
std 2388.513831
min -94.200000
25% 0.000000
50% 0.000000
75% 0.000000
max 25000.000000
Name: LP_GrossPrincipalLoss, dtype: float64
count 113937.000000
mean 681.420499
std 2357.167068
min -954.550000
25% 0.000000
50% 0.000000
75% 0.000000
max 25000.000000
Name: LP_NetPrincipalLoss, dtype: float64
count 113937.000000
mean 25.142686
std 275.657937
min 0.000000
25% 0.000000
50% 0.000000
75% 0.000000
max 21117.900000
Name: LP_NonPrincipalRecoverypayments, dtype: float64
count 113937.000000
mean 0.998584
std 0.017919
min 0.700000
25% 1.000000
50% 1.000000
75% 1.000000
max 1.012500
Name: PercentFunded, dtype: float64
count 113937.000000
mean 0.048027
std 0.332353
min 0.000000
25% 0.000000
50% 0.000000
75% 0.000000
max 39.000000
Name: Recommendations, dtype: float64
count 113937.000000
mean 0.023460
std 0.232412
min 0.000000
25% 0.000000
50% 0.000000
75% 0.000000
max 33.000000
Name: InvestmentFromFriendsCount, dtype: float64
count 113937.000000
mean 16.550751
std 294.545422
min 0.000000
25% 0.000000
50% 0.000000
75% 0.000000
max 25000.000000
Name: InvestmentFromFriendsAmount, dtype: float64
count 113937.000000
mean 80.475228
std 103.239020
min 1.000000
25% 2.000000
50% 44.000000
75% 115.000000
max 1189.000000
Name: Investors, dtype: float64
In [37]:
x = df['BorrowerAPR'].values
y = df['BorrowerRate'].values
plt.scatter(x, y)
plt.show()
In [48]:
col_1 = str()
col_2 = str()
for col, dtype in df.dtypes.iteritems():
if dtype == 'float64':
if col_1 == '':
col_1 = col
continue
else:
col_2 = col
x = df[col_1].values
y = df[col_2].values
print(spearmanr(x,y,nan_policy='omit'))
plt.title(f'Comparing {col_1} and {col_2}')
plt.scatter(x, y)
plt.show()
col_1 = col_2
SpearmanrResult(correlation=0.9894193209482549, pvalue=0.0)
SpearmanrResult(correlation=0.9992520568276085, pvalue=0.0)
SpearmanrResult(correlation=0.9183727240571069, pvalue=0.0)
SpearmanrResult(correlation=0.8845522705361033, pvalue=0.0)
SpearmanrResult(correlation=0.7524821355958692, pvalue=0.0)
SpearmanrResult(correlation=-0.7461856431766154, pvalue=0.0)
SpearmanrResult(correlation=0.7059401334742064, pvalue=0.0)
SpearmanrResult(correlation=-0.005061259646310271, pvalue=0.14044276643702816)
SpearmanrResult(correlation=0.0811902285733982, pvalue=6.343740673150766e-155)
SpearmanrResult(correlation=1.0, pvalue=0.0)
SpearmanrResult(correlation=0.15643758592809445, pvalue=0.0)
SpearmanrResult(correlation=0.95545268346175, pvalue=0.0)
SpearmanrResult(correlation=0.5664641248678572, pvalue=0.0)
SpearmanrResult(correlation=0.42830538889371245, pvalue=0.0)
SpearmanrResult(correlation=-0.10727842206155681, pvalue=5.163799954683272e-287)
SpearmanrResult(correlation=0.6130672412160673, pvalue=0.0)
SpearmanrResult(correlation=0.13407117119356377, pvalue=0.0)
SpearmanrResult(correlation=0.8861378002593798, pvalue=0.0)
SpearmanrResult(correlation=0.375849841851563, pvalue=0.0)
SpearmanrResult(correlation=0.3604269013480139, pvalue=0.0)
SpearmanrResult(correlation=0.2238442855856443, pvalue=0.0)
SpearmanrResult(correlation=-0.08623285802009668, pvalue=1.3031536338917587e-174)
SpearmanrResult(correlation=0.4229848379794714, pvalue=0.0)
SpearmanrResult(correlation=-0.47788890784084465, pvalue=0.0)
SpearmanrResult(correlation=0.28730054216668194, pvalue=0.0)
SpearmanrResult(correlation=0.03706472460775611, pvalue=1.1394590120555092e-33)
SpearmanrResult(correlation=-0.00233145315140626, pvalue=0.44697722928324224)
SpearmanrResult(correlation=0.05729620875286312, pvalue=5.020323961806811e-72)
SpearmanrResult(correlation=-0.26315212541344307, pvalue=0.0)
SpearmanrResult(correlation=0.05328936828894872, pvalue=2.2901533146187827e-15)
SpearmanrResult(correlation=0.6186597716766913, pvalue=0.0)
SpearmanrResult(correlation=0.988382851833924, pvalue=0.0)
SpearmanrResult(correlation=0.15402174093882215, pvalue=2.5937468257258588e-117)
SpearmanrResult(correlation=0.3179686055633133, pvalue=0.0)
SpearmanrResult(correlation=0.01460631971603419, pvalue=0.029958287741594283)
SpearmanrResult(correlation=0.39079069212792067, pvalue=0.0)
SpearmanrResult(correlation=-0.2226582957394165, pvalue=2.7831255559073114e-211)
SpearmanrResult(correlation=0.20388000294480862, pvalue=3.3917757384622225e-24)
SpearmanrResult(correlation=-0.0003698671715093867, pvalue=0.9615942279839989)
SpearmanrResult(correlation=0.28662617121738615, pvalue=0.0)
SpearmanrResult(correlation=0.9768538315817462, pvalue=0.0)
SpearmanrResult(correlation=0.6503519127689852, pvalue=0.0)
SpearmanrResult(correlation=-0.90722088444604, pvalue=0.0)
SpearmanrResult(correlation=0.05966990259765053, pvalue=2.2413509894235067e-90)
SpearmanrResult(correlation=-0.3474817287718461, pvalue=0.0)
SpearmanrResult(correlation=0.9869767909912213, pvalue=0.0)
SpearmanrResult(correlation=0.3597547679820946, pvalue=0.0)
SpearmanrResult(correlation=0.0010356890578058506, pvalue=0.7266475475077019)
SpearmanrResult(correlation=0.002403584069907327, pvalue=0.41718687440520696)
In [19]:
for col, dtype in df.dtypes.iteritems():
print(col, dtype)
ListingKey object
ListingNumber int64
ListingCreationDate object
CreditGrade object
Term int64
LoanStatus object
ClosedDate object
BorrowerAPR float64
BorrowerRate float64
LenderYield float64
EstimatedEffectiveYield float64
EstimatedLoss float64
EstimatedReturn float64
ProsperRating (numeric) float64
ProsperRating (Alpha) object
ProsperScore float64
ListingCategory (numeric) int64
BorrowerState object
Occupation object
EmploymentStatus object
EmploymentStatusDuration float64
IsBorrowerHomeowner bool
CurrentlyInGroup bool
GroupKey object
DateCreditPulled object
CreditScoreRangeLower float64
CreditScoreRangeUpper float64
FirstRecordedCreditLine object
CurrentCreditLines float64
OpenCreditLines float64
TotalCreditLinespast7years float64
OpenRevolvingAccounts int64
OpenRevolvingMonthlyPayment float64
InquiriesLast6Months float64
TotalInquiries float64
CurrentDelinquencies float64
AmountDelinquent float64
DelinquenciesLast7Years float64
PublicRecordsLast10Years float64
PublicRecordsLast12Months float64
RevolvingCreditBalance float64
BankcardUtilization float64
AvailableBankcardCredit float64
TotalTrades float64
TradesNeverDelinquent (percentage) float64
TradesOpenedLast6Months float64
DebtToIncomeRatio float64
IncomeRange object
IncomeVerifiable bool
StatedMonthlyIncome float64
LoanKey object
TotalProsperLoans float64
TotalProsperPaymentsBilled float64
OnTimeProsperPayments float64
ProsperPaymentsLessThanOneMonthLate float64
ProsperPaymentsOneMonthPlusLate float64
ProsperPrincipalBorrowed float64
ProsperPrincipalOutstanding float64
ScorexChangeAtTimeOfListing float64
LoanCurrentDaysDelinquent int64
LoanFirstDefaultedCycleNumber float64
LoanMonthsSinceOrigination int64
LoanNumber int64
LoanOriginalAmount int64
LoanOriginationDate object
LoanOriginationQuarter object
MemberKey object
MonthlyLoanPayment float64
LP_CustomerPayments float64
LP_CustomerPrincipalPayments float64
LP_InterestandFees float64
LP_ServiceFees float64
LP_CollectionFees float64
LP_GrossPrincipalLoss float64
LP_NetPrincipalLoss float64
LP_NonPrincipalRecoverypayments float64
PercentFunded float64
Recommendations int64
InvestmentFromFriendsCount int64
InvestmentFromFriendsAmount float64
Investors int64
Content source: austinjalexander/sandbox
Similar notebooks: