Initialization.

In this notebook, we will need pandas to read/process data and matplotlib to plot data.



In [1]:

    
%matplotlib inline
import pandas as pd
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import seaborn
import re
import datetime
pd.options.display.max_columns=200
matplotlib.rcParams['figure.figsize'] = (15.0, 12.0)

Column names of data file:



In [2]:

    
summaryfile_type = {'Unnamed':int,
                    'Unnamed2':int,
                    'LOAN_ID':object,
                    'ORIG_CHN':object,
                    'Seller.Name':object,
                    'ORIG_RT':float,
                    'ORIG_AMT':float,
                    'ORIG_TRM':float,
                    'ORIG_DTE':object,
                    'FRST_DTE':object,
                    'OLTV':float,
                    'OCLTV':float,
                    'NUM_BO':float,
                    'DTI':float,
                    'CSCORE_B':float,
                    'FTHB_FLG':object,
                    'PURPOSE':object,
                    'PROP_TYP':object,
                    'NUM_UNIT':int,
                    'OCC_STAT':object,
                    'STATE':object,
                    'ZIP_3':object,
                    'MI_PCT':float,
                    'Product.Type':object,
                    'CSCORE_C':float,
                    'Monthly.Rpt.Prd':object,
                    'Servicer.Name':object,
                    'LAST_RT':float,
                    'LAST_UPB':float,
                    'Loan.Age':float,
                    'Months.To.Legal.Mat':float,
                    'Adj.Month.To.Mat':float,
                    'Maturity.Date':object,
                    'MSA':int,
                    'Delq.Status':object,
                    'MOD_FLAG':object,
                    'Zero.Bal.Code':float,
                    'ZB_DTE':object,
                    'LPI_DTE':object,
                    'FCC_DTE':object,
                    'DISP_DT':object
}

Read data into memory and process the data



In [3]:

    
fannie1 = pd.read_csv('./processed/total/total_2012.csv', dtype=summaryfile_type, nrows=10000000)
fannie1.drop(fannie1.columns[:1], axis=1, inplace=True)
fannie1.rename(columns=lambda x: re.sub('[.]','_',x), inplace=True)
fannie1.head()









    Out[3]:






  
    
      
      LOAN_ID
      ORIG_CHN
      Seller_Name
      ORIG_RT
      ORIG_AMT
      ORIG_TRM
      ORIG_DTE
      FRST_DTE
      OLTV
      OCLTV
      NUM_BO
      DTI
      CSCORE_B
      FTHB_FLG
      PURPOSE
      PROP_TYP
      NUM_UNIT
      OCC_STAT
      STATE
      ZIP_3
      MI_PCT
      Product_Type
      CSCORE_C
      Monthly_Rpt_Prd
      Servicer_Name
      LAST_RT
      LAST_UPB
      Loan_Age
      Months_To_Legal_Mat
      Adj_Month_To_Mat
      Maturity_Date
      MSA
      Delq_Status
      MOD_FLAG
      Zero_Bal_Code
      ZB_DTE
      LPI_DTE
      FCC_DTE
      DISP_DT
    
  
  
    
      0
      100001726155
      B
      STEARNS LENDING, LLC
      3.500
      278000.0
      360.0
      12/2012
      02/2013
      77.0
      77.0
      1.0
      36.0
      802.0
      N
      R
      SF
      1
      P
      CA
      949
      NaN
      FRM
      NaN
      09/01/2015
      OTHER
      3.500
      258940.15
      32.0
      328.0
      320.0
      01/2043
      42220
      0
      N
      0.0
      NaN
      NaN
      NaN
      NaN
    
    
      1
      100003164216
      R
      OTHER
      3.625
      478000.0
      360.0
      11/2012
      01/2013
      68.0
      68.0
      2.0
      25.0
      763.0
      N
      R
      PU
      1
      P
      CA
      913
      NaN
      FRM
      742.0
      09/01/2015
      OTHER
      3.625
      452501.57
      33.0
      327.0
      327.0
      12/2042
      31080
      0
      N
      0.0
      NaN
      NaN
      NaN
      NaN
    
    
      2
      100008218145
      C
      FLAGSTAR CAPITAL MARKETS CORPORATION
      3.625
      403000.0
      360.0
      09/2012
      11/2012
      54.0
      61.0
      2.0
      33.0
      749.0
      N
      R
      SF
      1
      P
      WA
      981
      NaN
      FRM
      747.0
      08/01/2014
      MATRIX FINANCIAL SERVICES CORPORATION
      3.625
      389568.37
      22.0
      338.0
      0.0
      10/2042
      42660
      X
      N
      1.0
      08/2014
      NaN
      NaN
      NaN
    
    
      3
      100009252313
      C
      WELLS FARGO BANK, N.A.
      3.625
      626000.0
      360.0
      09/2012
      11/2012
      78.0
      78.0
      1.0
      24.0
      800.0
      N
      R
      SF
      1
      P
      VA
      220
      NaN
      FRM
      NaN
      09/01/2015
      WELLS FARGO BANK, N.A.
      3.625
      590002.26
      35.0
      325.0
      325.0
      10/2042
      47900
      0
      N
      0.0
      NaN
      NaN
      NaN
      NaN
    
    
      4
      100009333533
      C
      JPMORGAN CHASE BANK, NATIONAL ASSOCIATION
      3.625
      130000.0
      360.0
      10/2012
      12/2012
      80.0
      80.0
      2.0
      41.0
      758.0
      Y
      P
      SF
      1
      P
      IL
      601
      NaN
      FRM
      749.0
      09/01/2015
      JP MORGAN CHASE BANK, NA
      3.625
      122262.89
      34.0
      326.0
      322.0
      11/2042
      16980
      0
      N
      0.0
      NaN
      NaN
      NaN
      NaN



In [6]:

    
# All helper functions!
def extract_status_year(df, col, x):
    '''
    This function is to extract the year of last report time. For groupby purpose
    Instead of using the datetime object, let's use the string function to extract year
    '''
    return df.ix[x,col].split('/')[-1]

def extract_credit_score(x,y):
    temp = x.copy()
    temp[x.isnull()] = y.loc[x.isnull()]
    temp[x>y] = y[x>y]
    #x[x>y] = y[x>y]
    
    return temp
def extract_status_default(df, x):
    '''
    This function is to extract the year of last report time. For groupby purpose
    Instead of using the datetime object, let's use the string function to extract year
    '''
    if (df.ix[x,'Zero_Bal_Code'] == 0) or (df.ix[x,'Zero_Bal_Code'] == 1):
        return 'Healthy'
    elif (df.ix[x,'Zero_Bal_Code'] == 3) or (df.ix[x,'Zero_Bal_Code'] == 6) or (df.ix[x,'Zero_Bal_Code'] == 9):
        return 'Default'
    else:
        return 'NA'



In [7]:

    
# Processing the credit score: Take the lower credit score from two scores if both are available
credit = extract_credit_score(fannie1['CSCORE_B'],fannie1['CSCORE_C'])
fannie1.drop(['CSCORE_B','CSCORE_C'],axis = 1, inplace=True)
fannie1['CSCORE'] = credit

Analysis of data from 2000 to 2012

Evolution of default rate



In [8]:

    
# Total number of record
total_num = len(fannie1)
print("Total number of record is: {}".format(total_num))









    



Total number of record is: 10000000



In [9]:

    
# Number of prepaid
total_prepaid = len(fannie1.ix[fannie1['Zero_Bal_Code'] == 1])
print("Total number of prepaid is {}".format(total_prepaid))
print("Total prepaid ratio is {}".format(total_prepaid/total_num))









    



Total number of prepaid is 6282546
Total prepaid ratio is 0.6282546



In [10]:

    
fannie1_default = fannie1.ix[(fannie1['Zero_Bal_Code'] == 9) | (fannie1['Zero_Bal_Code'] == 6) |
                              (fannie1['Zero_Bal_Code'] == 9)]
# Number of default
total_default = len(fannie1_default)
print("Total number of default is: {}".format(total_default))
print("Total default ratio is {}".format(total_default/total_num))









    



Total number of default is: 247709
Total default ratio is 0.0247709



In [11]:

    
total_default_by_year = fannie1_default.groupby(lambda x:extract_status_year(fannie1,'Monthly_Rpt_Prd',x))['LOAN_ID'].count()
total_by_year = fannie1.groupby(lambda x:extract_status_year(fannie1,'ORIG_DTE',x))['LOAN_ID'].count()



In [12]:

    
total_default_by_year.name='Default number'
total_by_year.name='Total number'
default_rate_by_year = pd.concat([total_default_by_year,total_by_year],axis = 1, join='outer')
default_rate_by_year['Default_Rate'] = default_rate_by_year['Default number']/default_rate_by_year['Total number']
default_rate_by_year.plot(y='Default_Rate')









    Out[12]:





<matplotlib.axes._subplots.AxesSubplot at 0x7fe20f1b71d0>

Some comments about the default rate evolution

From the plot above, there is a tendency to say the default rate peaks at 2011. However, this dataset is just a subset of all Fannie's single family data. Also, many loan won't default in a short period of time. These factors all contribute to the increase of default rate at 2011. The dataset also cuts off in 2012, which probably explains why we see a decrease after 2011.

Number of applicants per year



In [13]:

    
total_applications_by_year = fannie1.groupby(lambda x:extract_status_year(fannie1,'ORIG_DTE',x))['LOAN_ID'].count()
total_applications_by_year.plot(kind='bar')









    Out[13]:





<matplotlib.axes._subplots.AxesSubplot at 0x7fe20f1216a0>

The evolution of interest rate by year



In [14]:

    
rate_by_year = fannie1.groupby(lambda x:extract_status_year(fannie1,'ORIG_DTE',x))['ORIG_RT'].mean()



In [15]:

    
rate_by_year.plot()









    Out[15]:





<matplotlib.axes._subplots.AxesSubplot at 0x7fe20f0c8b38>



In [16]:

    
# Any different in each state?
rate_by_year_state = fannie1.groupby([lambda x:extract_status_year(fannie1,'ORIG_DTE',x),'STATE'])['ORIG_RT'].mean()



In [17]:

    
rate_by_year_state = rate_by_year_state.unstack(level=0)

i = int(np.ceil(fannie1['STATE'].nunique()/4))
j = 4
fig, axes = plt.subplots(i,j, figsize=(10, 8))
plt.subplots_adjust(wspace=0.5, hspace=0.5)
i2 = 0
j2=0
for state in rate_by_year_state.index:
    axes[i2,j2].plot(rate_by_year_state.ix[state,])
    axes[i2,j2].set_title(state)
    axes[i2,j2].get_xaxis().set_visible(False)
    j2 += 1
    if j2 > 3:
        i2 += 1
        i2 = i2 % i
    j2 = j2 % j

Comments on rates by states plot

Some states, like North Dakota (ND) show an upward trend in early 2000s even the general trend of interest rate is going down. This is very interesting. I think this may related to its specific booming economy in that state, like oil (drilling in ND).

Default cases in each year

Still need to think about how to count the statistics of new application each year.



In [18]:

    
default_by_year = pd.concat([total_by_year,total_default_by_year], join='outer', axis=1)#.reset_index().plot()
default_by_year['Rate'] = default_by_year['Default number']/default_by_year['Total number']
default_by_year['Rate'].plot(kind='bar')









    Out[18]:





<matplotlib.axes._subplots.AxesSubplot at 0x7fe20f14e7f0>

Default cases in each state each year



In [19]:

    
default_by_year_state = fannie1_default.groupby([lambda x:extract_status_year(fannie1,'Monthly_Rpt_Prd',x), 
                                                 'STATE'])['LOAN_ID'].count()



In [20]:

    
total_by_year_state = fannie1.groupby([lambda x:extract_status_year(fannie1,'Monthly_Rpt_Prd',x), 
                                                 'STATE'])['LOAN_ID'].count()



In [21]:

    
default_by_year_state.name='default_cases_by_state'
total_by_year_state.name='total_cases_by_state'



In [22]:

    
default_stat_by_year_state = pd.concat([default_by_year_state, 
                                      total_by_year_state], join='outer', axis=1)

default_stat_by_year_state['Rate'] = default_stat_by_year_state['default_cases_by_state']/default_stat_by_year_state['total_cases_by_state']
default_stat_by_year_state.unstack(level=0).plot(y='Rate',kind='bar',subplots=True)









    Out[22]:





array([<matplotlib.axes._subplots.AxesSubplot object at 0x7fe21a3ebc88>,
       <matplotlib.axes._subplots.AxesSubplot object at 0x7fe1fe21afd0>,
       <matplotlib.axes._subplots.AxesSubplot object at 0x7fe2b3bb8630>,
       <matplotlib.axes._subplots.AxesSubplot object at 0x7fe1fe232f98>,
       <matplotlib.axes._subplots.AxesSubplot object at 0x7fe20542a940>,
       <matplotlib.axes._subplots.AxesSubplot object at 0x7fe20cda6198>,
       <matplotlib.axes._subplots.AxesSubplot object at 0x7fe211116278>,
       <matplotlib.axes._subplots.AxesSubplot object at 0x7fe21729a3c8>,
       <matplotlib.axes._subplots.AxesSubplot object at 0x7fe21b38dd30>,
       <matplotlib.axes._subplots.AxesSubplot object at 0x7fe21ebccb38>,
       <matplotlib.axes._subplots.AxesSubplot object at 0x7fe2b3b65400>,
       <matplotlib.axes._subplots.AxesSubplot object at 0x7fe2b3a77e80>], dtype=object)



In [23]:

    
default_stat_by_year_state.sort_values(by='Rate',ascending=False)['Rate']#.unstack(level=0).plot(y='Rate',kind='bar',subplots=True)









    Out[23]:





      STATE
2011  NV       0.278939
2010  NV       0.275126
      AZ       0.229078
2011  AZ       0.208608
      PR       0.191977
2014  FL       0.179002
2009  NV       0.175793
2010  FL       0.172649
      GA       0.142972
2004  AL       0.142857
2010  MI       0.142426
2013  VI       0.138462
2011  MI       0.135645
2009  AZ       0.129435
2011  ID       0.125779
2008  MI       0.125082
      NV       0.119600
2011  GA       0.117003
      FL       0.115594
2013  FL       0.114975
2014  MD       0.104205
2010  ID       0.100582
2009  MI       0.098306
2014  ME       0.095652
      IL       0.094153
      CT       0.092987
2009  FL       0.091376
2013  PR       0.088785
2011  MS       0.082735
2012  FL       0.081037
                 ...   
2004  WI            NaN
      WV            NaN
      WY            NaN
2005  DC            NaN
      ID            NaN
      KS            NaN
      ME            NaN
      MT            NaN
      ND            NaN
      NE            NaN
      RI            NaN
      UT            NaN
      VI            NaN
      VT            NaN
      WV            NaN
      WY            NaN
2006  AK            NaN
      GU            NaN
      ND            NaN
      PR            NaN
      VI            NaN
2007  GU            NaN
      VI            NaN
2008  GU            NaN
      VI            NaN
2009  GU            NaN
2011  GU            NaN
      VI            NaN
2012  GU            NaN
2014  GU            NaN
Name: Rate, dtype: float64

Default case by their credit score

Distribution of credit score



In [24]:

    
fannie1['CSCORE'].describe()









    Out[24]:





count    9.988642e+06
mean     7.434335e+02
std      5.308873e+01
min      3.200000e+02
25%      7.090000e+02
50%      7.570000e+02
75%      7.860000e+02
max      8.500000e+02
Name: CSCORE, dtype: float64

Credit score statistics

The average credit score of all applications is 743. This is a little bit surprising. I don't know what credit agency do these score come from. But if it is FICO, then average potential home owner has a excellent credit score based on the article here. If you have credit score less than 700 then you are already at only 25% percentile!



In [25]:

    
fannie1['CSCORE'].hist()









    Out[25]:





<matplotlib.axes._subplots.AxesSubplot at 0x7fe223a1aac8>



In [26]:

    
total_by_loan_status = fannie1.groupby([lambda x: extract_status_default(fannie1, x),
                                        lambda x:extract_status_year(fannie1,'ORIG_DTE',x)])['CSCORE'].mean()



In [27]:

    
total_by_loan_status.unstack(level=0).plot(kind='bar')









    Out[27]:





<matplotlib.axes._subplots.AxesSubplot at 0x7fe2072b3ac8>

Loan Status and Its credit score

This graph shows some interesting findings:

The overall trend of applicant's credit score is increasing by year.
Default cases does have lower average credit score compared to non-default cases. Therefore, credit score should be considered as one feature in the following machine learning model.
In the early days, you can get a loan with a low score less than 700 in 1999 and 2000. But this is not the case anymore. Tighter regulations make the rule of housing loan more and more strict, which is definitely a good thing.

Initial Loan-to-Value ratio by status



In [30]:

    
LTV_by_loan_status = fannie1.groupby([lambda x: extract_status_default(fannie1, x),
                                        lambda x:extract_status_year(fannie1,'ORIG_DTE',x)])['OLTV'].mean()



In [31]:

    
LTV_by_loan_status.unstack(level=0).plot(kind='bar')









    Out[31]:





<matplotlib.axes._subplots.AxesSubplot at 0x7fe207217ac8>

Loan to value ratio (LTV)

Another interesting plot. Clearly, healthy loan has LTVs around 70%, while default cases are at 80%. This makes sense too. As higher LTV means you borrow more money to buy the house, which makes the mortgage riskier.

Debt-to-Income ratio



In [34]:

    
DTI_by_loan_status = fannie1.groupby([lambda x: extract_status_default(fannie1, x),
                                        lambda x:extract_status_year(fannie1,'ORIG_DTE',x)])['DTI'].mean()



In [35]:

    
DTI_by_loan_status.unstack(level=0).plot(kind='bar')









    Out[35]:





<matplotlib.axes._subplots.AxesSubplot at 0x7fe20719ca90>

Debt-to-Income ratio (DTI)

There are a lot of websites out there to tell you how much you can afford to buy a house. Like this one. The conservative approach uses ~37% as DTI and the aggressive uses 41%. Analysis of Fannie's data kind of supports this theory. Default cases in general has higher DTI compared to those non-default cases. Also, before the sub-prime crisis, you can get a loan with DTI higher than 40%. Now, the general trend of DTI becomes more and more conservative. DTI rates are now between 30-35%. So next time when you use the online house affordability calculator. Keep this in mind that the conservative approach is really the approach you should take. Don't bother with the "aggressive" approach...

Loan purpose analysis



In [38]:

    
purpose_status = fannie1.groupby([lambda x: extract_status_default(fannie1, x), 'PURPOSE'])['LOAN_ID'].count()



In [39]:

    
purpose_status = purpose_status.unstack(level=0)



In [40]:

    
purpose_status['Default_Ratio'] = purpose_status['Default']/purpose_status['Healthy']
purpose_status









    Out[40]:






  
    
      
      Default
      Healthy
      Default_Ratio
    
    
      PURPOSE
      
      
      
    
  
  
    
      C
      144821
      2822240
      0.051314
    
    
      P
      118948
      3889445
      0.030582
    
    
      R
      74859
      2949208
      0.025383
    
    
      U
      30
      449
      0.066815

Loan purpose comments

Code	Purpose
C	Cash-out refinance
P	Purchase
R	Refinance
U	Unspecified refinance

Keep in mind that overall default rate is 0.0247709. Cash out refinance has relatively higher default rate compared to other types. If you think about why, click the link and read what "cash-out refinance" means and you would know why.

Occupancy status analysis



In [41]:

    
occupancy_status = fannie1.groupby([lambda x: extract_status_default(fannie1, x), 'OCC_STAT'])['LOAN_ID'].count()

occupancy_status = occupancy_status.unstack(level=0)



In [42]:

    
occupancy_status['Default_Ratio'] = occupancy_status['Default']/occupancy_status['Healthy']
occupancy_status









    Out[42]:






  
    
      
      Default
      Healthy
      Default_Ratio
    
    
      OCC_STAT
      
      
      
    
  
  
    
      I
      39882
      743727
      0.053625
    
    
      P
      283148
      8421442
      0.033622
    
    
      S
      15628
      496173
      0.031497

Occupancy status comments

Code	Status
I	Investment
P	Primary
S	Secondary

Very simple conclusion: Investment type occupancy status has a relatively higher default rate.

Geo-Location analysis



In [43]:

    
location_default = fannie1.groupby([lambda x: extract_status_default(fannie1, x), 'STATE'])['LOAN_ID'].count()
location_total = fannie1.groupby(['STATE'])['LOAN_ID'].count()
location_default_stat = pd.concat([location_default.unstack(level=0),location_total], axis = 1, join='inner')

location_default_stat.columns = ['Default', 'Healthy', 'Total']

location_default_stat['Default_Ratio'] = location_default_stat['Default']/location_default_stat['Total']



In [44]:

    
location_default_mean = location_default_stat['Default_Ratio'].mean()
location_default_std = location_default_stat['Default_Ratio'].std()

States have higher than mean + 1.5 sigma default rate?



In [45]:

    
location_default_stat[location_default_stat['Default_Ratio'] > location_default_mean + 1.5*location_default_std]









    Out[45]:






  
    
      
      Default
      Healthy
      Total
      Default_Ratio
    
    
      STATE
      
      
      
      
    
  
  
    
      AZ
      26272
      253459
      279731
      0.093919
    
    
      FL
      53461
      470288
      523749
      0.102074
    
    
      MI
      17551
      230956
      248507
      0.070626
    
    
      NV
      10551
      77884
      88435
      0.119308

Geo based default statistics

I am very surprised about this analysis. Those four states have higher default rate (more than 1.5$\sigma$ higher than the mean). I thought they would be CA, TX etc. But none of them are at the top of the list. In fact, Nevada is the state with highest default rate, followed by Florida.

	LOAN_ID	ORIG_CHN	Seller_Name	ORIG_RT	ORIG_AMT	ORIG_TRM	ORIG_DTE	FRST_DTE	OLTV	OCLTV	NUM_BO	DTI	CSCORE_B	FTHB_FLG	PURPOSE	PROP_TYP	NUM_UNIT	OCC_STAT	STATE	ZIP_3	MI_PCT	Product_Type	CSCORE_C	Monthly_Rpt_Prd	Servicer_Name	LAST_RT	LAST_UPB	Loan_Age	Months_To_Legal_Mat	Adj_Month_To_Mat	Maturity_Date	MSA	Delq_Status	MOD_FLAG	Zero_Bal_Code	ZB_DTE	LPI_DTE	FCC_DTE	DISP_DT
0	100001726155	B	STEARNS LENDING, LLC	3.500	278000.0	360.0	12/2012	02/2013	77.0	77.0	1.0	36.0	802.0	N	R	SF	1	P	CA	949	NaN	FRM	NaN	09/01/2015	OTHER	3.500	258940.15	32.0	328.0	320.0	01/2043	42220	0	N	0.0	NaN	NaN	NaN	NaN
1	100003164216	R	OTHER	3.625	478000.0	360.0	11/2012	01/2013	68.0	68.0	2.0	25.0	763.0	N	R	PU	1	P	CA	913	NaN	FRM	742.0	09/01/2015	OTHER	3.625	452501.57	33.0	327.0	327.0	12/2042	31080	0	N	0.0	NaN	NaN	NaN	NaN
2	100008218145	C	FLAGSTAR CAPITAL MARKETS CORPORATION	3.625	403000.0	360.0	09/2012	11/2012	54.0	61.0	2.0	33.0	749.0	N	R	SF	1	P	WA	981	NaN	FRM	747.0	08/01/2014	MATRIX FINANCIAL SERVICES CORPORATION	3.625	389568.37	22.0	338.0	0.0	10/2042	42660	X	N	1.0	08/2014	NaN	NaN	NaN
3	100009252313	C	WELLS FARGO BANK, N.A.	3.625	626000.0	360.0	09/2012	11/2012	78.0	78.0	1.0	24.0	800.0	N	R	SF	1	P	VA	220	NaN	FRM	NaN	09/01/2015	WELLS FARGO BANK, N.A.	3.625	590002.26	35.0	325.0	325.0	10/2042	47900	0	N	0.0	NaN	NaN	NaN	NaN
4	100009333533	C	JPMORGAN CHASE BANK, NATIONAL ASSOCIATION	3.625	130000.0	360.0	10/2012	12/2012	80.0	80.0	2.0	41.0	758.0	Y	P	SF	1	P	IL	601	NaN	FRM	749.0	09/01/2015	JP MORGAN CHASE BANK, NA	3.625	122262.89	34.0	326.0	322.0	11/2042	16980	0	N	0.0	NaN	NaN	NaN	NaN

	Default	Healthy	Default_Ratio
PURPOSE
C	144821	2822240	0.051314
P	118948	3889445	0.030582
R	74859	2949208	0.025383
U	30	449	0.066815

	Default	Healthy	Default_Ratio
OCC_STAT
I	39882	743727	0.053625
P	283148	8421442	0.033622
S	15628	496173	0.031497

	Default	Healthy	Total	Default_Ratio
STATE
AZ	26272	253459	279731	0.093919
FL	53461	470288	523749	0.102074
MI	17551	230956	248507	0.070626
NV	10551	77884	88435	0.119308