DBSCAN (Density-Based Spatial Clustering of Applications with Noise)

Find core samples of high density and expand clusters from them.

The minimum number of samples in a neighborhood for a point to be considered as a core point was set at 10 and the maximum distance between two samples within the same neighborhood was set at 0.1.


In [55]:
import pandas as pd


df1=pd.read_csv('team_out_1.csv')
df2=pd.read_csv('team_out_a2.csv')

df=df1.append(df2)
df.dropna(inplace=True)
df.reset_index(inplace=True,drop=True)
df=df[df.Total_Expenses>0]
df=df[df.Program_Exp<=1]
df


Out[55]:
Filename EIN Program_Exp Liabilities_To_Asset Working_Capital Surplus_Margin Total_Expenses
0 201523229349300327.xml 510311790.0 0.989619 0.091802 1.574677 -0.078663 4.643180e+05
1 201543089349301829.xml 261460932.0 0.965378 0.000000 3.910347 -0.042674 2.743900e+04
2 201533179349306298.xml 270609504.0 0.942276 0.049206 0.655152 0.088597 3.848280e+05
3 201533209349304768.xml 521548962.0 0.384526 0.000814 30.392756 0.633126 5.091200e+04
4 201533179349307343.xml 731653383.0 0.620560 0.029487 1.083739 0.105901 2.812300e+04
5 201533189349300608.xml 237324566.0 0.758902 0.736982 -0.176215 -0.003914 6.081260e+05
6 201523069349301367.xml 43259150.0 0.736312 0.181998 0.378592 0.057534 2.129680e+05
7 201533069349300963.xml 621273871.0 0.644822 0.234247 14.413163 0.496122 1.426413e+06
8 201523099349300542.xml 541897455.0 0.845659 0.138531 0.414986 -0.016769 4.615120e+05
9 201533099349301033.xml 251869168.0 0.762357 0.000000 8.673984 0.122244 1.020440e+05
10 201523169349304367.xml 376046335.0 0.745386 0.000000 16.463976 0.252579 3.717000e+04
11 201533099349301803.xml 50454409.0 0.915261 0.000000 1.158768 0.062649 5.573470e+05
12 201523069349300142.xml 272364809.0 0.920307 0.003606 0.240597 -0.029663 3.020610e+05
13 201543039349301204.xml 463812139.0 0.664088 0.648413 0.045823 0.088812 1.399740e+05
14 201523089349301462.xml 561797737.0 0.704623 0.432671 0.000000 -0.050384 2.147730e+05
15 201533069349300788.xml 590638867.0 0.000000 0.000000 0.360157 -0.055883 7.625220e+05
16 201533079349300238.xml 273314429.0 1.000000 0.000000 0.000000 0.107584 2.617100e+04
17 201523209349314257.xml 710801566.0 1.000000 0.000000 0.000000 -8.212337 1.145333e+06
18 201523209349311332.xml 752204594.0 0.896895 0.069715 0.535678 -0.064100 3.889691e+06
19 201533179349302173.xml 251374594.0 0.000000 0.313630 0.337537 0.014809 4.235120e+05
20 201533179349307048.xml 450448773.0 0.991205 0.051348 0.414930 0.161552 1.858940e+05
21 201533209349302633.xml 363789851.0 0.000000 0.232011 0.521133 -0.128997 7.329143e+06
22 201533099349301113.xml 411712804.0 0.477697 0.009887 0.751388 -0.014507 8.147200e+05
23 201523039349300127.xml 381557861.0 0.832159 0.063834 2.988120 0.170911 1.369609e+06
24 201523079349301652.xml 273741627.0 0.945120 0.000000 0.863663 0.094985 1.234798e+06
25 201533039349300813.xml 411495321.0 0.668161 0.040796 144.508488 -0.139777 1.108143e+06
26 201533139349300208.xml 60666277.0 0.000000 0.644904 0.654848 -0.296143 1.964943e+07
27 201533069349301413.xml 42616064.0 0.951218 0.338683 0.178517 0.041123 3.325420e+07
28 201533079349300003.xml 581651220.0 0.910347 0.026832 0.945357 -0.020293 3.247970e+05
29 201523069349300957.xml 341496171.0 0.850617 0.068838 1.125712 0.212899 6.727750e+05
... ... ... ... ... ... ... ...
38884 201533099349301698.xml 912130056.0 0.760828 0.610684 0.104169 -2.163589 7.340370e+05
38885 201543119349300204.xml 237198698.0 0.819953 0.549109 0.248479 0.060347 3.518240e+05
38886 201543139349302814.xml 462250934.0 0.131636 0.960863 0.001425 0.005347 2.279017e+06
38887 201533219349301213.xml 231416559.0 0.598760 0.802416 0.248929 -0.207427 8.819110e+05
38888 201543179349306629.xml 223128187.0 0.621104 0.040543 1.574808 -0.133304 6.866690e+05
38889 201533209349316263.xml 731585237.0 0.849584 0.133037 0.508528 -0.054146 2.441360e+05
38890 201543079349301044.xml 581736427.0 0.887224 1.390273 -1.014415 0.052991 1.413780e+05
38891 201533139349300123.xml 470841633.0 0.730878 0.237917 6.796036 0.615454 8.076930e+05
38892 201543179349305429.xml 943345498.0 0.570344 0.031022 3.196737 0.146015 5.820010e+05
38893 201543179349308719.xml 264795329.0 0.984882 1.067790 -0.013730 -0.013601 6.089029e+06
38894 201533179349302373.xml 262709818.0 1.000000 1.000000 0.000000 0.000000 3.369300e+04
38895 201620149349300127.xml 431129770.0 0.868905 0.203344 0.818979 0.041341 1.113451e+07
38896 201630129349300803.xml 592240502.0 0.858238 0.090567 1.552281 0.165716 6.398019e+06
38897 201620119349300422.xml 61462359.0 0.843057 0.087344 0.250055 -0.086286 6.898120e+05
38898 201630129349300723.xml 630652760.0 0.992892 0.301988 0.133494 0.000101 3.599537e+06
38899 201610139349300811.xml 341472960.0 1.000000 1.000000 0.000000 0.000000 2.762930e+05
38900 201600199349300500.xml 800182020.0 0.594838 0.141928 0.037224 -0.104980 1.481881e+06
38901 201600399349300955.xml 462754433.0 0.000000 0.000000 0.235223 0.095712 4.060960e+05
38902 201600369349301750.xml 742744885.0 0.975219 0.000000 0.452366 -0.045373 2.315450e+05
38903 201610399349300226.xml 471127421.0 0.138038 0.000000 2.097551 2.163548 1.495530e+05
38904 201620359349300612.xml 742244155.0 0.901125 0.559978 0.068062 0.008963 2.399374e+07
38905 201610429349302361.xml 940764760.0 0.992275 0.788509 0.034823 0.014275 5.107870e+05
38906 201610369349301226.xml 840588263.0 0.988925 0.080860 0.313222 0.166371 2.257280e+05
38907 201640359349300129.xml 20650275.0 0.681210 0.498341 0.213852 0.038597 3.040090e+05
38908 201600429349301735.xml 942418861.0 0.987789 0.015595 3.429593 0.526883 2.165501e+06
38909 201600359349302005.xml 223201871.0 0.907986 0.000000 3.052342 0.281392 2.319760e+05
38910 201600419349301130.xml 650599763.0 0.955581 4.173882 -0.414036 0.058141 1.489220e+05
38911 201640359349301614.xml 470461460.0 0.465235 0.014051 1.666884 0.182197 2.236610e+05
38912 201600429349302135.xml 990208381.0 0.852344 0.047928 0.815871 0.229352 1.879170e+05
38913 201610229349300741.xml 510216586.0 0.521938 0.746625 0.529489 0.026507 4.834544e+09

38657 rows × 7 columns

small_df=df[df.Total_Expenses<1000000]

med_df=df[df.Total_Expenses>1000000] med_df=med_df[df.Total_Expenses<10000000]

large_df=df[df.Total_Expenses<50000000] large_df=large_df[df.Total_Expenses>10000000]

national_df=df[df.Total_Expenses>50000000]


In [56]:
#Getting a list of positive businesses

temp=df[df.Program_Exp>.9] 
temp=temp[temp.Liabilities_To_Asset<.2]
lst_temp=list(temp['EIN'])

In [58]:
df.reset_index(drop=True,inplace=True)
norm_df=df.copy()

norm_df=norm_df[['Program_Exp','Liabilities_To_Asset','Working_Capital','Surplus_Margin','Total_Expenses']]

from sklearn import preprocessing

x = norm_df.values #returns a numpy array
min_max_scaler = preprocessing.MinMaxScaler()
x_scaled = min_max_scaler.fit_transform(x)
norm_df = pd.DataFrame(x_scaled)
norm_df["Filename"]=df['Filename']

norm_df["EIN"]=df['EIN']
norm_df.columns=['Program_Exp','Liabilities_To_Asset','Working_Capital','Surplus_Margin','Total_Expenses','Filename','EIN']
norm_df.set_index('EIN',inplace=True)
norm_df


Out[58]:
Program_Exp Liabilities_To_Asset Working_Capital Surplus_Margin Total_Expenses Filename
EIN
510311790.0 0.989619 0.013484 0.463650 0.957231 0.000068 201523229349300327.xml
261460932.0 0.965378 0.013479 0.463657 0.957231 0.000004 201543089349301829.xml
270609504.0 0.942276 0.013482 0.463647 0.957232 0.000056 201533179349306298.xml
521548962.0 0.384526 0.013479 0.463740 0.957234 0.000007 201533209349304768.xml
731653383.0 0.620560 0.013480 0.463649 0.957232 0.000004 201533179349307343.xml
237324566.0 0.758902 0.013522 0.463645 0.957231 0.000089 201533189349300608.xml
43259150.0 0.736312 0.013489 0.463646 0.957231 0.000031 201523069349301367.xml
621273871.0 0.644822 0.013492 0.463690 0.957233 0.000209 201533069349300963.xml
541897455.0 0.845659 0.013487 0.463646 0.957231 0.000068 201523099349300542.xml
251869168.0 0.762357 0.013479 0.463672 0.957232 0.000015 201533099349301033.xml
376046335.0 0.745386 0.013479 0.463696 0.957232 0.000005 201523169349304367.xml
50454409.0 0.915261 0.013479 0.463649 0.957231 0.000082 201533099349301803.xml
272364809.0 0.920307 0.013479 0.463646 0.957231 0.000044 201523069349300142.xml
463812139.0 0.664088 0.013517 0.463645 0.957232 0.000020 201543039349301204.xml
561797737.0 0.704623 0.013504 0.463645 0.957231 0.000031 201523089349301462.xml
590638867.0 0.000000 0.013479 0.463646 0.957231 0.000112 201533069349300788.xml
273314429.0 1.000000 0.013479 0.463645 0.957232 0.000004 201533079349300238.xml
710801566.0 1.000000 0.013479 0.463645 0.957200 0.000168 201523209349314257.xml
752204594.0 0.896895 0.013483 0.463647 0.957231 0.000569 201523209349311332.xml
251374594.0 0.000000 0.013497 0.463646 0.957231 0.000062 201533179349302173.xml
450448773.0 0.991205 0.013482 0.463646 0.957232 0.000027 201533179349307048.xml
363789851.0 0.000000 0.013492 0.463647 0.957231 0.001072 201533209349302633.xml
411712804.0 0.477697 0.013479 0.463648 0.957231 0.000119 201533099349301113.xml
381557861.0 0.832159 0.013482 0.463654 0.957232 0.000200 201523039349300127.xml
273741627.0 0.945120 0.013479 0.463648 0.957232 0.000181 201523079349301652.xml
411495321.0 0.668161 0.013481 0.464095 0.957231 0.000162 201533039349300813.xml
60666277.0 0.000000 0.013516 0.463647 0.957230 0.002874 201533139349300208.xml
42616064.0 0.951218 0.013499 0.463646 0.957231 0.004864 201533069349301413.xml
581651220.0 0.910347 0.013480 0.463648 0.957231 0.000048 201533079349300003.xml
341496171.0 0.850617 0.013483 0.463649 0.957232 0.000098 201523069349300957.xml
... ... ... ... ... ... ...
912130056.0 0.760828 0.013514 0.463645 0.957223 0.000107 201533099349301698.xml
237198698.0 0.819953 0.013511 0.463646 0.957231 0.000051 201543119349300204.xml
462250934.0 0.131636 0.013535 0.463645 0.957231 0.000333 201543139349302814.xml
231416559.0 0.598760 0.013526 0.463646 0.957230 0.000129 201533219349301213.xml
223128187.0 0.621104 0.013481 0.463650 0.957231 0.000100 201543179349306629.xml
731585237.0 0.849584 0.013486 0.463647 0.957231 0.000036 201533209349316263.xml
581736427.0 0.887224 0.013560 0.463642 0.957231 0.000021 201543079349301044.xml
470841633.0 0.730878 0.013493 0.463666 0.957234 0.000118 201533139349300123.xml
943345498.0 0.570344 0.013481 0.463655 0.957232 0.000085 201543179349305429.xml
264795329.0 0.984882 0.013541 0.463645 0.957231 0.000891 201543179349308719.xml
262709818.0 1.000000 0.013537 0.463645 0.957231 0.000005 201533179349302373.xml
431129770.0 0.868905 0.013491 0.463648 0.957231 0.001629 201620149349300127.xml
592240502.0 0.858238 0.013484 0.463650 0.957232 0.000936 201630129349300803.xml
61462359.0 0.843057 0.013484 0.463646 0.957231 0.000101 201620119349300422.xml
630652760.0 0.992892 0.013496 0.463646 0.957231 0.000526 201630129349300723.xml
341472960.0 1.000000 0.013537 0.463645 0.957231 0.000040 201610139349300811.xml
800182020.0 0.594838 0.013487 0.463645 0.957231 0.000217 201600199349300500.xml
462754433.0 0.000000 0.013479 0.463646 0.957232 0.000059 201600399349300955.xml
742744885.0 0.975219 0.013479 0.463647 0.957231 0.000034 201600369349301750.xml
471127421.0 0.138038 0.013479 0.463652 0.957239 0.000022 201610399349300226.xml
742244155.0 0.901125 0.013511 0.463645 0.957231 0.003509 201620359349300612.xml
940764760.0 0.992275 0.013525 0.463645 0.957231 0.000075 201610429349302361.xml
840588263.0 0.988925 0.013483 0.463646 0.957232 0.000033 201610369349301226.xml
20650275.0 0.681210 0.013508 0.463646 0.957231 0.000044 201640359349300129.xml
942418861.0 0.987789 0.013480 0.463656 0.957233 0.000317 201600429349301735.xml
223201871.0 0.907986 0.013479 0.463655 0.957232 0.000034 201600359349302005.xml
650599763.0 0.955581 0.013723 0.463644 0.957231 0.000022 201600419349301130.xml
470461460.0 0.465235 0.013480 0.463650 0.957232 0.000033 201640359349301614.xml
990208381.0 0.852344 0.013482 0.463648 0.957232 0.000027 201600429349302135.xml
510216586.0 0.521938 0.013522 0.463647 0.957231 0.707131 201610229349300741.xml

38657 rows × 6 columns


In [59]:
df2 = df.copy()
df2.reset_index(inplace=True, drop = True)
print(norm_df2)
Y_class_df = pd.DataFrame()
X_class_df=norm_df.loc[lst_temp]
X_class_df['Efficiency'] = 1


Y_class_df['Efficiency'] = X_class_df['Efficiency'] 
X_class_df.drop('Efficiency', axis=1, inplace=True)


       Program_Exp  Liabilities_To_Asset  Working_Capital  Surplus_Margin  \
0         0.684058              0.013484         0.463650        0.957231   
1         0.667302              0.013479         0.463657        0.957231   
2         0.651333              0.013482         0.463647        0.957232   
3         0.265798              0.013479         0.463740        0.957234   
4         0.428952              0.013480         0.463649        0.957232   
5         0.524579              0.013522         0.463645        0.957231   
6         0.508964              0.013489         0.463646        0.957231   
7         0.445723              0.013492         0.463690        0.957233   
8         0.584548              0.013487         0.463646        0.957231   
9         0.526967              0.013479         0.463672        0.957232   
10        0.515236              0.013479         0.463696        0.957232   
11        0.632659              0.013479         0.463649        0.957231   
12        0.636148              0.013479         0.463646        0.957231   
13        0.459040              0.013517         0.463645        0.957232   
14        0.487059              0.013504         0.463645        0.957231   
15        0.000000              0.013479         0.463646        0.957231   
16        0.691234              0.013479         0.463645        0.957232   
17        0.691234              0.013479         0.463645        0.957200   
18        0.619964              0.013483         0.463647        0.957231   
19        0.000000              0.013497         0.463646        0.957231   
20        0.685154              0.013482         0.463646        0.957232   
21        0.000000              0.013492         0.463647        0.957231   
22        0.330200              0.013479         0.463648        0.957231   
23        0.575217              0.013482         0.463654        0.957232   
24        0.653299              0.013479         0.463648        0.957232   
25        0.461856              0.013481         0.464095        0.957231   
26        0.000000              0.013516         0.463647        0.957230   
27        0.657514              0.013499         0.463646        0.957231   
28        0.629263              0.013480         0.463648        0.957231   
29        0.587975              0.013483         0.463649        0.957232   
...            ...                   ...              ...             ...   
38639     0.525910              0.013514         0.463645        0.957223   
38640     0.566779              0.013511         0.463646        0.957231   
38641     0.090991              0.013535         0.463645        0.957231   
38642     0.413883              0.013526         0.463646        0.957230   
38643     0.429328              0.013481         0.463650        0.957231   
38644     0.587261              0.013486         0.463647        0.957231   
38645     0.613279              0.013560         0.463642        0.957231   
38646     0.505208              0.013493         0.463666        0.957234   
38647     0.394241              0.013481         0.463655        0.957232   
38648     0.680784              0.013541         0.463645        0.957231   
38649     0.691234              0.013537         0.463645        0.957231   
38650     0.600616              0.013491         0.463648        0.957231   
38651     0.593243              0.013484         0.463650        0.957232   
38652     0.582750              0.013484         0.463646        0.957231   
38653     0.686320              0.013496         0.463646        0.957231   
38654     0.691234              0.013537         0.463645        0.957231   
38655     0.411172              0.013487         0.463645        0.957231   
38656     0.000000              0.013479         0.463646        0.957232   
38657     0.674104              0.013479         0.463647        0.957231   
38658     0.095417              0.013479         0.463652        0.957239   
38659     0.622888              0.013511         0.463645        0.957231   
38660     0.685894              0.013525         0.463645        0.957231   
38661     0.683578              0.013483         0.463646        0.957232   
38662     0.470875              0.013508         0.463646        0.957231   
38663     0.682793              0.013480         0.463656        0.957233   
38664     0.627631              0.013479         0.463655        0.957232   
38665     0.660530              0.013723         0.463644        0.957231   
38666     0.321586              0.013480         0.463650        0.957232   
38667     0.589169              0.013482         0.463648        0.957232   
38668     0.360782              0.013522         0.463647        0.957231   

       Total_Expenses                Filename  
0            0.000068  201523229349300327.xml  
1            0.000004  201543089349301829.xml  
2            0.000056  201533179349306298.xml  
3            0.000007  201533209349304768.xml  
4            0.000004  201533179349307343.xml  
5            0.000089  201533189349300608.xml  
6            0.000031  201523069349301367.xml  
7            0.000209  201533069349300963.xml  
8            0.000068  201523099349300542.xml  
9            0.000015  201533099349301033.xml  
10           0.000005  201523169349304367.xml  
11           0.000082  201533099349301803.xml  
12           0.000044  201523069349300142.xml  
13           0.000020  201543039349301204.xml  
14           0.000031  201523089349301462.xml  
15           0.000112  201533069349300788.xml  
16           0.000004  201533079349300238.xml  
17           0.000168  201523209349314257.xml  
18           0.000569  201523209349311332.xml  
19           0.000062  201533179349302173.xml  
20           0.000027  201533179349307048.xml  
21           0.001072  201533209349302633.xml  
22           0.000119  201533099349301113.xml  
23           0.000200  201523039349300127.xml  
24           0.000181  201523079349301652.xml  
25           0.000162  201533039349300813.xml  
26           0.002874  201533139349300208.xml  
27           0.004864  201533069349301413.xml  
28           0.000048  201533079349300003.xml  
29           0.000098  201523069349300957.xml  
...               ...                     ...  
38639        0.000107  201533099349301698.xml  
38640        0.000051  201543119349300204.xml  
38641        0.000333  201543139349302814.xml  
38642        0.000129  201533219349301213.xml  
38643        0.000100  201543179349306629.xml  
38644        0.000036  201533209349316263.xml  
38645        0.000021  201543079349301044.xml  
38646        0.000118  201533139349300123.xml  
38647        0.000085  201543179349305429.xml  
38648        0.000891  201543179349308719.xml  
38649        0.000005  201533179349302373.xml  
38650        0.001629  201620149349300127.xml  
38651        0.000936  201630129349300803.xml  
38652        0.000101  201620119349300422.xml  
38653        0.000526  201630129349300723.xml  
38654        0.000040  201610139349300811.xml  
38655        0.000217  201600199349300500.xml  
38656        0.000059  201600399349300955.xml  
38657        0.000034  201600369349301750.xml  
38658        0.000022  201610399349300226.xml  
38659        0.003509  201620359349300612.xml  
38660        0.000075  201610429349302361.xml  
38661        0.000033  201610369349301226.xml  
38662        0.000044  201640359349300129.xml  
38663        0.000317  201600429349301735.xml  
38664        0.000034  201600359349302005.xml  
38665        0.000022  201600419349301130.xml  
38666        0.000033  201640359349301614.xml  
38667        0.000027  201600429349302135.xml  
38668        0.707131  201610229349300741.xml  

[38669 rows x 6 columns]
C:\Anaconda3\lib\site-packages\ipykernel\__main__.py:6: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
C:\Anaconda3\lib\site-packages\ipykernel\__main__.py:10: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy

In [60]:
new_df=norm_df[['Program_Exp','Liabilities_To_Asset','Working_Capital','Surplus_Margin']]
new_df.reset_index(inplace=True,drop=True)
X_class_df=X_class_df[['Program_Exp','Liabilities_To_Asset','Working_Capital','Surplus_Margin']]
# X_class_df=X_class_df.drop(X_class_df.index[2])   #OUTLIER REMOVER
X_class_df.reset_index(inplace=True,drop=True)

In [61]:
import numpy as np

from sklearn.cluster import DBSCAN
from sklearn import metrics
from sklearn.datasets.samples_generator import make_blobs
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import TruncatedSVD

svd = TruncatedSVD(n_components=2, n_iter=7)
X = svd.fit_transform(new_df)

In [62]:
db = DBSCAN(eps=0.1, min_samples=10).fit(X)
core_samples_mask = np.zeros_like(db.labels_, dtype=bool)
core_samples_mask[db.core_sample_indices_] = True
labels = db.labels_
n_clusters_ = len(set(labels)) - (1 if -1 in labels else 0)

In [79]:
print(n_clusters_)

outliers_index = []
for i in range(len(labels)):
    if(labels[i]==-1):
        outliers_index.append(i)
        #print(df2.loc[i])
outliers = df2.loc[outliers_index]
import matplotlib.pyplot as plt

# Black removed and is used for noise instead.
unique_labels = set(labels)
colors = 'cyan'
for k, col in zip(unique_labels, colors):
    if k == -1:
        # Black used for noise.
        col = 'k'

    class_member_mask = (labels == k)

    xy = X[class_member_mask & core_samples_mask]
    plt.plot(xy[:, 0], xy[:, 1], 'o', markerfacecolor=col,
             markeredgecolor='k', markersize=14)

    xy = X[class_member_mask & ~core_samples_mask]
    plt.plot(xy[:, 0], xy[:, 1], 'o', markerfacecolor=col,
             markeredgecolor='k', markersize=6)

plt.title('Estimated number of clusters: %d' % n_clusters_)
plt.show()

outliers


1
Out[79]:
Filename EIN Program_Exp Liabilities_To_Asset Working_Capital Surplus_Margin Total_Expenses
2035 201502969349300235.xml 680383921.0 0.000000 0.000000 172324.300000 0.000000 10.0
9921 201513179349308861.xml 311040228.0 0.000000 0.540167 149559.470000 1.000333 100.0
12048 201513179349305901.xml 166050703.0 1.000000 33.472201 -148963.563600 0.000000 220.0
18753 201513139349303196.xml 943152652.0 1.000000 254.451508 -7.144765 -250550.000000 250551.0
30433 201512599349300126.xml 232799695.0 0.985825 0.426822 2.387819 -54849.571430 202757.0

Results:

Outliers appear to be failing in different metrics. The first 2 fail at Program Expenses and have extremeley inflated Working Capitals. The last two, despite having positive program expenses, have very high and negative surplus margins and are thus losing a lot of money.