notebook.community

Edit and run



In [1]:

    
%pylab inline
from IPython.display import HTML
%matplotlib inline

import os
import sys
from StringIO import StringIO
import scipy
import seaborn as sns

from pandas import read_csv
import matplotlib.pyplot as plt

cwd = os.getcwd()

#MedData = cwd +"/medicare_data/Medicare-Physician-and-Other-Supplier-PUF-CY2012.csv"
#data = read_csv(MedData, sep="\t")

#g_IL=data[data["nppes_provider_state"]=="IL"] 
#f_IL=g_IL[g_IL["provider_type"]=="Pathology"]

IL_data = cwd +"/medicare_data/Medicare_Data_IL_2012.csv"
f_IL = read_csv(IL_data)









    



Populating the interactive namespace from numpy and matplotlib



In [2]:

    
f_IL.describe()









    Out[2]:






  
    
      
      Unnamed: 0
      npi
      nppes_provider_zip
      line_srvc_cnt
      bene_unique_cnt
      bene_day_srvc_cnt
      average_Medicare_allowed_amt
      stdev_Medicare_allowed_amt
      average_submitted_chrg_amt
      stdev_submitted_chrg_amt
      average_Medicare_payment_amt
      stdev_Medicare_payment_amt
    
  
  
    
      count
        387623.000000
       3.876230e+05
       3.876230e+05
        387623.000000
       387623.000000
       387623.000000
       387623.000000
       387623.000000
       387623.000000
       387623.000000
       387623.000000
       387623.000000
    
    
      mean
       4595125.077805
       1.501417e+09
       5.391950e+08
           232.597919
           87.695124
          148.701775
           98.801347
            6.524754
          343.511687
           23.801261
           76.870100
           12.313640
    
    
      std
       2632511.542281
       2.868009e+08
       1.934934e+08
          4054.240115
          599.760320
         1105.904720
          192.197134
           36.973404
          943.696945
          169.014097
          152.143044
           33.027860
    
    
      min
             8.000000
       1.003000e+09
       6.170000e+02
            11.000000
           11.000000
           11.000000
            0.010000
            0.000000
            0.010000
            0.000000
            0.000000
            0.000000
    
    
      25%
       2314023.500000
       1.255329e+09
       6.009875e+08
            21.000000
           18.000000
           21.000000
           24.330000
            0.000000
           62.000000
            0.000000
           19.580000
            0.103002
    
    
      50%
       4568112.000000
       1.497861e+09
       6.052138e+08
            47.000000
           33.000000
           44.000000
           64.430000
            0.000000
          142.000000
            0.000000
           47.549091
            4.963882
    
    
      75%
       6883154.500000
       1.750335e+09
       6.110337e+08
           131.000000
           79.000000
          118.000000
          111.890000
            0.919908
          281.000000
            3.962470
           85.820834
           15.742280
    
    
      max
       9153107.000000
       1.992998e+09
       6.313661e+08
       1092721.000000
       170152.000000
       279254.000000
        32799.949219
        10326.988384
        98000.000000
        27576.714201
        25545.394688
         8950.803036



In [3]:

    
f_IL.head(5)









    Out[3]:






  
    
      
      Unnamed: 0
      npi
      nppes_provider_last_org_name
      nppes_provider_first_name
      nppes_provider_mi
      nppes_credentials
      nppes_provider_gender
      nppes_entity_code
      nppes_provider_street1
      nppes_provider_street2
      ...
      hcpcs_description
      line_srvc_cnt
      bene_unique_cnt
      bene_day_srvc_cnt
      average_Medicare_allowed_amt
      stdev_Medicare_allowed_amt
      average_submitted_chrg_amt
      stdev_submitted_chrg_amt
      average_Medicare_payment_amt
      stdev_Medicare_payment_amt
    
  
  
    
      0
        8
       1003000134
       CIBULL
       THOMAS
       L
       M.D.
       M
       I
       2650 RIDGE AVE
       EVANSTON HOSPITAL
      ...
       Tissue exam by pathologist
        226
        207
        209
       11.640000
       0.000000
       115
       0
        8.980442
       1.720341
    
    
      1
        9
       1003000134
       CIBULL
       THOMAS
       L
       M.D.
       M
       I
       2650 RIDGE AVE
       EVANSTON HOSPITAL
      ...
       Tissue exam by pathologist
       6070
       3624
       4416
       37.729960
       0.001257
       170
       0
       28.984504
       5.626832
    
    
      2
       10
       1003000134
       CIBULL
       THOMAS
       L
       M.D.
       M
       I
       2650 RIDGE AVE
       EVANSTON HOSPITAL
      ...
                 Decalcify tissue
         13
         13
         13
       12.700000
       0.000000
        39
       0
        7.815385
       4.280662
    
    
      3
       11
       1003000134
       CIBULL
       THOMAS
       L
       M.D.
       M
       I
       2650 RIDGE AVE
       EVANSTON HOSPITAL
      ...
           Special stains group 1
        330
        231
        238
       27.149576
       0.005433
        88
       0
       21.391364
       2.646612
    
    
      4
       12
       1003000134
       CIBULL
       THOMAS
       L
       M.D.
       M
       I
       2650 RIDGE AVE
       EVANSTON HOSPITAL
      ...
           Special stains group 2
         51
         46
         48
       12.340000
       0.000000
        68
       0
        9.676471
       1.368460
    
  

5 rows × 28 columns



In [4]:

    
#### 
# THIS IS FOR QUIZ

#g_CA=data[data["nppes_provider_state"]=="CA"] 
#f_CA=g_CA[g_CA["provider_type"]=="Pathology"]
CA_data = cwd +"/medicare_data/Medicare_Data_CA_2012.csv"
f_CA = read_csv(CA_data)



In [5]:

    
# THIS IS FOR QUIZ
f_CA.describe()









    Out[5]:






  
    
      
      Unnamed: 0
      npi
      nppes_provider_zip
      line_srvc_cnt
      bene_unique_cnt
      bene_day_srvc_cnt
      average_Medicare_allowed_amt
      stdev_Medicare_allowed_amt
      average_submitted_chrg_amt
      stdev_submitted_chrg_amt
      average_Medicare_payment_amt
      stdev_Medicare_payment_amt
    
  
  
    
      count
        716330.000000
       7.163300e+05
       7.163300e+05
        716330.000000
       716330.000000
       716330.000000
       716330.000000
       716330.000000
       716330.000000
       716330.000000
       716330.000000
       716330.000000
    
    
      mean
       4580263.934586
       1.499738e+09
       8.288257e+08
           282.169835
          102.604347
          179.941160
          111.330433
            7.816640
          331.460201
           25.219743
           87.042262
           13.322745
    
    
      std
       2640094.334631
       2.877652e+08
       2.890370e+08
          6456.171891
         1366.542781
         2532.800384
          251.931260
           42.168859
          913.840495
          179.111304
          200.610982
           36.041472
    
    
      min
            82.000000
       1.003001e+09
       6.851000e+03
             7.700000
           11.000000
           11.000000
            0.002338
            0.000000
            0.002338
            0.000000
            0.000000
            0.000000
    
    
      25%
       2289376.250000
       1.245491e+09
       9.074523e+08
            22.000000
           18.000000
           21.000000
           25.680000
            0.000000
           53.730000
            0.000000
           20.773003
            0.001540
    
    
      50%
       4589869.500000
       1.508063e+09
       9.250628e+08
            47.000000
           33.000000
           44.000000
           68.920000
            0.000000
          130.562500
            0.000000
           51.222150
            4.812060
    
    
      75%
       6858338.000000
       1.740455e+09
       9.430123e+08
           133.000000
           79.000000
          119.000000
          122.560000
            0.971116
          280.000000
            3.717458
           93.305593
           16.669437
    
    
      max
       9153143.000000
       1.992998e+09
       9.924343e+08
       3707234.000000
       256034.000000
       508209.000000
        36489.655263
         5643.068806
        73526.000000
        19444.525000
        29191.722895
         4702.487861



In [6]:

    
# THIS IS FOR QUIZ
f_CA.head(10)









    Out[6]:






  
    
      
      Unnamed: 0
      npi
      nppes_provider_last_org_name
      nppes_provider_first_name
      nppes_provider_mi
      nppes_credentials
      nppes_provider_gender
      nppes_entity_code
      nppes_provider_street1
      nppes_provider_street2
      ...
      hcpcs_description
      line_srvc_cnt
      bene_unique_cnt
      bene_day_srvc_cnt
      average_Medicare_allowed_amt
      stdev_Medicare_allowed_amt
      average_submitted_chrg_amt
      stdev_submitted_chrg_amt
      average_Medicare_payment_amt
      stdev_Medicare_payment_amt
    
  
  
    
      0
        82
       1003000712
       MALLORY
         SHEILA
       O
       N. P.
       F
       I
       1867 E FIR AVE STE 104
            NaN
      ...
           Us guide vascular access
         14
        14
        14
        12.820000
       0.000000
        57.000000
       0.000000
        10.260000
        0.000000
    
    
      1
       108
       1003001017
       NICHOLS
       LAWRENCE
       M
         M.D
       M
       I
            5471 LA PALMA AVE
       STE. 202
      ...
            Destruct premalg lesion
        110
        62
       110
        91.570000
       6.172714
       100.000000
       0.000000
        66.659818
       20.146418
    
    
      2
       109
       1003001017
       NICHOLS
       LAWRENCE
       M
         M.D
       M
       I
            5471 LA PALMA AVE
       STE. 202
      ...
          Destruct premalg les 2-14
        262
        46
        71
         8.030000
       0.000000
        10.000000
       0.000000
         6.042405
        2.879763
    
    
      3
       110
       1003001017
       NICHOLS
       LAWRENCE
       M
         M.D
       M
       I
            5471 LA PALMA AVE
       STE. 202
      ...
        Destroy premal lesions 15/>
        430
       158
       430
       191.490000
       0.000000
       220.000000
       0.000000
       146.658651
       24.311615
    
    
      4
       111
       1003001017
       NICHOLS
       LAWRENCE
       M
         M.D
       M
       I
            5471 LA PALMA AVE
       STE. 202
      ...
        Photochemotherapy with UV-B
        121
        31
       121
        86.185455
       7.060642
        95.000000
       0.000000
        68.319752
        6.533262
    
    
      5
       112
       1003001017
       NICHOLS
       LAWRENCE
       M
         M.D
       M
       I
            5471 LA PALMA AVE
       STE. 202
      ...
        Office/outpatient visit new
        103
       103
       103
       115.940000
       0.000000
       125.000000
       0.000000
        80.572718
       30.276042
    
    
      6
       113
       1003001017
       NICHOLS
       LAWRENCE
       M
         M.D
       M
       I
            5471 LA PALMA AVE
       STE. 202
      ...
        Office/outpatient visit est
        577
       180
       577
        77.990000
       0.000000
        80.008666
       0.207972
        57.328977
       15.841657
    
    
      7
       114
       1003001017
       NICHOLS
       LAWRENCE
       M
         M.D
       M
       I
            5471 LA PALMA AVE
       STE. 202
      ...
              Initial hospital care
         26
        25
        26
       175.000000
       0.000000
       175.000000
       0.000000
       135.692308
       21.538462
    
    
      8
       115
       1003001017
       NICHOLS
       LAWRENCE
       M
         M.D
       M
       I
            5471 LA PALMA AVE
       STE. 202
      ...
       Betamethasone acet&sod phosp
       2088
       133
       522
         5.547783
       0.071203
         7.500000
       0.000000
         4.231475
        1.856469
    
    
      9
       116
       1003001017
       NICHOLS
       LAWRENCE
       M
         M.D
       M
       I
            5471 LA PALMA AVE
       STE. 202
      ...
         Triamcinolone acet inj NOS
       3632
       120
       455
         1.687673
       0.065430
         2.497247
       0.165566
         1.289675
        0.785567
    
  

10 rows × 28 columns



In [7]:

    
len(f_IL.columns)









    Out[7]:





28



In [8]:

    
for c in f_IL.columns : print c









    



Unnamed: 0
npi
nppes_provider_last_org_name
nppes_provider_first_name
nppes_provider_mi
nppes_credentials
nppes_provider_gender
nppes_entity_code
nppes_provider_street1
nppes_provider_street2
nppes_provider_city
nppes_provider_zip
nppes_provider_state
nppes_provider_country
provider_type
medicare_participation_indicator
place_of_service
hcpcs_code
hcpcs_description
line_srvc_cnt
bene_unique_cnt
bene_day_srvc_cnt
average_Medicare_allowed_amt
stdev_Medicare_allowed_amt
average_submitted_chrg_amt
stdev_submitted_chrg_amt
average_Medicare_payment_amt
stdev_Medicare_payment_amt



In [9]:

    
#pal1 = dict(M="#4682B4", F="#CD5C5C")
print len(f_IL.provider_type.unique())
print len(f_IL.nppes_provider_city.unique())
print len(f_IL.hcpcs_description.unique())



In [10]:

    
f0 = f_IL.average_submitted_chrg_amt.values
f1 = f_IL.average_Medicare_payment_amt.values
f2 = f_IL.average_Medicare_allowed_amt.values



In [11]:

    
n0, bins0, patches0=plt.hist(f0,50,normed=1, range=(0,1000),
                             histtype='stepfilled')
n2, bins2, patches2=plt.hist(f2,50,normed=1, range=(0,1000),
                             histtype='stepfilled')
plt.setp(patches0, 'facecolor', 'g', 'alpha', 0.75)
plt.setp(patches2, 'facecolor', 'b', 'alpha', 0.75)









    Out[11]:





[None, None]



In [12]:

    
n0, bins0, patches0=plt.hist(f0,50,normed=1, log=0,range=(0,1000),
                             histtype='stepfilled')
n1, bins1, patches1=plt.hist(f1,50,normed=1, log=0,range=(0,1000),
                             histtype='stepfilled')
plt.setp(patches1, 'facecolor', 'r', 'alpha', 0.75)
plt.setp(patches0, 'facecolor', 'g', 'alpha', 0.75)









    Out[12]:





[None, None]



In [13]:

    
#### 
# THIS IS FOR QUIZ

g0 = f_CA.average_submitted_chrg_amt.values
g1 = f_CA.average_Medicare_payment_amt.values
g2 = f_CA.average_Medicare_allowed_amt.values



In [14]:

    
#### 
# THIS IS FOR QUIZ
n0, bins0, patches0=plt.hist(g0,50,normed=0, range=(0,1000),
                             histtype='stepfilled')
n2, bins2, patches2=plt.hist(g2,50,normed=0, range=(0,1000),
                             histtype='stepfilled')
plt.setp(patches0, 'facecolor', 'g', 'alpha', 0.75)
plt.setp(patches2, 'facecolor', 'b', 'alpha', 0.75)









    Out[14]:





[None, None]



In [15]:

    
#### 
# THIS IS FOR QUIZ
n0, bins0, patches0=plt.hist(g0,50,normed=1, log=0,range=(0,1000),
                             histtype='stepfilled')
n1, bins1, patches1=plt.hist(g1,50,normed=1, log=0,range=(0,1000),
                             histtype='stepfilled')
plt.setp(patches1, 'facecolor', 'r', 'alpha', 0.75)
plt.setp(patches0, 'facecolor', 'g', 'alpha', 0.75)









    Out[15]:





[None, None]



In [19]:

    
n0, bins0, patches0=plt.hist((f0-f0.min())/(f0.max() - f0.min()),40,
                             normed=1,log=0,range=(0.,1.02),
                             histtype='stepfilled')
n1, bins1, patches1=plt.hist((f1-f1.min())/(f1.max() - f1.min()),40,
                             normed=1,log=0,range=(0,1.02),
                             histtype='stepfilled')
plt.setp(patches0, 'facecolor', 'g', 'alpha', 0.75)
plt.setp(patches1, 'facecolor', 'r', 'alpha', 0.75)









    Out[19]:





[None, None]



In [20]:

    
n0, bins0, patches0=plt.hist((f0-f0.min())/(f0.max() - f0.min()),50,
                             normed=1, log=1,range=(-0.2,1.2),
                             histtype='stepfilled')
n1, bins1, patches1=plt.hist((f2-f2.min())/(f2.max() - f2.min()),40,
                             normed=1, log=1,range=(-0.2,1.2),
                             histtype='stepfilled')
plt.setp(patches0, 'facecolor', 'g', 'alpha', 0.75)
plt.setp(patches1, 'facecolor', 'r', 'alpha', 0.75)









    Out[20]:





[None, None]



In [23]:

    
plt.scatter((f1 - f1.min())/(f1.max() - f1.min()),(f2-f2.min())/(f2.max() - f2.min()),
            marker="o",color="g")









    Out[23]:





<matplotlib.collections.PathCollection at 0x10f03ae90>



In [24]:

    
plt.scatter((f0 - f0.min())/(f0.max() - f0.min()),(f1-f1.min())/(f1.max() - f1.min()),
            marker="x",color="b")









    Out[24]:





<matplotlib.collections.PathCollection at 0x10b7c2ed0>



In [25]:

    
from scipy.stats import pearsonr
pearsonr(f1,f2)









    Out[25]:





(0.99892537506204215, 0.0)



In [28]:

    
sns.set(style="darkgrid")
f, ax = plt.subplots(figsize=(10, 10))
sns.corrplot(f_IL, annot=False, sig_stars=True,
             diag_names=False, ax=ax)
f.tight_layout()



In [29]:

    
##for quiz
#
from scipy.stats import pearsonr
pearsonr((f1 - f1.min())/(f1.max() - f1.min()),f2/(f2.max() - f2.min()))









    Out[29]:





(0.99892537506177492, 0.0)



In [30]:

    
x = abs(f0-f1)/f0
n0, bins0, patches0=plt.hist(x,100,normed=0,range=(0,1),histtype='stepfilled')
plt.setp(patches0, 'facecolor', 'g', 'alpha', 0.75)









    Out[30]:





[None, None]



In [33]:

    
## For QUIZ
y = abs(g0-g1)/g0
n1, bins1, patches1=plt.hist(x,200,normed=1,range=(-0.05,1),log=0,histtype='stepfilled')
n2, bins2, patches2=plt.hist(y,200,normed=1,range=(-0.5,1),log=0,histtype='stepfilled')
plt.setp(patches1, 'facecolor', 'g', 'alpha', 0.75)
plt.setp(patches2, 'facecolor', 'r', 'alpha', 0.75)









    Out[33]:





[None, None]



In [35]:

    
from scipy import stats
from functools import partial
def my_kde_bandwidth(obj, fac=1./5):
    """We use Scott's Rule, multiplied by a constant factor."""
    return np.power(obj.n, -1./(obj.d+4)) * fac

def getKDE(data, name="", bwfac = 0.2):
    x2=data
    x_eval = np.linspace(x2.min() - 1, x2.max() + 1, 500)
    kde = stats.gaussian_kde(x2, bw_method=partial(my_kde_bandwidth, fac=bwfac))
    fig1 = plt.figure(figsize=(8, 6))
    ax = fig1.add_subplot(111)
    plt.yscale=('log')
    plt.grid(True)
    x2h1, x2h2 =np.histogram(x2,bins=[0.,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1.0],normed=True)
    ax.plot(x2, np.zeros(x2.shape), 'b+', ms=12)
    ax.plot(x_eval, kde(x_eval), 'g-', label="Scott *"+str(bwfac))
    ax.plot(x2h2[:-1], x2h1, 'r--', label="Actual PDF")
    ax.set_xlim([-0.5,1.5])
    ax.legend(loc=2)
    ax.set_xlabel('x')
    ax.set_ylabel('Density Estimate')
    plt.savefig(cwd+"/Plots/KDE_"+name+".png")
    return plt



In [36]:

    
plt.clf()
plt=getKDE(x,"Fractional Diff of Claimed vs Paid",bwfac=0.1)









    





<matplotlib.figure.Figure at 0x109f80410>



In [37]:

    
plt.clf()
plt=getKDE(x,"Fractional Diff of Claimed vs Paid",bwfac=0.2)









    





<matplotlib.figure.Figure at 0x10bc39a90>



In [38]:

    
plt.clf()
plt=getKDE(x,"Fractional Diff of Claimed vs Paid",bwfac=0.7)









    





<matplotlib.figure.Figure at 0x10baa0610>



In [39]:

    
plt.clf()
plt=getKDE(x,"Fractional Diff of Claimed vs Paid",bwfac=0.9)









    





<matplotlib.figure.Figure at 0x10a6f8690>



In [40]:

    
plt.clf()
plt=getKDE(x,"Fractional Diff of Claimed vs Paid",bwfac=0.99)









    





<matplotlib.figure.Figure at 0x109f80410>



In [43]:

    
def getAllKDE(data, name=""):
    x2=data
    x_eval = np.linspace(x2.min() - 1, x2.max() + 1, 500)
    
    kde1 = stats.gaussian_kde(x2)
    kde2 = stats.gaussian_kde(x2, bw_method='silverman')
    kde3 = stats.gaussian_kde(x2, bw_method=partial(my_kde_bandwidth, fac=0.2))
    kde4 = stats.gaussian_kde(x2, bw_method=partial(my_kde_bandwidth, fac=0.5))
    pdf = stats.norm.pdf
    fig1 = plt.figure(figsize=(12, 10))
    ax = fig1.add_subplot(111)
    
    plt.yscale=('log')
    plt.grid(True)
    x2h1, x2h2 =np.histogram(x2,bins=[0.,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1.0],normed=True)
    ax.plot(x2, np.zeros(x2.shape), 'b+', ms=12)
    ax.plot(x_eval, kde1(x_eval), 'k-', label="Scott's Rule")
    ax.plot(x_eval, kde2(x_eval), 'b-', label="Silverman's Rule")
    ax.plot(x_eval, kde3(x_eval), 'g-', label="Scott * 0.2")
    ax.plot(x_eval, kde4(x_eval), 'c-', label="Scott * 0.9")
    ax.plot(x2h2[:-1], x2h1, 'r--', label="Actual PDF")
    
    
    #ax.set_xlim([x_eval.min(), x_eval.max()])
    ax.set_xlim([-0.5,1.5])
    ax.legend(loc=2)
    ax.set_xlabel('x')
    ax.set_ylabel('Density Estimate')
    plt.savefig(cwd+"/Plots/KDE_"+name+".png")
    return plt



In [44]:

    
plt.clf()
plt=getAllKDE(x,"Fractional Diff of Claimed vs Paid")









    





<matplotlib.figure.Figure at 0x10214c3d0>



In [45]:

    
xbar = abs(f0 - f2)/f0
g = sns.jointplot(x, xbar, kind="kde", size=7, space=0)



In [46]:

    
def MahalanobisDist(x, y):
    covariance_xy = np.cov(x,y, rowvar=0)
    inv_covariance_xy = np.linalg.inv(covariance_xy)
    xy_mean = np.mean(x),np.mean(y)
    x_diff = np.array([x_i - xy_mean[0] for x_i in x])
    y_diff = np.array([y_i - xy_mean[1] for y_i in y])
    diff_xy = np.transpose([x_diff, y_diff])
    md = []
    for i in range(len(diff_xy)):
        md.append(np.sqrt(np.dot(np.dot(np.transpose(diff_xy[i]),
                                        inv_covariance_xy),diff_xy[i])))
    return md



In [47]:

    
md = MahalanobisDist(x,xbar)



In [48]:

    
def FindOutliers(x, y, p):
    MD = MahalanobisDist(x, y)
    nx, ny, outliers = [], [], []
    threshold = -2*log(1-p)
    for i in range(len(MD)):
        if MD[i]*MD[i] < threshold:
            nx.append(x[i])
            ny.append(y[i])
            outliers.append(i) # position of removed pair
    return (np.array(nx), np.array(ny), np.array(outliers))



In [49]:

    
Outliers = FindOutliers(x,xbar,0.00000243)



In [50]:

    
#print Outliers

def PlotOutliers(Outliers):
    print "Total Outliers found :", len(Outliers[2])
    print "The index of the variables are :", Outliers[2]
    fig2 = plt.figure(figsize=(8, 6))
    ax2 = fig2.add_subplot(111)
    ax2.set_xlim([0.,1.])
    ax2.legend(loc=2)
    ax2.set_xlabel('1 - Allowed Amount/Paid Amount')
    ax2.set_ylabel('1 - Submitted Amount/Paid Amount')
    plt.scatter(Outliers[0],Outliers[1])
    return plt



In [52]:

    
plt.clf()
plt = PlotOutliers(Outliers)









    



Total Outliers found : 4
The index of the variables are : [ 64740 232865 275868 351303]






    





<matplotlib.figure.Figure at 0x10b298050>



In [ ]:

    
# Quiz 
#SHow the indeces using g with y and ybar

ybar = abs(g0-g2)/g0
Outliers = FindOutliers(y,ybar,0.00000243)
plt.clf()
plt = PlotOutliers(y,ybar)



In [ ]:

	Unnamed: 0	npi	nppes_provider_zip	line_srvc_cnt	bene_unique_cnt	bene_day_srvc_cnt	average_Medicare_allowed_amt	stdev_Medicare_allowed_amt	average_submitted_chrg_amt	stdev_submitted_chrg_amt	average_Medicare_payment_amt	stdev_Medicare_payment_amt
count	387623.000000	3.876230e+05	3.876230e+05	387623.000000	387623.000000	387623.000000	387623.000000	387623.000000	387623.000000	387623.000000	387623.000000	387623.000000
mean	4595125.077805	1.501417e+09	5.391950e+08	232.597919	87.695124	148.701775	98.801347	6.524754	343.511687	23.801261	76.870100	12.313640
std	2632511.542281	2.868009e+08	1.934934e+08	4054.240115	599.760320	1105.904720	192.197134	36.973404	943.696945	169.014097	152.143044	33.027860
min	8.000000	1.003000e+09	6.170000e+02	11.000000	11.000000	11.000000	0.010000	0.000000	0.010000	0.000000	0.000000	0.000000
25%	2314023.500000	1.255329e+09	6.009875e+08	21.000000	18.000000	21.000000	24.330000	0.000000	62.000000	0.000000	19.580000	0.103002
50%	4568112.000000	1.497861e+09	6.052138e+08	47.000000	33.000000	44.000000	64.430000	0.000000	142.000000	0.000000	47.549091	4.963882
75%	6883154.500000	1.750335e+09	6.110337e+08	131.000000	79.000000	118.000000	111.890000	0.919908	281.000000	3.962470	85.820834	15.742280
max	9153107.000000	1.992998e+09	6.313661e+08	1092721.000000	170152.000000	279254.000000	32799.949219	10326.988384	98000.000000	27576.714201	25545.394688	8950.803036

	Unnamed: 0	npi	nppes_provider_last_org_name	nppes_provider_first_name	nppes_provider_mi	nppes_credentials	nppes_provider_gender	nppes_entity_code	nppes_provider_street1	nppes_provider_street2	...	hcpcs_description	line_srvc_cnt	bene_unique_cnt	bene_day_srvc_cnt	average_Medicare_allowed_amt	stdev_Medicare_allowed_amt	average_submitted_chrg_amt	average_Medicare_payment_amt	stdev_Medicare_payment_amt
0	8	1003000134	CIBULL	THOMAS	L	M.D.	M	I	2650 RIDGE AVE	EVANSTON HOSPITAL	...	Tissue exam by pathologist	226	207	209	11.640000	0.000000	115	8.980442	1.720341
1	9	1003000134	CIBULL	THOMAS	L	M.D.	M	I	2650 RIDGE AVE	EVANSTON HOSPITAL	...	Tissue exam by pathologist	6070	3624	4416	37.729960	0.001257	170	28.984504	5.626832
2	10	1003000134	CIBULL	THOMAS	L	M.D.	M	I	2650 RIDGE AVE	EVANSTON HOSPITAL	...	Decalcify tissue	13	13	13	12.700000	0.000000	39	7.815385	4.280662
3	11	1003000134	CIBULL	THOMAS	L	M.D.	M	I	2650 RIDGE AVE	EVANSTON HOSPITAL	...	Special stains group 1	330	231	238	27.149576	0.005433	88	21.391364	2.646612
4	12	1003000134	CIBULL	THOMAS	L	M.D.	M	I	2650 RIDGE AVE	EVANSTON HOSPITAL	...	Special stains group 2	51	46	48	12.340000	0.000000	68	9.676471	1.368460

	Unnamed: 0	npi	nppes_provider_zip	line_srvc_cnt	bene_unique_cnt	bene_day_srvc_cnt	average_Medicare_allowed_amt	stdev_Medicare_allowed_amt	average_submitted_chrg_amt	stdev_submitted_chrg_amt	average_Medicare_payment_amt	stdev_Medicare_payment_amt
count	716330.000000	7.163300e+05	7.163300e+05	716330.000000	716330.000000	716330.000000	716330.000000	716330.000000	716330.000000	716330.000000	716330.000000	716330.000000
mean	4580263.934586	1.499738e+09	8.288257e+08	282.169835	102.604347	179.941160	111.330433	7.816640	331.460201	25.219743	87.042262	13.322745
std	2640094.334631	2.877652e+08	2.890370e+08	6456.171891	1366.542781	2532.800384	251.931260	42.168859	913.840495	179.111304	200.610982	36.041472
min	82.000000	1.003001e+09	6.851000e+03	7.700000	11.000000	11.000000	0.002338	0.000000	0.002338	0.000000	0.000000	0.000000
25%	2289376.250000	1.245491e+09	9.074523e+08	22.000000	18.000000	21.000000	25.680000	0.000000	53.730000	0.000000	20.773003	0.001540
50%	4589869.500000	1.508063e+09	9.250628e+08	47.000000	33.000000	44.000000	68.920000	0.000000	130.562500	0.000000	51.222150	4.812060
75%	6858338.000000	1.740455e+09	9.430123e+08	133.000000	79.000000	119.000000	122.560000	0.971116	280.000000	3.717458	93.305593	16.669437
max	9153143.000000	1.992998e+09	9.924343e+08	3707234.000000	256034.000000	508209.000000	36489.655263	5643.068806	73526.000000	19444.525000	29191.722895	4702.487861

	Unnamed: 0	npi	nppes_provider_last_org_name	nppes_provider_first_name	nppes_provider_mi	nppes_credentials	nppes_provider_gender	nppes_entity_code	nppes_provider_street1	nppes_provider_street2	...	hcpcs_description	line_srvc_cnt	bene_unique_cnt	bene_day_srvc_cnt	average_Medicare_allowed_amt	stdev_Medicare_allowed_amt	average_submitted_chrg_amt	stdev_submitted_chrg_amt	average_Medicare_payment_amt	stdev_Medicare_payment_amt
0	82	1003000712	MALLORY	SHEILA	O	N. P.	F	I	1867 E FIR AVE STE 104	NaN	...	Us guide vascular access	14	14	14	12.820000	0.000000	57.000000	0.000000	10.260000	0.000000
1	108	1003001017	NICHOLS	LAWRENCE	M	M.D	M	I	5471 LA PALMA AVE	STE. 202	...	Destruct premalg lesion	110	62	110	91.570000	6.172714	100.000000	0.000000	66.659818	20.146418
2	109	1003001017	NICHOLS	LAWRENCE	M	M.D	M	I	5471 LA PALMA AVE	STE. 202	...	Destruct premalg les 2-14	262	46	71	8.030000	0.000000	10.000000	0.000000	6.042405	2.879763
3	110	1003001017	NICHOLS	LAWRENCE	M	M.D	M	I	5471 LA PALMA AVE	STE. 202	...	Destroy premal lesions 15/>	430	158	430	191.490000	0.000000	220.000000	0.000000	146.658651	24.311615
4	111	1003001017	NICHOLS	LAWRENCE	M	M.D	M	I	5471 LA PALMA AVE	STE. 202	...	Photochemotherapy with UV-B	121	31	121	86.185455	7.060642	95.000000	0.000000	68.319752	6.533262
5	112	1003001017	NICHOLS	LAWRENCE	M	M.D	M	I	5471 LA PALMA AVE	STE. 202	...	Office/outpatient visit new	103	103	103	115.940000	0.000000	125.000000	0.000000	80.572718	30.276042
6	113	1003001017	NICHOLS	LAWRENCE	M	M.D	M	I	5471 LA PALMA AVE	STE. 202	...	Office/outpatient visit est	577	180	577	77.990000	0.000000	80.008666	0.207972	57.328977	15.841657
7	114	1003001017	NICHOLS	LAWRENCE	M	M.D	M	I	5471 LA PALMA AVE	STE. 202	...	Initial hospital care	26	25	26	175.000000	0.000000	175.000000	0.000000	135.692308	21.538462
8	115	1003001017	NICHOLS	LAWRENCE	M	M.D	M	I	5471 LA PALMA AVE	STE. 202	...	Betamethasone acet&sod phosp	2088	133	522	5.547783	0.071203	7.500000	0.000000	4.231475	1.856469
9	116	1003001017	NICHOLS	LAWRENCE	M	M.D	M	I	5471 LA PALMA AVE	STE. 202	...	Triamcinolone acet inj NOS	3632	120	455	1.687673	0.065430	2.497247	0.165566	1.289675	0.785567