In [1]:
%matplotlib inline
import pandas as pd
from scipy import stats
import seaborn as sns
from stemgraphic import stem_graphic
In [2]:
!wget http://download.cms.gov/Research-Statistics-Data-and-Systems/Statistics-Trends-and-Reports/Medicare-Provider-Charge-Data/Downloads/Medicare_Provider_Util_Payment_PUF_CY2012_update.zip
In [3]:
!unzip -o Medicare_Provider_Util_Payment_PUF_CY2012_update.zip
In [4]:
df = pd.read_table("Medicare_Provider_Util_Payment_PUF_CY2012.txt", skiprows=[1], header=0)
In [5]:
df.describe()
Out[5]:
In [6]:
nebulizer = df[df.HCPCS_CODE=='94640']
In [7]:
nebulizer['AVERAGE_SUBMITTED_CHRG_AMT'].hist()
Out[7]:
In [8]:
sns.rugplot(nebulizer['AVERAGE_SUBMITTED_CHRG_AMT'])
Out[8]:
In [9]:
sns.stripplot(nebulizer['AVERAGE_SUBMITTED_CHRG_AMT'], jitter=True)
Out[9]:
In [10]:
sns.distplot(nebulizer['AVERAGE_SUBMITTED_CHRG_AMT'], kde=False)
Out[10]:
In [11]:
sns.distplot(nebulizer['AVERAGE_SUBMITTED_CHRG_AMT'], rug=True)
Out[11]:
In [12]:
ax = sns.distplot(df[df.HCPCS_CODE=='94640']['AVERAGE_SUBMITTED_CHRG_AMT'], fit=stats.norm)
ax.set_title('kde vs normal')
Out[12]:
In [13]:
ax = sns.distplot(df[df.HCPCS_CODE=='94640']['AVERAGE_SUBMITTED_CHRG_AMT'], fit=stats.dgamma)
ax.set_title('kde vs double gamma')
Out[13]:
In [14]:
g = sns.factorplot(x='AVERAGE_SUBMITTED_CHRG_AMT', y='PROVIDER_TYPE',
data=nebulizer[nebulizer.AVERAGE_SUBMITTED_CHRG_AMT>0],
hue='MEDICARE_PARTICIPATION_INDICATOR', split=True, kind='violin',
orient="h", size=7, aspect=2, palette="Set3", bw=.2, cut=0)
In [15]:
sub_nebulizer = nebulizer[nebulizer.PROVIDER_TYPE.isin(['Pediatric Medicine','Pulmonary Disease', 'Family Practice','Emergency Medicine'])]
In [16]:
g = sns.factorplot(x='AVERAGE_SUBMITTED_CHRG_AMT', y='PROVIDER_TYPE',
data=sub_nebulizer, kind='box',
orient="h", size=6, aspect=3, palette="Set3")
In [18]:
stem_graphic(sub_nebulizer['AVERAGE_SUBMITTED_CHRG_AMT'], unit='$')
Out[18]:
In [ ]: