In [ ]:
from azureml import Workspace
ws = Workspace()
experiment = ws.experiments['6aa07b1af33b4b2bb32adc11b6e40bdc.f-id.299df9aa87674e0da3349774b2df8879']
ds = experiment.get_intermediate_dataset(
node_id='e77d289d-9cde-4a3f-a13d-42425508c22b-314',
port_name='Results dataset',
data_type_id='GenericCSV'
)
Income = ds.to_dataframe()
In [ ]:
Income.dtypes
In [ ]:
## Plot categorical variables as bar plots
def income_barplot(df):
import numpy as np
import matplotlib.pyplot as plt
cols = df.columns.tolist()[:-1]
for col in cols:
if(df.ix[:, col].dtype not in [np.int64, np.int32, np.float64]):
temp1 = df.ix[df['income'] == ' <=50K', col].value_counts()
temp0 = df.ix[df['income'] == ' >50K', col].value_counts()
ylim = [0, max(max(temp1), max(temp0))]
fig = plt.figure(figsize = (12,6))
fig.clf()
ax1 = fig.add_subplot(1, 2, 1)
ax0 = fig.add_subplot(1, 2, 2)
temp1.plot(kind = 'bar', ax = ax1, ylim = ylim)
ax1.set_title('Values of ' + col + '\n for income <= 50K')
temp0.plot(kind = 'bar', ax = ax0, ylim = ylim)
ax0.set_title('Values of ' + col + '\n for income > 50K')
return('Done')
In [ ]:
income_barplot(Income)
In [ ]:
## Plot categorical variables as box plots
def income_boxplot(df):
import numpy as np
import matplotlib.pyplot as plt
cols = df.columns.tolist()[:-1]
for col in cols:
if(df[col].dtype in [np.int64, np.int32, np.float64]):
fig = plt.figure(figsize = (6,6))
fig.clf()
ax = fig.gca()
df.boxplot(column = [col], ax = ax, by = [' income'])
return('Done')
In [ ]:
income_boxplot(Income)