In [ ]:
from azureml import Workspace

ws = Workspace()
ds = ws.datasets["Automobile price data (Raw)"]
frame = ds.to_dataframe()

In [ ]:
frame

In [ ]:
cols = ["price", "bore","stroke", "horsepower", "peak-rpm"]
frame[cols] = frame[cols].convert_objects(convert_numeric = True)
frame.dropna(axis = 0, inplace = True)
frame.dtypes

In [ ]:
def describe(df, col):
    ## Compute the summary stats
    desc = df[col].describe()
    
    ## Change the name of the 50% index to median
    idx = desc.index.tolist()
    idx[5] = "median"
    desc.index = idx
    return desc

In [ ]:
describe(frame, "price")

In [ ]:
def plot_stats(df, col):
    import matplotlib.pyplot as plt
    ## Setup for ploting two charts one over the other
    fig, ax = plt.subplots(2, 1, figsize = (12,8))
    
    ## First a box plot
    df.dropna().boxplot(col, ax = ax[0], vert=False,
                        return_type="dict")
    ## Plot the histogram   
    temp = df[col].as_matrix()
    ax[1].hist(temp, bins = 30, alpha = 0.7)
    plt.ylabel("Number of Cars")
    plt.xlabel(col)
    return [col]

In [ ]:
plot_stats(frame, "price")

In [ ]:
describe(frame, "horsepower")

In [ ]:
plot_stats(frame, "horsepower")