Ch16 Figure1


In [1]:
# Maybe something like, "data suggest that 65% of our customers don't run.”

data = []

for i in range(5000):
    # how many times run in a week
    if rd.random() <= .65:
        r_times = 0
    else:
        if rd.random() <= .85:
            r_times = rd.randint(1,3)
        else:
            r_times = rd.randint(4,5)
    
    data.append([i, r_times])

df = pd.DataFrame(data, columns=['id', 'run-times'])
# df.to_csv('csv_output/ch16_fig1.csv', index=False)
df = pd.read_csv('csv_output/ch16_fig1.csv')
df.head()


Out[1]:
id run-times
0 0 2
1 1 0
2 2 0
3 3 0
4 4 0

In [2]:
df = pd.read_csv('csv_output/ch16_fig1.csv')

%matplotlib inline
sns.set_style("white")
cm = sns.color_palette('Blues', 6)

f, ax = plt.subplots(1,1, figsize=(8,8))
dgb = df.groupby('run-times')
d2 = dgb.id.count().reset_index()
explode = (0.15, 0, 0, 0, 0, 0)
labels = ['runs %s times a week' %str(x) for x in d2['run-times']]

ax.pie(d2.id, explode=explode, labels=labels, colors=cm,
        autopct='%1.1f%%', shadow=False);
ax.set_aspect('equal')
ax.set_title('Proportion by how many times the respondent runs in a week');

f.savefig('svg_output/ch16_fig1.svg', format='svg')


Assume the generated data comes from a survey that the company did on their customers, the second column comes from the question 'How many time on average do you run per week?' Looking at the data, about 65% of the respondents don't run at all, 55% of respondents runs more than once per week.