In [1]:

    
# Think about how your data science team might also apply these concepts. Let's go back to our running shoe website. Imagine that the team identified that there was a big increase in sales in January. There's a strong correlation between January and the number of people buying new shoes.
# The team discusses the questions and decides to create reports. The reports suggest that most of these customers are new customers buying expensive shoes. Because of these reports the team feels comfortable that the cause of the new sales is that new customers have more money in January. Maybe they received gift cards or credit from other stores.
# A few months later the team looks over the data. They found that their promotions and discounts had no impact. Roughly the same number of people bought the same number of shoes. Even worse, the last year's new customers seem to have no interest in new running shoes.
# They went back and created reports. The report said that they were all new customers that bought one pair and then stopped visiting the site in the middle of the year. This suggested that they bought expensive shoes and then gave up. The team guessed that the expensive shoes might've been a motivation to keep running.

# create data
df = pd.DataFrame([
            [30,18,6,9,12,22,25,28,16,10, 24,27],
            [26,29,18,8,6,18,16,17,23,8,28,31],
            [2400, 900, 243, 480, 704, 1125, 960, 1030, 960, 725, 1820, 2040],
            [2483, 2040, 1105, 530, 310, 1300, 1080, 1255, 1670, 360, 1930, 2600],
            [28,6,3,3,3,5,8,8,6,3,6,16],
            [17,12,5,5,2,6,13,4,5,4,6,10],
            [17,8,5,6,3,1,3,5,2,6,8,3],
            [19,7,3,5,2,2,6,7,1,4,6,2],
            [67,5,8,13,15,36,27,38,16,13,4, 8],
            [63,1,5,4,16,25,34,36,23,21,8, 2],
            [19,2,1,0.3,0.6,0.8,0.7,0.9,5,4,8,7],
            [17,5,1,0.9,0.8,1,1.5,6,3,2,8,9],
            [83,62,54,68,71,55,48,61,38,42,47,61],
            [2,8,3,10,5,2,3,7,9,6,5,3],
            ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']],
            index=['2015 total sales quantity',
                   '2016 total sales quantity',
                   '2015 revenue',
                   '2016 revenue',
                   '2015 total customers',
                   '2016 total customers',
                   '2015 first time runner buyer ratio',
                   '2016 first time runner buyer ratio',
                   '2015 running shoes sales ratio',
                   '2016 running shoes sales ratio',
                   '2015 first time runner sales',
                   '2016 first time runner sales',
                   'new customer in 2015',
                   '2015 new customer returning in 2016',
                   'month']).transpose()

df['2015 average price per pair'] = df['2015 revenue']/df['2015 total sales quantity']
df['2016 average price per pair'] = df['2016 revenue']/df['2016 total sales quantity']
df['2015 average pair per customer'] = df['2015 total sales quantity']/df['2015 total customers']
df['2016 average pair per customer'] = df['2016 total sales quantity']/df['2016 total customers']


%matplotlib inline
sns.set_style("whitegrid")

f, ax = plt.subplots(4,2, figsize=(8,12))
ax[0][0].plot(df['2015 total sales quantity'], 'o-');
ax[0][0].plot(df['2016 total sales quantity'], 'o-');
ax[0][0].set_xlim(0,11);
ax[0][0].set_xticks([i for i in range(12)]);
ax[0][0].set_xticklabels(df['month']);
ax[0][0].set_title('Total sales quantity by Month (10,000)');
ax[0][0].legend([2015,2016])

ax[0][1].plot(df['2015 revenue'], 'o-');
ax[0][1].plot(df['2016 revenue'], 'o-');
ax[0][1].set_title('Total sales revenue by Month (10,000)');
ax[0][1].set_xlim(0,11);
ax[0][1].set_xticks([i for i in range(12)]);
ax[0][1].set_xticklabels(df['month']);

ax[1][0].plot(df['2015 average price per pair'], 'o-');
ax[1][0].plot(df['2016 average price per pair'], 'o-');
ax[1][0].set_xlim(0,11);
ax[1][0].set_xticks([i for i in range(12)]);
ax[1][0].set_xticklabels(df['month']);
ax[1][0].set_title('Average price per pair of shoes ($)')
f.tight_layout();

sns.barplot(y=df['new customer in 2015'], x=df['month'], ax=ax[1][1], color='cornflowerblue');
sns.barplot(y=df['2015 new customer returning in 2016'], x=df['month'], ax=ax[1][1], color='green');
ax[1][1].set_ylabel('Customer (%)')
ax[1][1].set_title('2015 New customer (%) and \n returning customer from 2015 in 2016')

ax[2][0].plot(df['2015 first time runner buyer ratio'], '-o');
ax[2][0].plot(df['2016 first time runner buyer ratio'], '-o');
ax[2][0].set_xlim(0,11)
ax[2][0].set_xticks([i for i in range(12)]);
ax[2][0].set_xticklabels(df['month']);
ax[2][0].set_title('First time runner buyer ratio (%)')

ax[2][1].plot(df['2015 average pair per customer'], 'o-')
ax[2][1].plot(df['2016 average pair per customer'], 'o-')
ax[2][1].set_xlim(0,11)
ax[2][1].set_xticks([i for i in range(12)]);
ax[2][1].set_xticklabels(df['month']);
ax[2][1].set_title('Average pair purchased per customer (pairs)')

ax[3][0].plot(df['2015 running shoes sales ratio'], '-o');
ax[3][0].plot(df['2016 running shoes sales ratio'], '-o');
ax[3][0].set_xlim(0,11)
ax[3][0].set_xticks([i for i in range(12)]);
ax[3][0].set_xticklabels(df['month']);
ax[3][0].set_title('Running shoes sales (%)')

ax[3][1].plot(df['2015 first time runner sales'], '-o');
ax[3][1].plot(df['2016 first time runner sales'], '-o');
ax[3][1].set_xlim(0,11)
ax[3][1].set_xticks([i for i in range(12)]);
ax[3][1].set_xticklabels(df['month']);
ax[3][1].set_title('First time runner gross sales (% of total sales)')

f.tight_layout()
f.savefig('svg_output/ch4_fig4.svg', format='svg')
Ch4 Figure4