In [1]:
# code written in python_3. (for py_2.7 users some changes may be required)

import pandas # load pandas dataframe lib
import matplotlib.pyplot as plt
plt.style.use('ggplot')
import numpy as np

# find path to your Concessions.xlsx
# df = short for dataframe == excel worksheet
# zero indexing in python, so first worksheet = 0
df_sales = pandas.read_excel(open('C:/Users/craigrshenton/Desktop/Dropbox/excel_data_sci/ch01_complete/Concessions.xlsx','rb'), sheetname=0) 
df_sales = df_sales.iloc[0:, 0:4]
df_sales.head() # use .head() to just show top 4 results


Out[1]:
Item Category Price Profit
0 Beer Beverages 4.0 0.500000
1 Hamburger Hot Food 3.0 0.666667
2 Popcorn Hot Food 5.0 0.800000
3 Pizza Hot Food 2.0 0.250000
4 Bottled Water Beverages 3.0 0.833333

In [2]:
df_sales.dtypes # explore the dataframe


Out[2]:
Item         object
Category     object
Price       float64
Profit      float64
dtype: object

In [3]:
df_sales['Item'].head() # how to select a col


Out[3]:
0             Beer
1        Hamburger
2          Popcorn
3            Pizza
4    Bottled Water
Name: Item, dtype: object

In [4]:
df_sales['Price'].describe() # basic stats


Out[4]:
count    199.000000
mean       2.829146
std        0.932551
min        1.500000
25%        2.000000
50%        3.000000
75%        3.000000
max        5.000000
Name: Price, dtype: float64
1.2 Calculate Actual Profit

In [5]:
df_sales = df_sales.assign(Actual_Profit = df_sales['Price']*df_sales['Profit']) # adds new col
df_sales.head()


Out[5]:
Item Category Price Profit Actual_Profit
0 Beer Beverages 4.0 0.500000 2.0
1 Hamburger Hot Food 3.0 0.666667 2.0
2 Popcorn Hot Food 5.0 0.800000 4.0
3 Pizza Hot Food 2.0 0.250000 0.5
4 Bottled Water Beverages 3.0 0.833333 2.5
1.3 Load data from 'Calories' worksheet and plot

In [45]:
# find path to your Concessions.xlsx 
df_cals = pandas.read_excel(open('C:/Users/craigrshenton/Desktop/Dropbox/excel_data_sci/ch01_complete/Concessions.xlsx','rb'), sheetname=1) 
df_cals = df_cals.iloc[0:14, 0:2] # take data from 'Calories' worksheet
df_cals.head()


Out[45]:
Item Calories
0 Beer 200
1 Bottled Water 0
2 Chocolate Bar 255
3 Chocolate Dipped Cone 300
4 Gummy Bears 300

In [46]:
df_cals = df_cals.set_index('Item') # index df by items
# Items ranked by calories = .sort_values(by='Calories',ascending=True) 
# rot = axis rotation
ax = df_cals.sort_values(by='Calories',ascending=True).plot(kind='bar', title ="Calories",figsize=(15,5),legend=False, fontsize=10, alpha=0.75, rot=20,)
plt.xlabel("") # no x-axis lable
plt.show()


1.4 add calorie data to sales worksheet

In [8]:
df_sales = df_sales.assign(Calories=df_sales['Item'].map(df_cals['Calories'])) # map num calories from df_cals per item in df_sales (==Vlookup)
df_sales.head()


Out[8]:
Item Category Price Profit Actual_Profit Calories
0 Beer Beverages 4.0 0.500000 2.0 200
1 Hamburger Hot Food 3.0 0.666667 2.0 320
2 Popcorn Hot Food 5.0 0.800000 4.0 500
3 Pizza Hot Food 2.0 0.250000 0.5 480
4 Bottled Water Beverages 3.0 0.833333 2.5 0
1.5 pivot table: number of sales per item

In [9]:
pivot = pandas.pivot_table(df_sales, index=["Item"], values=["Price"], aggfunc=len) # len == 'count of price'
pivot.columns = ['Count'] # renames col
pivot.index.name = None # removes intex title which is not needed
pivot


Out[9]:
Count
Beer 20.0
Bottled Water 13.0
Chocolate Bar 13.0
Chocolate Dipped Cone 11.0
Gummy Bears 14.0
Hamburger 16.0
Hot Dog 15.0
Ice Cream Sandwich 10.0
Licorice Rope 13.0
Nachos 15.0
Pizza 17.0
Popcorn 16.0
Popsicle 13.0
Soda 13.0
1.6 pivot table: revenue per item / category

In [10]:
# revenue = price * number of sales
pivot = pandas.pivot_table(df_sales, index=["Item"], values=["Price"], columns=["Category"], aggfunc=np.sum, fill_value='')
pivot.index.name = None
pivot.columns = pivot.columns.get_level_values(1) # sets cols to product categories
pivot


Out[10]:
Category Beverages Candy Frozen Treats Hot Food
Beer 80
Bottled Water 39
Chocolate Bar 26
Chocolate Dipped Cone 33
Gummy Bears 28
Hamburger 48
Hot Dog 22.5
Ice Cream Sandwich 30
Licorice Rope 26
Nachos 45
Pizza 34
Popcorn 80
Popsicle 39
Soda 32.5

In [64]:
# set up decision variables
items = df_cals.index.tolist()
items


Out[64]:
['Beer',
 'Bottled Water',
 'Chocolate Bar',
 'Chocolate Dipped Cone',
 'Gummy Bears',
 'Hamburger',
 'Hot Dog',
 'Ice Cream Sandwich',
 'Licorice Rope',
 'Nachos',
 'Pizza',
 'Popcorn',
 'Popsicle',
 'Soda']

In [66]:
cost = dict(zip(df_cals.index, df_cals.Calories)) # calarific cost of each item
cost


Out[66]:
{'Beer': 200,
 'Bottled Water': 0,
 'Chocolate Bar': 255,
 'Chocolate Dipped Cone': 300,
 'Gummy Bears': 300,
 'Hamburger': 320,
 'Hot Dog': 265,
 'Ice Cream Sandwich': 240,
 'Licorice Rope': 280,
 'Nachos': 560,
 'Pizza': 480,
 'Popcorn': 500,
 'Popsicle': 150,
 'Soda': 120}

In [109]:
from pulp import *
# create the LinProg object, set up as a minimisation problem
prob = pulp.LpProblem('Diet', pulp.LpMinimize)

vars = LpVariable.dicts("Number of",items, lowBound = 0, cat='Integer')
# Obj Func
prob += lpSum([cost[c]*vars[c] for c in items])

prob += sum(vars[c] for c in items)

# add constraint representing demand for soldiers
prob += (lpSum([cost[c]*vars[c] for c in items]) == 2400)

print(prob)


Diet:
MINIMIZE
1*Number_of_Beer + 1*Number_of_Bottled_Water + 1*Number_of_Chocolate_Bar + 1*Number_of_Chocolate_Dipped_Cone + 1*Number_of_Gummy_Bears + 1*Number_of_Hamburger + 1*Number_of_Hot_Dog + 1*Number_of_Ice_Cream_Sandwich + 1*Number_of_Licorice_Rope + 1*Number_of_Nachos + 1*Number_of_Pizza + 1*Number_of_Popcorn + 1*Number_of_Popsicle + 1*Number_of_Soda + 0
SUBJECT TO
_C1: 200 Number_of_Beer + 255 Number_of_Chocolate_Bar
 + 300 Number_of_Chocolate_Dipped_Cone + 300 Number_of_Gummy_Bears
 + 320 Number_of_Hamburger + 265 Number_of_Hot_Dog
 + 240 Number_of_Ice_Cream_Sandwich + 280 Number_of_Licorice_Rope
 + 560 Number_of_Nachos + 480 Number_of_Pizza + 500 Number_of_Popcorn
 + 150 Number_of_Popsicle + 120 Number_of_Soda = 2400

VARIABLES
0 <= Number_of_Beer Integer
0 <= Number_of_Bottled_Water Integer
0 <= Number_of_Chocolate_Bar Integer
0 <= Number_of_Chocolate_Dipped_Cone Integer
0 <= Number_of_Gummy_Bears Integer
0 <= Number_of_Hamburger Integer
0 <= Number_of_Hot_Dog Integer
0 <= Number_of_Ice_Cream_Sandwich Integer
0 <= Number_of_Licorice_Rope Integer
0 <= Number_of_Nachos Integer
0 <= Number_of_Pizza Integer
0 <= Number_of_Popcorn Integer
0 <= Number_of_Popsicle Integer
0 <= Number_of_Soda Integer


In [110]:
prob.solve()

# Is the solution optimal?
print("Status:", LpStatus[prob.status])
# Each of the variables is printed with it's value
for v in prob.variables():
    print(v.name, "=", v.varValue)
# The optimised objective function value is printed to the screen    
print("Minimum Number of Items = ", value(prob.objective))


Status: Optimal
Number_of_Beer = 0.0
Number_of_Bottled_Water = 0.0
Number_of_Chocolate_Bar = 0.0
Number_of_Chocolate_Dipped_Cone = 0.0
Number_of_Gummy_Bears = 0.0
Number_of_Hamburger = 0.0
Number_of_Hot_Dog = 0.0
Number_of_Ice_Cream_Sandwich = 0.0
Number_of_Licorice_Rope = 0.0
Number_of_Nachos = 0.0
Number_of_Pizza = 5.0
Number_of_Popcorn = 0.0
Number_of_Popsicle = 0.0
Number_of_Soda = 0.0
Minimum Number of Items =  5.0

In [ ]: