1.0 Load data from http://media.wiley.com/product_ancillary/6X/11186614/DOWNLOAD/ch01.zip, Concessions.xlsx



In [1]:

    
# code written in python_3. (for py_2.7 users some changes may be required)

import pandas # load pandas dataframe lib
import matplotlib.pyplot as plt
plt.style.use('ggplot')
import numpy as np

# find path to your Concessions.xlsx
# df = short for dataframe == excel worksheet
# zero indexing in python, so first worksheet = 0
df_sales = pandas.read_excel(open('C:/Users/craigrshenton/Desktop/Dropbox/excel_data_sci/ch01_complete/Concessions.xlsx','rb'), sheetname=0) 
df_sales = df_sales.iloc[0:, 0:4]
df_sales.head() # use .head() to just show top 4 results









    Out[1]:






  
    
      
      Item
      Category
      Price
      Profit
    
  
  
    
      0
      Beer
      Beverages
      4.0
      0.500000
    
    
      1
      Hamburger
      Hot Food
      3.0
      0.666667
    
    
      2
      Popcorn
      Hot Food
      5.0
      0.800000
    
    
      3
      Pizza
      Hot Food
      2.0
      0.250000
    
    
      4
      Bottled Water
      Beverages
      3.0
      0.833333



In [2]:

    
df_sales.dtypes # explore the dataframe









    Out[2]:





Item         object
Category     object
Price       float64
Profit      float64
dtype: object



In [3]:

    
df_sales['Item'].head() # how to select a col









    Out[3]:





0             Beer
1        Hamburger
2          Popcorn
3            Pizza
4    Bottled Water
Name: Item, dtype: object



In [4]:

    
df_sales['Price'].describe() # basic stats









    Out[4]:





count    199.000000
mean       2.829146
std        0.932551
min        1.500000
25%        2.000000
50%        3.000000
75%        3.000000
max        5.000000
Name: Price, dtype: float64

1.2 Calculate Actual Profit



In [5]:

    
df_sales = df_sales.assign(Actual_Profit = df_sales['Price']*df_sales['Profit']) # adds new col
df_sales.head()









    Out[5]:






  
    
      
      Item
      Category
      Price
      Profit
      Actual_Profit
    
  
  
    
      0
      Beer
      Beverages
      4.0
      0.500000
      2.0
    
    
      1
      Hamburger
      Hot Food
      3.0
      0.666667
      2.0
    
    
      2
      Popcorn
      Hot Food
      5.0
      0.800000
      4.0
    
    
      3
      Pizza
      Hot Food
      2.0
      0.250000
      0.5
    
    
      4
      Bottled Water
      Beverages
      3.0
      0.833333
      2.5

1.3 Load data from 'Calories' worksheet and plot



In [45]:

    
# find path to your Concessions.xlsx 
df_cals = pandas.read_excel(open('C:/Users/craigrshenton/Desktop/Dropbox/excel_data_sci/ch01_complete/Concessions.xlsx','rb'), sheetname=1) 
df_cals = df_cals.iloc[0:14, 0:2] # take data from 'Calories' worksheet
df_cals.head()









    Out[45]:






  
    
      
      Item
      Calories
    
  
  
    
      0
      Beer
      200
    
    
      1
      Bottled Water
      0
    
    
      2
      Chocolate Bar
      255
    
    
      3
      Chocolate Dipped Cone
      300
    
    
      4
      Gummy Bears
      300



In [46]:

    
df_cals = df_cals.set_index('Item') # index df by items
# Items ranked by calories = .sort_values(by='Calories',ascending=True) 
# rot = axis rotation
ax = df_cals.sort_values(by='Calories',ascending=True).plot(kind='bar', title ="Calories",figsize=(15,5),legend=False, fontsize=10, alpha=0.75, rot=20,)
plt.xlabel("") # no x-axis lable
plt.show()

1.4 add calorie data to sales worksheet



In [8]:

    
df_sales = df_sales.assign(Calories=df_sales['Item'].map(df_cals['Calories'])) # map num calories from df_cals per item in df_sales (==Vlookup)
df_sales.head()









    Out[8]:






  
    
      
      Item
      Category
      Price
      Profit
      Actual_Profit
      Calories
    
  
  
    
      0
      Beer
      Beverages
      4.0
      0.500000
      2.0
      200
    
    
      1
      Hamburger
      Hot Food
      3.0
      0.666667
      2.0
      320
    
    
      2
      Popcorn
      Hot Food
      5.0
      0.800000
      4.0
      500
    
    
      3
      Pizza
      Hot Food
      2.0
      0.250000
      0.5
      480
    
    
      4
      Bottled Water
      Beverages
      3.0
      0.833333
      2.5
      0

1.5 pivot table: number of sales per item



In [9]:

    
pivot = pandas.pivot_table(df_sales, index=["Item"], values=["Price"], aggfunc=len) # len == 'count of price'
pivot.columns = ['Count'] # renames col
pivot.index.name = None # removes intex title which is not needed
pivot









    Out[9]:






  
    
      
      Count
    
  
  
    
      Beer
      20.0
    
    
      Bottled Water
      13.0
    
    
      Chocolate Bar
      13.0
    
    
      Chocolate Dipped Cone
      11.0
    
    
      Gummy Bears
      14.0
    
    
      Hamburger
      16.0
    
    
      Hot Dog
      15.0
    
    
      Ice Cream Sandwich
      10.0
    
    
      Licorice Rope
      13.0
    
    
      Nachos
      15.0
    
    
      Pizza
      17.0
    
    
      Popcorn
      16.0
    
    
      Popsicle
      13.0
    
    
      Soda
      13.0

1.6 pivot table: revenue per item / category



In [10]:

    
# revenue = price * number of sales
pivot = pandas.pivot_table(df_sales, index=["Item"], values=["Price"], columns=["Category"], aggfunc=np.sum, fill_value='')
pivot.index.name = None
pivot.columns = pivot.columns.get_level_values(1) # sets cols to product categories
pivot









    Out[10]:






  
    
      Category
      Beverages
      Candy
      Frozen Treats
      Hot Food
    
  
  
    
      Beer
      80
      
      
      
    
    
      Bottled Water
      39
      
      
      
    
    
      Chocolate Bar
      
      26
      
      
    
    
      Chocolate Dipped Cone
      
      
      33
      
    
    
      Gummy Bears
      
      28
      
      
    
    
      Hamburger
      
      
      
      48
    
    
      Hot Dog
      
      
      
      22.5
    
    
      Ice Cream Sandwich
      
      
      30
      
    
    
      Licorice Rope
      
      26
      
      
    
    
      Nachos
      
      
      
      45
    
    
      Pizza
      
      
      
      34
    
    
      Popcorn
      
      
      
      80
    
    
      Popsicle
      
      
      39
      
    
    
      Soda
      32.5



In [64]:

    
# set up decision variables
items = df_cals.index.tolist()
items









    Out[64]:





['Beer',
 'Bottled Water',
 'Chocolate Bar',
 'Chocolate Dipped Cone',
 'Gummy Bears',
 'Hamburger',
 'Hot Dog',
 'Ice Cream Sandwich',
 'Licorice Rope',
 'Nachos',
 'Pizza',
 'Popcorn',
 'Popsicle',
 'Soda']



In [66]:

    
cost = dict(zip(df_cals.index, df_cals.Calories)) # calarific cost of each item
cost









    Out[66]:





{'Beer': 200,
 'Bottled Water': 0,
 'Chocolate Bar': 255,
 'Chocolate Dipped Cone': 300,
 'Gummy Bears': 300,
 'Hamburger': 320,
 'Hot Dog': 265,
 'Ice Cream Sandwich': 240,
 'Licorice Rope': 280,
 'Nachos': 560,
 'Pizza': 480,
 'Popcorn': 500,
 'Popsicle': 150,
 'Soda': 120}



In [109]:

    
from pulp import *
# create the LinProg object, set up as a minimisation problem
prob = pulp.LpProblem('Diet', pulp.LpMinimize)

vars = LpVariable.dicts("Number of",items, lowBound = 0, cat='Integer')
# Obj Func
prob += lpSum([cost[c]*vars[c] for c in items])

prob += sum(vars[c] for c in items)

# add constraint representing demand for soldiers
prob += (lpSum([cost[c]*vars[c] for c in items]) == 2400)

print(prob)









    



Diet:
MINIMIZE
1*Number_of_Beer + 1*Number_of_Bottled_Water + 1*Number_of_Chocolate_Bar + 1*Number_of_Chocolate_Dipped_Cone + 1*Number_of_Gummy_Bears + 1*Number_of_Hamburger + 1*Number_of_Hot_Dog + 1*Number_of_Ice_Cream_Sandwich + 1*Number_of_Licorice_Rope + 1*Number_of_Nachos + 1*Number_of_Pizza + 1*Number_of_Popcorn + 1*Number_of_Popsicle + 1*Number_of_Soda + 0
SUBJECT TO
_C1: 200 Number_of_Beer + 255 Number_of_Chocolate_Bar
 + 300 Number_of_Chocolate_Dipped_Cone + 300 Number_of_Gummy_Bears
 + 320 Number_of_Hamburger + 265 Number_of_Hot_Dog
 + 240 Number_of_Ice_Cream_Sandwich + 280 Number_of_Licorice_Rope
 + 560 Number_of_Nachos + 480 Number_of_Pizza + 500 Number_of_Popcorn
 + 150 Number_of_Popsicle + 120 Number_of_Soda = 2400

VARIABLES
0 <= Number_of_Beer Integer
0 <= Number_of_Bottled_Water Integer
0 <= Number_of_Chocolate_Bar Integer
0 <= Number_of_Chocolate_Dipped_Cone Integer
0 <= Number_of_Gummy_Bears Integer
0 <= Number_of_Hamburger Integer
0 <= Number_of_Hot_Dog Integer
0 <= Number_of_Ice_Cream_Sandwich Integer
0 <= Number_of_Licorice_Rope Integer
0 <= Number_of_Nachos Integer
0 <= Number_of_Pizza Integer
0 <= Number_of_Popcorn Integer
0 <= Number_of_Popsicle Integer
0 <= Number_of_Soda Integer



In [110]:

    
prob.solve()

# Is the solution optimal?
print("Status:", LpStatus[prob.status])
# Each of the variables is printed with it's value
for v in prob.variables():
    print(v.name, "=", v.varValue)
# The optimised objective function value is printed to the screen    
print("Minimum Number of Items = ", value(prob.objective))









    



Status: Optimal
Number_of_Beer = 0.0
Number_of_Bottled_Water = 0.0
Number_of_Chocolate_Bar = 0.0
Number_of_Chocolate_Dipped_Cone = 0.0
Number_of_Gummy_Bears = 0.0
Number_of_Hamburger = 0.0
Number_of_Hot_Dog = 0.0
Number_of_Ice_Cream_Sandwich = 0.0
Number_of_Licorice_Rope = 0.0
Number_of_Nachos = 0.0
Number_of_Pizza = 5.0
Number_of_Popcorn = 0.0
Number_of_Popsicle = 0.0
Number_of_Soda = 0.0
Minimum Number of Items =  5.0



In [ ]:

	Item	Category	Price	Profit
0	Beer	Beverages	4.0	0.500000
1	Hamburger	Hot Food	3.0	0.666667
2	Popcorn	Hot Food	5.0	0.800000
3	Pizza	Hot Food	2.0	0.250000
4	Bottled Water	Beverages	3.0	0.833333

	Item	Calories
0	Beer	200
1	Bottled Water	0
2	Chocolate Bar	255
3	Chocolate Dipped Cone	300
4	Gummy Bears	300

	Count
Beer	20.0
Bottled Water	13.0
Chocolate Bar	13.0
Chocolate Dipped Cone	11.0
Gummy Bears	14.0
Hamburger	16.0
Hot Dog	15.0
Ice Cream Sandwich	10.0
Licorice Rope	13.0
Nachos	15.0
Pizza	17.0
Popcorn	16.0
Popsicle	13.0
Soda	13.0