In [1]:
# code written in python_3. (for py_2.7 users some changes may be required)
import pandas # load pandas dataframe lib
import matplotlib.pyplot as plt
plt.style.use('ggplot')
import numpy as np
# find path to your Concessions.xlsx
# df = short for dataframe == excel worksheet
# zero indexing in python, so first worksheet = 0
df_sales = pandas.read_excel(open('C:/Users/craigrshenton/Desktop/Dropbox/excel_data_sci/ch01_complete/Concessions.xlsx','rb'), sheetname=0)
df_sales = df_sales.iloc[0:, 0:4]
df_sales.head() # use .head() to just show top 4 results
Out[1]:
In [2]:
df_sales.dtypes # explore the dataframe
Out[2]:
In [3]:
df_sales['Item'].head() # how to select a col
Out[3]:
In [4]:
df_sales['Price'].describe() # basic stats
Out[4]:
In [5]:
df_sales = df_sales.assign(Actual_Profit = df_sales['Price']*df_sales['Profit']) # adds new col
df_sales.head()
Out[5]:
In [45]:
# find path to your Concessions.xlsx
df_cals = pandas.read_excel(open('C:/Users/craigrshenton/Desktop/Dropbox/excel_data_sci/ch01_complete/Concessions.xlsx','rb'), sheetname=1)
df_cals = df_cals.iloc[0:14, 0:2] # take data from 'Calories' worksheet
df_cals.head()
Out[45]:
In [46]:
df_cals = df_cals.set_index('Item') # index df by items
# Items ranked by calories = .sort_values(by='Calories',ascending=True)
# rot = axis rotation
ax = df_cals.sort_values(by='Calories',ascending=True).plot(kind='bar', title ="Calories",figsize=(15,5),legend=False, fontsize=10, alpha=0.75, rot=20,)
plt.xlabel("") # no x-axis lable
plt.show()
In [8]:
df_sales = df_sales.assign(Calories=df_sales['Item'].map(df_cals['Calories'])) # map num calories from df_cals per item in df_sales (==Vlookup)
df_sales.head()
Out[8]:
In [9]:
pivot = pandas.pivot_table(df_sales, index=["Item"], values=["Price"], aggfunc=len) # len == 'count of price'
pivot.columns = ['Count'] # renames col
pivot.index.name = None # removes intex title which is not needed
pivot
Out[9]:
In [10]:
# revenue = price * number of sales
pivot = pandas.pivot_table(df_sales, index=["Item"], values=["Price"], columns=["Category"], aggfunc=np.sum, fill_value='')
pivot.index.name = None
pivot.columns = pivot.columns.get_level_values(1) # sets cols to product categories
pivot
Out[10]:
In [64]:
# set up decision variables
items = df_cals.index.tolist()
items
Out[64]:
In [66]:
cost = dict(zip(df_cals.index, df_cals.Calories)) # calarific cost of each item
cost
Out[66]:
In [109]:
from pulp import *
# create the LinProg object, set up as a minimisation problem
prob = pulp.LpProblem('Diet', pulp.LpMinimize)
vars = LpVariable.dicts("Number of",items, lowBound = 0, cat='Integer')
# Obj Func
prob += lpSum([cost[c]*vars[c] for c in items])
prob += sum(vars[c] for c in items)
# add constraint representing demand for soldiers
prob += (lpSum([cost[c]*vars[c] for c in items]) == 2400)
print(prob)
In [110]:
prob.solve()
# Is the solution optimal?
print("Status:", LpStatus[prob.status])
# Each of the variables is printed with it's value
for v in prob.variables():
print(v.name, "=", v.varValue)
# The optimised objective function value is printed to the screen
print("Minimum Number of Items = ", value(prob.objective))
In [ ]: