In [13]:
%matplotlib inline
import pandas as pd
import numpy as np
import seaborn as sb
import matplotlib.pyplot as plt
import pulp
sb.set()
pd.set_option('precision', 2)
macro_nutrients = [
'kcal per 100 g ready to eat',
'Protein_(g)',
'Lipid_Tot_(g)',
'FA_Sat_(g)',
'Carbohydrt_(g)']
micro_nutrients = [
'Fiber_TD_(g)',
'Vit_A_RAE',
'Vit_B6_(mg)',
'Vit_B12_(mcg)',
'Vit_C_(mg)',
'Vit_E_(mg)',
'Folate_DFE_(mcg)',
'Vit_K_(mcg)',
'Riboflavin_(mg)',
'Calcium_(mg)',
'Niacin_(mg)',
'Cholestrl_(mg)',
'FA_Mono_(g)',
'FA_Poly_(g)',
'Sugar_Tot_(g)',
'soluble fiber',
'total Flavonoid',
'Iron_(mg)',
'Alpha_Carot_(mcg)',
'Beta_Carot_(mcg)',
'Lycopene_(mcg)',
'Lut+Zea_(mcg)',
'Beta_Crypt_(mcg)',
'Sodium_(mg)',
'Selenium_(mcg)',
'Potassium_(mg)',
'Phytosterols(mg)Per 100 g']
nutrient_list = macro_nutrients + micro_nutrients
food_abbrev = ['almond',
'apples',
'apricot',
'asparag.',
'avocado',
'banana',
'barley',
'kidney bn.',
'snap bn.',
'blueberry',
'broccoli',
'cabbage',
'canola oil',
'cantaloupe',
'carrots',
'cauliflower',
'celery',
'cherry',
'collard',
'corn flr',
'corn starch',
'corn grits',
'swt corn',
'cucumber',
'garlic',
'grapefruit',
'grapes',
'hazelnuts',
'honeydew',
'kiwi',
'lemon',
'lettuce',
'macadam.',
'oats',
'olive oil',
'onions',
'orange',
'peach',
'peanut',
'pear',
'peas',
'grn pepper',
'pineapple',
'pistachio',
'potato',
'pumpkin',
'raspberry',
'rice',
'soy oil',
'spinach',
'squash',
'strawberry',
'sweet potato',
'tomato',
'walnut',
'watermelon',
'wheat',
'chickpeas',
'lentils',
'soybeans',
'tofu',
'buckwheat',
'sorghum',
'rye',
'spelt',
'saflwer oil',
'syrup cane',
'hfcs'
]
ANIMALS = ['beef',
'dairy',
'egg',
'chicken',
'pork',
'salmon',
'tuna']
food_abbrev += ANIMALS
UDSA_COLNAME = 'USDA groups'
MAD_COLNAME = 'MAD (kcal/cap/d)'
ENERGY_COLNAME = 'Energ_Kcal'
data_frame = pd.DataFrame.from_csv('plVals.csv')
index2abbrev = dict(zip(data_frame.index, food_abbrev))
data_frame = data_frame.rename(index=index2abbrev)
In [14]:
# find the corresponding index of the columns in the dataframe to the nutrient list.
# it is enough if the prefix of the column name matches exactly the nutrien name
col_names = list(data_frame.columns)
col_ind_order = np.zeros((1, len(nutrient_list)))
nutrient_list_updated = [''] * len(nutrient_list)
for i, nutr in enumerate(nutrient_list):
for j, col in enumerate(col_names):
l_min = min(len(nutr), len(col))
if nutr[:l_min] == col[:l_min]:
col_ind_order[0, i] = j
nutrient_list_updated[i] = col
continue
nutrient_list = nutrient_list_updated
data = data_frame[nutrient_list].copy()
usda_group = data_frame[UDSA_COLNAME]
# correct to eaten mass basis, so that D entries will be in "per g ready2eat"
data.loc[:, nutrient_list[0]] = data[[nutrient_list[0]]] * 0.01 # convert first column from kcal/100g to kcal/g
for nutr in nutrient_list[1:]:
# multiply each column the vector of kcal per 100 g ready2eat / kcal per 100 g
data.loc[:, nutr] *= data.loc[:, nutrient_list[0]] / data_frame.loc[:, ENERGY_COLNAME]
data = data.rename(columns={nutrient_list[0]:'kcal'})
nutrient_list[0] = 'kcal'
data[pd.isnull(data)] = 0
In [15]:
# calculate how many grams of a certain animal-based food product is consumed in MAD
MAD = pd.Series(data_frame.loc[:, MAD_COLNAME] / data.loc[:, 'kcal'], food_abbrev)
MAD[pd.isnull(MAD)] = 0
animals = ['beef', 'egg', 'chicken', 'pork', 'dairy']
MAD_to_replace = MAD[animals]
for animal in animals:
print '%.1f grams of %s are in MAD per day' % (MAD_to_replace[animal], animal)
# calculate the fraction of this animal portion from the total MAD diet
MAD_fraction = sum(data_frame.loc[animals, MAD_COLNAME]) / data_frame[[MAD_COLNAME]].sum(numeric_only=True)
# adjust the vector of required nutrients to more realistic values
b_MAD_to_replace = data.loc[animals, :].transpose().dot(MAD_to_replace)
b_const = b_MAD_to_replace.copy()
b_const['Sugar_Tot_(g)'] = 90 # set a more realistic sugar upper bound
b_const['Sodium_(mg)'] = 600 # set a more realistic sodium upper bound (note that the rightful by-mass portion of the total 2300 mg/d of MAD_a is 790 mg/d
b_const['Calcium_(mg)'] = 1100.0 * MAD_fraction # Calcium, see https://www.nlm.nih.gov/medlineplus/magazine/issues/winter11/articles/winter11pg12.html
b_const['Niacin_(mg)'] = 15.0 * MAD_fraction # niacin, see https://www.nlm.nih.gov/medlineplus/ency/article/002409.htm
b_const['Selenium_(mcg)'] = 55 * MAD_fraction # set a more realistic selenium lower bound, see https://ods.od.nih.gov/factsheets/Selenium-HealthProfessional/
b_const['Lipid_Tot_(g)'] = 0 # we don't care about fat specifically, just calories
b_const['Fiber_TD_(g)'] = 0 # doesn't exist in animals anyway
b_const['Vit_B12_(mcg)'] = 0 # impossible to get from plants
In [16]:
# create the LP using PuLP
const_sign = 'ULLULLLLLLLLLLLLULLULLLLLLLLULLL'
C = -np.ones((1, data.shape[1]))
lb = np.zeros((data.shape[1], 1))
ub = np.ones((data.shape[1], 1)) * 130.0
lp = pulp.LpProblem('replace_animal_diet', pulp.LpMinimize)
x = pulp.LpVariable.dicts('mass', food_abbrev)
for i, nutr in enumerate(nutrient_list):
row = [data[nutr][f] * x[f] for f in food_abbrev]
if const_sign[i] == 'U':
lp += pulp.lpSum(row) <= b_const[nutr], nutr
elif const_sign[i] == 'L':
lp += pulp.lpSum(row) >= b_const[nutr], nutr
elif const_sign[i] == 'S':
lp += pulp.lpSum(row) == b_const[nutr], nutr
# add constraints on all animal foods to be 0
for f in food_abbrev:
if f in ANIMALS:
lp += x[f] == 0, 'no_%s' % f
else:
lp += x[f] >= 0, 'lower_%s' % f
lp += x[f] <= 50, 'upper_%s' % f
In [17]:
# minimize total mass of new diet
lp.setObjective(pulp.lpSum(x))
pulp_solver = pulp.CPLEX(msg=0)
lp.solve(pulp_solver)
if lp.status != pulp.LpStatusOptimal:
raise pulp.solvers.PulpSolverError("cannot replace these animal foods")
SANE1 = pd.Series(map(pulp.value, map(x.get, food_abbrev)), food_abbrev)
SANE1.sum()
Out[17]:
In [18]:
# minimize change in diet compared to MAD
weights = (100.0 + 1.0 / (MAD + 0.00001))
weighted_sum = pulp.lpSum([x[f] * weights[f] for f in food_abbrev])
lp.setObjective(weighted_sum)
pulp_solver = pulp.CPLEX(msg=0)
lp.solve(pulp_solver)
if lp.status != pulp.LpStatusOptimal:
raise pulp.solvers.PulpSolverError("cannot replace these animal foods")
SANE2 = pd.Series(map(pulp.value, map(x.get, food_abbrev)), food_abbrev)
print SANE2.sum()
In [19]:
report_food = pd.concat([MAD, 1.0/(MAD + 0.01), SANE1, SANE2], axis=1, keys=['MAD', 'Weights', 'Opt. Min Mass', 'Opt. Weighted'])
print 'Report of diet alternatives to MAD'
print '-' * 50
print report_food.loc[(report_food.loc[:, report_food.columns[2:4]] > 0).any(axis=1), :]
b_SANE1 = data.transpose().dot(SANE1)
b_SANE2 = data.transpose().dot(SANE2)
report_nutr = pd.concat([b_MAD_to_replace, b_const, b_SANE1, b_SANE2], axis=1, keys=['to repl.', 'constraint', 'Opt. Min Mass', 'Opt. Weighted'])
report_nutr = report_nutr.rename(index={'total Flavonoid (mg/100g edible portion)':'Flavonoid_(mg)',
'soluble fiber (g per 100g eaten)':'Fiber_(g)',
'Phytosterols(mg)Per 100 g':'Phytosterols_(mg)'})
report_nutr.loc['Total mass', :] = [MAD_to_replace.sum(), np.nan, SANE1.sum(), SANE2.sum()]
print '\n\nReport of nutrient content in all diets'
print '-' * 100
print report_nutr
In [ ]:
In [ ]: