In [13]:
%matplotlib inline
import pandas as pd
import numpy as np
import seaborn as sb
import matplotlib.pyplot as plt
import pulp
sb.set()

pd.set_option('precision', 2)

macro_nutrients = [
              'kcal per 100 g ready to eat',
              'Protein_(g)',
              'Lipid_Tot_(g)',
              'FA_Sat_(g)',
              'Carbohydrt_(g)']

micro_nutrients = [
              'Fiber_TD_(g)',
              'Vit_A_RAE',
              'Vit_B6_(mg)',
              'Vit_B12_(mcg)',
              'Vit_C_(mg)',
              'Vit_E_(mg)',
              'Folate_DFE_(mcg)',
              'Vit_K_(mcg)',
              'Riboflavin_(mg)',
              'Calcium_(mg)',
              'Niacin_(mg)',
              'Cholestrl_(mg)',
              'FA_Mono_(g)',
              'FA_Poly_(g)',
              'Sugar_Tot_(g)',
              'soluble fiber',
              'total Flavonoid',
              'Iron_(mg)',
              'Alpha_Carot_(mcg)',
              'Beta_Carot_(mcg)',
              'Lycopene_(mcg)',
              'Lut+Zea_(mcg)',
              'Beta_Crypt_(mcg)',
              'Sodium_(mg)',
              'Selenium_(mcg)',
              'Potassium_(mg)',
              'Phytosterols(mg)Per 100 g']

nutrient_list = macro_nutrients + micro_nutrients

food_abbrev = ['almond',
                'apples',
                'apricot',
                'asparag.',
                'avocado',
                'banana',
                'barley',
                'kidney bn.',
                'snap bn.',
                'blueberry',
                'broccoli',
                'cabbage',
                'canola oil',
                'cantaloupe',
                'carrots',
                'cauliflower',
                'celery',
                'cherry',
                'collard',
                'corn flr',
                'corn starch',
                'corn grits',
                'swt corn',
                'cucumber',
                'garlic',
                'grapefruit',
                'grapes',
                'hazelnuts',
                'honeydew',
                'kiwi',
                'lemon',
                'lettuce',
                'macadam.',       
                'oats',            
                'olive oil',       
                'onions',          
                'orange',          
                'peach',           
                'peanut',         
                'pear',            
                'peas',            
                'grn pepper',     
                'pineapple',       
                'pistachio',       
                'potato',          
                'pumpkin',         
                'raspberry',     
                'rice',            
                'soy oil',         
                'spinach',         
                'squash',          
                'strawberry',    
                'sweet potato',    
                'tomato',        
                'walnut',         
                'watermelon',      
                'wheat',           
                'chickpeas',       
                'lentils',         
                'soybeans',        
                'tofu',            
                'buckwheat',   
                'sorghum',
                'rye',             
                'spelt',           
                'saflwer oil',
                'syrup cane',
                'hfcs'
                ]

ANIMALS =      ['beef',
                'dairy',
                'egg',
                'chicken',
                'pork',
                'salmon',
                'tuna']

food_abbrev += ANIMALS

UDSA_COLNAME = 'USDA groups'
MAD_COLNAME = 'MAD (kcal/cap/d)'
ENERGY_COLNAME = 'Energ_Kcal'
data_frame = pd.DataFrame.from_csv('plVals.csv')
index2abbrev = dict(zip(data_frame.index, food_abbrev))
data_frame = data_frame.rename(index=index2abbrev)

In [14]:
# find the corresponding index of the columns in the dataframe to the nutrient list.
# it is enough if the prefix of the column name matches exactly the nutrien name
col_names = list(data_frame.columns)
col_ind_order = np.zeros((1, len(nutrient_list)))
nutrient_list_updated = [''] * len(nutrient_list)
for i, nutr in enumerate(nutrient_list):
    for j, col in enumerate(col_names):
        l_min = min(len(nutr), len(col))
        if nutr[:l_min] == col[:l_min]:
            col_ind_order[0, i] = j
            nutrient_list_updated[i] = col
            continue
nutrient_list = nutrient_list_updated
data = data_frame[nutrient_list].copy()
usda_group = data_frame[UDSA_COLNAME]

# correct to eaten mass basis, so that D entries will be in "per g ready2eat"
data.loc[:, nutrient_list[0]] = data[[nutrient_list[0]]] * 0.01 # convert first column from kcal/100g to kcal/g
for nutr in nutrient_list[1:]:
    # multiply each column the vector of kcal per 100 g ready2eat / kcal per 100 g
    data.loc[:, nutr] *= data.loc[:, nutrient_list[0]] / data_frame.loc[:, ENERGY_COLNAME]
data = data.rename(columns={nutrient_list[0]:'kcal'})
nutrient_list[0] = 'kcal'
data[pd.isnull(data)] = 0

In [15]:
# calculate how many grams of a certain animal-based food product is consumed in MAD
MAD = pd.Series(data_frame.loc[:, MAD_COLNAME] / data.loc[:, 'kcal'], food_abbrev)
MAD[pd.isnull(MAD)] = 0
animals = ['beef', 'egg', 'chicken', 'pork', 'dairy']
MAD_to_replace = MAD[animals]
for animal in animals:
    print '%.1f grams of %s are in MAD per day' % (MAD_to_replace[animal], animal)

# calculate the fraction of this animal portion from the total MAD diet
MAD_fraction = sum(data_frame.loc[animals, MAD_COLNAME]) / data_frame[[MAD_COLNAME]].sum(numeric_only=True)

# adjust the vector of required nutrients to more realistic values
b_MAD_to_replace = data.loc[animals, :].transpose().dot(MAD_to_replace)
b_const = b_MAD_to_replace.copy()
b_const['Sugar_Tot_(g)'] = 90                    # set a more realistic sugar  upper bound
b_const['Sodium_(mg)'] = 600                     # set a more realistic sodium upper bound (note that the rightful by-mass portion of the total 2300 mg/d of MAD_a is 790 mg/d 
b_const['Calcium_(mg)'] = 1100.0 * MAD_fraction  # Calcium, see https://www.nlm.nih.gov/medlineplus/magazine/issues/winter11/articles/winter11pg12.html
b_const['Niacin_(mg)'] = 15.0 * MAD_fraction     # niacin,  see https://www.nlm.nih.gov/medlineplus/ency/article/002409.htm
b_const['Selenium_(mcg)'] = 55 * MAD_fraction    # set a more realistic selenium lower bound, see https://ods.od.nih.gov/factsheets/Selenium-HealthProfessional/

b_const['Lipid_Tot_(g)'] = 0                     # we don't care about fat specifically, just calories
b_const['Fiber_TD_(g)'] = 0                      # doesn't exist in animals anyway
b_const['Vit_B12_(mcg)'] = 0                     # impossible to get from plants


65.5 grams of beef are in MAD per day
24.3 grams of egg are in MAD per day
74.3 grams of chicken are in MAD per day
29.8 grams of pork are in MAD per day
219.5 grams of dairy are in MAD per day

In [16]:
# create the LP using PuLP
const_sign = 'ULLULLLLLLLLLLLLULLULLLLLLLLULLL'
C  = -np.ones((1, data.shape[1]))
lb = np.zeros((data.shape[1], 1))
ub = np.ones((data.shape[1], 1)) * 130.0

lp = pulp.LpProblem('replace_animal_diet', pulp.LpMinimize)
x = pulp.LpVariable.dicts('mass', food_abbrev)

for i, nutr in enumerate(nutrient_list):
    row = [data[nutr][f] * x[f] for f in food_abbrev]
    if const_sign[i] == 'U':
        lp += pulp.lpSum(row) <= b_const[nutr], nutr
    elif const_sign[i] == 'L':
        lp += pulp.lpSum(row) >= b_const[nutr], nutr
    elif const_sign[i] == 'S':
        lp += pulp.lpSum(row) == b_const[nutr], nutr

# add constraints on all animal foods to be 0
for f in food_abbrev:
    if f in ANIMALS:
        lp += x[f] == 0, 'no_%s' % f
    else:
        lp += x[f] >= 0, 'lower_%s' % f
        lp += x[f] <= 50, 'upper_%s' % f

In [17]:
# minimize total mass of new diet

lp.setObjective(pulp.lpSum(x))
pulp_solver = pulp.CPLEX(msg=0)
lp.solve(pulp_solver)
if lp.status != pulp.LpStatusOptimal:
    raise pulp.solvers.PulpSolverError("cannot replace these animal foods")
SANE1 = pd.Series(map(pulp.value, map(x.get, food_abbrev)), food_abbrev)
SANE1.sum()


Out[17]:
447.91622145288864

In [18]:
# minimize change in diet compared to MAD
weights = (100.0 + 1.0 / (MAD + 0.00001))
weighted_sum = pulp.lpSum([x[f] * weights[f] for f in food_abbrev])
lp.setObjective(weighted_sum)
pulp_solver = pulp.CPLEX(msg=0)
lp.solve(pulp_solver)
if lp.status != pulp.LpStatusOptimal:
    raise pulp.solvers.PulpSolverError("cannot replace these animal foods")
SANE2 = pd.Series(map(pulp.value, map(x.get, food_abbrev)), food_abbrev)
print SANE2.sum()


676.058242286

In [19]:
report_food = pd.concat([MAD, 1.0/(MAD + 0.01), SANE1, SANE2], axis=1, keys=['MAD', 'Weights', 'Opt. Min Mass', 'Opt. Weighted'])
print 'Report of diet alternatives to MAD'
print '-' * 50
print report_food.loc[(report_food.loc[:, report_food.columns[2:4]] > 0).any(axis=1), :]

b_SANE1 = data.transpose().dot(SANE1)
b_SANE2 = data.transpose().dot(SANE2)
report_nutr = pd.concat([b_MAD_to_replace, b_const, b_SANE1, b_SANE2], axis=1, keys=['to repl.', 'constraint', 'Opt. Min Mass', 'Opt. Weighted'])

report_nutr = report_nutr.rename(index={'total Flavonoid (mg/100g edible portion)':'Flavonoid_(mg)',
                                        'soluble fiber (g per 100g eaten)':'Fiber_(g)',
                                        'Phytosterols(mg)Per 100 g':'Phytosterols_(mg)'})
report_nutr.loc['Total mass', :] = [MAD_to_replace.sum(), np.nan, SANE1.sum(), SANE2.sum()]
print '\n\nReport of nutrient content in all diets'
print '-' * 100
print report_nutr


Report of diet alternatives to MAD
--------------------------------------------------
              MAD  Weights  Opt. Min Mass  Opt. Weighted
almond        1.0      1.0           20.5           21.7
asparag.      0.6      1.6           34.7           50.0
kidney bn.    7.6      0.1           50.0           50.0
snap bn.      2.5      0.4            0.0           44.2
broccoli      4.4      0.2            0.0           50.0
cauliflower   0.7      1.4            0.0           50.0
collard       0.1      9.6           30.7           50.0
garlic        1.1      0.9           38.8           34.3
grapes       18.6      0.1           21.4            3.9
lettuce      11.0      0.1            0.0           50.0
peanut        7.1      0.1           50.0           50.0
peas          0.4      2.3            1.9           50.0
spinach       1.6      0.6           50.0           50.0
squash        2.5      0.4            0.0           50.0
lentils       0.0    100.0           50.0            0.0
soybeans      0.0    100.0           50.0           22.0
tofu          0.0    100.0           50.0           50.0


Report of nutrient content in all diets
----------------------------------------------------------------------------------------------------
                   to repl.  constraint  Opt. Min Mass  Opt. Weighted
kcal                  766.3       766.3          766.3          766.3
Protein_(g)            46.0        46.0           46.0           46.0
Lipid_Tot_(g)          55.0         0.0           43.6           43.0
FA_Sat_(g)             21.9        21.9            5.5            5.4
Carbohydrt_(g)         19.9        19.9           62.2           66.1
Fiber_TD_(g)            0.0         0.0           23.9           25.7
Vit_A_RAE_(mcg)_      351.4       351.4          351.4          644.0
Vit_B6_(mg)             0.8         0.8            1.1            1.5
Vit_B12_(mcg)           4.5         0.0            0.0            0.0
Vit_C_(mg)              8.4         8.4           43.5          153.9
Vit_E_(mg)              0.3         0.3           11.7           13.4
Folate_DFE_(mcg)       60.6        60.6          488.7          562.4
Vit_K_(mcg)             0.1         0.1          401.5          629.5
Riboflavin_(mg)         1.1         1.1            1.1            1.1
Calcium_(mg)          535.9       334.1          708.8          812.6
Niacin_(mg)             8.8         4.6            9.2           11.3
Cholestrl_(mg)        287.1       287.1            0.0            0.0
FA_Mono_(g)            20.4        20.4           20.4           20.4
FA_Poly_(g)             5.4         5.4           15.1           14.4
Sugar_Tot_(g)          19.8        90.0            5.1           12.6
Fiber_(g)               0.0         0.0            5.6            7.4
Flavonoid_(mg)          0.0         0.0           31.8           39.8
Iron_(mg)               3.0         3.0           10.8           11.9
Alpha_Carot_(mcg)       0.0         0.0            8.0           65.1
Beta_Carot_(mcg)        0.0         0.0         3909.9         7390.6
Lycopene_(mcg)          0.0         0.0            0.0            0.0
Lut+Zea_(mcg)         122.5       122.5         7738.9        12774.7
Beta_Crypt_(mcg)        2.2         2.2            8.8           14.5
Sodium_(mg)           344.9       600.0           74.4          127.4
Selenium_(mcg)         45.0        16.7           16.7           16.7
Potassium_(mg)       1062.7      1062.7         1881.8         2374.7
Phytosterols_(mg)       0.0         0.0           63.7          161.1
Total mass            413.4         NaN          447.9          676.1

In [ ]:


In [ ]: