In [18]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import random
from bokeh.layouts import row, widgetbox
from bokeh.models import CustomJS, Slider, HoverTool, BoxAnnotation
from bokeh.models.widgets import Slider, Select, TextInput
from bokeh.plotting import figure, output_file, show, ColumnDataSource
from collections import OrderedDict
#init_notebook_mode()
In [19]:
dffile = "./Data/sr28abxl/ABBREV_FoodDatabase.xlsx"
dvfile = "./Playdata.xlsx"
#df = pd.read_excel(file, sheetname = 'Sheet1', header = 0, index_col = 0, convert_float = True)
#dv = pd.read_excel(file, sheetname = 'Sheet3', header = 0, index_col = 0, convert_float = True)
df = pd.read_excel(dffile, sheetname = 'ABBREV', header = 0, convert_float = True)
dv = pd.read_excel(dvfile, sheetname = 'Sheet3', header = 0, index_col = 0, convert_float = True)
df.columns = df.dtypes.index.str.replace("(", "").str.replace(")", "").str.replace("µ", "p")
Searchstring = "SHAKE|BEVERAGES|LIQUOR|FORMULA|BABY|SALTED|UNSWTND|PDR|FREEZE|GENERAL MILLS|FRUIT-FLAV|CEREALS|SNACKFO|OIL,|LARD|CANDIES|YEAST|SNACKS|BACKFAT|SHORTENING|COOKIES|BASIL,DRIED|SYRUP"
df = df[df.Shrt_Desc.str.contains(Searchstring) == False]
#dffout = dff[dff.Shrt_Desc.str.contains(Searchstring) == True]
#foodgroups = ['Bread','Candy', 'Dessert','Dressing','Fruit','Meat','Pasta','Pizza','Spread','Vegetable','fruit'}
# This file can be used to calculate nutrient values for food portions from the
#values provided per 100 g of food. The following formula is used to calculate the nutrient
#content per household measure:
#N = (V*W)/100
#where:
#N = nutrient value per household measure,
#V = nutrient value per 100 g (Nutr_Val in the Nutrient Data file), and
#W = g weight of portion (Gm_Wgt in the Weight file).
df['protein_rda'] = df['Protein_g']/dv['DV']['Protein'] * (df['GmWt_1']/100)
df['Calories_rda'] = df['Energ_Kcal']/dv['DV']['Caloriesm'] * (df['GmWt_1']/100)
df['Calories'] = df['Energ_Kcal'] * (df['GmWt_1']/100)
#df['foodtext'] = df.Shrt_Desc
df['Totfat_rda'] = df['Lipid_Tot_g']/dv['DV']['Total Fat'] * (df['GmWt_1']/100)
df['Satfat_rda'] = df['FA_Sat_g']/dv['DV']['Saturated Fat'] * (df['GmWt_1']/100)
df['Colesterol_rda'] = df['Cholestrl_mg']/dv['DV']['Cholesterol'] * (df['GmWt_1']/100)
df['Potassium_rda'] = df['Potassium_mg']/dv['DV']['Potassium'] * (df['GmWt_1']/100)
df['Sodium_rda'] = df['Sodium_mg']/dv['DV']['Sodium'] * (df['GmWt_1']/100)
#df['TotCarbs_rda'] = df['Lipid_Tot_g']/dv['DV']['Total Carbohydrate'] * (df['GmWt_1']/100)
df['Dietfiber_rda'] = df['Fiber_TD_g']/dv['DV']['Dietary Fiber'] * (df['GmWt_1']/100)
df['VitaminA_rda'] = df['Vit_A_IU']/dv['DV']['Vitamin A'] * (df['GmWt_1']/100)
df['VitaminC_rda'] = df['Vit_C_mg']/dv['DV']['Vitamin C'] * (df['GmWt_1']/100)
df['Calcium_rda'] = df['Calcium_mg']/dv['DV']['Calcium'] * (df['GmWt_1']/100)
df['Iron_rda'] = df['Iron_mg']/dv['DV']['Iron'] * (df['GmWt_1']/100)
df['VitaminD_rda'] = df['Vit_D_pg']/dv['DV']['Vitamin D'] * (df['GmWt_1']/100)
df['VitaminE_rda'] = df['Vit_E_mg']/dv['DV']['Vitamin E'] * (df['GmWt_1']/100)
df['VitaminK_rda'] = df['Vit_K_pg']/dv['DV']['Vitamin K'] * (df['GmWt_1']/100)
df['Thiamin_rda'] = df['Thiamin_mg']/dv['DV']['Thiamin'] * (df['GmWt_1']/100)
df['Riboflavin_rda'] = df['Riboflavin_mg']/dv['DV']['Riboflavin'] * (df['GmWt_1']/100)
df['Niacin_rda'] = df['Niacin_mg']/dv['DV']['Niacin'] * (df['GmWt_1']/100)
df['VitaminB6_rda'] = df['Vit_B6_mg']/dv['DV']['Vitamin B6'] * (df['GmWt_1']/100)
df['Folate_rda'] = df['Folate_Tot_pg']/dv['DV']['Folate'] * (df['GmWt_1']/100)
df['VitaminB12_rda'] = df['Vit_B12_pg']/dv['DV']['Vitamin B12'] * (df['GmWt_1']/100)
df['PantothenicAcid_rda'] = df['Panto_Acid_mg']/dv['DV']['Pantothenic acid'] * (df['GmWt_1']/100)
df['Phosphorus_rda'] = df['Phosphorus_mg']/dv['DV']['Phosphorus'] * (df['GmWt_1']/100)
df['Magnesium_rda'] = df['Magnesium_mg']/dv['DV']['Magnesium'] * (df['GmWt_1']/100)
df['Zinc_rda'] = df['Zinc_mg']/dv['DV']['Zinc'] * (df['GmWt_1']/100)
df['Selenium_rda'] = df['Selenium_pg']/dv['DV']['Selenium'] * (df['GmWt_1']/100)
df['Copper_rda'] = df['Copper_mg']/dv['DV']['Copper'] * (df['GmWt_1']/100)
df['Manganese_rda'] = df['Manganese_mg']/dv['DV']['Manganese'] * (df['GmWt_1']/100)
df['Molybdenum_rda'] = df['Energ_Kcal']/dv['DV']['Molybdenum'] * (df['GmWt_1']/100)
##df["Quantity"] = df["GmWt_1"].map(str) + " @ " + df["GmWt_Desc1"]
df["Quantity"] = df["GmWt_Desc1"]
df['Healthy'] = df['protein_rda']+ df['Dietfiber_rda']+ df['Thiamin_rda'] + df['Riboflavin_rda'] + df['Niacin_rda'] + df['PantothenicAcid_rda'] + df['Phosphorus_rda'] + df['Magnesium_rda'] + df['Selenium_rda'] + df['Copper_rda'] + df['Manganese_rda'] + df['Molybdenum_rda'] + (df['VitaminA_rda'] + df['VitaminB6_rda'] + df['VitaminC_rda'] + df['Calcium_rda'] + df['Iron_rda'] + df['VitaminD_rda'] + df['VitaminE_rda'] + df['VitaminK_rda'] + df['Folate_rda'] + df['VitaminB12_rda'] + df['Zinc_rda'])*2 - (10*df['Sodium_rda'])
#df['Calories_rda']+ df['Totfat_rda']+df['Satfat_rda']+df['Colesterol_rda'] +df['Sodium_rda'] +df['TotCarbs_rda']+
#foodgrp_key= list(set(df['foodgroup']))
#grp_val = list(map(int,range(len(foodgrp_key))))
#grpdict = dict(zip(foodgrp_key,grp_val))
#df.head()
#Get rid of the NaNs
dff = df[np.isfinite(df['Healthy'])]
#FiFORMULAstrings in dffin and dFORMULA
dffin = dff
#Searchstring = "SHAKE|BEVERAGES|LIQUOR|FORMULA|BABY|SALTED|UNSWTND|PDR|FREEZE|GENERAL MILLS|FRUIT-FLAV|CEREALS|SNACKFO|OIL,|LARD|CANDIES|YEAST|SNACKS|BACKFAT|SHORTENING|COOKIES|BASIL,DRIED|SYRUP"
#dffin = dff[dff.Shrt_Desc.str.contains(Searchstring) == False]
#dffout = dff[dff.Shrt_Desc.str.contains(Searchstring) == True]
In [ ]:
# Create a dictionary out of the dataframe so we can select them later
#dffin.set_index("Shrt_Desc", drop=True, inplace=True)
#dictionary = df.to_dict(orient="index")
In [23]:
#keep only the top 50 healthy items
dffin = dffin.sort_values('Healthy', ascending=False)
dffin = dffin.head(500)
## Modifying Max calories to fit my whims
## will loop in nutritionist later
Maxgoodcal = 400
Redboxhigh = 600
Yellowboxlow = 400
x= dffin.Calories.values.tolist()
h= dffin.Healthy.values.tolist()
y= dffin.Healthy.values.tolist()
n=dffin.Shrt_Desc.values.tolist()
s=dffin.Sodium_mg.values.tolist()
k=dffin.Vit_K_pg.values.tolist()
phos=dffin.Phosphorus_mg.values.tolist()
por=dffin.Quantity.values.tolist()
pro=dffin.Protein_g.values.tolist()
pot=dffin.Potassium_mg.values.tolist()
ylim = max(h)+50
source = ColumnDataSource(data=dict(x=x,y=y,n=n,s=s,h=h,pro=pro,phos=phos, por=por,k=k, pot=pot))
TOOLS="resize,crosshair,pan,wheel_zoom,box_zoom,reset,tap,previewsave,box_select,poly_select,lasso_select,hover"
#plot = figure(plot_height=400, plot_width=400, title="my sine wave",
#
# tools="crosshair,pan,reset,save,wheel_zoom",
# x_range=[0, 4*np.pi], y_range=[-2.5, 2.5])
#plot = figure(title= "Nutrition Filter Test",tools = TOOLS)
plot = figure(tools = TOOLS)
plot.scatter('x', 'y', source=source, radius= dffin.Healthy ,line_color=None)
# Setup the plot
high_box = BoxAnnotation( left=Redboxhigh, fill_alpha=0.1, fill_color='red')
mid_box = BoxAnnotation( left=Yellowboxlow, right=Redboxhigh, fill_alpha=0.1, fill_color='yellow')
low_box = BoxAnnotation( right=Yellowboxlow, fill_alpha=0.1, fill_color='green')
plot.add_layout(low_box)
plot.add_layout(mid_box)
plot.add_layout(high_box)
#plot.title.text = "Nutrition Filter"
#p.xgrid[0].grid_line_color=None
#p.ygrid[0].grid_line_alpha=0.5
plot.xaxis.axis_label = 'Calories(lower tends to be better)'
plot.yaxis.axis_label = 'Healthy (Higher is Better)'
# Set up callbacks
callback = CustomJS(args=dict(source=source), code="""
var data = source.data;
var S = salt.value;
var Prot = protein.value;
var Phos = phosphorus.value;
var K = vit_k.value;
var Pot = pot.value;
h = data['h']
x = data['x']
y = data['y']
s = data['s']
k = data['k']
pot = data['pot']
pro = data['pro']
phos = data['phos']
por = data['por']
for (i = 0; i < x.length; i++) {
y[i] = h[i]-((s[i]-100)*S)- ((pro[i]- 10)*Prot *3) - ((phos[i]- 100)*Phos *2) - ((k[i] - 200)* K *1 ) - ((pot[i] - 200)* Pot *1 );
if (y[i] < 0) {
y[i] = 0
}
}
source.trigger('change');
""")
salt_slider = Slider(start=0, end=1, value=0, step=.005, title="Salt", callback=callback)
callback.args["salt"] = salt_slider
protein_slider = Slider(start=0, end=1, value=0, step=.005, title="Protein", callback=callback)
callback.args["protein"] = protein_slider
phos_slider = Slider(start=0, end=1, value=0, step=.005, title="Phosphorus", callback=callback)
callback.args["phosphorus"] = phos_slider
k_slider = Slider(start=0, end=1, value=0, step=.005, title="Vitamin K", callback=callback)
callback.args["vit_k"] = k_slider
pot_slider = Slider(start=0, end=1, value=0, step=.005, title="Potassium", callback=callback)
callback.args["pot"] = pot_slider
hover = plot.select(dict(type=HoverTool))
hover.tooltips = OrderedDict([('Food', '@n'),('Nutrition', '@y'),('Sodium', '@s'),('Phosphorus', '@phos'),('Calories', '@x'),('Protein', '@pro'),('Potassium', '@pot'),('Vitamin K', '@k'),('Quantity','@por')])
layout = row(
plot,
widgetbox(salt_slider,protein_slider, phos_slider, k_slider, pot_slider),
)
output_file("ffslidertext.html", title="Food Filters")
show(layout)
In [ ]:
#df["Quantity"] = df["GmWt_1"].map(str) + " @ " + df["GmWt_Desc1"]
#dffin['Healthy'][dffin['Calories' > Maxgoodcal].map()
In [ ]:
df["Quantity"] = df["GmWt_1"].map(str)
In [ ]:
df.GmWt_Desc1.head()
In [ ]:
df.Quantity.head(25)
In [ ]:
dff['Phosphorus_mg']
In [ ]:
#print("rgb("+ str(random.randint(0,255)) + "," + str(random.randint(0,255)) + "," + str(random.randint(0,255)) + ")")
#df.columns
#df.Healthy
#df.head().totalcal
#for i in df["foodgroup"]:
# df["grpnumbers"] = grphash[df["foodgroup"][i]]
#grpdict
#grpdict["Burger"]
#grpdict
In [ ]:
In [ ]:
#len(dffin)
In [ ]:
In [ ]:
x.length
In [ ]:
In [ ]:
#df.Shrt_Desc.head()
#df['foodgroup'] = df[df['Shrt_Desc'].str.contains("BUTTER|MILK")==TRUE
#df.foodgroup.head()
#converter = {}
#i=1
#for item in df["foodgroup"]:
# if item not in converter:
#converter[item] =i
# converter[item]= "rgb("+ str(random.randint(0,255)) + "," + str(random.randint(0,255)) + "," + str(random.randint(0,255)) + ")"
# i += 1
#df['foodgrpnum'] = [converter[i] for i in df["foodgroup"]]
#print("rgb("+ str(random.randint(0,255)) + "," + str(random.randint(0,255)) + "," + str(random.randint(0,255)) + ")")
#df.foodgrpnum
In [ ]:
#TOOLS="resize,crosshair,pan,wheel_zoom,box_zoom,reset,tap,previewsave,box_select,poly_select,lasso_select"
#output_file("color_scatter.html", title="color_scatter.py example")
#p = figure(tools=TOOLS)
#p.scatter(x, y, radius=radii, fill_color=colors, fill_alpha=0.6, line_color=None)
#x=dffin.Healthy.values.tolist()
#y = dffin.Calories.values.tolist()
#p.annulus(x, y, inner_radius=0, outer_radius=2,color="orange", alpha=0.6)
#p.annulus(x=dffin.Calories_rda.tolist(), y=dffin.Healthy, inner_radius=0.1, outer_radius=0.25, color="orange", alpha=0.6)
#p.scatter(x,y, radius= 1 , fill_alpha=0.6, line_color=None)
#show(p)
In [ ]:
# plot({
# 'data': [
# Scatter(x=dffin['Calories_rda'],
# y=dffin['Healthy'],
# text=dffin.foodtext ,
## #marker=Marker(size=df['calperserv'], sizemode='area'), #, sizeref=131868,),
# marker = dict(size= dffin.Healthy ,
# line= dict(width=1),
# color= "rgb(178,16,243)" ,
# opacity= 0.8
# ),
# #name= y[i],
# #mode='markers'
## mode="markers"
# )
# ],
# 'layout': Layout(title='Nutrition and Calorie Count',xaxis=XAxis(title='Calories per Serving'), yaxis=YAxis(title='Nutrition 1-10'))
#}, show_link=False)