In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import random
from bokeh.layouts import row, widgetbox
from bokeh.models import CustomJS, Slider, HoverTool, BoxAnnotation
from bokeh.plotting import figure, output_file, show, ColumnDataSource
from collections import OrderedDict
#from plotly.offline import download_plotlyjs, init_notebook_mode, plot
#from plotly.graph_objs import *
#%matplotlib inline
#init_notebook_mode()
In [2]:
dffile = "./Data/sr28abxl/ABBREV_FoodDatabase.xlsx"
dvfile = "./Playdata.xlsx"
#df = pd.read_excel(file, sheetname = 'Sheet1', header = 0, index_col = 0, convert_float = True)
#dv = pd.read_excel(file, sheetname = 'Sheet3', header = 0, index_col = 0, convert_float = True)
df = pd.read_excel(dffile, sheetname = 'ABBREV', header = 0, convert_float = True)
dv = pd.read_excel(dvfile, sheetname = 'Sheet3', header = 0, index_col = 0, convert_float = True)
df.columns = df.dtypes.index.str.replace("(", "").str.replace(")", "").str.replace("µ", "p")
Searchstring = "SHAKE|BEVERAGES|LIQUOR|FORMULA|BABY|SALTED|UNSWTND|PDR|FREEZE|GENERAL MILLS|FRUIT-FLAV|CEREALS|SNACKFO|OIL,|LARD|CANDIES|YEAST|SNACKS|BACKFAT|SHORTENING|COOKIES|BASIL,DRIED|SYRUP"
df = df[df.Shrt_Desc.str.contains(Searchstring) == False]
#dffout = dff[dff.Shrt_Desc.str.contains(Searchstring) == True]
#foodgroups = ['Bread','Candy', 'Dessert','Dressing','Fruit','Meat','Pasta','Pizza','Spread','Vegetable','fruit'}
# This file can be used to calculate nutrient values for food portions from the
#values provided per 100 g of food. The following formula is used to calculate the nutrient
#content per household measure:
#N = (V*W)/100
#where:
#N = nutrient value per household measure,
#V = nutrient value per 100 g (Nutr_Val in the Nutrient Data file), and
#W = g weight of portion (Gm_Wgt in the Weight file).
df['protein_rda'] = df['Protein_g']/dv['DV']['Protein'] * (df['GmWt_1']/100)
df['Calories_rda'] = df['Energ_Kcal']/dv['DV']['Caloriesm'] * (df['GmWt_1']/100)
df['Calories'] = df['Energ_Kcal'] * (df['GmWt_1']/100)
#df['foodtext'] = df.Shrt_Desc
df['Totfat_rda'] = df['Lipid_Tot_g']/dv['DV']['Total Fat'] * (df['GmWt_1']/100)
df['Satfat_rda'] = df['FA_Sat_g']/dv['DV']['Saturated Fat'] * (df['GmWt_1']/100)
df['Colesterol_rda'] = df['Cholestrl_mg']/dv['DV']['Cholesterol'] * (df['GmWt_1']/100)
df['Potassium_rda'] = df['Potassium_mg']/dv['DV']['Potassium'] * (df['GmWt_1']/100)
df['Sodium_rda'] = df['Sodium_mg']/dv['DV']['Sodium'] * (df['GmWt_1']/100)
#df['TotCarbs_rda'] = df['Lipid_Tot_g']/dv['DV']['Total Carbohydrate'] * (df['GmWt_1']/100)
df['Dietfiber_rda'] = df['Fiber_TD_g']/dv['DV']['Dietary Fiber'] * (df['GmWt_1']/100)
df['VitaminA_rda'] = df['Vit_A_IU']/dv['DV']['Vitamin A'] * (df['GmWt_1']/100)
df['VitaminC_rda'] = df['Vit_C_mg']/dv['DV']['Vitamin C'] * (df['GmWt_1']/100)
df['Calcium_rda'] = df['Calcium_mg']/dv['DV']['Calcium'] * (df['GmWt_1']/100)
df['Iron_rda'] = df['Iron_mg']/dv['DV']['Iron'] * (df['GmWt_1']/100)
df['VitaminD_rda'] = df['Vit_D_pg']/dv['DV']['Vitamin D'] * (df['GmWt_1']/100)
df['VitaminE_rda'] = df['Vit_E_mg']/dv['DV']['Vitamin E'] * (df['GmWt_1']/100)
df['VitaminK_rda'] = df['Vit_K_pg']/dv['DV']['Vitamin K'] * (df['GmWt_1']/100)
df['Thiamin_rda'] = df['Thiamin_mg']/dv['DV']['Thiamin'] * (df['GmWt_1']/100)
df['Riboflavin_rda'] = df['Riboflavin_mg']/dv['DV']['Riboflavin'] * (df['GmWt_1']/100)
df['Niacin_rda'] = df['Niacin_mg']/dv['DV']['Niacin'] * (df['GmWt_1']/100)
df['VitaminB6_rda'] = df['Vit_B6_mg']/dv['DV']['Vitamin B6'] * (df['GmWt_1']/100)
df['Folate_rda'] = df['Folate_Tot_pg']/dv['DV']['Folate'] * (df['GmWt_1']/100)
df['VitaminB12_rda'] = df['Vit_B12_pg']/dv['DV']['Vitamin B12'] * (df['GmWt_1']/100)
df['PantothenicAcid_rda'] = df['Panto_Acid_mg']/dv['DV']['Pantothenic acid'] * (df['GmWt_1']/100)
df['Phosphorus_rda'] = df['Phosphorus_mg']/dv['DV']['Phosphorus'] * (df['GmWt_1']/100)
df['Magnesium_rda'] = df['Magnesium_mg']/dv['DV']['Magnesium'] * (df['GmWt_1']/100)
df['Zinc_rda'] = df['Zinc_mg']/dv['DV']['Zinc'] * (df['GmWt_1']/100)
df['Selenium_rda'] = df['Selenium_pg']/dv['DV']['Selenium'] * (df['GmWt_1']/100)
df['Copper_rda'] = df['Copper_mg']/dv['DV']['Copper'] * (df['GmWt_1']/100)
df['Manganese_rda'] = df['Manganese_mg']/dv['DV']['Manganese'] * (df['GmWt_1']/100)
df['Molybdenum_rda'] = df['Energ_Kcal']/dv['DV']['Molybdenum'] * (df['GmWt_1']/100)
##df["Quantity"] = df["GmWt_1"].map(str) + " @ " + df["GmWt_Desc1"]
df["Quantity"] = df["GmWt_Desc1"]
df['Healthy'] = df['protein_rda']+ df['Dietfiber_rda']+ df['Thiamin_rda'] + df['Riboflavin_rda'] + df['Niacin_rda'] + df['PantothenicAcid_rda'] + df['Phosphorus_rda'] + df['Magnesium_rda'] + df['Selenium_rda'] + df['Copper_rda'] + df['Manganese_rda'] + df['Molybdenum_rda'] + (df['VitaminA_rda'] + df['VitaminB6_rda'] + df['VitaminC_rda'] + df['Calcium_rda'] + df['Iron_rda'] + df['VitaminD_rda'] + df['VitaminE_rda'] + df['VitaminK_rda'] + df['Folate_rda'] + df['VitaminB12_rda'] + df['Zinc_rda'])*2 - (10*df['Sodium_rda'])
#df['Calories_rda']+ df['Totfat_rda']+df['Satfat_rda']+df['Colesterol_rda'] +df['Sodium_rda'] +df['TotCarbs_rda']+
#foodgrp_key= list(set(df['foodgroup']))
#grp_val = list(map(int,range(len(foodgrp_key))))
#grpdict = dict(zip(foodgrp_key,grp_val))
#df.head()
#Get rid of the NaNs
dff = df[np.isfinite(df['Healthy'])]
#FiFORMULAstrings in dffin and dFORMULA
dffin = dff
#Searchstring = "SHAKE|BEVERAGES|LIQUOR|FORMULA|BABY|SALTED|UNSWTND|PDR|FREEZE|GENERAL MILLS|FRUIT-FLAV|CEREALS|SNACKFO|OIL,|LARD|CANDIES|YEAST|SNACKS|BACKFAT|SHORTENING|COOKIES|BASIL,DRIED|SYRUP"
#dffin = dff[dff.Shrt_Desc.str.contains(Searchstring) == False]
#dffout = dff[dff.Shrt_Desc.str.contains(Searchstring) == True]
#keep only the top 50 healthy items
dffin = dffin.sort_values('Healthy', ascending=False)
dffin = dffin.head(500)
In [3]:
## Modifying h to fit my whims
Maxgoodcal = 400
#dffin['Healthy'][dffin['Calories' > Maxgoodcal] =
#x = np.linspace(0, 10, 500)
x= dffin.Calories.values.tolist()
h= dffin.Healthy.values.tolist()
y= dffin.Healthy.values.tolist()
n=dffin.Shrt_Desc.values.tolist()
s=dffin.Sodium_mg.values.tolist()
k=dffin.Vit_K_pg.values.tolist()
phos=dffin.Phosphorus_mg.values.tolist()
por=dffin.Quantity.values.tolist()
pro=dffin.Protein_g.values.tolist()
pot=dffin.Potassium_mg.values.tolist()
ylim = max(h)+50
source = ColumnDataSource(data=dict(x=x,y=y,n=n,s=s,h=h,pro=pro,phos=phos, por=por,k=k, pot=pot))
dffin[dffin.Sodium_mg >35]
TOOLS="resize,crosshair,pan,wheel_zoom,box_zoom,reset,tap,previewsave,box_select,poly_select,lasso_select,hover"
#plot = figure(y_range=(0,ylim),tools = TOOLS)
plot = figure(y_range=(0,ylim),tools = TOOLS)
plot.scatter('x', 'y', source=source, radius= dffin.Healthy ,line_color=None)
callback = CustomJS(args=dict(source=source), code="""
var data = source.data;
var S = salt.value;
var Prot = protein.value;
var Phos = phosphorus.value;
var K = vit_k.value;
var Pot = pot.value;
h = data['h']
x = data['x']
y = data['y']
s = data['s']
k = data['k']
pot = data['pot']
pro = data['pro']
phos = data['phos']
por = data['por']
for (i = 0; i < x.length; i++) {
y[i] = h[i]-((s[i]-100)*S)- ((pro[i]- 10)*Prot *3) - ((phos[i]- 100)*Phos *2) - ((k[i] - 200)* K *1 ) - ((pot[i] - 200)* Pot *1 );
}
source.trigger('change');
""")
salt_slider = Slider(start=0, end=1, value=0, step=.005, title="Salt", callback=callback)
callback.args["salt"] = salt_slider
protein_slider = Slider(start=0, end=1, value=0, step=.005, title="Protein", callback=callback)
callback.args["protein"] = protein_slider
phos_slider = Slider(start=0, end=1, value=0, step=.005, title="Phosphorus", callback=callback)
callback.args["phosphorus"] = phos_slider
k_slider = Slider(start=0, end=1, value=0, step=.005, title="Vitamin K", callback=callback)
callback.args["vit_k"] = k_slider
pot_slider = Slider(start=0, end=1, value=0, step=.005, title="Potassium", callback=callback)
callback.args["pot"] = pot_slider
hover = plot.select(dict(type=HoverTool))
hover.tooltips = OrderedDict([('Food', '@n'),('Nutrition', '@y'),('Sodium', '@s'),('Phosphorus', '@phos'),('Calories', '@x'),('Protein', '@pro'),('Potassium', '@pot'),('Vitamin K', '@k'),('Quantity','@por')])
layout = row(
plot,
widgetbox(salt_slider,protein_slider, phos_slider, k_slider, pot_slider),
)
high_box = BoxAnnotation( left=600, fill_alpha=0.1, fill_color='red')
mid_box = BoxAnnotation( left=400, right=600, fill_alpha=0.1, fill_color='yellow')
low_box = BoxAnnotation( right=400, fill_alpha=0.1, fill_color='green')
plot.add_layout(low_box)
plot.add_layout(mid_box)
plot.add_layout(high_box)
plot.title.text = "Nutrition Filter"
#p.xgrid[0].grid_line_color=None
#p.ygrid[0].grid_line_alpha=0.5
plot.xaxis.axis_label = 'Calories'
plot.yaxis.axis_label = 'Healthy (Higher is Better)'
output_file("ffslider.html", title="Food Filter with Sliders")
show(layout)
In [ ]:
dff.columns
In [ ]:
#df["Quantity"] = df["GmWt_1"].map(str) + " @ " + df["GmWt_Desc1"]
#dffin['Healthy'][dffin['Calories' > Maxgoodcal].map()
In [ ]:
df["Quantity"] = df["GmWt_1"].map(str)
In [ ]:
df.GmWt_Desc1.head()
In [ ]:
df.Quantity.head(25)
In [ ]:
dff['Phosphorus_mg']
In [ ]:
#print("rgb("+ str(random.randint(0,255)) + "," + str(random.randint(0,255)) + "," + str(random.randint(0,255)) + ")")
#df.columns
#df.Healthy
#df.head().totalcal
#for i in df["foodgroup"]:
# df["grpnumbers"] = grphash[df["foodgroup"][i]]
#grpdict
#grpdict["Burger"]
#grpdict
In [ ]:
In [ ]:
#len(dffin)
In [ ]:
In [ ]:
x.length
In [ ]:
In [ ]:
#df.Shrt_Desc.head()
#df['foodgroup'] = df[df['Shrt_Desc'].str.contains("BUTTER|MILK")==TRUE
#df.foodgroup.head()
#converter = {}
#i=1
#for item in df["foodgroup"]:
# if item not in converter:
#converter[item] =i
# converter[item]= "rgb("+ str(random.randint(0,255)) + "," + str(random.randint(0,255)) + "," + str(random.randint(0,255)) + ")"
# i += 1
#df['foodgrpnum'] = [converter[i] for i in df["foodgroup"]]
#print("rgb("+ str(random.randint(0,255)) + "," + str(random.randint(0,255)) + "," + str(random.randint(0,255)) + ")")
#df.foodgrpnum
In [ ]:
#TOOLS="resize,crosshair,pan,wheel_zoom,box_zoom,reset,tap,previewsave,box_select,poly_select,lasso_select"
#output_file("color_scatter.html", title="color_scatter.py example")
#p = figure(tools=TOOLS)
#p.scatter(x, y, radius=radii, fill_color=colors, fill_alpha=0.6, line_color=None)
#x=dffin.Healthy.values.tolist()
#y = dffin.Calories.values.tolist()
#p.annulus(x, y, inner_radius=0, outer_radius=2,color="orange", alpha=0.6)
#p.annulus(x=dffin.Calories_rda.tolist(), y=dffin.Healthy, inner_radius=0.1, outer_radius=0.25, color="orange", alpha=0.6)
#p.scatter(x,y, radius= 1 , fill_alpha=0.6, line_color=None)
#show(p)
In [ ]:
# plot({
# 'data': [
# Scatter(x=dffin['Calories_rda'],
# y=dffin['Healthy'],
# text=dffin.foodtext ,
## #marker=Marker(size=df['calperserv'], sizemode='area'), #, sizeref=131868,),
# marker = dict(size= dffin.Healthy ,
# line= dict(width=1),
# color= "rgb(178,16,243)" ,
# opacity= 0.8
# ),
# #name= y[i],
# #mode='markers'
## mode="markers"
# )
# ],
# 'layout': Layout(title='Nutrition and Calorie Count',xaxis=XAxis(title='Calories per Serving'), yaxis=YAxis(title='Nutrition 1-10'))
#}, show_link=False)