In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import random
from bokeh.layouts import row, widgetbox
from bokeh.models import CustomJS, Slider, HoverTool, BoxAnnotation
from bokeh.models.widgets import Slider, Select, TextInput
from bokeh.plotting import figure, output_file, show, ColumnDataSource
from collections import OrderedDict
#from plotly.offline import download_plotlyjs, init_notebook_mode, plot
#from plotly.graph_objs import *
#%matplotlib inline
#init_notebook_mode()
In [2]:
dffile = "./Data/sr28abxl/ABBREV_FoodDatabase.xlsx"
dvfile = "./Playdata.xlsx"
#df = pd.read_excel(file, sheetname = 'Sheet1', header = 0, index_col = 0, convert_float = True)
#dv = pd.read_excel(file, sheetname = 'Sheet3', header = 0, index_col = 0, convert_float = True)
df = pd.read_excel(dffile, sheetname = 'ABBREV', header = 0, convert_float = True)
dv = pd.read_excel(dvfile, sheetname = 'Sheet3', header = 0, index_col = 0, convert_float = True)
df.columns = df.dtypes.index.str.replace("(", "").str.replace(")", "").str.replace("µ", "p")
Searchstring = "SHAKE|BEVERAGES|LIQUOR|FORMULA|BABY|SALTED|UNSWTND|PDR|FREEZE|GENERAL MILLS|FRUIT-FLAV|CEREALS|SNACKFO|OIL,|LARD|CANDIES|YEAST|SNACKS|BACKFAT|SHORTENING|COOKIES|BASIL,DRIED|SYRUP"
df = df[df.Shrt_Desc.str.contains(Searchstring) == False]
#dffout = dff[dff.Shrt_Desc.str.contains(Searchstring) == True]
#foodgroups = ['Bread','Candy', 'Dessert','Dressing','Fruit','Meat','Pasta','Pizza','Spread','Vegetable','fruit'}
# This file can be used to calculate nutrient values for food portions from the
#values provided per 100 g of food. The following formula is used to calculate the nutrient
#content per household measure:
#N = (V*W)/100
#where:
#N = nutrient value per household measure,
#V = nutrient value per 100 g (Nutr_Val in the Nutrient Data file), and
#W = g weight of portion (Gm_Wgt in the Weight file).
df['protein_rda'] = df['Protein_g']/dv['DV']['Protein'] * (df['GmWt_1']/100)
df['Calories_rda'] = df['Energ_Kcal']/dv['DV']['Caloriesm'] * (df['GmWt_1']/100)
df['Calories'] = df['Energ_Kcal'] * (df['GmWt_1']/100)
#df['foodtext'] = df.Shrt_Desc
df['Totfat_rda'] = df['Lipid_Tot_g']/dv['DV']['Total Fat'] * (df['GmWt_1']/100)
df['Satfat_rda'] = df['FA_Sat_g']/dv['DV']['Saturated Fat'] * (df['GmWt_1']/100)
df['Colesterol_rda'] = df['Cholestrl_mg']/dv['DV']['Cholesterol'] * (df['GmWt_1']/100)
df['Potassium_rda'] = df['Potassium_mg']/dv['DV']['Potassium'] * (df['GmWt_1']/100)
df['Sodium_rda'] = df['Sodium_mg']/dv['DV']['Sodium'] * (df['GmWt_1']/100)
#df['TotCarbs_rda'] = df['Lipid_Tot_g']/dv['DV']['Total Carbohydrate'] * (df['GmWt_1']/100)
df['Dietfiber_rda'] = df['Fiber_TD_g']/dv['DV']['Dietary Fiber'] * (df['GmWt_1']/100)
df['VitaminA_rda'] = df['Vit_A_IU']/dv['DV']['Vitamin A'] * (df['GmWt_1']/100)
df['VitaminC_rda'] = df['Vit_C_mg']/dv['DV']['Vitamin C'] * (df['GmWt_1']/100)
df['Calcium_rda'] = df['Calcium_mg']/dv['DV']['Calcium'] * (df['GmWt_1']/100)
df['Iron_rda'] = df['Iron_mg']/dv['DV']['Iron'] * (df['GmWt_1']/100)
df['VitaminD_rda'] = df['Vit_D_pg']/dv['DV']['Vitamin D'] * (df['GmWt_1']/100)
df['VitaminE_rda'] = df['Vit_E_mg']/dv['DV']['Vitamin E'] * (df['GmWt_1']/100)
df['VitaminK_rda'] = df['Vit_K_pg']/dv['DV']['Vitamin K'] * (df['GmWt_1']/100)
df['Thiamin_rda'] = df['Thiamin_mg']/dv['DV']['Thiamin'] * (df['GmWt_1']/100)
df['Riboflavin_rda'] = df['Riboflavin_mg']/dv['DV']['Riboflavin'] * (df['GmWt_1']/100)
df['Niacin_rda'] = df['Niacin_mg']/dv['DV']['Niacin'] * (df['GmWt_1']/100)
df['VitaminB6_rda'] = df['Vit_B6_mg']/dv['DV']['Vitamin B6'] * (df['GmWt_1']/100)
df['Folate_rda'] = df['Folate_Tot_pg']/dv['DV']['Folate'] * (df['GmWt_1']/100)
df['VitaminB12_rda'] = df['Vit_B12_pg']/dv['DV']['Vitamin B12'] * (df['GmWt_1']/100)
df['PantothenicAcid_rda'] = df['Panto_Acid_mg']/dv['DV']['Pantothenic acid'] * (df['GmWt_1']/100)
df['Phosphorus_rda'] = df['Phosphorus_mg']/dv['DV']['Phosphorus'] * (df['GmWt_1']/100)
df['Magnesium_rda'] = df['Magnesium_mg']/dv['DV']['Magnesium'] * (df['GmWt_1']/100)
df['Zinc_rda'] = df['Zinc_mg']/dv['DV']['Zinc'] * (df['GmWt_1']/100)
df['Selenium_rda'] = df['Selenium_pg']/dv['DV']['Selenium'] * (df['GmWt_1']/100)
df['Copper_rda'] = df['Copper_mg']/dv['DV']['Copper'] * (df['GmWt_1']/100)
df['Manganese_rda'] = df['Manganese_mg']/dv['DV']['Manganese'] * (df['GmWt_1']/100)
df['Molybdenum_rda'] = df['Energ_Kcal']/dv['DV']['Molybdenum'] * (df['GmWt_1']/100)
##df["Quantity"] = df["GmWt_1"].map(str) + " @ " + df["GmWt_Desc1"]
df["Quantity"] = df["GmWt_Desc1"]
df['Healthy'] = df['protein_rda']+ df['Dietfiber_rda']+ df['Thiamin_rda'] + df['Riboflavin_rda'] + df['Niacin_rda'] + df['PantothenicAcid_rda'] + df['Phosphorus_rda'] + df['Magnesium_rda'] + df['Selenium_rda'] + df['Copper_rda'] + df['Manganese_rda'] + df['Molybdenum_rda'] + (df['VitaminA_rda'] + df['VitaminB6_rda'] + df['VitaminC_rda'] + df['Calcium_rda'] + df['Iron_rda'] + df['VitaminD_rda'] + df['VitaminE_rda'] + df['VitaminK_rda'] + df['Folate_rda'] + df['VitaminB12_rda'] + df['Zinc_rda'])*2 - (10*df['Sodium_rda'])
#df['Calories_rda']+ df['Totfat_rda']+df['Satfat_rda']+df['Colesterol_rda'] +df['Sodium_rda'] +df['TotCarbs_rda']+
#foodgrp_key= list(set(df['foodgroup']))
#grp_val = list(map(int,range(len(foodgrp_key))))
#grpdict = dict(zip(foodgrp_key,grp_val))
#df.head()
#Get rid of the NaNs
dffin = df[np.isfinite(df['Healthy'])]
#FiFORMULAstrings in dffin and dFORMULA
#Searchstring = "SHAKE|BEVERAGES|LIQUOR|FORMULA|BABY|SALTED|UNSWTND|PDR|FREEZE|GENERAL MILLS|FRUIT-FLAV|CEREALS|SNACKFO|OIL,|LARD|CANDIES|YEAST|SNACKS|BACKFAT|SHORTENING|COOKIES|BASIL,DRIED|SYRUP"
#dffin = dff[dff.Shrt_Desc.str.contains(Searchstring) == False]
#dffout = dff[dff.Shrt_Desc.str.contains(Searchstring) == True]
In [3]:
# Create a dictionary out of the dataframe so we can select them later
#dffin.set_index("Shrt_Desc", drop=True, inplace=True)
#dictionary = df.to_dict(orient="index")
In [28]:
#keep only the top 500 healthy items
dffin = dffin.sort_values('Healthy', ascending=False)
dffin = dffin.head(500)
tinydffin = dffin.head(10)
## Modifying Max calories to fit my whims
## will loop in nutritionist later
Maxgoodcal = 400
Redboxhigh = 600
Yellowboxlow = 400
# Setup the plot
high_box = BoxAnnotation( left=Redboxhigh, fill_alpha=0.1, fill_color='red')
mid_box = BoxAnnotation( left=Yellowboxlow, right=Redboxhigh, fill_alpha=0.1, fill_color='yellow')
low_box = BoxAnnotation( right=Yellowboxlow, fill_alpha=0.1, fill_color='green')
x= dffin.Calories.values.tolist()
h= dffin.Healthy.values.tolist()
y= dffin.Healthy.values.tolist()
n=dffin.Shrt_Desc.values.tolist()
s=dffin.Sodium_mg.values.tolist()
k=dffin.Vit_K_pg.values.tolist()
phos=dffin.Phosphorus_mg.values.tolist()
por=dffin.Quantity.values.tolist()
pro=dffin.Protein_g.values.tolist()
pot=dffin.Potassium_mg.values.tolist()
ylim = max(h)+50
ds1 = ColumnDataSource(data=dict(x=x,y=y,n=n,s=s,h=h,pro=pro,phos=phos, por=por,k=k, pot=pot))
###########Testing
x= tinydffin.Calories.values.tolist()
h= tinydffin.Healthy.values.tolist()
y= tinydffin.Healthy.values.tolist()
n=tinydffin.Shrt_Desc.values.tolist()
s=tinydffin.Sodium_mg.values.tolist()
k=tinydffin.Vit_K_pg.values.tolist()
phos=tinydffin.Phosphorus_mg.values.tolist()
por=tinydffin.Quantity.values.tolist()
pro=tinydffin.Protein_g.values.tolist()
pot=tinydffin.Potassium_mg.values.tolist()
tinyds1= ColumnDataSource(data=dict(x=x,y=y,n=n,s=s,h=h,pro=pro,phos=phos, por=por,k=k, pot=pot))
#################
TOOLS="resize,crosshair,pan,wheel_zoom,box_zoom,reset,tap,previewsave,box_select,poly_select,lasso_select,hover"
plot = figure(title= "Nutrition Filter Test",tools = TOOLS)
plot.scatter('x', 'y', source=tinyds1, radius= dffin.Healthy ,line_color=None)
plot.add_layout(low_box)
plot.add_layout(mid_box)
plot.add_layout(high_box)
#plot.title.text = "Nutrition Filter"
#p.xgrid[0].grid_line_color=None
#p.ygrid[0].grid_line_alpha=0.5
plot.xaxis.axis_label = 'Calories(lower tends to be better)'
plot.yaxis.axis_label = 'Healthy (Higher is Better)'
############## more test
text_input = TextInput(title="title", value='my sine wave')
plot.title.text = text_input.value
text_input.callback = CustomJS(args=dict(source=text_input.value), code="""
text_input = TextInput(title="title", value='something else')
plot.title.text = text_input.value
source.trigger('change');
""")
#title.callback = CustomJS(update_title('value'))
################ End Test
ds1.callback = CustomJS(args=dict(source=tinyds1), code="""
var data = source.data;
var S = salt.value;
var Prot = protein.value;
var Phos = phosphorus.value;
var K = vit_k.value;
var Pot = pot.value;
h = data['h']
x = data['x']
y = data['y']
s = data['s']
k = data['k']
pot = data['pot']
pro = data['pro']
phos = data['phos']
por = data['por']
for (i = 0; i < x.length; i++) {
y[i] = h[i]-((s[i]-100)*S)- ((pro[i]- 10)*Prot *3) - ((phos[i]- 100)*Phos *2) - ((k[i] - 200)* K *1 ) - ((pot[i] - 200)* Pot *1 );
if (y[i] < 0) {
y[i] = 0
}
}
source.trigger('change');
""")
salt_slider = Slider(start=0, end=1, value=0, step=.005, title="Salt", callback=ds1.callback)
ds1.callback.args["salt"] = salt_slider
protein_slider = Slider(start=0, end=1, value=0, step=.005, title="Protein", callback=ds1.callback)
ds1.callback.args["protein"] = protein_slider
phos_slider = Slider(start=0, end=1, value=0, step=.005, title="Phosphorus", callback=ds1.callback)
ds1.callback.args["phosphorus"] = phos_slider
k_slider = Slider(start=0, end=1, value=0, step=.005, title="Vitamin K", callback=ds1.callback)
ds1.callback.args["vit_k"] = k_slider
pot_slider = Slider(start=0, end=1, value=0, step=.005, title="Potassium", callback=ds1.callback)
ds1.callback.args["pot"] = pot_slider
hover = plot.select(dict(type=HoverTool))
hover.tooltips = OrderedDict([('Food', '@n'),('Nutrition', '@y'),('Sodium', '@s'),('Phosphorus', '@phos'),('Calories', '@x'),('Protein', '@pro'),('Potassium', '@pot'),('Vitamin K', '@k'),('Quantity','@por')])
layout = row(
plot,
widgetbox(text_input, salt_slider,protein_slider, phos_slider, k_slider, pot_slider),
)
output_file("ffslidertext.html", title="Food Filters")
show(layout)
In [ ]:
#df["Quantity"] = df["GmWt_1"].map(str) + " @ " + df["GmWt_Desc1"]
#dffin['Healthy'][dffin['Calories' > Maxgoodcal].map()
In [ ]:
df["Quantity"] = df["GmWt_1"].map(str)
In [ ]:
df.GmWt_Desc1.head()
In [ ]:
df.Quantity.head(25)
In [ ]:
dff['Phosphorus_mg']
In [ ]:
#print("rgb("+ str(random.randint(0,255)) + "," + str(random.randint(0,255)) + "," + str(random.randint(0,255)) + ")")
#df.columns
#df.Healthy
#df.head().totalcal
#for i in df["foodgroup"]:
# df["grpnumbers"] = grphash[df["foodgroup"][i]]
#grpdict
#grpdict["Burger"]
#grpdict
In [ ]:
In [ ]:
#len(dffin)
In [ ]:
In [ ]:
x.length
In [ ]:
In [ ]:
#df.Shrt_Desc.head()
#df['foodgroup'] = df[df['Shrt_Desc'].str.contains("BUTTER|MILK")==TRUE
#df.foodgroup.head()
#converter = {}
#i=1
#for item in df["foodgroup"]:
# if item not in converter:
#converter[item] =i
# converter[item]= "rgb("+ str(random.randint(0,255)) + "," + str(random.randint(0,255)) + "," + str(random.randint(0,255)) + ")"
# i += 1
#df['foodgrpnum'] = [converter[i] for i in df["foodgroup"]]
#print("rgb("+ str(random.randint(0,255)) + "," + str(random.randint(0,255)) + "," + str(random.randint(0,255)) + ")")
#df.foodgrpnum
In [ ]:
#TOOLS="resize,crosshair,pan,wheel_zoom,box_zoom,reset,tap,previewsave,box_select,poly_select,lasso_select"
#output_file("color_scatter.html", title="color_scatter.py example")
#p = figure(tools=TOOLS)
#p.scatter(x, y, radius=radii, fill_color=colors, fill_alpha=0.6, line_color=None)
#x=dffin.Healthy.values.tolist()
#y = dffin.Calories.values.tolist()
#p.annulus(x, y, inner_radius=0, outer_radius=2,color="orange", alpha=0.6)
#p.annulus(x=dffin.Calories_rda.tolist(), y=dffin.Healthy, inner_radius=0.1, outer_radius=0.25, color="orange", alpha=0.6)
#p.scatter(x,y, radius= 1 , fill_alpha=0.6, line_color=None)
#show(p)
In [ ]:
# plot({
# 'data': [
# Scatter(x=dffin['Calories_rda'],
# y=dffin['Healthy'],
# text=dffin.foodtext ,
## #marker=Marker(size=df['calperserv'], sizemode='area'), #, sizeref=131868,),
# marker = dict(size= dffin.Healthy ,
# line= dict(width=1),
# color= "rgb(178,16,243)" ,
# opacity= 0.8
# ),
# #name= y[i],
# #mode='markers'
## mode="markers"
# )
# ],
# 'layout': Layout(title='Nutrition and Calorie Count',xaxis=XAxis(title='Calories per Serving'), yaxis=YAxis(title='Nutrition 1-10'))
#}, show_link=False)