In [2]:
with open('./food101/calories.txt') as f:
    calories = [float(y) for _, y in map(str.split, f)]

In [9]:
import matplotlib.pyplot as plt
import numpy as np
plt.hist(calories, bins=100)
plt.show()



In [11]:
np.mean(calories), np.std(calories)


Out[11]:
(237.05069306930693, 164.14514985917779)

In [12]:
calories = np.array(calories)

In [17]:
mse_of_mean = np.mean((calories - np.mean(calories)) ** 2)
print('mse of mean', mse_of_mean)
avg_percent = np.mean( np.abs(calories - np.mean(calories)) / calories)
print('avg_percent', avg_percent)


mse of mean 26943.6302223
avg_percent 0.893165039281

In [31]:
sampled = np.random.normal(np.mean(calories), np.std(calories), len(calories))
avg_percent = np.mean( np.abs(calories - sampled) / calories)
print('avg_percent', avg_percent)


avg_percent 1.25104638552

In [33]:
np.argmax(calories > 1000)


Out[33]:
31000

In [36]:
import scipy.misc as sm
im = sm.imread('./food101/31000.jpg')
plt.imshow(im)
plt.show()
print(calories[31000])


1388.0