In [16]:
import requests as r
import json
from sklearn.feature_extraction.text import CountVectorizer
from scipy.spatial.distance import cosine, euclidean, jaccard
In [17]:
api_key = 'DEMO_KEY'
In [18]:
def get_product_list(q = ''):
# https://api.nal.usda.gov/ndb/search/?format=json&q=butter&sort=n&max=25&offset=0&api_key=DEMO_KEY
url_root = 'https://api.nal.usda.gov/ndb/search/?q={query}&format={format}&sort={sort}&max={records_limit}&&offset={offset}&api_key={api_key}'
params = { 'format': 'json', 'offset': 0, 'records_limit': 1000, 'sort': 'n', 'api_key': api_key, 'query': q }
resp = r.get(url_root.format_map(params))
data = json.loads(resp._content)
result = list([(x['ndbno'], x['name']) for x in data['list']['item']])
return result
In [21]:
lst = get_product_list('carrots raw')
# print(len(lst))
for item in lst:
print(item)
txt_data = [x[1] for x in lst]
In [26]:
txt_data
vectorizer = CountVectorizer(min_df=1)
X = vectorizer.fit_transform(txt_data)
req_X = vectorizer.transform(['carrots raw']).toarray()
for idx, item in enumerate(X):
lst[idx] = (lst[idx][0], lst[idx][1], jaccard(item.toarray(), req_X))
# print(cosine(item.toarray(), req_X), euclidean(item.toarray(), req_X), jaccard(item.toarray(), req_X))
for item in lst:
print(item)
min(lst, key = lambda x: x[2])
Out[26]:
In [84]:
def get_product_detail(product_id):
# https://api.nal.usda.gov/ndb/reports/?ndbno=01009&type=f&format=json&api_key=DEMO_KEY
url_root = 'https://api.nal.usda.gov/ndb/reports/?ndbno={product_id}&format={format}&type={type}&api_key={api_key}'
params = { 'format': 'json', 'type': 'f', 'api_key': api_key, 'product_id': product_id }
resp = r.get(url_root.format_map(params))
data = json.loads(resp._content)
item = data['report']['food']
result = {}
result['id'] = item['ndbno']
result['name'] = item['name']
nutrients = item['nutrients']
for n_item in nutrients:
if n_item['nutrient_id'] == '208':
result['energy'] = (int(n_item['value']), n_item['unit'])
if n_item['nutrient_id'] == '203':
result['protein'] = (float(n_item['value']), n_item['unit'])
if n_item['nutrient_id'] == '204':
result['fat'] = (float(n_item['value']), n_item['unit'])
if n_item['nutrient_id'] == '205':
result['carbohydrate'] = (float(n_item['value']), n_item['unit'])
if n_item['nutrient_id'] == '291':
result['fiber'] = (float(n_item['value']), n_item['unit'])
if n_item['nutrient_id'] == '269':
result['sugar'] = (float(n_item['value']), n_item['unit'])
return result
In [85]:
get_product_detail('45051561')
In [ ]: