In [ ]:
import pandas as pd
import numpy as np
import sqlalchemy
import re
from elasticsearch import Elasticsearch
from elasticsearch import helpers as eshelper
from tqdm import tqdm
In [ ]:
db = sqlalchemy.create_engine('postgresql://postgres@localhost:5432/postgres', client_encoding='utf8')
In [ ]:
categories = pd.read_sql_table('categories', db)
categories.head()
In [ ]:
categorisations = pd.read_sql_table('categorisations', db)
categorisations.head()
In [ ]:
images = pd.read_sql_table('images', db)
images.head()
In [ ]:
nutrients = pd.read_sql_table('nutrients', db).fillna(0)
nutrients.head()
In [ ]:
product_nutrients = pd.read_sql_table('product_nutrients', db).fillna(0)
product_nutrients.head()
In [ ]:
products = pd.read_sql_table('products', db).fillna(0)
products.head()
In [ ]:
re_brand = re.compile('([^:]*):(.+)')
In [ ]:
entries = []
for i, product in tqdm(products.iterrows()):
name = product.name_translations.get('fr')
if name:
nuts = []
mapping = product_nutrients[product_nutrients.product_id == product.id]
for i, m in mapping.iterrows():
nut = nutrients[nutrients.id == m.nutrient_id]
nuts.append({
'name': nut.name_translations.values[0]['fr'],
'unit': nut.unit.values[0],
'rdi': np.nan_to_num(nut.rdi.values[0]),
'per_day': np.nan_to_num(m.per_day),
'per_hundred': np.nan_to_num(m.per_hundred),
'per_portion': np.nan_to_num(m.per_portion),
})
if len(nuts):
brand_existence = re_brand.search(name)
if brand_existence:
brand, name_cleaned = re_brand.search(name).groups()
else:
brand = ''
name_cleaned = name
fmt = {
'_index': 'products1',
'_type': 'openfood',
'_id': product.id,
'_source': {
'name': name_cleaned,
'brand': brand,
'unit': product.unit,
'quantity': product.quantity,
'unit_portion': product.portion_quantity,
'unit_quantity': product.portion_unit,
'nutriments': nuts
}
}
entries.append(fmt)
len(entries)
In [ ]:
entries[0]
In [ ]:
client = Elasticsearch(hosts='http://')
In [ ]:
eshelper.bulk(client, entries)
In [ ]: