In [26]:
from selenium import webdriver
import time
import json
In [27]:
url_eva = 'http://www.evavzw.be/resto'
driver = webdriver.Firefox()
driver.get(url_eva)
In [ ]:
restaurants = None
restaurants = []
number_of_pages = 83
for i in range(0,number_of_pages-1):
print(i)
selections = driver.find_elements_by_class_name('node-restaurant')
for s in selections:
info = s.text.split('\n')
if len(info)>2:
restaurant = (info[0],info[1],info[2][:4],info[2][4:], info[3:])
elif len(info)>=1:
restaurant = (info[0],None,None)
restaurants.append(restaurant)
selection = driver.find_element_by_class_name('pager-next')
selection.find_element_by_tag_name('a').click()
time.sleep(3)
driver.close()
print(len(restaurants))
In [29]:
restaurants[0]
Out[29]:
In [45]:
restaurant_objects = []
tags = []
for r in restaurants:
if len(r)!=5:
print('Bad record: ' + str(r))
else:
o = {'name':r[0], 'street':r[1], 'zipcode':r[2], 'city':r[3], 'tags':r[4]}
restaurant_objects.append(o)
#print(o)
if r[4] is not None:
tags.extend(r[4])
unique_tags = set(tags)
print(unique_tags)
In [47]:
with open('EVA_restodata.json', 'w') as fp:
json.dump(restaurant_objects, fp, sort_keys=True, indent=4, separators=(',', ': '))
In [48]:
tag_objects = []
for t in unique_tags:
tag_objects.append({"tag":t})
print(tag_objects)
In [49]:
with open('EVA_tags.json', 'w') as fp:
json.dump(tag_objects, fp, sort_keys=True, indent=4, separators=(',', ': '))
In [ ]: