In [1]:
import os
import urllib.request
import csv
import json

In [ ]:
#On télécharge et on dézippe

urllib.request.urlretrieve ("", "geo_sirene.csv.gz")
os.system( 'gunzip geo_sirene.csv.gz' )

#sur une petite zone
#urllib.request.urlretrieve ("", "geo-sirene_75112.csv.7z")
#os.system( '7z x geo-sirene_75112.csv.7z' )

Out[ ]:

In [ ]:
#On extrait le type de POIs qui nous intéresse

output_without_dup = {}

with open("geo_sirene.csv", "r") as source:
    reader = csv.DictReader(source)
    for row in reader:
        if row['APET700'] == "1071C":
            if row['longitude'] and row['latitude']:
                elem_id = row['longitude'] + row['latitude']
                output_without_dup[elem_id] = row
print (len(output_without_dup))

In [ ]:
with open('bakery.json', 'w') as fp:
    json.dump(list(output_without_dup.values()), fp, indent=4)

conflate -c result.json -i bakery.json --osm osm_bakery.osm