Geocoding is the process of turning an address -- '3020 W Colorado Ave, Colorado Springs, CO 80904' -- into a latitude/longitude coordinate pair. You'd need to do this if you wanted to do some geospatial analysis or make an interactive map or something.
We're going to use the geopy library to geocode a CSV of payday lenders in Illinois. The data file is at data/payday.csv.
Once we have the coordinates, we're going to tack on those columns and write out to a new file, payday-geocoded.csv.
In [ ]:
# import the Google geocoder from geopy
# import Python's csv and time libaries
from geopy.geocoders import GoogleV3
import csv
import time
# Make a geolocator object
# Set the `timeout` keyword argument to 5 (seconds)
geolocator = GoogleV3(timeout=5)
# in a `with` block, open the file to read from and the file to write to
with open('data/payday.csv', 'r') as infile, open('payday-geocoded.csv', 'w') as outfile:
# make a DictReader object
reader = csv.DictReader(infile)
# define the headers
headers = reader.fieldnames + ['lat', 'lng']
# make a DictWriter object
writer = csv.DictWriter(outfile, fieldnames=headers)
# write headers
writer.writeheader()
# loop over address data
# i'm adding an `enumerate()` function to keep track of where we're at
# so we can kill the loop after 5 iterations --
# that way we don't bombard google and get cut off
for i, row in enumerate(reader):
if i < 5:
# Put the address in a Google-recognizable string: ADDRESS, CITY, STATE ZIP
addr = '{} {} {}, {}, {}'.format(
row['STADDR'].strip(),
row['STADDR2'].strip(),
row['CITY'],
row['STATE'],
row['ZIP']
)
# Geocode that string
location = geolocator.geocode(addr)
# print the address and results
print(addr, location.latitude, location.longitude)
# write out a new dict
writer.writerow({
**row,
**{'lat': location.latitude, 'lng': location.longitude}
})
# Before we do all of this with the next row, pause for two seconds.
time.sleep(2)
else:
# once we hit 5, break out of the loop
break
# let us know when we're done
print('All done!')
In [ ]: