In [1]:
from ftplib import FTP
import os.path
In [2]:
ftp = FTP('ftp.epa.gov', timeout=60)
ftp.login()
ftp.cwd('dmdnload')
ftp.cwd('emissions')
ftp.cwd('hourly')
ftp.cwd('monthly')
years = []
ftp.retrlines('NLST', years.append)
print(years)
parent_directory = ftp.pwd()
In [3]:
def zip_fetch(ftp, entry):
print(entry)
outfile = open('data/' + entry, 'wb')
ftp.retrbinary('RETR ' + entry, outfile.write)
outfile.close()
return
states = ['wa', 'or', 'ca', 'id', 'nv', 'ut', 'az', 'nm', 'co', 'wy', 'mt', 'tx']
def inWest(string):
return string[4:6] in states
In [5]:
ftp.cwd(parent_directory)
if not os.path.exists('data'):
os.makedirs('data')
visited = []
with open("successes.txt", "r") as file:
for line in file:
visited.append(line[:-1])
for year in range(2001, 2018):
ftp.cwd(str(year))
files = []
ftp.retrlines('NLST', files.append)
for entry in files:
if entry in visited or not inWest(entry):
continue
try:
zip_fetch(ftp, entry)
with open("successes.txt", 'a+') as outfile:
outfile.writelines(entry + '\n')
except:
pass
ftp.cwd('..')
In [ ]:
In [ ]:
In [ ]: