In [1]:
from ftplib import FTP
import os.path
In [2]:
ftp = FTP('ftp.epa.gov', timeout=60)
ftp.login()
ftp.cwd('dmdnload')
ftp.cwd('emissions')
ftp.cwd('hourly')
ftp.cwd('monthly')
years = []
ftp.retrlines('NLST', years.append)
print(years)
parent_directory = ftp.pwd()
In [3]:
def zip_fetch(ftp, entry):
print(entry)
outfile = open('data/' + entry, 'wb')
ftp.retrbinary('RETR ' + entry, outfile.write)
outfile.close()
return
In [ ]:
ftp.cwd(parent_directory)
if not os.path.exists('data'):
os.makedirs('data')
visited = []
with open("successes.txt", "r") as file:
for line in file:
visited.append(line[:-1])
for year in range(2001, 2017):
ftp.cwd(str(year))
files = []
ftp.retrlines('NLST', files.append)
for entry in files:
if entry in visited:
continue
try:
zip_fetch(ftp, entry)
with open("successes.txt", 'a+') as outfile:
outfile.writelines(entry + '\n')
except:
pass
ftp.cwd('..')
In [ ]:
# ftp.cwd(parent_directory)
# if not os.path.exists('data'):
# os.makedirs('data')
# for year in years:
# ftp.cwd(year)
# files = []
# ftp.retrlines('NLST', files.append)
# for entry in files:
# p = Process(target=zip_fetch, args=(ftp, entry))
# # zip_fetch(ftp, entry)
# p.start()
# p.join()
# ftp.cwd('..')
In [ ]:
from multiprocessing.dummy import Pool as ThreadPool
In [ ]: