In [1]:
from ftplib import FTP
import os.path

In [2]:
ftp = FTP('ftp.epa.gov', timeout=60)
ftp.login()
ftp.cwd('dmdnload')
ftp.cwd('emissions')
ftp.cwd('hourly')
ftp.cwd('monthly')
years = []
ftp.retrlines('NLST', years.append)
print(years)
parent_directory = ftp.pwd()


['1995', '1996', '1997', '1998', '1999', '2000', '2001', '2002', '2003', '2004', '2005', '2006', '2007', '2008', '2009', '2010', '2011', '2012', '2013', '2014', '2015', '2016', '2017']

In [3]:
def zip_fetch(ftp, entry):
    print(entry)
    outfile = open('data/' + entry, 'wb')
    ftp.retrbinary('RETR ' + entry, outfile.write)
    outfile.close()
    return

In [ ]:
ftp.cwd(parent_directory)
if not os.path.exists('data'):
    os.makedirs('data')
visited = []
with open("successes.txt", "r") as file:
    for line in file:
        visited.append(line[:-1])


for year in range(2001, 2017):
    ftp.cwd(str(year))
    files = []
    ftp.retrlines('NLST', files.append)
    for entry in files:
        if entry in visited:
            continue
        try:
            zip_fetch(ftp, entry)
            with open("successes.txt", 'a+') as outfile:
                outfile.writelines(entry + '\n')
        except:
            pass            
    ftp.cwd('..')


1997al01.zip
1997al02.zip
1997al03.zip
1997al04.zip
1997al05.zip
1997al06.zip
1997al07.zip
1997al08.zip
1997al09.zip
1997al10.zip
1997al11.zip
1997al12.zip
1997ar01.zip
1997ar02.zip
1997ar03.zip
1997ar04.zip
1997ar05.zip
1997ar06.zip
1997ar07.zip
1997ar08.zip
1997ar09.zip
1997ar10.zip
1997ar11.zip
1997ar12.zip
1997az01.zip
1997az02.zip
1997az03.zip
1997az04.zip
1997az05.zip
1997az06.zip
1997az07.zip
1997az08.zip
1997az09.zip
1997az10.zip
1997az11.zip
1997az12.zip
1997ca01.zip
1997ca02.zip
1997ca03.zip
1997ca04.zip
1997ca05.zip
1997ca06.zip
1997ca07.zip
1997ca08.zip
1997ca09.zip
1997ca10.zip
1997ca11.zip
1997ca12.zip
1997co01.zip
1997co02.zip
1997co03.zip
1997co04.zip
1997co05.zip
1997co06.zip
1997co07.zip
1997co08.zip
1997co09.zip
1997co10.zip
1997co11.zip
1997co12.zip
1997ct01.zip
1997ct02.zip
1997ct03.zip
1997ct04.zip
1997ct05.zip
1997ct06.zip
1997ct07.zip
1997ct08.zip
1997ct09.zip
1997ct10.zip
1997ct11.zip
1997ct12.zip
1997dc01.zip
1997dc02.zip
1997dc03.zip
1997dc04.zip
1997dc05.zip
1997dc06.zip
1997dc07.zip
1997dc08.zip
1997dc09.zip
1997dc10.zip
1997dc11.zip
1997dc12.zip
1997de01.zip
1997de02.zip
1997de03.zip
1997de04.zip
1997de05.zip
1997de06.zip
1997de07.zip
1997de08.zip
1997de09.zip
1997de10.zip
1997de11.zip
1997de12.zip
1997fl01.zip
1997fl02.zip
1997fl03.zip
1997fl04.zip
1997fl05.zip
1997fl06.zip
1997fl07.zip
1997fl08.zip
1997fl09.zip
1997fl10.zip
1997fl11.zip
1997fl12.zip
1997ga01.zip
1997ga02.zip
1997ga03.zip
1997ga04.zip
1997ga05.zip
1997ga06.zip
1997ga07.zip
1997ga08.zip
1997ga09.zip
1997ga10.zip
1997ga11.zip
1997ga12.zip
1997ia01.zip
1997ia02.zip
1997ia03.zip
1997ia04.zip
1997ia05.zip
1997ia06.zip
1997ia07.zip
1997ia08.zip
1997ia09.zip
1997ia10.zip
1997ia11.zip
1997ia12.zip
1997id01.zip
1997id02.zip
1997id03.zip
1997id04.zip
1997id05.zip
1997id06.zip
1997id07.zip
1997id08.zip
1997id09.zip
1997id10.zip
1997id11.zip
1997id12.zip
1997il01.zip
1997il02.zip
1997il03.zip
1997il04.zip
1997il05.zip
1997il06.zip
1997il07.zip
1997il08.zip
1997il09.zip
1997il10.zip
1997il11.zip
1997il12.zip
1997in01.zip
1997in02.zip
1997in03.zip
1997in04.zip
1997in05.zip
1997in06.zip
1997in07.zip
1997in08.zip
1997in09.zip
1997in10.zip
1997in11.zip
1997in12.zip
1997ks01.zip
1997ks02.zip
1997ks03.zip
1997ks04.zip
1997ks05.zip
1997ks06.zip
1997ks07.zip
1997ks08.zip
1997ks09.zip
1997ks10.zip
1997ks11.zip
1997ks12.zip
1997ky01.zip
1997ky02.zip
1997ky03.zip
1997ky04.zip
1997ky05.zip
1997ky06.zip
1997ky07.zip
1997ky08.zip
1997ky09.zip
1997ky10.zip
1997ky11.zip
1997ky12.zip
1997la01.zip
1997la02.zip
1997la03.zip
1997la04.zip
1997la05.zip
1997la06.zip
1997la07.zip
1997la08.zip
1997la09.zip
1997la10.zip
1997la11.zip
1997la12.zip
1997ma01.zip
1997ma02.zip
1997ma03.zip
1997ma04.zip
1997ma05.zip
1997ma06.zip
1997ma07.zip
1997ma08.zip
1997ma09.zip
1997ma10.zip
1997ma11.zip
1997ma12.zip
1997md01.zip
1997md02.zip
1997md03.zip
1997md04.zip
1997md05.zip
1997md06.zip
1997md07.zip
1997md08.zip
1997md09.zip
1997md10.zip
1997md11.zip
1997md12.zip
1997me01.zip
1997me02.zip
1997me03.zip
1997me04.zip
1997me05.zip
1997me06.zip
1997me07.zip
1997me08.zip
1997me09.zip
1997me10.zip
1997me11.zip
1997me12.zip
1997mi01.zip
1997mi02.zip
1997mi03.zip
1997mi04.zip
1997mi05.zip
1997mi06.zip
1997mi07.zip
1997mi08.zip
1997mi09.zip
1997mi10.zip
1997mi11.zip
1997mi12.zip
1997mn01.zip
1997mn02.zip
1997mn03.zip
1997mn04.zip
1997mn05.zip
1997mn06.zip
1997mn07.zip
1997mn08.zip
1997mn09.zip
1997mn10.zip
1997mn11.zip
1997mn12.zip
1997mo01.zip
1997mo02.zip
1997mo03.zip
1997mo04.zip
1997mo05.zip
1997mo06.zip
1997mo07.zip
1997mo08.zip
1997mo09.zip
1997mo10.zip
1997mo11.zip
1997mo12.zip
1997ms01.zip
1997ms02.zip
1997ms03.zip
1997ms04.zip
1997ms05.zip
1997ms06.zip
1997ms07.zip
1997ms08.zip
1997ms09.zip
1997ms10.zip
1997ms11.zip
1997ms12.zip
1997mt01.zip
1997mt02.zip
1997mt03.zip
1997mt04.zip
1997mt05.zip
1997mt06.zip
1997mt07.zip
1997mt08.zip
1997mt09.zip
1997mt10.zip
1997mt11.zip
1997mt12.zip
1997nc01.zip
1997nc02.zip
1997nc03.zip
1997nc04.zip
1997nc05.zip
1997nc06.zip
1997nc07.zip
1997nc08.zip
1997nc09.zip
1997nc10.zip
1997nc11.zip

In [ ]:
# ftp.cwd(parent_directory)
# if not os.path.exists('data'):
#     os.makedirs('data')
# for year in years:
#     ftp.cwd(year)
#     files = []
#     ftp.retrlines('NLST', files.append)
#     for entry in files:
#         p = Process(target=zip_fetch, args=(ftp, entry))
# #         zip_fetch(ftp, entry)
#         p.start()
#         p.join()
#     ftp.cwd('..')

In [ ]:
from multiprocessing.dummy import Pool as ThreadPool

In [ ]: