In [1]:
import sqlite3
import pandas
con = sqlite3.connect('smaller.db')
max_date = pandas.read_sql('select max(op_date) from data', con)

In [33]:
for i, row in max_date.iterrows():
    date = str(row[0])
max_month = date[0:2]
max_year = date[-4:]
print("Max date found: " + max_month + " " + max_year)


Max date found: 12 2016

In [38]:
from ftplib import FTP
import os.path

ftp = FTP('ftp.epa.gov', timeout=60)
ftp.login()
ftp.cwd('dmdnload')
ftp.cwd('emissions')
ftp.cwd('hourly')
ftp.cwd('monthly')
years = []
ftp.retrlines('NLST', years.append)
print(years)
parent_directory = ftp.pwd()

def zip_fetch(ftp, entry):
    print(entry)
    outfile = open('data/' + entry, 'wb')
    ftp.retrbinary('RETR ' + entry, outfile.write)
    outfile.close()
    return
states = ['wa', 'or', 'ca', 'id', 'nv', 'ut', 'az', 'nm', 'co', 'wy', 'mt', 'tx']
def inWest(string):
    return string[4:6] in states

if not os.path.exists('data'):
    os.makedirs('data')

for year in years:
    if (int(year) < int(max_year)): continue
    ftp.cwd(str(year))
    files = []
    ftp.retrlines('NLST', files.append)
    for entry in files:
        if (int(year) == int(max_year) and int(entry[6:8]) <= int(max_month)) or not inWest(entry):
            continue
        try:
            zip_fetch(ftp, entry)
            with open("successes.txt", 'a+') as outfile:
                outfile.writelines(entry + '\n')
        except:
            pass            
    ftp.cwd('..')


['1995', '1996', '1997', '1998', '1999', '2000', '2001', '2002', '2003', '2004', '2005', '2006', '2007', '2008', '2009', '2010', '2011', '2012', '2013', '2014', '2015', '2016', '2017']
2017az01.zip
2017az01.zip
2017az02.zip
2017az02.zip
2017az03.zip
2017az03.zip
2017ca01.zip
2017ca01.zip
2017ca02.zip
2017ca02.zip
2017ca03.zip
2017ca03.zip
2017co01.zip
2017co01.zip
2017co02.zip
2017co02.zip
2017co03.zip
2017co03.zip
2017id01.zip
2017id01.zip
2017id02.zip
2017id02.zip
2017id03.zip
2017id03.zip
2017mt01.zip
2017mt01.zip
2017mt02.zip
2017mt02.zip
2017mt03.zip
2017mt03.zip
2017nm01.zip
2017nm01.zip
2017nm02.zip
2017nm02.zip
2017nm03.zip
2017nm03.zip
2017nv01.zip
2017nv01.zip
2017nv02.zip
2017nv02.zip
2017nv03.zip
2017nv03.zip
2017or01.zip
2017or01.zip
2017or02.zip
2017or02.zip
2017or03.zip
2017or03.zip
2017tx01.zip
2017tx01.zip
2017tx02.zip
2017tx02.zip
2017tx03.zip
2017tx03.zip
2017ut01.zip
2017ut01.zip
2017ut02.zip
2017ut02.zip
2017ut03.zip
2017ut03.zip
2017wa01.zip
2017wa01.zip
2017wa02.zip
2017wa02.zip
2017wa03.zip
2017wa03.zip
2017wy01.zip
2017wy01.zip
2017wy02.zip
2017wy02.zip
2017wy03.zip
2017wy03.zip

In [36]:
print int(entry[6:8])


10

In [31]:
year = '2016'
max_month = '01'
entry = '1995al02.zip'

In [32]:
print (int(year) == int(max_year) and int(entry[-6:-4]) <= int(max_month))


False

In [39]:
for year in years:
    if (int(year) < int(max_year)):
        years.remove(year)

In [40]:
years


Out[40]:
['1996',
 '1998',
 '2000',
 '2002',
 '2004',
 '2006',
 '2008',
 '2010',
 '2012',
 '2014',
 '2016',
 '2017']

In [ ]: