ftplib
and zipfile
packagesThis notebook demonstrates how the concepts learned in 03-Fetching-files-with-ftplib.ipynb
can be incorporated into a loop to download files for multiple states. The states to process are set in the first code box, and in the second code box, we specify which StreamCat files to download.
In [ ]:
#Set states(s) to grab
states = ["RI"]
In [ ]:
#Set files to grab
filesToGet = ("ForestLossByYear0013_",
"ForestLossByYear0013RipBuf100_",
"NRSA_PredictedBioCondition_"
)
In [ ]:
#import libraries
import sys, os
import ftplib
import zipfile
In [ ]:
#Set input variables
ftpURL="newftp.epa.gov"
ftpDir = "/EPADataCommons/ORD/NHDPlusLandscapeAttributes/StreamCat/States/"
In [ ]:
#Create the base output folder, if it doesn't exist
outFolder = "StreamCat"
if not os.path.exists(outFolder): os.mkdir(outFolder)
In [ ]:
#Make sure the state subfolder(s) exist
for state in states:
stateFolder = outFolder + os.sep + state
if not os.path.exists(stateFolder): os.mkdir(stateFolder)
In [ ]:
#Log into the ftp site
ftp = ftplib.FTP(ftpURL)
ftp.login("anonymous","user@duke.edu")
In [ ]:
#Navigate to the directory
ftp.cwd(ftpDir)
In [ ]:
#Get a list of files on the ftp server
files = []
try:
files = ftp.nlst()
except ftplib.error_perm, resp:
if str(resp) == "550 No files found":
print "No files in this directory"
else:
raise
This is where the bulk of the work happens. We loop through each state the user specifies, and for each state we loop through all the files in the ftp directory and see whether the filename contains the state abbreviation. If so, we then see whether the file matches any of the filenames in the filesToGet
list; if so, we download the file to the local machine and unzip it.
In [ ]:
#Loop through the states states list
for state in states:
#Loop through the file list returned; if it's in the selected region, download it
for f in files:
#Proceed only if the state string appears in the filename
if state in f:
#Proceed only if the file is in the list of files we want:
if str(f)[:-6] in filesToGet:
#Create the output zip filename by concatenating path elements
outFN = os.path.join(outFolder,state,f)
#Skip if the file has already been downloaded
if os.path.exists(outFN):
print "{} exists; skipping".format(outFN)
continue
#Proceed to download and unzip the file...
print "downloading {}".format(f)
#...create the fileobject into which to nab data
outFileObj = open(outFN,'wt')
#...use a lambda to add newlines to the lines read from the server
ftp.retrbinary("RETR " + f, open(outFN,'wb').write)
#...close the file
outFileObj.close()
In [ ]:
#Close the ftp connection
ftp.close()