In [ ]:
import os
from glob import glob
import wget
import pandas as pd
import requests
from tqdm import *
In [ ]:
def make_url(year, month, kind):
"""Create url from a year and month string as a download link"""
base = "http://temis.nl/airpollution/no2col/data/omi/data_v2/"
file_start = "no2_"
return ''.join([base, year,'/', month, '/',
file_start, year, month, kind])
def make_target(year, month, kind):
"""Make a target folder/filename string from a year month string"""
return ''.join(["Data/", year, month, kind])
def download_file(year, month, kind):
"""
Download a file. Must specify:
year = string of YYYY
month = string of MM (must be two digit, e.g. '01')
kind = string of file extension type (e.g. '.asc.gz' or '.grd.gz')
e.g.
>>>download_file('2016','01','.asc.gz')
"""
if not os.path.isdir("Data"):
os.makedirs("Data")
url = make_url(year, month, kind)
target = make_target(year, month, kind)
try:
wget.download(url, out=target)
except:
raise IOError("File not found")
In [ ]:
# Example of downloading a single year/month file
download_file('2016', '05', kind='.grd.gz')
If there is no Data/ folder in the local folder, one will be made if the [year][month].asc.gz file doesnt exist on the server no download will be attempted, and instead an Eror will be raised. (This is so you can use a try: except: syntax to run a loop.)
To download multiple files you will need to iterate over a range of date-time objects. Each element of the list can then be used to get a month / year pair, used as inputs to the downloader.
In [ ]:
def download_batch(start, end, kind):
"""
Provide a start and and end date.
A local Data folder will be created if none exists.
All files present in temis.nl/airpollution/no2col/data/omi/data_v2/
will be downloaded there.
(Even though dates are given to days, the time steps are monthly.)
start = date string 'YYYYMMDD'
end = date string 'YYYYMMDD'
kind = file type: '.asc.gz' or '.grd.gz'
e.g.
>>> download_batch(start='20041001', end='20161001')
"""
dates = pd.date_range(start=start, end=end,freq='M')
missing = []
for date in tqdm(dates):
month = "{0:02d}".format(date.month)
year = str(date.year)
try:
download_file(year, month, kind)
except:
missing.append((year, month))
if len(missing): # print info on missing files, if any exist
for pair in missing:
print("{0}: No corresponding file found".format(pair))
# Clean up any partial files
for badfile in glob('Data/*.tmp'):
os.remove(badfile)
# Clean up any duplicated files
for duplicate in glob('Data/*(?)*'):
print(os.remove(duplicate))
In [ ]:
download_batch(start='20160101', end='20161001', kind='.grd.gz')