In [ ]:
import pandas as pd
import numpy as np
import datetime as dt
import time
import random
from tqdm import tqdm
import urllib2
import requests
from geopy.geocoders import Nominatim
import json 
import ConfigParser
import os.path

Variable config

api_key

The key used to authenticate with the DarkSky API, get your own at darksky.net/dev

Longitude/Latitude

Geopy derived geographical information from your specified location

base_url

The base url used for GET request to the DarkSky API, containing your api_key and location information.


In [ ]:
config = ConfigParser.RawConfigParser()
config.read('synchronization.cfg')
api_key = config.get('Darksky', 'api_key')

geolocator = Nominatim()
location = geolocator.geocode('Muntstraat 10 Leuven')
latitude = location.latitude
longitude = location.longitude

base_url = config.get('Darksky', 'base_url') + api_key \
        + '/' + str(latitude) + ',' + str(longitude) + ','

URL builder

Using this function a list is made that can later be iterated over to fetch a large amount of weather data in a fixed date range


In [ ]:
def url_builder(start_date, end_date=dt.datetime.now()):
    url_list = []
    delta = end_date - start_date
    for counter in range(delta.days):
        timestamp = str(time.mktime((start_date + dt.timedelta(days=counter)).timetuple()))[:-2]
        if os.path.isfile('local_data/full_data_' + timestamp + '.json'):
            continue
        full_url = base_url + timestamp
        url_list.append(full_url)
    return url_list

In [ ]:
url_list = url_builder(dt.datetime(2017,6,12))
len(url_list)

Fahrenheit to Celsius

Because everyone likes non retard units


In [ ]:
def f_t_c(fahrenheit):
    return (((fahrenheit - 32) * 5.0) / 9.0)

Fetch JSON data from URL

JSON data is locally stored so all future code can use the locally stored files and don't require any remote API calls


In [ ]:
def fetch_and_store_json(url):
    try:
        request = requests.get(url=url, timeout=10)
    except ReadTimeout as t:
        print "Read timeout"
        request = None
    if request is None:
        while(request is None):
            request = requests.get(url=url, timeout=1)
    content = json.loads(request.content)
    storage = open('local_data/full_data_' + url.split(',')[2] + '.json', 'w')    
    #storage.write(json.dumps(content))  #for BigQuery ready json file
    storage.write(json.dumps(content, separators=(',', ': '), indent=5))  #For clean indentation
    storage.close()

In [ ]:
for url in tqdm(url_list):
    fetch_and_store_json(url)

In [ ]: