Weather Data Collection

This notebooks uses the wunderground API

Usage

Use the get_weather_data(city,state,start_date,end_date) function to retreive daily weather data.


In [1]:
import json
import urllib2
import numpy as np
import pandas as pd
import collections 
import matplotlib.pyplot as plt
from scipy import stats
%matplotlib inline


/usr/local/lib/python2.7/dist-packages/pandas/io/excel.py:626: UserWarning: Installed openpyxl is not supported at this time. Use >=1.6.1 and <2.0.0.
  .format(openpyxl_compat.start_ver, openpyxl_compat.stop_ver))

In [2]:
from datetime import datetime, timedelta, date
#this function returns a json object
#Pass in the city, state, and desired date as strings, the date format is YYYYMMDD
def get_weather_data(api,city,state,start_date,end_date):  
    if(start_date is not None and end_date is not None):

        #format our date structure to pass to our http request
        date_format = "%Y%m%d"
        a = datetime.strptime(start_date, date_format)
        b = datetime.strptime(end_date, date_format)
        #get number of days from start_date to end_date
        delta = b - a
        num_days = delta.days
        objects_list = []
    
        #create new variable that will create query's for the api
        for year in range(0,num_days + 1):
            #count from start_date to end_date
            dates = a + timedelta(days=year)
            #format our str with our date_format
            formatted_dates = datetime.strftime(dates, date_format)
            #create query which will iterate through desired weather period
            query = 'http://api.wunderground.com/api/'+ api +'/history_'+formatted_dates+'/q/'+state+'/'+city+'.json'
            #iterate through the number of days and query the api. dump json results every time 
            f = urllib2.urlopen(query)
            #read query as a json string
            json_string = f.read()
            #parse/load json string
            parsed_json = json.loads(json_string)
            #Iterate through each json object and append it to an ordered dictionary
            for i in parsed_json['history']['observations']:        
                d = collections.OrderedDict()
                d['date'] = i['date']['mon'] + '/' + i['date']['mday'] + '/' + i['date']['year']
                d['time'] = i['date']['pretty'][0:8]
                d['temp'] = i['tempi']
                d['conds'] = i['conds']
                d['wdire'] = i['wdire']
                d['wdird'] = i['wdird']
                d['hail'] = i['hail']
                d['thunder'] = i['thunder']
                d['pressurei'] = i['pressurei']
                d['snow'] = i['snow']
                d['pressurem'] = i['pressurem']
                d['fog'] = i['fog']
                d['tornado'] = i['tornado']
                d['hum'] = i['hum']
                d['tempi'] = i['tempi']
                d['tempm'] = i['tempm']
                d['dewptm'] = i['dewptm']
                d['dewpti'] = i['dewpti']
                d['rain'] = i['rain']
                d['visim'] = i['visi']
                d['wspdi'] = i['wspdi']
                d['wspdm'] = i['wspdm']
                objects_list.append(d)
                #dump the dictionary into a json object
                j = json.dumps(objects_list)
        #append our json object to a list for every day and return its data
    #    print j
        return j
    #If we just need the data for ONE day (pass None for end_date):
    if(end_date is None):
        f = urllib2.urlopen('http://api.wunderground.com/api/API_KEY/history_'+start_date+'/q/'+state+'/'+city+'.json')
        json_string = f.read()
        parsed_json = json.loads(json_string)
    
        objects_list = []
        for i in parsed_json['history']['observations']:        
            d = collections.OrderedDict()
            d['date'] = i['date']['mon'] + '/' + i['date']['mday'] + '/' + i['date']['year']
            d['time'] = i['date']['pretty'][0:8]
            d['temp'] = i['tempi']
            d['conds'] = i['conds']
            d['wdire'] = i['wdire']
            d['wdird'] = i['wdird']
            d['hail'] = i['hail']
            d['thunder'] = i['thunder']
            d['pressurei'] = i['pressurei']
            d['snow'] = i['snow']
            d['pressurem'] = i['pressurem']
            d['fog'] = i['fog']
            d['tornado'] = i['tornado']
            d['hum'] = i['hum']
            d['tempi'] = i['tempi']
            d['tempm'] = i['tempm']
            d['dewptm'] = i['dewptm']
            d['dewpti'] = i['dewpti']
            d['rain'] = i['rain']
            d['visim'] = i['visi']
            d['wspdi'] = i['wspdi']
            d['wspdm'] = i['wspdm']
            objects_list.append(d)
        
        j = json.dumps(objects_list)
        return j

Usage example:


In [8]:
#Pass in the city, state, and desired start and end date as strings. The date format is YYYYMMDD.
#get_weather_data(city,state,start_date, end_date)
#Use None for end_date if you just need to query one day. 
query_results = get_weather_data('API_KEY','78739','TX', '20110101', '20110110')
#create dataframe with our query results
df = pd.read_json(query_results)

In [9]:
df


Out[9]:
conds date dewpti dewptm fog hail hum pressurei pressurem rain ... tempi tempm thunder time tornado visim wdird wdire wspdi wspdm
0 Overcast 2011-01-01 12.9 -10.6 0 0 23 29.96 1014.4 0 ... 50.0 10.0 0 12:51 AM 0 10.0 10 North 5.8 9.3
1 Overcast 2011-01-01 12.9 -10.6 0 0 23 30.03 1016.7 0 ... 50.0 10.0 0 1:51 AM 0 10.0 320 NW 4.6 7.4
2 Overcast 2011-01-01 14.0 -10.0 0 0 24 30.02 1016.4 0 ... 50.0 10.0 0 2:51 AM 0 10.0 360 North 6.9 11.1
3 Overcast 2011-01-01 14.0 -10.0 0 0 24 30.04 1017.1 0 ... 50.0 10.0 0 3:51 AM 0 10.0 350 North 3.5 5.6
4 Overcast 2011-01-01 14.0 -10.0 0 0 24 30.05 1017.5 0 ... 50.0 10.0 0 4:51 AM 0 10.0 350 North 4.6 7.4
5 Overcast 2011-01-01 14.0 -10.0 0 0 25 30.07 1018.2 0 ... 48.9 9.4 0 5:51 AM 0 10.0 0 Variable 3.5 5.6
6 Overcast 2011-01-01 16.0 -8.9 0 0 28 30.11 1019.6 0 ... 48.0 8.9 0 6:51 AM 0 10.0 360 North 3.5 5.6
7 Overcast 2011-01-01 16.0 -8.9 0 0 28 30.14 1020.4 0 ... 48.0 8.9 0 7:51 AM 0 10.0 360 North 4.6 7.4
8 Clear 2011-01-01 16.0 -8.9 0 0 26 30.18 1021.8 0 ... 50.0 10.0 0 8:51 AM 0 10.0 10 North 6.9 11.1
9 Clear 2011-01-01 15.1 -9.4 0 0 24 30.20 1022.7 0 ... 51.1 10.6 0 9:51 AM 0 10.0 20 NNE 8.1 13.0
10 Clear 2011-01-01 17.1 -8.3 0 0 23 30.22 1023.3 0 ... 55.0 12.8 0 10:51 AM 0 10.0 0 Variable 6.9 11.1
11 Clear 2011-01-01 19.9 -6.7 0 0 25 30.22 1023.3 0 ... 55.9 13.3 0 11:51 AM 0 10.0 0 Variable 5.8 9.3
12 Clear 2011-01-01 23.0 -5.0 0 0 27 30.19 1022.4 0 ... 57.0 13.9 0 12:51 PM 0 10.0 0 Variable 6.9 11.1
13 Clear 2011-01-01 25.0 -3.9 0 0 29 30.18 1021.9 0 ... 57.0 13.9 0 1:51 PM 0 10.0 10 North 8.1 13.0
14 Clear 2011-01-01 26.1 -3.3 0 0 30 30.18 1021.9 0 ... 57.0 13.9 0 2:51 PM 0 10.0 340 NNW 6.9 11.1
15 Clear 2011-01-01 26.1 -3.3 0 0 32 30.19 1022.1 0 ... 55.9 13.3 0 3:51 PM 0 10.0 0 Variable 5.8 9.3
16 Clear 2011-01-01 25.0 -3.9 0 0 33 30.19 1022.4 0 ... 54.0 12.2 0 4:51 PM 0 10.0 10 North 4.6 7.4
17 Clear 2011-01-01 23.0 -5.0 0 0 35 30.22 1023.2 0 ... 50.0 10.0 0 5:51 PM 0 10.0 10 North 4.6 7.4
18 Clear 2011-01-01 23.0 -5.0 0 0 37 30.25 1024.1 0 ... 48.0 8.9 0 6:51 PM 0 10.0 0 Variable 3.5 5.6
19 Clear 2011-01-01 23.0 -5.0 0 0 40 30.28 1025.2 0 ... 46.0 7.8 0 7:51 PM 0 10.0 30 NNE 6.9 11.1
20 Clear 2011-01-01 21.9 -5.6 0 0 43 30.29 1025.7 0 ... 43.0 6.1 0 8:51 PM 0 10.0 0 Variable 3.5 5.6
21 Clear 2011-01-01 21.9 -5.6 0 0 47 30.31 1026.4 0 ... 41.0 5.0 0 9:51 PM 0 10.0 0 Variable 4.6 7.4
22 Clear 2011-01-01 19.0 -7.2 0 0 43 30.32 1026.8 0 ... 39.9 4.4 0 10:51 PM 0 10.0 0 North 0.0 0.0
23 Clear 2011-01-01 18.0 -7.8 0 0 43 30.33 1027.0 0 ... 39.0 3.9 0 11:51 PM 0 10.0 0 North 0.0 0.0
24 Clear 2011-01-02 18.0 -7.8 0 0 46 30.34 1027.3 0 ... 37.0 2.8 0 12:51 AM 0 10.0 0 North 0.0 0.0
25 Clear 2011-01-02 19.0 -7.2 0 0 52 30.35 1027.7 0 ... 35.1 1.7 0 1:51 AM 0 10.0 0 North 0.0 0.0
26 Clear 2011-01-02 18.0 -7.8 0 0 52 30.36 1028.0 0 ... 34.0 1.1 0 2:51 AM 0 10.0 0 North 0.0 0.0
27 Clear 2011-01-02 19.0 -7.2 0 0 56 30.37 1028.3 0 ... 33.1 0.6 0 3:51 AM 0 10.0 0 North 0.0 0.0
28 Clear 2011-01-02 18.0 -7.8 0 0 54 30.37 1028.2 0 ... 33.1 0.6 0 4:51 AM 0 10.0 360 North 3.5 5.6
29 Clear 2011-01-02 18.0 -7.8 0 0 54 30.38 1028.8 0 ... 33.1 0.6 0 5:51 AM 0 10.0 0 North 0.0 0.0
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
296 Overcast 2011-01-10 35.6 2.0 0 0 93 29.99 1015.5 0 ... 37.4 3.0 0 5:58 AM 0 2.0 0 North 0.0 0.0
297 Overcast 2011-01-10 35.1 1.7 0 0 93 30.03 1016.8 0 ... 37.0 2.8 0 6:51 AM 0 1.8 0 North 0.0 0.0
298 Overcast 2011-01-10 35.6 2.0 0 0 93 30.02 1016.5 0 ... 37.4 3.0 0 7:17 AM 0 2.0 0 North 0.0 0.0
299 Overcast 2011-01-10 35.1 1.7 0 0 93 30.06 1018.0 0 ... 37.0 2.8 0 7:51 AM 0 2.0 0 North 0.0 0.0
300 Overcast 2011-01-10 35.6 2.0 0 0 93 30.05 1017.5 0 ... 37.4 3.0 0 8:05 AM 0 1.2 0 Variable 3.5 5.6
301 Overcast 2011-01-10 36.0 2.2 0 0 96 30.11 1019.4 0 ... 37.0 2.8 0 8:51 AM 0 2.0 0 North 0.0 0.0
302 Overcast 2011-01-10 35.6 2.0 0 0 93 30.09 1018.8 0 ... 37.4 3.0 0 9:09 AM 0 1.8 0 North 0.0 0.0
303 Overcast 2011-01-10 35.6 2.0 0 0 93 30.10 1019.2 0 ... 37.4 3.0 0 9:20 AM 0 2.0 0 North 0.0 0.0
304 Overcast 2011-01-10 36.0 2.2 0 0 93 30.14 1020.6 0 ... 37.9 3.3 0 9:51 AM 0 2.0 0 North 0.0 0.0
305 Overcast 2011-01-10 35.6 2.0 0 0 93 30.12 1019.9 0 ... 37.4 3.0 0 10:14 AM 0 1.5 120 ESE 4.6 7.4
306 Overcast 2011-01-10 36.0 2.2 0 0 93 30.14 1020.5 0 ... 37.9 3.3 0 10:51 AM 0 1.5 80 East 3.5 5.6
307 Overcast 2011-01-10 36.0 2.2 0 0 93 30.11 1019.6 0 ... 37.9 3.3 0 11:51 AM 0 2.0 70 ENE 4.6 7.4
308 Overcast 2011-01-10 37.4 3.0 0 0 93 30.08 1018.5 0 ... 39.2 4.0 0 12:13 PM 0 1.8 80 East 4.6 7.4
309 Overcast 2011-01-10 37.4 3.0 0 0 93 30.07 1018.2 0 ... 39.2 4.0 0 12:38 PM 0 2.5 0 Variable 3.5 5.6
310 Overcast 2011-01-10 37.0 2.8 0 0 93 30.09 1019.0 0 ... 39.0 3.9 0 12:51 PM 0 1.8 0 North 0.0 0.0
311 Overcast 2011-01-10 37.4 3.0 0 0 93 30.06 1017.8 0 ... 39.2 4.0 0 1:32 PM 0 2.5 0 North 0.0 0.0
312 Overcast 2011-01-10 37.0 2.8 0 0 93 30.08 1018.5 0 ... 39.0 3.9 0 1:51 PM 0 3.0 60 ENE 3.5 5.6
313 Overcast 2011-01-10 36.0 2.2 0 0 89 30.09 1018.9 0 ... 39.0 3.9 0 2:51 PM 0 7.0 0 Variable 3.5 5.6
314 Overcast 2011-01-10 36.0 2.2 0 0 89 30.12 1019.7 0 ... 39.0 3.9 0 3:51 PM 0 6.0 0 Variable 4.6 7.4
315 Overcast 2011-01-10 36.0 2.2 0 0 93 30.14 1020.7 0 ... 37.9 3.3 0 4:51 PM 0 5.0 0 Variable 3.5 5.6
316 Overcast 2011-01-10 35.1 1.7 0 0 89 30.18 1022.0 0 ... 37.9 3.3 0 5:51 PM 0 7.0 20 NNE 8.1 13.0
317 Overcast 2011-01-10 35.6 2.0 0 0 93 30.19 1022.2 0 ... 37.4 3.0 0 6:43 PM 0 2.5 30 NNE 4.6 7.4
318 Overcast 2011-01-10 35.1 1.7 0 0 93 30.22 1023.4 0 ... 37.0 2.8 0 6:51 PM 0 4.0 0 Variable 4.6 7.4
319 Overcast 2011-01-10 35.1 1.7 0 0 89 30.25 1024.2 0 ... 37.9 3.3 0 7:51 PM 0 7.0 10 North 4.6 7.4
320 Overcast 2011-01-10 35.1 1.7 0 0 86 30.28 1025.2 0 ... 39.0 3.9 0 8:51 PM 0 8.0 10 North 5.8 9.3
321 Overcast 2011-01-10 33.8 1.0 0 0 81 30.26 1024.6 0 ... 39.2 4.0 0 9:02 PM 0 10.0 10 North 6.9 11.1
322 Overcast 2011-01-10 35.1 1.7 0 0 86 30.32 1026.6 0 ... 39.0 3.9 0 9:51 PM 0 10.0 360 North 9.2 14.8
323 Overcast 2011-01-10 34.0 1.1 0 0 82 30.35 1027.6 0 ... 39.0 3.9 0 10:51 PM 0 10.0 360 North 9.2 14.8
324 Overcast 2011-01-10 33.8 1.0 0 0 81 30.33 1027.0 0 ... 39.2 4.0 0 11:22 PM 0 10.0 10 North 10.4 16.7
325 Overcast 2011-01-10 33.1 0.6 0 0 79 30.37 1028.4 0 ... 39.0 3.9 0 11:51 PM 0 10.0 10 North 4.6 7.4

326 rows × 22 columns


In [23]:
#save file:
with open('January.json', 'w') as outfile:
  json.dump(t, outfile)

Monthly temperatures

January


In [180]:
#Query
january = get_weather_data('1d83c5de274645d4','Austin','TX', '20130101', '20130131')

In [228]:
jan = pd.read_json(january)
#Find outlier:
outlier = jan['temp'].ix[jan['temp'] < 1]
print 'outlier: ', outlier
#Get rid of outlier!!!:
#jan = jan.drop(jan.index[[354]])

l = []
l = list(range(0,len(jan['temp']),len(jan['temp'])/31))
#print len(l)
#print l
plt.clf()
ax = plt.figure(figsize=(20,3)).add_subplot(111)
ax.set_xticks(l)
ax.set_xticklabels(range(1,32))
#Plot:
ax.plot(jan['temp'])
plt.savefig('January_Weather.pdf')


outlier:  Series([], name: temp, dtype: float64)
<matplotlib.figure.Figure at 0x10ddbb390>

April


In [183]:
#Query
april = get_weather_data('API_KEY','Austin','TX', '20130401', '20130430')

In [227]:
apr = pd.read_json(april)
#Find outlier:
outlier = apr['temp'].ix[apr['temp'] < 1]
print 'outlier: ', outlier
#Get rid of outlier!!!:
apr = apr.drop(apr.index[[50]])
apr = apr.drop(apr.index[[49]])

#print apr['temp'][48:52]
l = []
l = list(range(0,len(apr['temp']),len(apr['temp'])/30))
#print len(l)
#print l
plt.clf()
ax = plt.figure(figsize=(20,3)).add_subplot(111)
ax.set_xticks(l)
ax.set_xticklabels(range(1,31))
#Plot:
ax.plot(apr['temp'])
plt.savefig('April_Weather.pdf')


outlier:  49   -9999
50   -9999
Name: temp, dtype: float64
<matplotlib.figure.Figure at 0x113209310>

July


In [206]:
#Query
july = get_weather_data('API_KEY','Austin','TX', '20130701', '20130731')

In [234]:
jly = pd.read_json(july)
#Find outlier:
outlier = jly['temp'].ix[jly['temp'] < 1]
print 'outlier: ', outlier
#Get rid of outlier!!!:
jly = jly.drop(jly.index[[586]])

l = []
l = list(range(0,len(jly['temp']),len(jly['temp'])/31))
#print len(l)
#print l
plt.clf()
ax = plt.figure(figsize=(20,3)).add_subplot(111)
ax.set_xticks(l)
ax.set_xticklabels(range(1,32))
#Plot:
ax.plot(apr['temp'])
plt.savefig('month_temp_graphs/July_Weather.png')


outlier:  586   -9999
Name: temp, dtype: float64
<matplotlib.figure.Figure at 0x10bd75b90>

October


In [161]:
october = get_weather_data('API_KEY','Austin','TX', '20131001', '20131031')

In [3]:
octo = pd.read_json(october)
#Find outlier:
outlier = octo['temp'].ix[octo['temp'] < 1]
print 'outlier: ', outlier
#Get rid of outlier!!!:
octo = octo.drop(octo.index[[354]])

l = []
l = list(range(0,len(octo['temp']),len(octo['temp'])/31))
#print len(l)
#print l
plt.clf()
ax = plt.figure(figsize=(20,3)).add_subplot(111)
ax.set_xticks(l)
ax.set_xticklabels(range(1,32))
#Plot:
ax.plot(octo['temp'])
plt.savefig('month_temp_graphs/October_Weather.png')


---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-3-4ec853e66a26> in <module>()
----> 1 octo = pd.read_json(october)
      2 #Find outlier:
      3 outlier = octo['temp'].ix[octo['temp'] < 1]
      4 print 'outlier: ', outlier
      5 #Get rid of outlier!!!:

NameError: name 'october' is not defined

In [ ]: