In [1]:
import json
import urllib2
import numpy as np
import pandas as pd
import collections
import matplotlib.pyplot as plt
from scipy import stats
%matplotlib inline
In [2]:
from datetime import datetime, timedelta, date
#this function returns a json object
#Pass in the city, state, and desired date as strings, the date format is YYYYMMDD
def get_weather_data(api,city,state,start_date,end_date):
if(start_date is not None and end_date is not None):
#format our date structure to pass to our http request
date_format = "%Y%m%d"
a = datetime.strptime(start_date, date_format)
b = datetime.strptime(end_date, date_format)
#get number of days from start_date to end_date
delta = b - a
num_days = delta.days
objects_list = []
#create new variable that will create query's for the api
for year in range(0,num_days + 1):
#count from start_date to end_date
dates = a + timedelta(days=year)
#format our str with our date_format
formatted_dates = datetime.strftime(dates, date_format)
#create query which will iterate through desired weather period
query = 'http://api.wunderground.com/api/'+ api +'/history_'+formatted_dates+'/q/'+state+'/'+city+'.json'
#iterate through the number of days and query the api. dump json results every time
f = urllib2.urlopen(query)
#read query as a json string
json_string = f.read()
#parse/load json string
parsed_json = json.loads(json_string)
#Iterate through each json object and append it to an ordered dictionary
for i in parsed_json['history']['observations']:
d = collections.OrderedDict()
d['date'] = i['date']['mon'] + '/' + i['date']['mday'] + '/' + i['date']['year']
d['time'] = i['date']['pretty'][0:8]
d['temp'] = i['tempi']
d['conds'] = i['conds']
d['wdire'] = i['wdire']
d['wdird'] = i['wdird']
d['hail'] = i['hail']
d['thunder'] = i['thunder']
d['pressurei'] = i['pressurei']
d['snow'] = i['snow']
d['pressurem'] = i['pressurem']
d['fog'] = i['fog']
d['tornado'] = i['tornado']
d['hum'] = i['hum']
d['tempi'] = i['tempi']
d['tempm'] = i['tempm']
d['dewptm'] = i['dewptm']
d['dewpti'] = i['dewpti']
d['rain'] = i['rain']
d['visim'] = i['visi']
d['wspdi'] = i['wspdi']
d['wspdm'] = i['wspdm']
objects_list.append(d)
#dump the dictionary into a json object
j = json.dumps(objects_list)
#append our json object to a list for every day and return its data
# print j
return j
#If we just need the data for ONE day (pass None for end_date):
if(end_date is None):
f = urllib2.urlopen('http://api.wunderground.com/api/API_KEY/history_'+start_date+'/q/'+state+'/'+city+'.json')
json_string = f.read()
parsed_json = json.loads(json_string)
objects_list = []
for i in parsed_json['history']['observations']:
d = collections.OrderedDict()
d['date'] = i['date']['mon'] + '/' + i['date']['mday'] + '/' + i['date']['year']
d['time'] = i['date']['pretty'][0:8]
d['temp'] = i['tempi']
d['conds'] = i['conds']
d['wdire'] = i['wdire']
d['wdird'] = i['wdird']
d['hail'] = i['hail']
d['thunder'] = i['thunder']
d['pressurei'] = i['pressurei']
d['snow'] = i['snow']
d['pressurem'] = i['pressurem']
d['fog'] = i['fog']
d['tornado'] = i['tornado']
d['hum'] = i['hum']
d['tempi'] = i['tempi']
d['tempm'] = i['tempm']
d['dewptm'] = i['dewptm']
d['dewpti'] = i['dewpti']
d['rain'] = i['rain']
d['visim'] = i['visi']
d['wspdi'] = i['wspdi']
d['wspdm'] = i['wspdm']
objects_list.append(d)
j = json.dumps(objects_list)
return j
In [8]:
#Pass in the city, state, and desired start and end date as strings. The date format is YYYYMMDD.
#get_weather_data(city,state,start_date, end_date)
#Use None for end_date if you just need to query one day.
query_results = get_weather_data('API_KEY','78739','TX', '20110101', '20110110')
#create dataframe with our query results
df = pd.read_json(query_results)
In [9]:
df
Out[9]:
In [23]:
#save file:
with open('January.json', 'w') as outfile:
json.dump(t, outfile)
In [180]:
#Query
january = get_weather_data('1d83c5de274645d4','Austin','TX', '20130101', '20130131')
In [228]:
jan = pd.read_json(january)
#Find outlier:
outlier = jan['temp'].ix[jan['temp'] < 1]
print 'outlier: ', outlier
#Get rid of outlier!!!:
#jan = jan.drop(jan.index[[354]])
l = []
l = list(range(0,len(jan['temp']),len(jan['temp'])/31))
#print len(l)
#print l
plt.clf()
ax = plt.figure(figsize=(20,3)).add_subplot(111)
ax.set_xticks(l)
ax.set_xticklabels(range(1,32))
#Plot:
ax.plot(jan['temp'])
plt.savefig('January_Weather.pdf')
In [183]:
#Query
april = get_weather_data('API_KEY','Austin','TX', '20130401', '20130430')
In [227]:
apr = pd.read_json(april)
#Find outlier:
outlier = apr['temp'].ix[apr['temp'] < 1]
print 'outlier: ', outlier
#Get rid of outlier!!!:
apr = apr.drop(apr.index[[50]])
apr = apr.drop(apr.index[[49]])
#print apr['temp'][48:52]
l = []
l = list(range(0,len(apr['temp']),len(apr['temp'])/30))
#print len(l)
#print l
plt.clf()
ax = plt.figure(figsize=(20,3)).add_subplot(111)
ax.set_xticks(l)
ax.set_xticklabels(range(1,31))
#Plot:
ax.plot(apr['temp'])
plt.savefig('April_Weather.pdf')
In [206]:
#Query
july = get_weather_data('API_KEY','Austin','TX', '20130701', '20130731')
In [234]:
jly = pd.read_json(july)
#Find outlier:
outlier = jly['temp'].ix[jly['temp'] < 1]
print 'outlier: ', outlier
#Get rid of outlier!!!:
jly = jly.drop(jly.index[[586]])
l = []
l = list(range(0,len(jly['temp']),len(jly['temp'])/31))
#print len(l)
#print l
plt.clf()
ax = plt.figure(figsize=(20,3)).add_subplot(111)
ax.set_xticks(l)
ax.set_xticklabels(range(1,32))
#Plot:
ax.plot(apr['temp'])
plt.savefig('month_temp_graphs/July_Weather.png')
In [161]:
october = get_weather_data('API_KEY','Austin','TX', '20131001', '20131031')
In [3]:
octo = pd.read_json(october)
#Find outlier:
outlier = octo['temp'].ix[octo['temp'] < 1]
print 'outlier: ', outlier
#Get rid of outlier!!!:
octo = octo.drop(octo.index[[354]])
l = []
l = list(range(0,len(octo['temp']),len(octo['temp'])/31))
#print len(l)
#print l
plt.clf()
ax = plt.figure(figsize=(20,3)).add_subplot(111)
ax.set_xticks(l)
ax.set_xticklabels(range(1,32))
#Plot:
ax.plot(octo['temp'])
plt.savefig('month_temp_graphs/October_Weather.png')
In [ ]: