In [1]:
%pylab inline
import pandas as pd
In [2]:
weather = pd.read_csv('hourly_weather.csv', parse_dates = ['DATE'])
# change labels to lowercase
labels = []
for label in weather.columns:
labels.append(label.lower())
weather.columns = labels
# convert date to datetime format and fill in null values
weather.date = pd.to_datetime(weather.date)
apply_func = lambda x: 0 if x=='T' else x
weather.hourlyprecip = weather.hourlyprecip.fillna(0).apply(apply_func)
# select out temperature and precipitation columns
cols = ['date', 'hourlywetbulbtempf', 'hourlyprecip']
weather = weather[cols]
weather.head()
weather.rename(columns = {'hourlywetbulbtempf': 'temp'}, inplace = True)
weather.temp.fillna(method = 'ffill', inplace = True)
weather.hourlyprecip = weather.hourlyprecip.astype(str)
#create 'hour' column and truncate datetime to %Y-%m-%d
def split_off_times(df):
df['hour'] = df.date.dt.hour
df['date'] = df.date.dt.floor('d')
return df
weather = split_off_times(weather)
# create a dataframe that contains snow information and
# concatenate to original dataframe
df = weather.hourlyprecip.str.split('s', expand = True)
df[1] = df[1].apply(lambda x: 0 if x==None else 1)
df[0] = df.astype(float)
df = df.rename(columns = {0: 'precip', 1: 'snow'})
weather = pd.concat([weather, df], axis = 1)
weather.snow = weather.snow.astype(int)
weather.drop('hourlyprecip', axis = 1, inplace = True)
# ensure unique hourly intervals by grouping by hour
aggregator = {'temp': 'mean', 'precip':'mean', 'snow': 'max'}
weather = weather.groupby(['date','hour']).agg(aggregator).reset_index()
In [ ]:
weather.to_csv('weather.csv')
In [11]:
weather.iloc[100:110,:]
Out[11]:
In [ ]:
weather[weather.precip > 0.].shape
In [15]:
from bs4 import BeautifulSoup
import urllib
# scrape live weather data site to get current temperature
url = 'http://w1.weather.gov/data/obhistory/KNYC.html'
page = urllib.urlopen(url).read()
soup = BeautifulSoup(page)
live_temp = 0
for tr in soup.find_all('tr')[7:8]:
tds = tr.find_all('td')
live_temp += float(tds[6].text)
# listy.append(tds[3].text)
In [16]:
live_temp
Out[16]:
In [ ]: