In [1]:
%pylab inline
import pandas as pd


Populating the interactive namespace from numpy and matplotlib

In [2]:
weather = pd.read_csv('hourly_weather.csv', parse_dates = ['DATE'])

# change labels to lowercase
labels = []
for label in weather.columns:
    labels.append(label.lower())

weather.columns = labels

# convert date to datetime format and fill in null values
weather.date = pd.to_datetime(weather.date)
apply_func = lambda x: 0 if x=='T' else x
weather.hourlyprecip = weather.hourlyprecip.fillna(0).apply(apply_func)

# select out temperature and precipitation columns
cols = ['date', 'hourlywetbulbtempf', 'hourlyprecip']
weather = weather[cols]
weather.head()
weather.rename(columns = {'hourlywetbulbtempf': 'temp'}, inplace = True)
weather.temp.fillna(method = 'ffill', inplace = True)

weather.hourlyprecip = weather.hourlyprecip.astype(str)

#create 'hour' column and truncate datetime to %Y-%m-%d
def split_off_times(df):
    df['hour'] = df.date.dt.hour
    df['date'] = df.date.dt.floor('d')
    return df

weather = split_off_times(weather)

# create a dataframe that contains snow information and 
# concatenate to original dataframe
df = weather.hourlyprecip.str.split('s', expand = True)
df[1] = df[1].apply(lambda x: 0 if x==None else 1)
df[0] = df.astype(float)
df = df.rename(columns = {0: 'precip', 1: 'snow'})
weather = pd.concat([weather, df], axis = 1)
weather.snow = weather.snow.astype(int)
weather.drop('hourlyprecip', axis = 1, inplace = True)

# ensure unique hourly intervals by grouping by hour
aggregator = {'temp': 'mean', 'precip':'mean', 'snow': 'max'}
weather = weather.groupby(['date','hour']).agg(aggregator).reset_index()


/home/psamtik071/anaconda2/lib/python2.7/site-packages/IPython/core/interactiveshell.py:2717: DtypeWarning: Columns (10,11,14,15,20,23,25,40,67,84,86) have mixed types. Specify dtype option on import or set low_memory=False.
  interactivity=interactivity, compiler=compiler, result=result)

In [ ]:
weather.to_csv('weather.csv')

In [11]:
weather.iloc[100:110,:]


Out[11]:
date hour precip snow temp
100 2015-03-05 4 0.020000 0 31.000000
101 2015-03-05 5 0.010000 0 31.000000
102 2015-03-05 6 0.008000 0 30.400000
103 2015-03-05 7 0.040000 0 31.000000
104 2015-03-05 8 0.040000 0 29.333333
105 2015-03-05 9 0.047500 0 25.500000
106 2015-03-05 10 0.065000 0 25.000000
107 2015-03-05 11 0.027500 0 23.250000
108 2015-03-05 12 0.040000 0 20.000000
109 2015-03-05 13 0.026667 0 19.000000

In [ ]:
weather[weather.precip > 0.].shape

In [15]:
from bs4 import BeautifulSoup
import urllib

# scrape live weather data site to get current temperature
url = 'http://w1.weather.gov/data/obhistory/KNYC.html'

page = urllib.urlopen(url).read()
soup = BeautifulSoup(page)

live_temp = 0

for tr in soup.find_all('tr')[7:8]:
    tds = tr.find_all('td')
    live_temp += float(tds[6].text)
#     listy.append(tds[3].text)

In [16]:
live_temp


Out[16]:
78.0

In [ ]: