In [1]:
import sys
import re
import time
import datetime
# Requires for ipython to pick up on twitter?
sys.path.append('/Library/Python/2.7/site-packages/')
import twitter
import pandas as pd
import func
# import pyowm # Historical API is paid

# inline plot
%matplotlib inline


/Users/albarron/anaconda/lib/python2.7/site-packages/matplotlib/font_manager.py:273: UserWarning: Matplotlib is building the font cache using fc-list. This may take a moment.
  warnings.warn('Matplotlib is building the font cache using fc-list. This may take a moment.')

In [2]:
#%load 'data/raw-twt2016-01-26-14/21/09.csv'
df = pd.read_csv("data/formated_twts.csv",sep=',',error_bad_lines=False)

In [3]:
df.head()


Out[3]:
Unnamed: 0 created_at favorite_count hashtags id in_reply_to_screen_name retweet_count text day_of_week day_of_month month time_of_day topic_train t_bullet t_limited t_northbound timestamp train_id tweet_id
0 0 2016-01-25 23:42:14 1 NB155, Caltrain 691768068385718275 NaN 2 #NB155 is 22 mins late. #Caltrain 0 25 1 error '155' 0 0 1 2016-01-25 23:42:14 155 691768068385718275
1 1 2016-01-22 22:48:57 NaN NB, Caltrain 690667494906814464 NaN 2 #NB 151 is running 10 mins behind #Caltrain 4 22 1 error '151' 0 0 1 2016-01-22 22:48:57 151 690667494906814464
2 2 2016-01-20 22:22:55 1 NB151, Caltrain 689936168893329408 NaN 2 #NB151 is 10 mins down at San Mateo. #Caltrain 2 20 1 error '151' 0 0 1 2016-01-20 22:22:55 151 689936168893329408
3 3 2016-01-19 18:08:25 1 SB138, Caltrain 689509733640732672 NaN 2 #SB138 will board on the northbound platform a... 1 19 1 rush_evening '138' 0 0 0 2016-01-19 18:08:25 138 689509733640732672
4 4 2016-01-19 16:32:04 2 SB216, NB225, NB329, Caltrain 689485484838416388 NaN 4 #SB216 delayed 13 minutes at SAT\n#NB225 delay... 1 19 1 rush_evening '216', '225', '329' 0 1 0 2016-01-19 16:32:04 216 689485484838416388

In [4]:
# keys = pd.read_csv('keys.csv') # hidden from github
# openweather=keys.iloc[4].string
# owm = pyowm.OWM(openweather)

In [5]:
# owm.weather_history_at_place('5380748','2015-10-14 01:17:00+00','2015-10-14 01:19:00+00')

In [6]:
### Unfortunately pydap only goes up to 2014 :/
### http://nomads.ncdc.noaa.gov/dods/NCEP_NARR_DAILY
# from pydap.client import open_url
# url = 'http://nomads.ncdc.noaa.gov/dods/NCEP_NARR_DAILY/197901/197901/narr-a_221_197901dd_hh00_000'
# modelconn = open_url(url)
# tmp2m = modelconn['tmp2m']
# # Set location to Palo Alto lat/lon
# tmp2m.lat = 37.441879
# tmp2m.lon = -122.143021

In [4]:
keys = pd.read_csv('keys.csv') # hidden from github
forecaseiokey=keys.iloc[5].string

In [5]:
import forecastio
api_key = forecaseiokey
lat = 37.441879
lng = -122.143021
#forecast = forecastio.load_forecast(api_key, lat, lng)

In [9]:
zip(range(5),[0]*5)


Out[9]:
[(0, 0), (1, 0), (2, 0), (3, 0), (4, 0)]

In [6]:
def get_avg_temp(lat,lon,timestamp):
    time = datetime.datetime.strptime(timestamp, "%Y-%m-%d %H:%M:%S")
    forecast = forecastio.load_forecast(api_key, lat, lng,time=time)

    iter = [0]*6
    temp = precip = vis = windspeed = humidity = cloudcover = 0
    for t in forecast.hourly().data:
        try:
            temp += t.d['apparentTemperature']
            iter[0]+=1
        except:
            print "No temp"
        try:
            precip += t.d['precipIntensity']
            iter[1]+=1
        except:
            print "no precip"
        try:
            vis += t.d['visibility']
            iter[2]+=1
        except:
            print "no Vis"
        try:
            windspeed += t.d['windSpeed']
            iter[3]+=1
        except:
             print "no Wind"
        try:
            humidity += t.d['humidity']
            iter[4]+=1
        except:
             print "no Humid"
        try:
            cloudcover += t.d['cloudCover']
            iter[5]+=1
        except:
            print "no Cloud"

#     return pd.Series(
#         'temp': temp/float(iter),
#         'precipiation': precip/float(iter),
#         'visability': vis/float(iter),
#         'windspeed': windspeed/float(iter),
#         'humidity': humidity/float(iter),
#         'cloudcover': cloudcover/float(iter)
#     )

    print iter

    return pd.Series([
        temp/float(iter[0]),
        precip/float(iter[1]),
        vis/float(iter[2]),
        windspeed/float(iter[3]),
        humidity/float(iter[4]),
        cloudcover/float(iter[5])
    ])

In [11]:
get_avg_temp(lat,lng,"2015-10-15 01:18:39")


[24, 24, 24, 24, 24, 24]
Out[11]:
0    66.583750
1     0.000000
2    10.000000
3     4.980417
4     0.772500
5     0.037500
dtype: float64

In [13]:
test = df['created_at'].apply(lambda x:get_avg_temp(lat,lng,x))


---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-13-5cc6fc1d850d> in <module>()
----> 1 test = df['created_at'].apply(lambda x:get_avg_temp(lat,lng,x))

/Users/albarron/anaconda/lib/python2.7/site-packages/pandas/core/series.pyc in apply(self, func, convert_dtype, args, **kwds)
   2167             values = lib.map_infer(values, lib.Timestamp)
   2168 
-> 2169         mapped = lib.map_infer(values, f, convert=convert_dtype)
   2170         if len(mapped) and isinstance(mapped[0], Series):
   2171             from pandas.core.frame import DataFrame

pandas/src/inference.pyx in pandas.lib.map_infer (pandas/lib.c:62578)()

<ipython-input-13-5cc6fc1d850d> in <lambda>(x)
----> 1 test = df['created_at'].apply(lambda x:get_avg_temp(lat,lng,x))

<ipython-input-10-d39366524f82> in get_avg_temp(lat, lon, timestamp)
      1 def get_avg_temp(lat,lon,timestamp):
----> 2     time = datetime.datetime.strptime(timestamp, "%Y-%m-%d %H:%M:%S")
      3     forecast = forecastio.load_forecast(api_key, lat, lng,time=time)
      4 
      5     iter = [0]*6

/Users/albarron/anaconda/lib/python2.7/_strptime.pyc in _strptime(data_string, format)
    323     if not found:
    324         raise ValueError("time data %r does not match format %r" %
--> 325                          (data_string, format))
    326     if len(data_string) != found.end():
    327         raise ValueError("unconverted data remains: %s" %

ValueError: time data 'Tue Jan 26 20:32:15 +0000 2016' does not match format '%Y-%m-%d %H:%M:%S'

In [ ]:
test

In [ ]:
test.plot()

In [ ]:
merged = pd.concat([df['id','created_at'],test],axis=1)

In [ ]:


In [ ]:
merged = merged.rename(columns={0:'temp',
 1:'precipiation',
 2:'visability',
 3:'windspeed',
 4:'humidity',
 5:'cloudcover'})

In [ ]:
del merged['Unnamed: 0']

In [ ]:
merged.head()

In [ ]:
filename = "./data/weather-add-twt{date}.csv".format(date=datetime.datetime.now().strftime("%Y-%m-%d-%H:%M:%S"))
merged.to_csv(filename, sep='\t', encoding='utf-8')

In [ ]: