In [1]:
import sys
import re
import time
import datetime
# Requires for ipython to pick up on twitter?
sys.path.append('/Library/Python/2.7/site-packages/')
import twitter
import pandas as pd
import func
# import pyowm # Historical API is paid

# inline plot
%matplotlib inline


/Users/albarron/anaconda/lib/python2.7/site-packages/matplotlib/font_manager.py:273: UserWarning: Matplotlib is building the font cache using fc-list. This may take a moment.
  warnings.warn('Matplotlib is building the font cache using fc-list. This may take a moment.')

In [2]:
#%load 'data/raw-twt2016-01-26-14/21/09.csv'
df = pd.read_csv("data/formated_twts.csv",sep=',',error_bad_lines=False)

In [3]:
df.head()


Out[3]:
Unnamed: 0 created_at favorite_count favorited hashtags id in_reply_to_screen_name in_reply_to_status_id in_reply_to_user_id lang ... place retweet_count retweeted retweeted_status source text truncated urls user user_mentions
0 0 Tue Jan 26 20:32:15 +0000 2016 6 False [SanFrancisco] 692082643022680064 NaN NaN NaN en ... NaN 7 False NaN <a href="https://about.twitter.com/products/tw... NOTICE: Ped &amp; Bike detours in place for Ma... False {u'https://t.co/hcYGYF5L5S': u'https://www.sfm... {u'id': 456808166, u'verified': True, u'profil... NaN
1 1 Tue Jan 26 19:41:32 +0000 2016 NaN False NaN 692069881559134208 therealwall 6.920673e+17 46136761 en ... NaN NaN False NaN <a href="https://about.twitter.com/products/tw... @therealwall After the end of the concert we w... False {u'https://t.co/3f9VEAaGTY': u'http://www.calt... {u'id': 456808166, u'verified': True, u'profil... [{u'screen_name': u'therealwall', u'id': 46136...
2 2 Tue Jan 26 19:28:52 +0000 2016 NaN False [SB50] 692066695838498816 AemalTheAFGHAN 6.920578e+17 291505788 en ... NaN NaN False NaN <a href="https://about.twitter.com/products/tw... @AemalTheAFGHAN @BKDenverSports We're glad to ... False {u'https://t.co/fgMOSXplzZ': u'http://www.calt... {u'id': 456808166, u'verified': True, u'profil... [{u'screen_name': u'AemalTheAFGHAN', u'id': 29...
3 3 Tue Jan 26 18:12:35 +0000 2016 1 False [Sorry, Headphones] 692047497238175744 4c4d 6.920436e+17 256648931 en ... NaN NaN False NaN <a href="https://about.twitter.com/products/tw... @4c4d Oh man. We love that, too. Our favorite ... False NaN {u'id': 456808166, u'verified': True, u'profil... [{u'screen_name': u'4c4d', u'id': 256648931, u...
4 4 Tue Jan 26 17:53:20 +0000 2016 7 False NaN 692042650933862401 NaN NaN NaN en ... NaN 18 False NaN <a href="https://about.twitter.com/products/tw... Pssst, hey, regular Caltrain riders: expect a ... False NaN {u'id': 456808166, u'verified': True, u'profil... NaN

5 rows × 21 columns


In [4]:
# keys = pd.read_csv('keys.csv') # hidden from github
# openweather=keys.iloc[4].string
# owm = pyowm.OWM(openweather)

In [5]:
# owm.weather_history_at_place('5380748','2015-10-14 01:17:00+00','2015-10-14 01:19:00+00')

In [6]:
### Unfortunately pydap only goes up to 2014 :/
### http://nomads.ncdc.noaa.gov/dods/NCEP_NARR_DAILY
# from pydap.client import open_url
# url = 'http://nomads.ncdc.noaa.gov/dods/NCEP_NARR_DAILY/197901/197901/narr-a_221_197901dd_hh00_000'
# modelconn = open_url(url)
# tmp2m = modelconn['tmp2m']
# # Set location to Palo Alto lat/lon
# tmp2m.lat = 37.441879
# tmp2m.lon = -122.143021

In [7]:
keys = pd.read_csv('keys.csv') # hidden from github
forecaseiokey=keys.iloc[5].string

In [8]:
import forecastio
api_key = forecaseiokey
lat = 37.441879
lng = -122.143021
#forecast = forecastio.load_forecast(api_key, lat, lng)

In [9]:
zip(range(5),[0]*5)


Out[9]:
[(0, 0), (1, 0), (2, 0), (3, 0), (4, 0)]

In [10]:
def get_avg_temp(lat,lon,timestamp):
    time = datetime.datetime.strptime(timestamp, "%Y-%m-%d %H:%M:%S")
    forecast = forecastio.load_forecast(api_key, lat, lng,time=time)

    iter = [0]*6
    temp = precip = vis = windspeed = humidity = cloudcover = 0
    for t in forecast.hourly().data:
        try:
            temp += t.d['apparentTemperature']
            iter[0]+=1
        except:
            print "No temp"
        try:
            precip += t.d['precipIntensity']
            iter[1]+=1
        except:
            print "no precip"
        try:
            vis += t.d['visibility']
            iter[2]+=1
        except:
            print "no Vis"
        try:
            windspeed += t.d['windSpeed']
            iter[3]+=1
        except:
             print "no Wind"
        try:
            humidity += t.d['humidity']
            iter[4]+=1
        except:
             print "no Humid"
        try:
            cloudcover += t.d['cloudCover']
            iter[5]+=1
        except:
            print "no Cloud"

#     return pd.Series(
#         'temp': temp/float(iter),
#         'precipiation': precip/float(iter),
#         'visability': vis/float(iter),
#         'windspeed': windspeed/float(iter),
#         'humidity': humidity/float(iter),
#         'cloudcover': cloudcover/float(iter)
#     )

    print iter

    return pd.Series([
        temp/float(iter[0]),
        precip/float(iter[1]),
        vis/float(iter[2]),
        windspeed/float(iter[3]),
        humidity/float(iter[4]),
        cloudcover/float(iter[5])
    ])

In [11]:
get_avg_temp(lat,lng,"2015-10-15 01:18:39")


[24, 24, 24, 24, 24, 24]
Out[11]:
0    66.583750
1     0.000000
2    10.000000
3     4.980417
4     0.772500
5     0.037500
dtype: float64

In [13]:
test = df['created_at'].apply(lambda x:get_avg_temp(lat,lng,x))


---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-13-5cc6fc1d850d> in <module>()
----> 1 test = df['created_at'].apply(lambda x:get_avg_temp(lat,lng,x))

/Users/albarron/anaconda/lib/python2.7/site-packages/pandas/core/series.pyc in apply(self, func, convert_dtype, args, **kwds)
   2167             values = lib.map_infer(values, lib.Timestamp)
   2168 
-> 2169         mapped = lib.map_infer(values, f, convert=convert_dtype)
   2170         if len(mapped) and isinstance(mapped[0], Series):
   2171             from pandas.core.frame import DataFrame

pandas/src/inference.pyx in pandas.lib.map_infer (pandas/lib.c:62578)()

<ipython-input-13-5cc6fc1d850d> in <lambda>(x)
----> 1 test = df['created_at'].apply(lambda x:get_avg_temp(lat,lng,x))

<ipython-input-10-d39366524f82> in get_avg_temp(lat, lon, timestamp)
      1 def get_avg_temp(lat,lon,timestamp):
----> 2     time = datetime.datetime.strptime(timestamp, "%Y-%m-%d %H:%M:%S")
      3     forecast = forecastio.load_forecast(api_key, lat, lng,time=time)
      4 
      5     iter = [0]*6

/Users/albarron/anaconda/lib/python2.7/_strptime.pyc in _strptime(data_string, format)
    323     if not found:
    324         raise ValueError("time data %r does not match format %r" %
--> 325                          (data_string, format))
    326     if len(data_string) != found.end():
    327         raise ValueError("unconverted data remains: %s" %

ValueError: time data 'Tue Jan 26 20:32:15 +0000 2016' does not match format '%Y-%m-%d %H:%M:%S'

In [ ]:
test

In [ ]:
test.plot()

In [ ]:
merged = pd.concat([df,test],axis=1)

In [ ]:


In [ ]:
merged = merged.rename(columns={0:'temp',
 1:'precipiation',
 2:'visability',
 3:'windspeed',
 4:'humidity',
 5:'cloudcover'})

In [ ]:
del merged['Unnamed: 0']

In [ ]:
merged.head()

In [ ]:
filename = "./data/weather-add-twt{date}.csv".format(date=datetime.datetime.now().strftime("%Y-%m-%d-%H:%M:%S"))
merged.to_csv(filename, sep='\t', encoding='utf-8')

In [ ]: