In [1]:
import sys
import re
import time
import datetime
# Requires for ipython to pick up on twitter?
sys.path.append('/Library/Python/2.7/site-packages/')
import twitter
import pandas as pd
import func
# import pyowm # Historical API is paid
# inline plot
%matplotlib inline
In [2]:
#%load 'data/raw-twt2016-01-26-14/21/09.csv'
truth = pd.read_csv("data/truth_tweets.csv",sep=',',error_bad_lines=False)
twts = pd.read_csv("data/formated_twts.csv",sep=',',error_bad_lines=False)
weather = pd.read_csv("data/weather-add-twt2016-03-06-21:26:39.csv",sep='\t',error_bad_lines=False)
In [3]:
del truth['Unnamed: 0']
truth.head()
Out[3]:
In [4]:
del twts['Unnamed: 0']
twts.head(3)
Out[4]:
In [5]:
truth['uid'] = truth.apply(lambda x: int(str(x.train_id)+str(x.id)),axis=1)
twts['uid'] = truth.apply(lambda x: int(str(x.train_id)+str(x.id)),axis=1)
In [6]:
# df = twts.merge(truth,on=['id','train_id'],how='inner') # now have uid
df = twts.merge(truth,on='uid',how='inner')
df.shape
Out[6]:
In [ ]:
# wt = weather[['id','temp','precipiation','visability','windspeed','humidity','cloudcover']]
In [ ]:
# wt.head(2)
In [ ]:
# df = df.merge(wt,left_on='tweet_id',right_on='id',how='outer')
# df.shape
In [8]:
df.head()
Out[8]:
In [9]:
df.columns.values
Out[9]:
In [10]:
del df['created_at']
del df['favorite_count']
del df['hashtags']
del df['in_reply_to_screen_name']
del df['retweet_count']
del df['text_y']
del df['train_id_y']
del df['id_y']
del df['id_x']
In [11]:
df.corr()
Out[11]:
In [12]:
del df['text_x']
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
df.tail()
In [ ]:
In [13]:
filename = "./data/merged_delay.csv".format(date=datetime.datetime.now().strftime("%Y-%m-%d-%H:%M:%S"))
df.to_csv(filename, sep='\t', encoding='utf-8')
In [ ]: