First, you'll want to head over to https://dev.twitter.com/apps and register an application!
1) Fill Credentials:
In [3]:
Image(filename='/home/nipun/Pictures/Twitter_registering_application.png')
Out[3]:
2) Get key for your apps:
In [5]:
Image('/home/nipun/Pictures/Twitter_OAuth.png')
Out[5]:
Necessary python packages:
Tweepy: https://github.com/tweepy/tweepy
$pip install tweepy
In [ ]:
#Import the necessary methods from tweepy library
from tweepy.streaming import StreamListener
from tweepy import OAuthHandler
from tweepy import Stream
#Variables that contains the user credentials to access Twitter API
consumer_key= "################"
consumer_secret = "################"
access_token = "################"
access_token_secret= "################"
tweet_file_path="/home/nipun/Downloads/Twitter_scrape/Twitter_scrape.txt"
tweet_file_handle=open(tweet_file_path,"a")
#This is a basic listener that just prints received tweets to stdout.
class StdOutListener(StreamListener):
def on_data(self, data):
print data
tweet_file_handle.write(str(data)+"\n")
return True
def on_error(self, status):
print status
#This handles Twitter authetification and the connection to Twitter Streaming API
l = StdOutListener()
auth = OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
stream = Stream(auth, l)
#This line filter Twitter Streams to capture data by the keywords: 'python', 'javascript', 'ruby'
stream.filter(track=['IPL2016'])
In [33]:
%matplotlib inline
import json
import pandas as pd
import matplotlib.pyplot as plt
tweets_data_path = '/home/nipun/Downloads/Twitter_scrape/Twitter_scrape.txt'
tweets_data = []
tweets_file = open(tweets_data_path, "r")
for line in tweets_file:
try:
tweet = json.loads(line)
tweets_data.append(tweet)
except:
continue
print len(tweets_data)
In [13]:
tweets_df = pd.DataFrame()
tweets_df['Username'] = map(lambda tweet: tweet['user']['name'], tweets_data)
tweets_df['statuses_count'] = map(lambda tweet: tweet['user']['statuses_count'], tweets_data)
tweets_df['friends_count'] = map(lambda tweet: tweet['user']['friends_count'], tweets_data)
tweets_df['followers_count'] = map(lambda tweet: tweet['user']['followers_count'], tweets_data)
tweets_df['text'] = map(lambda tweet: tweet['text'], tweets_data)
tweets_df['lang'] = map(lambda tweet: tweet['lang'], tweets_data)
tweets_df['location'] = map(lambda tweet: tweet['user']['location'], tweets_data)
tweets_df['created_at'] = map(lambda tweet: tweet['created_at'], tweets_data)
In [21]:
tweets_df.dtypes
Out[21]:
In [26]:
tweets_df['created_at'] = pd.to_datetime(tweets_df['created_at'].astype(str))
tweets_df.dtypes
Out[26]:
In [27]:
tweets_df.head()
Out[27]:
In [34]:
tweets_df.to_csv('/home/nipun/Downloads/Test_Twitter/Twitter_scrape.csv',encoding='utf-8')
In [29]:
tweets_df['Username'].value_counts()
Out[29]:
In [41]:
new_tweet_df = pd.read_csv('/home/nipun/Downloads/Test_Twitter/Twitter_IPL2016.csv',header=0)
new_tweet_df['location'].value_counts()[0:20]
Out[41]:
In [40]:
#for simplicity in visualisation we will take only those locations
#where count is greater than or equal to 10
tweet_filter_df = new_tweet_df.groupby("location").filter(lambda x: len(x) >= 10)
tweet_filter_df['location'].value_counts()
Out[40]:
In [43]:
tweet_filter_df['lang'].value_counts()
Out[43]:
In [51]:
tweet_filter_df['created_at'] = pd.to_datetime(tweet_filter_df['created_at'].astype(str))
tweet_filter_df.dtypes
tweet_filter_df.to_csv('/home/nipun/Downloads/Test_Twitter/Finished_Twitter_IPL2016.csv',index=0,encoding='utf-8')
In [44]:
tweets_by_lang = tweet_filter_df['lang'].value_counts()
fig, ax = plt.subplots()
ax.tick_params(axis='x', labelsize=15)
ax.tick_params(axis='y', labelsize=10)
ax.set_xlabel('Languages', fontsize=15)
ax.set_ylabel('Number of tweets' , fontsize=15)
ax.set_title('Top 5 languages', fontsize=15, fontweight='bold')
tweets_by_lang[:5].plot(ax=ax, kind='bar', color='cyan')
Out[44]:
In [ ]: