In [1]:
import warnings
warnings.filterwarnings('ignore')
import ipywidgets as widgets
from IPython.display import display
from IPython.utils.traitlets import Unicode
def installPackages(sender):
!pip install folium
!pip install tweepy
!pip install pymongo
install_btn = widgets.Button(description="Install Packages")
install_btn.on_click(installPackages)
display(install_btn)
In [2]:
from pymongo import MongoClient
from IPython.utils.traitlets import Unicode
class PasswordWidget(widgets.Textarea):
_view_name = Unicode('PasswordView', sync=True)
uri = PasswordWidget(placeholder='Input connection string of your MongoDB')
login_btn = widgets.Button(description="Login")
def login(sender):
try:
client = MongoClient(uri.value)
global db
db = client.twitterdb
print('Connection established successfully')
except ValueError:
print(ValueError)
login_btn.on_click(login)
display(uri, login_btn)
In [3]:
import tweepy
def twitterCarousel():
global iterator
global api
iterator = (iterator + 1)%3
print("carousel use iterator "+str(iterator))
access_token = []
access_token_secret = []
consumer_key = []
consumer_secret = []
access_token.append("917416602634768385-pXPkTeyW9vaysd4vZflYm2pZckkIeDn")
access_token_secret.append("HweGKohJFWSMPDj1LwjoNExGIj1K2e7ApHdHpA7fcwl7F")
consumer_key.append("r8dh3IsvxNOMH2UjSIoM00gnN")
consumer_secret.append("0SqGVySPhZ8ngMYnQ05W8KPctMp8jbXGSdCo0qKxMYKwcewIrZ")
access_token.append("438291047-AWXl0LpNxZzjhdFA3FH7AJHtmLRK52QDJiKzq5Wz")
access_token_secret.append("o3kZKFF2s9ctgVpfDVRRpMbg6BMsGUIFWlJm9wSysKyyY")
consumer_key.append("q4utaFepGhE5OjujyoruBOoQg")
consumer_secret.append("D5K3P5URNUTxKnoVnggiUFsNapuNLOSx5cB7Zh6Y4HhpBhhtNy")
consumer_key.append('cwuOhOSiMHaqSjUsyfYRVltuE')
consumer_secret.append('JBZWaPi3ldDHgMo6NPr8MbRKEU2iHBW7xVzL094HjsoX33K4eJ')
access_token.append('842632842207203328-cNbwTaG4eW4rbQJwaG4RxtZkHJ51SoO')
access_token_secret.append('IhypdlKWPYtpKJ8aWevWTPTyeTbtmffVRGsFcF9hXkQQg')
auth = tweepy.OAuthHandler(consumer_key[iterator], consumer_secret[iterator])
auth.set_access_token(access_token[iterator], access_token_secret[iterator])
api = tweepy.API(auth)
print('twitter is connected')
iterator = 0
api = ""
twitterCarousel()
In [4]:
import json
class MyStreamListener(tweepy.StreamListener):
def on_data(self, raw_data):
json_data = json.loads(raw_data)
post_id = db.twitter_data.insert_one(json_data).inserted_id
print(post_id)
def on_error(self, status_code):
if status_code == 420:
# returning False in on_data disconnects the stream
return False
def streamDataFromGlasgow(sender):
myStream = tweepy.Stream(auth = api.auth, listener=MyStreamListener())
#myStream.filter(locations=[-4.50,55.79,-3.97,55.93], async=True)
stream_btn = widgets.Button(description="Stream Data")
stream_btn.on_click(streamDataFromGlasgow)
display(stream_btn)
In [5]:
import folium
import numpy as np
import math
def startMap(sender):
m = folium.Map(location=[(55.79+55.93)/2, (-4.50-3.97)/2])
filter = "is"
regx = re.compile(".*"+filter+".*", re.IGNORECASE)
posts = db.twitter_data.find({"text": regx})
for post in posts:
x0 = post["place"]["bounding_box"]["coordinates"][0][0][0]
x1 = post["place"]["bounding_box"]["coordinates"][0][1][0]
x2 = post["place"]["bounding_box"]["coordinates"][0][2][0]
y0 = post["place"]["bounding_box"]["coordinates"][0][0][1]
y1 = post["place"]["bounding_box"]["coordinates"][0][1][1]
y2 = post["place"]["bounding_box"]["coordinates"][0][2][1]
sq = np.square([x1-x0, y1-y0, x2-x1, y2-y1])
sqrts = np.sqrt([sq[0]+sq[1], sq[2]+sq[3]])
L1 = sqrts[0]
L2 = sqrts[1]
R = math.sqrt(L1**2 + L2**2)
centre=[(y2+y0)/2, (x2+x0)/2]
text = ''.join(e for e in post["text"] if e.isalnum() or e==' ')[:40]
text = '<i>' + text + '</i>'
if (R>1.0):
folium.CircleMarker(centre, radius=R, popup=text,
color='#3186cc', fill_color='#3186cc').add_to(m)
else:
folium.Marker(centre, popup=text,
icon=folium.Icon(color='green',icon='info-sign')).add_to(m)
display(m)
start_btn = widgets.Button(description="Start map")
start_btn.on_click(startMap)
display(start_btn)
In [7]:
def decodeData(status):
return {
"retweet_count": status.retweet_count,
"favorited": status.favorited,
"in_reply_to_user_id": status.in_reply_to_user_id,
"created_at": status.created_at,
"coordinates": status.coordinates,
"user": {
"created_at": status.user.created_at,
"geo_enabled": status.user.geo_enabled,
"lang": status.user.lang,
"url": status.user.url,
"description": status.user.description,
"time_zone": status.user.time_zone,
"location": status.user.location,
"screen_name": status.user.screen_name,
"protected": status.user.protected,
"statuses_count": status.user.statuses_count,
"profile_image_url_https": status.user.profile_image_url_https,
"utc_offset": status.user.utc_offset,
"followers_count": status.user.followers_count,
"id": status.user.id,
"id_str": status.user.id_str,
"name": status.user.name,
"friends_count": status.user.friends_count,
},
"retweeted": status.retweeted,
"place": {
"country_code": status.place.country_code,
"country": status.place.country,
"name": status.place.name,
"full_name": status.place.full_name,
"id": status.place.id,
"bounding_box": {
"type": status.place.bounding_box.type,
"coordinates": status.place.bounding_box.coordinates
}
},
"geo": status.geo,
"_id": status.id,
"text": status.text,
"lang": status.lang,
"in_reply_to_user_id_str": status.in_reply_to_user_id_str,
"id_str": status.id_str
}
def getHistoricalStatuses(sender):
global api
#c = tweepy.Cursor(api.search,
# geocode="55.85,-4.25,10km",
# since="2017-10-29",
# include_entities=True,
# #until="2017-10-21",
# lang="en").items()
def putInDataBase(tweet):
if tweet.place==None:
return
d = decodeData(tweet)
try:
res = db.twitter_historical_data.insert_one(d)
print(res)
except:
print("duplicate. Move further")
import time
tweepyErrors = 0
while True:
try:
tweets = list(tweepy.Cursor(api.search,
geocode="55.85,-4.25,10km",
since="2017-10-29",
include_entities=True,
#until="2017-10-21",
lang="en").items(500))
for tweet in tweets:
putInDataBase(tweet)
except tweepy.TweepError:
tweepyErrors = tweepyErrors + 1
if tweepyErrors > 2:
print("tweepError. Sleep")
time.sleep(60 * 15)
tweepyErrors = 0
continue
twitterCarousel()
continue
except StopIteration:
print("Stopping Iteration")
break
history_btn = widgets.Button(description="Take Data")
history_btn.on_click(getHistoricalStatuses)
display(history_btn)
In [ ]: