In [ ]:
# We're using the pymongo module to connect to the mLab database where the
# tweets are being stored
import pymongo

In [ ]:
# Set up the "client" we want to connect to
client = pymongo.MongoClient("mongodb://reader:reader@ds123614.mlab.com:23614/bmlm-parking-tweets")

# Get the correct database (there's only one!)
db = client["bmlm-parking-tweets"]

# Now get the right collection (there is also db.test)
collection = db.tweets

In [ ]:
# Make a list where each element is the "text" part of one tweet
tweets = [obj["text"] for obj in collection.find()]

In [ ]:
# Have a look at some of the tweets
tweets[0:10]

In [ ]:


In [ ]:
# Define some words to do with traffic and business
buzzwords = ["busy", "cars", "traffic", "crash", "accident", "standstill",
             "queue", "lorry", "roadworks"]

In [ ]:
# Get all the tweets where any of the buzzwords are present
traffic_tweets = [tweet for tweet in tweets if any(bw in tweet.split() for bw in buzzwords)]

# How many do we have?
print(len(traffic_tweets))

In [ ]:
# OK... what proportion of all the tweets we have is this?
print(len(traffic_tweets)/len(tweets))

In [ ]:


In [ ]:
# Huh. OK.

In [ ]:


In [ ]:
# Let's have a look at these tweets then...
traffic_tweets

In [ ]:


In [ ]:


In [ ]:
# Bum.

In [ ]:


In [ ]:
# Let's do some sentiment analysis anyway :P
# Lots of this is borrowed from Roger's workshop: https://github.com/Bath-ML/sentiment-analysis-workshop
import nltk
print(nltk.__version__)

In [ ]:
# We're using the vader sentiment analysis library
from nltk.sentiment import vader
vanalyser = vader.SentimentIntensityAnalyzer()

In [ ]:
# Define a small function to return the sentiment scores
def vanalyse(sample):
    return vanalyser.polarity_scores(sample)

In [ ]:
# Try this out
vanalyse("I hate being stuck in traffic")

In [ ]:


In [ ]:
# Small function to extract compound score:
#      -1 ............0.............1
# super-negative   neutral   super-positive
def vaderSentiment(review):
    return vanalyse(review)["compound"]

In [ ]:
# e.g.
vaderSentiment("town is stupidly busy today")

In [ ]:
# Get scores for all our buzzword tweets
[vaderSentiment(tweet) for tweet in traffic_tweets]