In [67]:
import pandas as pd
import psycopg2
import re
import regex
from functools import reduce
from collections import Counter
% matplotlib inline
conn_string = "host='localhost' dbname='bolero'"
con = psycopg2.connect(conn_string)
In [68]:
tweets = pd.read_sql('SELECT * FROM tweet', con)
In [85]:
tweets_clean = tweets[~tweets.text.str.startswith('RT')]
decoded = tweets_clean.text
emoji_pattern = re.compile("["
u"\U0001F600-\U0001F64F" # emoticons
u"\U0001F300-\U0001F5FF" # symbols & pictographs
u"\U0001F680-\U0001F6FF" # transport & map symbols
u"\U0001F1E0-\U0001F1FF" # flags (iOS)
"]+", flags=re.UNICODE)
emojis = Counter()
for row in decoded:
emoji_matches = emoji_pattern.findall(row)
indv_emojis = reduce(lambda x, y: x + regex.findall(r'\X', y), emoji_matches, [])
emojis.update(indv_emojis)
pd.Series(emojis).sort_values(ascending=False)
Out[85]:
In [ ]: