In [67]:
import pandas as pd
import psycopg2
import re
import regex
from functools import reduce
from collections import Counter
% matplotlib inline
conn_string = "host='localhost' dbname='bolero'"
con = psycopg2.connect(conn_string)

In [68]:
tweets = pd.read_sql('SELECT * FROM tweet', con)

In [85]:
tweets_clean = tweets[~tweets.text.str.startswith('RT')]
decoded = tweets_clean.text
emoji_pattern = re.compile("["
        u"\U0001F600-\U0001F64F"  # emoticons
        u"\U0001F300-\U0001F5FF"  # symbols & pictographs
        u"\U0001F680-\U0001F6FF"  # transport & map symbols
        u"\U0001F1E0-\U0001F1FF"  # flags (iOS)
                           "]+", flags=re.UNICODE)
emojis = Counter()
for row in decoded:
    emoji_matches = emoji_pattern.findall(row)
    indv_emojis = reduce(lambda x, y: x + regex.findall(r'\X', y), emoji_matches, [])
    emojis.update(indv_emojis)
pd.Series(emojis).sort_values(ascending=False)


Out[85]:
🔥     61
👍     56
😂     37
🙄     31
🚀     25
😕     18
👌     17
😒     16
👏     16
🐍     14
👍🏻    14
😆     13
🇺🇸    12
🙃     11
😅     10
😁     10
😔      9
😉      9
😐      8
😀      8
😜      7
😄      7
🙌      6
😞      6
🎧      5
😮      5
🍁      5
😢      5
🌧      5
🌊      5
      ..
💫      1
🔢      1
💤      1
💢      1
💡      1
💔      1
🔦      1
💊      1
💃      1
🕐      1
👾      1
🕑      1
👓      1
🕒      1
🖥      1
🖱      1
🗓      1
😃      1
😇      1
😊      1
👊      1
👉      1
💳      1
👆      1
👂      1
📰      1
🐦      1
🐞      1
🐚      1
🇨🇦     1
dtype: int64

In [ ]: