In [1]:
import pandas as pd
df = pd.read_csv("output/input_with_gender.csv")
print(df.head())
Unnamed: 0 first_name Action_id \
0 1 'regin Like/1351451431565601
1 2 'Γιωργος Like/1840121602944310
2 3 'Ειρηνη Like/1790336797885801
3 4 'ۦ Like/1127220827394955
4 5 - Comment/1194130220666342_1194652720614092
TypeAction Page_name \
0 Like Bandai Namco
1 Like TEKKEN
2 Like Resident Evil
3 Like TEKKEN
4 Comment -
Feed_message User_name \
0 Is there even a way to prepare your body for t... 'regin Iyas
1 Dive into the week like Bob dives into his opp... 'Γιωργος Γρυπαρης'
2 Day 2 of Resident Evil Christmas! Alexia put ... 'Ειρηνη Στελλατου'
3 Need inspiration for your New Year’s resolutio... 'ۦ مريم
4 Ultimate Mortal Kombat 3 #FanArt: #Kitana by B... -
CommentsForItem_message Sentiment Action_created_time_qvdate \
0 - - 1/7/2017
1 - - 1/9/2017
2 - - 1/4/2017
3 - - 1/4/2017
4 Great art and a great start to 2017! 0.9255 1/6/2017
name proportion_male proportion_female gender year_min year_max
0 NaN NaN NaN NaN NaN NaN
1 NaN NaN NaN NaN NaN NaN
2 NaN NaN NaN NaN NaN NaN
3 NaN NaN NaN NaN NaN NaN
4 NaN NaN NaN NaN NaN NaN
In [2]:
comments = df[df.TypeAction == "Comment"]
comments.drop(['Action_id', 'TypeAction', 'year_min', 'year_max', 'first_name'], 1,inplace=True)
comments.head()
/opt/conda/lib/python3.5/site-packages/ipykernel/__main__.py:2: SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame
See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
from ipykernel import kernelapp as app
Out[2]:
Unnamed: 0
Page_name
Feed_message
User_name
CommentsForItem_message
Sentiment
Action_created_time_qvdate
name
proportion_male
proportion_female
gender
4
5
-
Ultimate Mortal Kombat 3 #FanArt: #Kitana by B...
-
Great art and a great start to 2017!
0.9255
1/6/2017
NaN
NaN
NaN
NaN
5
6
Bandai Namco
Safety first. Even when you're a living rocket...
-
Worlds best super fan right here
0.8713
12/28/2016
NaN
NaN
NaN
NaN
6
7
TEKKEN
Who doesn’t remember Tekken 3? Refresh your me...
-
Thanks a lot for Sharing! <3
0.8124
1/5/2017
NaN
NaN
NaN
NaN
7
8
-
Milla Jovovich returns as the unstoppable Alic...
-
fabulous :)
0.7785
12/31/2016
NaN
NaN
NaN
NaN
9
10
TEKKEN
Who doesn’t remember Tekken 3? Refresh your me...
-
best video game soundtrack ever!
0.6345
1/5/2017
NaN
NaN
NaN
NaN
In [6]:
#page stats
comments_light = comments[["Page_name", "User_name", "Sentiment", "gender"]]
# ensure the sentiment is a number, the gender is treated as a value
# should get stats over the distribution of values for each page/user
print(comments_light.dtypes)
grouped_by_page = comments_light.groupby('Page_name')
grouped_by_user = comments_light.groupby('User_name')
print(grouped_by_page.describe(include = 'all'))
print(grouped_by_user.describe(include = 'all'))
# Let's start with the number of observations by Page name
# then try to show the proportion of sexes per page
# then correct for multiple comments for the same user...
#also try the same for the sentiment
# also the sentiment over time/ the sentiment per message...
Page_name object
User_name object
Sentiment object
gender object
dtype: object
Sentiment User_name \
Page_name
- count 227 227
unique 34 181
top - Bogdan Hubert
freq 143 10
Bandai Namco count 1987 1987
unique 508 1532
top - Jermel Daise
freq 830 13
CAPCOM count 274 274
unique 84 238
top - Marlon Silva
freq 107 8
Dead or Alive Game count 40 40
unique 1 34
top - Oscar Barojas Hernandez
freq 40 3
Dragon Ball Xenoverse 2 count 32 32
unique 2 26
top - Joshua Jsm
freq 30 2
Dragon Ball Xenoverse 2 - Community count 110 110
unique 2 99
top - Hugo Maurel
freq 109 2
Mortal Kombat count 51 51
unique 8 43
top - Soroush Atarod
freq 38 3
Resident Evil count 3720 3720
unique 546 3209
top - Jesse Andrew
freq 2225 6
Street Fighter count 622 622
unique 99 536
top - AJ Threet
freq 386 6
TEKKEN count 1213 1213
unique 94 1070
top - Quentin Debode
freq 905 5
Ubisoft count 93 93
unique 2 84
top - Bryan Potteau
freq 70 3
gender
Page_name
- count 171
unique 2
top male
freq 130
Bandai Namco count 1741
unique 2
top male
freq 1609
CAPCOM count 237
unique 2
top male
freq 222
Dead or Alive Game count 30
unique 2
top male
freq 26
Dragon Ball Xenoverse 2 count 29
unique 2
top male
freq 26
Dragon Ball Xenoverse 2 - Community count 80
unique 2
top male
freq 68
Mortal Kombat count 45
unique 2
top male
freq 40
Resident Evil count 3284
unique 2
top male
freq 2755
Street Fighter count 524
unique 2
top male
freq 471
TEKKEN count 1001
unique 2
top male
freq 841
Ubisoft count 78
unique 2
top male
freq 73
Page_name Sentiment gender
User_name
- count 26 26 0
unique 8 12 0
top - - NaN
freq 8 13 NaN
A-Dawg Mez count 1 1 0
unique 1 1 0
top CAPCOM 0.2680 NaN
freq 1 1 NaN
A.J. Peck count 1 1 0
unique 1 1 0
top Bandai Namco - NaN
freq 1 1 NaN
A.R. Knight count 1 1 0
unique 1 1 0
top Street Fighter - NaN
freq 1 1 NaN
AC Pacana count 2 2 2
unique 1 1 1
top - 0.0000 male
freq 2 2 2
AJ Escalona count 2 2 2
unique 1 1 1
top Bandai Namco - male
freq 2 2 2
AJ Threet count 6 6 6
unique 1 2 1
top Street Fighter - male
freq 6 4 6
ALe Gordo count 2 2 2
unique 1 1 1
... ... ... ...
木木水雲 top Resident Evil 0.6142 NaN
freq 1 1 NaN
李培德 count 1 1 0
unique 1 1 0
top Bandai Namco - NaN
freq 1 1 NaN
游宗勳 count 1 1 0
unique 1 1 0
top Dead or Alive Game - NaN
freq 1 1 NaN
田秋娜 count 1 1 0
unique 1 1 0
top Resident Evil - NaN
freq 1 1 NaN
趙偉綸 count 1 1 0
unique 1 1 0
top Bandai Namco - NaN
freq 1 1 NaN
강동우 count 1 1 0
unique 1 1 0
top Bandai Namco - NaN
freq 1 1 NaN
브렛사페 count 1 1 0
unique 1 1 0
top - - NaN
freq 1 1 NaN
장태인 count 1 1 0
unique 1 1 0
top Bandai Namco 0.0000 NaN
freq 1 1 NaN
[27860 rows x 3 columns]
In [ ]:
# top words / relevant words for pages...
In [ ]:
# find what are the words in the comments that provoke the best/worst emotions/most reactions...
In [ ]:
# gender stats
In [ ]:
# for a page, try over time
Content source: HelainSchoonjans/fantastic-eureka
Similar notebooks: