In [34]:
import pandas as pd
import pip

pip.main(['install', 'xlrd'])

input_file = "input/input.xlsx"

df = pd.read_excel(input_file)
likes = df[df.TypeAction == "Like"]

likes.head()


Requirement already satisfied (use --upgrade to upgrade): xlrd in /opt/conda/lib/python3.5/site-packages
You are using pip version 8.1.2, however version 9.0.1 is available.
You should consider upgrading via the 'pip install --upgrade pip' command.
Out[34]:
Action_id TypeAction Page_name Feed_message User_name CommentsForItem_message Sentiment Action_created_time_qvdate
7765 Like/6938264959 Like CAPCOM The team at Capcom Vancouver want to wish ever... - - - 2017-01-04
7766 Like/12040282146 Like - Before it comes out, i playing all the horror ... - - - 2017-01-05
7767 Like/49054165628 Like Bandai Namco Surfs up! Boruto is bringing tsunami reinforce... - - - 2017-01-04
7768 Like/154718398250 Like - 21 días para experimentar el miedo y el terror... - - - 2017-01-04
7769 Like/285330833071 Like - Before it comes out, i playing all the horror ... - - - 2017-01-05

In [36]:
print(likes.User_name.unique())
print(likes.CommentsForItem_message.unique())
print(likes.Sentiment.unique())


['-' 'Aaron Turness' 'Marcelo J. Biott' ..., 'Michael Tong'
 'JadieKit Louise Brown' 'Mauro Mazzariol']
['-']
['-']

In [37]:
# let's drop the commentsForItem and Sentiment columns, as they are empty for the likes...
likes.drop(['CommentsForItem_message', 'Sentiment'], 1,inplace=True)

# also drop the columns with no information
likes.drop(['TypeAction', 'Action_id'], 1,inplace=True)

likes.head()


/opt/conda/lib/python3.5/site-packages/ipykernel/__main__.py:2: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  from ipykernel import kernelapp as app
/opt/conda/lib/python3.5/site-packages/ipykernel/__main__.py:5: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
Out[37]:
Page_name Feed_message User_name Action_created_time_qvdate
7765 CAPCOM The team at Capcom Vancouver want to wish ever... - 2017-01-04
7766 - Before it comes out, i playing all the horror ... - 2017-01-05
7767 Bandai Namco Surfs up! Boruto is bringing tsunami reinforce... - 2017-01-04
7768 - 21 días para experimentar el miedo y el terror... - 2017-01-04
7769 - Before it comes out, i playing all the horror ... - 2017-01-05

In [32]:
likes.describe()


Out[32]:
Page_name Feed_message User_name Action_created_time_qvdate
count 58089 50987 58089 58089
unique 12 103 57090 7
top Resident Evil Are you afraid of the dark? Because you should... - 2017-01-04 00:00:00
freq 28556 12177 30 25313
first NaN NaN NaN 2017-01-04 00:00:00
last NaN NaN NaN 2017-01-10 00:00:00