notebook.community

Edit and run



In [3]:

    
import pandas as pd

# TODO HSC: specify the dtypes at loading
df = pd.read_csv("../input/input.csv", na_values=["-"], encoding = "ISO-8859-1")

df.head()









    



/opt/conda/lib/python3.5/site-packages/IPython/core/interactiveshell.py:2705: DtypeWarning: Columns (5) have mixed types. Specify dtype option on import or set low_memory=False.
  interactivity=interactivity, compiler=compiler, result=result)






    Out[3]:






  
    
      
      Action_id
      TypeAction
      Page_name
      Feed_message
      User_name
      CommentsForItem_message
      Sentiment
      Action_created_time_qvdate
    
  
  
    
      0
      Comment/1364017066952510_1364097753611108
      Comment
      Resident Evil
      [UK ONLY] To celebrate the release of Resident...
      Ronald Briand
      Muy buena :)  (Y)
      0.9880
      1/5/2017
    
    
      1
      Comment/10154797365350629_10154797373500629
      Comment
      Bandai Namco
      We continue our Tales Of look back with Tales ...
      Jon Imboden
      I may be in the minority here, but I LOVE the ...
      0.9540
      12/27/2016
    
    
      2
      Comment/1365217000165850_1365754593445424
      Comment
      Resident Evil
      Are you afraid of the dark? Because you should...
      Claudia Alejandra Olguín
      mira Roberto Zeballos,  para probar en tu PEQU...
      0.9527
      1/7/2017
    
    
      3
      Comment/1364017066952510_1364061950281355
      Comment
      Resident Evil
      [UK ONLY] To celebrate the release of Resident...
      David Lawrence
      Sarah Amadeus :O
      0.9402
      1/5/2017
    
    
      4
      Comment/1365217000165850_1225502404193085
      Comment
      Resident Evil
      Are you afraid of the dark? Because you should...
      Leandro Guilherme
      Aeee Paulo Lucas vou te dar de presente no seu...
      0.9382
      1/8/2017



In [4]:

    
print(df.dtypes)
print(df.describe())









    



Action_id                      object
TypeAction                     object
Page_name                      object
Feed_message                   object
User_name                      object
CommentsForItem_message        object
Sentiment                     float64
Action_created_time_qvdate     object
dtype: object
         Sentiment
count  3222.000000
mean     -0.032501
std       0.396417
min      -0.971000
25%            NaN
50%            NaN
75%            NaN
max       0.988000






    



/opt/conda/lib/python3.5/site-packages/numpy/lib/function_base.py:3403: RuntimeWarning: Invalid value encountered in median
  RuntimeWarning)



In [5]:

    
comments = df[df.TypeAction == "Comment"]
likes = df[df.TypeAction == "Like"]

#print(comments.head())
#print(likes.head())

print(len(df) - len(comments) - len(likes))



In [6]:

    
print(likes.User_name.unique())
print(likes.CommentsForItem_message.unique())
print(likes.Sentiment.unique())









    



[nan 'Aaron Turness' 'Marcelo J. Biott' ..., 'Michael Tong'
 'JadieKit Louise Brown' 'Mauro Mazzariol']
[nan]
[ nan]



In [7]:

    
# let's drop the commentsForItem and Sentiment columns, as they are empty for the likes...
likes.drop(['CommentsForItem_message', 'Sentiment'], 1,inplace=True)

# also drop the columns with no information
likes.drop(['TypeAction', 'Action_id'], 1,inplace=True)

likes.describe()









    



/opt/conda/lib/python3.5/site-packages/ipykernel/__main__.py:2: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  from ipykernel import kernelapp as app
/opt/conda/lib/python3.5/site-packages/ipykernel/__main__.py:5: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy






    Out[7]:






  
    
      
      Page_name
      Feed_message
      User_name
      Action_created_time_qvdate
    
  
  
    
      count
      56033
      50987
      58059
      58089
    
    
      unique
      11
      103
      56315
      7
    
    
      top
      Resident Evil
      Are you afraid of the dark? Because you should...
      ???
      1/4/2017
    
    
      freq
      28556
      12177
      312
      25313



In [12]:

    
print(comments.Sentiment.unique())

comments.describe()









    



[ 0.988   0.954   0.9527 ..., -0.9493 -0.971      nan]






    



/opt/conda/lib/python3.5/site-packages/numpy/lib/function_base.py:3403: RuntimeWarning: Invalid value encountered in median
  RuntimeWarning)






    Out[12]:






  
    
      
      Sentiment
    
  
  
    
      count
      3222.000000
    
    
      mean
      -0.032501
    
    
      std
      0.396417
    
    
      min
      -0.971000
    
    
      25%
      NaN
    
    
      50%
      NaN
    
    
      75%
      NaN
    
    
      max
      0.988000



In [9]:

    
comments.to_csv("../output/comments.csv")
likes.to_csv("../output/likes.csv")

	Action_id	TypeAction	Page_name	Feed_message	User_name	CommentsForItem_message	Sentiment	Action_created_time_qvdate
0	Comment/1364017066952510_1364097753611108	Comment	Resident Evil	[UK ONLY] To celebrate the release of Resident...	Ronald Briand	Muy buena :) (Y)	0.9880	1/5/2017
1	Comment/10154797365350629_10154797373500629	Comment	Bandai Namco	We continue our Tales Of look back with Tales ...	Jon Imboden	I may be in the minority here, but I LOVE the ...	0.9540	12/27/2016
2	Comment/1365217000165850_1365754593445424	Comment	Resident Evil	Are you afraid of the dark? Because you should...	Claudia Alejandra Olguín	mira Roberto Zeballos, para probar en tu PEQU...	0.9527	1/7/2017
3	Comment/1364017066952510_1364061950281355	Comment	Resident Evil	[UK ONLY] To celebrate the release of Resident...	David Lawrence	Sarah Amadeus :O	0.9402	1/5/2017
4	Comment/1365217000165850_1225502404193085	Comment	Resident Evil	Are you afraid of the dark? Because you should...	Leandro Guilherme	Aeee Paulo Lucas vou te dar de presente no seu...	0.9382	1/8/2017

	Page_name	Feed_message	User_name	Action_created_time_qvdate
count	56033	50987	58059	58089
unique	11	103	56315	7
top	Resident Evil	Are you afraid of the dark? Because you should...	???	1/4/2017
freq	28556	12177	312	25313

	Sentiment
count	3222.000000
mean	-0.032501
std	0.396417
min	-0.971000
25%	NaN
50%	NaN
75%	NaN
max	0.988000