In [1]:

    
%matplotlib inline

Finalizing Model Data

Code for finalizing the model data
Author: Jimmy Charité
Email: jimmy.charite@gmail.com

Directory & Packages



In [2]:

    
import os
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import spacy
import pysentiment
from textstat.textstat import textstat 
from wordcloud import WordCloud
import nltk
import statsmodels.formula.api as smf
import statsmodels.api as sm



In [3]:

    
retval=os.chdir("..")

Helper Functions



In [4]:

    
def pd_tab(df,col,sort_by='count',asc=False):
    tab=df[col].value_counts(dropna=False).reset_index(name='count')
    tab.columns=[col,'count']
    tab['percent']=tab['count']/tab['count'].sum()
    tab.sort_values(by=sort_by,inplace=True,ascending=asc)
    return tab

Upload Data



In [5]:

    
raw_data=pd.read_pickle('./clean_data/raw_data_post_parse.pkl')
raw_data.head()









    Out[5]:






  
    
      
      Id
      ProductId
      UserId
      ProfileName
      HelpfulnessNumerator
      HelpfulnessDenominator
      Score
      Time
      Summary
      Text
      ...
      vec299
      num_sents
      num_words
      readability
      sentiment_dict
      neg_senti
      pos_senti
      neu_senti
      comp_senti
      text_lemma
    
  
  
    
      0
      2
      B00813GRG4
      A1D87F6ZCVE5NK
      dll pa
      0
      0
      1
      1346976000
      Not as Advertised
      Product arrived labeled as Jumbo Salted Peanut...
      ...
      0.020952
      2
      37
      8.0
      {'neg': 0.079, 'neu': 0.853, 'pos': 0.068, 'co...
      0.079
      0.068
      0.853
      -0.1027
      product arrive label peanut actually small siz...
    
    
      1
      5
      B006K2ZZ7K
      A1UQRSCLF8GW1T
      Michael D. Bigham "M. Wassir"
      0
      0
      5
      1350777600
      Great taffy
      Great taffy at a great price.  There was a wid...
      ...
      0.113610
      4
      35
      1.3
      {'neg': 0.0, 'neu': 0.552, 'pos': 0.448, 'comp...
      0.000
      0.448
      0.552
      0.9468
      great taffy great price wide assortment yummy ...
    
    
      2
      6
      B006K2ZZ7K
      ADT0SRK1MGOEU
      Twoapennything
      0
      0
      4
      1342051200
      Nice Taffy
      I got a wild hair for taffy and ordered this f...
      ...
      0.046176
      5
      92
      8.6
      {'neg': 0.029, 'neu': 0.809, 'pos': 0.163, 'co...
      0.029
      0.163
      0.809
      0.8830
      get wild hair taffy order pound bag taffy enjo...
    
    
      3
      7
      B006K2ZZ7K
      A1SP2KVKFXXRU1
      David C. Sullivan
      0
      0
      5
      1340150400
      Great!  Just as good as the expensive brands!
      This saltwater taffy had great flavors and was...
      ...
      0.137415
      5
      63
      7.7
      {'neg': 0.034, 'neu': 0.693, 'pos': 0.273, 'co...
      0.034
      0.273
      0.693
      0.9346
      saltwater taffy great flavor soft chewy candy ...
    
    
      4
      8
      B006K2ZZ7K
      A3JRGQVEQN31IQ
      Pamela G. Williams
      0
      0
      5
      1336003200
      Wonderful, tasty taffy
      This taffy is so good.  It is very soft and ch...
      ...
      0.123007
      5
      34
      3.8
      {'neg': 0.0, 'neu': 0.52, 'pos': 0.48, 'compou...
      0.000
      0.480
      0.520
      0.9487
      taffy good soft chewy flavor amazing definitel...
    
  

5 rows × 326 columns

Finalizing Features



In [6]:

    
raw_data.columns









    Out[6]:





Index(['Id', 'ProductId', 'UserId', 'ProfileName', 'HelpfulnessNumerator',
       'HelpfulnessDenominator', 'Score', 'Time', 'Summary', 'Text',
       ...
       'vec299', 'num_sents', 'num_words', 'readability', 'sentiment_dict',
       'neg_senti', 'pos_senti', 'neu_senti', 'comp_senti', 'text_lemma'],
      dtype='object', length=326)



In [7]:

    
fin_cols=['helpful','num_sents', 'num_words', 'readability',
          'neg_senti', 'pos_senti', 'neu_senti', 'comp_senti',
          'text_lemma']
vec_cols=[s for s in raw_data.columns if s[:3]=='vec']
fin_cols.extend(vec_cols)
fin_cols









    Out[7]:





['helpful',
 'num_sents',
 'num_words',
 'readability',
 'neg_senti',
 'pos_senti',
 'neu_senti',
 'comp_senti',
 'text_lemma',
 'vec0',
 'vec1',
 'vec2',
 'vec3',
 'vec4',
 'vec5',
 'vec6',
 'vec7',
 'vec8',
 'vec9',
 'vec10',
 'vec11',
 'vec12',
 'vec13',
 'vec14',
 'vec15',
 'vec16',
 'vec17',
 'vec18',
 'vec19',
 'vec20',
 'vec21',
 'vec22',
 'vec23',
 'vec24',
 'vec25',
 'vec26',
 'vec27',
 'vec28',
 'vec29',
 'vec30',
 'vec31',
 'vec32',
 'vec33',
 'vec34',
 'vec35',
 'vec36',
 'vec37',
 'vec38',
 'vec39',
 'vec40',
 'vec41',
 'vec42',
 'vec43',
 'vec44',
 'vec45',
 'vec46',
 'vec47',
 'vec48',
 'vec49',
 'vec50',
 'vec51',
 'vec52',
 'vec53',
 'vec54',
 'vec55',
 'vec56',
 'vec57',
 'vec58',
 'vec59',
 'vec60',
 'vec61',
 'vec62',
 'vec63',
 'vec64',
 'vec65',
 'vec66',
 'vec67',
 'vec68',
 'vec69',
 'vec70',
 'vec71',
 'vec72',
 'vec73',
 'vec74',
 'vec75',
 'vec76',
 'vec77',
 'vec78',
 'vec79',
 'vec80',
 'vec81',
 'vec82',
 'vec83',
 'vec84',
 'vec85',
 'vec86',
 'vec87',
 'vec88',
 'vec89',
 'vec90',
 'vec91',
 'vec92',
 'vec93',
 'vec94',
 'vec95',
 'vec96',
 'vec97',
 'vec98',
 'vec99',
 'vec100',
 'vec101',
 'vec102',
 'vec103',
 'vec104',
 'vec105',
 'vec106',
 'vec107',
 'vec108',
 'vec109',
 'vec110',
 'vec111',
 'vec112',
 'vec113',
 'vec114',
 'vec115',
 'vec116',
 'vec117',
 'vec118',
 'vec119',
 'vec120',
 'vec121',
 'vec122',
 'vec123',
 'vec124',
 'vec125',
 'vec126',
 'vec127',
 'vec128',
 'vec129',
 'vec130',
 'vec131',
 'vec132',
 'vec133',
 'vec134',
 'vec135',
 'vec136',
 'vec137',
 'vec138',
 'vec139',
 'vec140',
 'vec141',
 'vec142',
 'vec143',
 'vec144',
 'vec145',
 'vec146',
 'vec147',
 'vec148',
 'vec149',
 'vec150',
 'vec151',
 'vec152',
 'vec153',
 'vec154',
 'vec155',
 'vec156',
 'vec157',
 'vec158',
 'vec159',
 'vec160',
 'vec161',
 'vec162',
 'vec163',
 'vec164',
 'vec165',
 'vec166',
 'vec167',
 'vec168',
 'vec169',
 'vec170',
 'vec171',
 'vec172',
 'vec173',
 'vec174',
 'vec175',
 'vec176',
 'vec177',
 'vec178',
 'vec179',
 'vec180',
 'vec181',
 'vec182',
 'vec183',
 'vec184',
 'vec185',
 'vec186',
 'vec187',
 'vec188',
 'vec189',
 'vec190',
 'vec191',
 'vec192',
 'vec193',
 'vec194',
 'vec195',
 'vec196',
 'vec197',
 'vec198',
 'vec199',
 'vec200',
 'vec201',
 'vec202',
 'vec203',
 'vec204',
 'vec205',
 'vec206',
 'vec207',
 'vec208',
 'vec209',
 'vec210',
 'vec211',
 'vec212',
 'vec213',
 'vec214',
 'vec215',
 'vec216',
 'vec217',
 'vec218',
 'vec219',
 'vec220',
 'vec221',
 'vec222',
 'vec223',
 'vec224',
 'vec225',
 'vec226',
 'vec227',
 'vec228',
 'vec229',
 'vec230',
 'vec231',
 'vec232',
 'vec233',
 'vec234',
 'vec235',
 'vec236',
 'vec237',
 'vec238',
 'vec239',
 'vec240',
 'vec241',
 'vec242',
 'vec243',
 'vec244',
 'vec245',
 'vec246',
 'vec247',
 'vec248',
 'vec249',
 'vec250',
 'vec251',
 'vec252',
 'vec253',
 'vec254',
 'vec255',
 'vec256',
 'vec257',
 'vec258',
 'vec259',
 'vec260',
 'vec261',
 'vec262',
 'vec263',
 'vec264',
 'vec265',
 'vec266',
 'vec267',
 'vec268',
 'vec269',
 'vec270',
 'vec271',
 'vec272',
 'vec273',
 'vec274',
 'vec275',
 'vec276',
 'vec277',
 'vec278',
 'vec279',
 'vec280',
 'vec281',
 'vec282',
 'vec283',
 'vec284',
 'vec285',
 'vec286',
 'vec287',
 'vec288',
 'vec289',
 'vec290',
 'vec291',
 'vec292',
 'vec293',
 'vec294',
 'vec295',
 'vec296',
 'vec297',
 'vec298',
 'vec299']



In [8]:

    
raw_data=raw_data[fin_cols].copy()



In [9]:

    
raw_data.head()









    Out[9]:






  
    
      
      helpful
      num_sents
      num_words
      readability
      neg_senti
      pos_senti
      neu_senti
      comp_senti
      text_lemma
      vec0
      ...
      vec290
      vec291
      vec292
      vec293
      vec294
      vec295
      vec296
      vec297
      vec298
      vec299
    
  
  
    
      0
      0.0
      2
      37
      8.0
      0.079
      0.068
      0.853
      -0.1027
      product arrive label peanut actually small siz...
      -0.019901
      ...
      -0.178709
      0.120293
      0.048853
      -0.028560
      0.024294
      -0.051074
      -0.082868
      -0.058978
      0.058156
      0.020952
    
    
      1
      0.0
      4
      35
      1.3
      0.000
      0.448
      0.552
      0.9468
      great taffy great price wide assortment yummy ...
      -0.076091
      ...
      -0.125921
      0.026862
      -0.011833
      -0.023788
      0.028657
      -0.001059
      -0.003236
      -0.048324
      -0.050874
      0.113610
    
    
      2
      0.0
      5
      92
      8.6
      0.029
      0.163
      0.809
      0.8830
      get wild hair taffy order pound bag taffy enjo...
      -0.048797
      ...
      -0.154745
      0.004021
      0.004185
      0.006071
      -0.032341
      0.030001
      0.004792
      -0.122627
      -0.015319
      0.046176
    
    
      3
      0.0
      5
      63
      7.7
      0.034
      0.273
      0.693
      0.9346
      saltwater taffy great flavor soft chewy candy ...
      -0.009421
      ...
      -0.185385
      0.038134
      0.014824
      -0.012089
      0.007642
      -0.013590
      0.038388
      -0.117533
      0.042929
      0.137415
    
    
      4
      0.0
      5
      34
      3.8
      0.000
      0.480
      0.520
      0.9487
      taffy good soft chewy flavor amazing definitel...
      -0.073490
      ...
      -0.155703
      0.041312
      -0.121036
      -0.063175
      0.075995
      -0.005276
      0.051416
      -0.136569
      0.021066
      0.123007
    
  

5 rows × 309 columns

Number of Sentences



In [10]:

    
g=sns.distplot(raw_data.num_sents)
g.axes.set_ylim(0,)
g.axes.set_xlim(0,)
g.axes.set_title('Number of Sentences\n',fontsize=20)
g.set_xlabel('Count',fontsize=15)









    



/home/jimmy/anaconda3/envs/py36/lib/python3.6/site-packages/statsmodels/nonparametric/kdetools.py:20: VisibleDeprecationWarning: using a non-integer number instead of an integer will result in an error in the future
  y = X[:m/2+1] + np.r_[0,X[m/2+1:],0]*1j






    Out[10]:





<matplotlib.text.Text at 0x7efff62459b0>



In [11]:

    
g=sns.distplot(np.log(raw_data.num_sents))
g.axes.set_ylim(0,)
g.axes.set_xlim(0,)
g.axes.set_title('Log Number of Sentences\n',fontsize=20)
g.set_xlabel('Count',fontsize=15)









    



/home/jimmy/anaconda3/envs/py36/lib/python3.6/site-packages/statsmodels/nonparametric/kdetools.py:20: VisibleDeprecationWarning: using a non-integer number instead of an integer will result in an error in the future
  y = X[:m/2+1] + np.r_[0,X[m/2+1:],0]*1j






    Out[11]:





<matplotlib.text.Text at 0x7efffe5931d0>

Will use the log b/c of extreme skewness



In [12]:

    
raw_data['num_sents']=np.log(raw_data.num_sents)

Number of Words



In [13]:

    
g=sns.distplot(raw_data.num_words)
g.axes.set_ylim(0,)
g.axes.set_xlim(0,)
g.axes.set_title('Number of Words\n',fontsize=20)
g.set_xlabel('Count',fontsize=15)









    



/home/jimmy/anaconda3/envs/py36/lib/python3.6/site-packages/statsmodels/nonparametric/kdetools.py:20: VisibleDeprecationWarning: using a non-integer number instead of an integer will result in an error in the future
  y = X[:m/2+1] + np.r_[0,X[m/2+1:],0]*1j






    Out[13]:





<matplotlib.text.Text at 0x7efff5f707b8>



In [14]:

    
g=sns.distplot(np.log(raw_data.num_words))
g.axes.set_ylim(0,)
g.axes.set_xlim(0,)
g.axes.set_title('Log Number of Words\n',fontsize=20)
g.set_xlabel('Count',fontsize=15)









    



/home/jimmy/anaconda3/envs/py36/lib/python3.6/site-packages/statsmodels/nonparametric/kdetools.py:20: VisibleDeprecationWarning: using a non-integer number instead of an integer will result in an error in the future
  y = X[:m/2+1] + np.r_[0,X[m/2+1:],0]*1j






    Out[14]:





<matplotlib.text.Text at 0x7efff5dd29e8>



In [15]:

    
raw_data['num_words']=np.log(raw_data.num_words)

Readability



In [16]:

    
raw_data.readability.describe()









    Out[16]:





count    198659.000000
mean          6.398967
std           3.436570
min          -8.400000
25%           4.300000
50%           6.000000
75%           8.000000
max         345.600000
Name: readability, dtype: float64



In [17]:

    
g=sns.distplot(raw_data.readability)
g.axes.set_ylim(0,)
g.axes.set_xlim(0,)
g.axes.set_title('Readbility\n',fontsize=20)
g.set_xlabel('Count',fontsize=15)









    



/home/jimmy/anaconda3/envs/py36/lib/python3.6/site-packages/statsmodels/nonparametric/kdetools.py:20: VisibleDeprecationWarning: using a non-integer number instead of an integer will result in an error in the future
  y = X[:m/2+1] + np.r_[0,X[m/2+1:],0]*1j






    Out[17]:





<matplotlib.text.Text at 0x7efff5c90f28>



In [18]:

    
raw_data.readability.isnull().sum()









    Out[18]:





0

In retrospect, this seems less appropriate. However, I will just shift it to make it positive, then take the log



In [19]:

    
raw_data['readability']=np.log(raw_data.readability+100*np.abs(np.min(raw_data.readability)))



In [20]:

    
raw_data.readability.describe()









    Out[20]:





count    198659.000000
mean          6.740983
std           0.004010
min           6.723352
25%           6.738508
50%           6.740519
75%           6.742881
max           7.078004
Name: readability, dtype: float64



In [21]:

    
g=sns.distplot(raw_data.readability)
g.axes.set_ylim(0,)
g.axes.set_xlim(6.5,7.25)
g.axes.set_title('Readbility\n',fontsize=20)
g.set_xlabel('Count',fontsize=15)









    



/home/jimmy/anaconda3/envs/py36/lib/python3.6/site-packages/statsmodels/nonparametric/kdetools.py:20: VisibleDeprecationWarning: using a non-integer number instead of an integer will result in an error in the future
  y = X[:m/2+1] + np.r_[0,X[m/2+1:],0]*1j






    Out[21]:





<matplotlib.text.Text at 0x7efff5af4e80>

This comically thin distribution will be adjusted with scaling

Sentiment



In [22]:

    
raw_data.neg_senti.describe()









    Out[22]:





count    198659.000000
mean          0.043195
std           0.052190
min           0.000000
25%           0.000000
50%           0.030000
75%           0.068000
max           0.602000
Name: neg_senti, dtype: float64



In [23]:

    
g=sns.distplot(raw_data.neg_senti)
g.axes.set_ylim(0,)
g.axes.set_title('Negative Sentiment\n',fontsize=20)
g.set_xlabel('Score',fontsize=15)









    



/home/jimmy/anaconda3/envs/py36/lib/python3.6/site-packages/statsmodels/nonparametric/kdetools.py:20: VisibleDeprecationWarning: using a non-integer number instead of an integer will result in an error in the future
  y = X[:m/2+1] + np.r_[0,X[m/2+1:],0]*1j






    Out[23]:





<matplotlib.text.Text at 0x7efff5959668>



In [24]:

    
raw_data.pos_senti.describe()









    Out[24]:





count    198659.000000
mean          0.191822
std           0.106541
min           0.000000
25%           0.116000
50%           0.178000
75%           0.257000
max           0.964000
Name: pos_senti, dtype: float64



In [25]:

    
g=sns.distplot(raw_data.pos_senti)
g.axes.set_ylim(0,)
g.axes.set_title('Positive Sentiment\n',fontsize=20)
g.set_xlabel('Score',fontsize=15)









    



/home/jimmy/anaconda3/envs/py36/lib/python3.6/site-packages/statsmodels/nonparametric/kdetools.py:20: VisibleDeprecationWarning: using a non-integer number instead of an integer will result in an error in the future
  y = X[:m/2+1] + np.r_[0,X[m/2+1:],0]*1j






    Out[25]:





<matplotlib.text.Text at 0x7efff5836668>



In [26]:

    
raw_data.neu_senti.describe()









    Out[26]:





count    198659.000000
mean          0.764983
std           0.100293
min           0.036000
25%           0.704000
50%           0.775000
75%           0.835000
max           1.000000
Name: neu_senti, dtype: float64



In [27]:

    
g=sns.distplot(raw_data.neu_senti)
g.axes.set_ylim(0,)
g.axes.set_title('Neutral Sentiment\n',fontsize=20)
g.set_xlabel('Score',fontsize=15)









    



/home/jimmy/anaconda3/envs/py36/lib/python3.6/site-packages/statsmodels/nonparametric/kdetools.py:20: VisibleDeprecationWarning: using a non-integer number instead of an integer will result in an error in the future
  y = X[:m/2+1] + np.r_[0,X[m/2+1:],0]*1j






    Out[27]:





<matplotlib.text.Text at 0x7efff565b668>



In [28]:

    
raw_data.comp_senti.describe()









    Out[28]:





count    198659.000000
mean          0.643604
std           0.473956
min          -0.998300
25%           0.573000
50%           0.855500
75%           0.942700
max           0.999800
Name: comp_senti, dtype: float64



In [29]:

    
g=sns.distplot(raw_data.comp_senti)
g.axes.set_ylim(0,)
g.axes.set_title('Composite Sentiment Score\n',fontsize=20)
g.set_xlabel('Score',fontsize=15)









    



/home/jimmy/anaconda3/envs/py36/lib/python3.6/site-packages/statsmodels/nonparametric/kdetools.py:20: VisibleDeprecationWarning: using a non-integer number instead of an integer will result in an error in the future
  y = X[:m/2+1] + np.r_[0,X[m/2+1:],0]*1j






    Out[29]:





<matplotlib.text.Text at 0x7efff5517d68>



In [30]:

    
g=sns.regplot(x="pos_senti", y="neg_senti", data=raw_data,
             fit_reg=True)
#g.axes.set_ylim(0,)
#g.axes.set_xlim(0,)
g.axes.set_title('Positive vs Negative Sentiment\n',fontsize=20)
g.set_xlabel('Positive Sentiment',fontsize=15)
g.set_ylabel('Negative Sentiment',fontsize=15)









    Out[30]:





<matplotlib.text.Text at 0x7efff52bb128>



In [31]:

    
f='pos_senti ~ neg_senti'
results = smf.ols(formula=f, data=raw_data).fit()
print(results.summary())









    



                            OLS Regression Results                            
==============================================================================
Dep. Variable:              pos_senti   R-squared:                       0.130
Model:                            OLS   Adj. R-squared:                  0.130
Method:                 Least Squares   F-statistic:                 2.980e+04
Date:                Sun, 21 May 2017   Prob (F-statistic):               0.00
Time:                        22:48:26   Log-Likelihood:             1.7684e+05
No. Observations:              198659   AIC:                        -3.537e+05
Df Residuals:                  198657   BIC:                        -3.537e+05
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
==============================================================================
                 coef    std err          t      P>|t|      [95.0% Conf. Int.]
------------------------------------------------------------------------------
Intercept      0.2237      0.000    773.013      0.000         0.223     0.224
neg_senti     -0.7372      0.004   -172.617      0.000        -0.746    -0.729
==============================================================================
Omnibus:                     9458.602   Durbin-Watson:                   1.843
Prob(Omnibus):                  0.000   Jarque-Bera (JB):            11034.711
Skew:                           0.539   Prob(JB):                         0.00
Kurtosis:                       3.415   Cond. No.                         19.2
==============================================================================

Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.

I expected a larger R^2



In [32]:

    
sns.set(context="paper", font="monospace")



In [33]:

    
corrmat = raw_data[['neg_senti', 'pos_senti', 'neu_senti', 'comp_senti']].corr()
f, ax = plt.subplots(figsize=(12, 9))
sns.heatmap(corrmat, vmax=1, square=True)
ax.set_title('Sentiment Correlation Matrix Heatmap\n',fontsize=20)
plt.savefig('./plots/Sentiment_Correlation_Matrix_Heatmap.png', bbox_inches='tight')



In [34]:

    
corrmat









    Out[34]:






  
    
      
      neg_senti
      pos_senti
      neu_senti
      comp_senti
    
  
  
    
      neg_senti
      1.000000
      -0.361147
      -0.136729
      -0.660513
    
    
      pos_senti
      -0.361147
      1.000000
      -0.874365
      0.608528
    
    
      neu_senti
      -0.136729
      -0.874365
      1.000000
      -0.302719
    
    
      comp_senti
      -0.660513
      0.608528
      -0.302719
      1.000000

Ask expected, the sentiments are highly correlated



In [35]:

    
corrmat = raw_data[[s for s in raw_data.columns if s!='text_lemma']].corr()
f, ax = plt.subplots(figsize=(16, 12))
sns.heatmap(corrmat, vmax=1, square=True)
ax.set_title('Correlation Matrix Heatmap\n',fontsize=20)
plt.savefig('./plots/Correlation_Matrix_Heatmap.png', bbox_inches='tight')



In [36]:

    
corrmat









    Out[36]:






  
    
      
      helpful
      num_sents
      num_words
      readability
      neg_senti
      pos_senti
      neu_senti
      comp_senti
      vec0
      vec1
      ...
      vec290
      vec291
      vec292
      vec293
      vec294
      vec295
      vec296
      vec297
      vec298
      vec299
    
  
  
    
      helpful
      1.000000
      0.101176
      0.117021
      0.045834
      0.012478
      -0.047061
      0.043529
      0.014837
      -0.021024
      -0.000308
      ...
      -0.006312
      -0.016650
      0.010745
      0.003621
      -0.041179
      0.020941
      -0.038949
      0.010570
      0.011678
      -0.031660
    
    
      num_sents
      0.101176
      1.000000
      0.852068
      -0.030509
      0.076832
      -0.219416
      0.193097
      0.127060
      0.080847
      -0.023221
      ...
      0.096276
      0.030849
      0.041608
      -0.014270
      -0.136374
      0.105767
      -0.133743
      0.112584
      -0.015821
      -0.159772
    
    
      num_words
      0.117021
      0.852068
      1.000000
      0.313085
      0.073248
      -0.339856
      0.322906
      0.121910
      0.093157
      -0.023474
      ...
      0.025480
      0.066575
      0.140240
      -0.035748
      -0.166247
      0.148201
      -0.230496
      0.089968
      0.084351
      -0.194113
    
    
      readability
      0.045834
      -0.030509
      0.313085
      1.000000
      0.022891
      -0.157609
      0.155496
      0.006546
      -0.040833
      -0.082752
      ...
      -0.086714
      0.102816
      0.108107
      0.099261
      -0.200150
      0.147153
      -0.066937
      -0.091301
      0.087600
      -0.150674
    
    
      neg_senti
      0.012478
      0.076832
      0.073248
      0.022891
      1.000000
      -0.361147
      -0.136729
      -0.660513
      -0.066167
      -0.015326
      ...
      0.017673
      0.040843
      -0.028029
      -0.016988
      0.041079
      0.122023
      -0.130083
      0.097845
      0.059839
      -0.086497
    
    
      pos_senti
      -0.047061
      -0.219416
      -0.339856
      -0.157609
      -0.361147
      1.000000
      -0.874365
      0.608528
      -0.124317
      0.166925
      ...
      0.004793
      -0.030210
      -0.193280
      0.041793
      0.137701
      -0.087122
      0.385883
      -0.184340
      -0.112153
      0.182514
    
    
      neu_senti
      0.043529
      0.193097
      0.322906
      0.155496
      -0.136729
      -0.874365
      1.000000
      -0.302719
      0.166511
      -0.169324
      ...
      -0.014260
      0.010835
      0.219893
      -0.035567
      -0.167636
      0.029034
      -0.342245
      0.144911
      0.088001
      -0.148827
    
    
      comp_senti
      0.014837
      0.127060
      0.121910
      0.006546
      -0.660513
      0.608528
      -0.302719
      1.000000
      -0.007327
      0.085703
      ...
      0.007676
      -0.032437
      -0.051333
      0.003716
      -0.022217
      -0.031866
      0.203050
      -0.150316
      -0.054746
      0.067414
    
    
      vec0
      -0.021024
      0.080847
      0.093157
      -0.040833
      -0.066167
      -0.124317
      0.166511
      -0.007327
      1.000000
      -0.239346
      ...
      0.140677
      0.206046
      0.237824
      -0.041626
      0.147721
      -0.204304
      0.059901
      0.384844
      -0.171336
      0.039900
    
    
      vec1
      -0.000308
      -0.023221
      -0.023474
      -0.082752
      -0.015326
      0.166925
      -0.169324
      0.085703
      -0.239346
      1.000000
      ...
      0.124256
      -0.010269
      -0.115197
      -0.014984
      0.225464
      -0.143607
      -0.194399
      -0.033445
      -0.064594
      0.169178
    
    
      vec2
      -0.001800
      -0.040844
      -0.083776
      0.038187
      -0.102236
      0.043041
      0.007467
      0.036784
      -0.139377
      0.012549
      ...
      0.204658
      -0.006103
      0.152946
      -0.107047
      -0.373972
      0.075628
      0.089887
      -0.242217
      -0.376876
      -0.022572
    
    
      vec3
      0.037009
      0.048317
      0.219438
      0.266689
      -0.059989
      -0.071295
      0.106951
      0.080793
      0.007059
      0.004844
      ...
      0.196621
      0.056743
      0.330639
      -0.045391
      -0.200419
      0.072164
      0.025184
      -0.033891
      -0.166246
      -0.193170
    
    
      vec4
      -0.027483
      -0.030523
      -0.014919
      -0.039737
      -0.176090
      0.104084
      -0.018921
      0.151693
      0.075945
      -0.060827
      ...
      -0.129243
      -0.094865
      0.062424
      0.042619
      -0.072820
      -0.022953
      0.099799
      -0.062912
      -0.025225
      -0.081417
    
    
      vec5
      0.005861
      0.034797
      0.026264
      0.086985
      -0.002146
      0.068732
      -0.071889
      0.057091
      -0.150680
      0.095188
      ...
      0.266313
      0.118189
      -0.411483
      -0.000468
      -0.224229
      0.203962
      0.098254
      -0.347256
      0.074543
      0.116049
    
    
      vec6
      -0.028466
      -0.080706
      -0.159235
      -0.119074
      0.069101
      0.242410
      -0.293459
      0.026779
      -0.082140
      0.142972
      ...
      -0.070788
      -0.039885
      -0.236534
      0.168851
      0.481742
      -0.109835
      0.120877
      0.142119
      0.089868
      0.138145
    
    
      vec7
      0.015983
      0.040698
      0.111544
      0.237758
      -0.053963
      -0.093716
      0.127608
      0.034884
      0.002990
      -0.284373
      ...
      -0.153826
      0.044967
      0.012524
      -0.246825
      -0.468049
      0.260467
      0.012502
      -0.192033
      0.006660
      -0.211779
    
    
      vec8
      -0.008543
      -0.053366
      -0.042866
      -0.068535
      -0.083356
      -0.030607
      0.075896
      0.020454
      0.103116
      -0.174756
      ...
      0.049581
      -0.001291
      0.157500
      -0.090473
      -0.074079
      -0.063739
      0.076762
      0.031495
      -0.081361
      0.037611
    
    
      vec9
      -0.024680
      -0.030126
      0.001294
      -0.138502
      0.083311
      -0.060317
      0.020743
      -0.060879
      0.146462
      0.135247
      ...
      -0.274154
      0.028144
      0.033730
      0.144731
      0.365791
      -0.225978
      -0.130372
      0.299036
      0.247475
      0.011821
    
    
      vec10
      0.022359
      0.075521
      0.146375
      0.249865
      0.110868
      -0.142832
      0.094007
      -0.079335
      -0.027544
      -0.164458
      ...
      -0.293097
      -0.081100
      0.095453
      0.209641
      -0.097731
      0.131152
      -0.068611
      0.096385
      0.051349
      -0.442117
    
    
      vec11
      0.013360
      0.024754
      -0.148576
      -0.218639
      -0.085405
      0.044330
      -0.002639
      0.002961
      -0.187227
      0.042166
      ...
      0.109710
      -0.093437
      -0.028785
      0.158295
      -0.113576
      -0.077821
      0.047306
      -0.019741
      -0.031634
      -0.055061
    
    
      vec12
      0.016946
      0.129597
      0.132072
      -0.030925
      0.092374
      -0.224558
      0.190474
      -0.146186
      0.266408
      -0.108939
      ...
      -0.332689
      0.137497
      0.092700
      0.195032
      0.246625
      -0.194367
      -0.263180
      0.341567
      0.253517
      -0.054801
    
    
      vec13
      0.002316
      -0.033118
      -0.074886
      0.002575
      0.008436
      0.151440
      -0.165278
      0.048670
      -0.190556
      0.209237
      ...
      0.056679
      -0.002352
      -0.290314
      -0.080823
      -0.039640
      0.122661
      -0.118793
      -0.217003
      0.075118
      -0.032563
    
    
      vec14
      -0.004829
      -0.034882
      -0.047273
      0.092256
      -0.097265
      0.039312
      0.008859
      0.052992
      0.024597
      -0.164679
      ...
      0.172790
      0.203899
      -0.065643
      -0.335584
      -0.304207
      0.165112
      0.064876
      -0.336967
      -0.174648
      0.144551
    
    
      vec15
      0.026936
      0.082896
      0.078156
      -0.055285
      -0.015697
      -0.111698
      0.126834
      -0.027553
      0.107417
      -0.115801
      ...
      -0.037488
      -0.243637
      0.105147
      0.189803
      0.115211
      -0.063695
      0.047062
      0.384396
      0.092247
      -0.220648
    
    
      vec16
      0.042710
      0.117887
      0.106741
      0.109839
      0.090225
      -0.197286
      0.162602
      -0.106401
      -0.127513
      -0.086831
      ...
      0.120651
      -0.131306
      0.005134
      0.048534
      -0.286665
      0.202404
      -0.056175
      0.002245
      -0.105485
      -0.253934
    
    
      vec17
      0.009666
      -0.048920
      -0.077814
      -0.095418
      -0.122056
      0.083072
      -0.024717
      0.084116
      -0.195741
      0.226244
      ...
      -0.155056
      0.049024
      0.048734
      -0.302486
      -0.227978
      0.088641
      -0.073952
      -0.330209
      -0.089352
      0.253440
    
    
      vec18
      -0.031889
      -0.076767
      -0.112490
      0.017747
      0.033408
      0.016888
      -0.035323
      -0.060855
      0.105047
      0.021997
      ...
      0.220337
      0.097333
      0.234584
      0.031465
      -0.027726
      -0.056936
      0.147275
      0.035333
      -0.404321
      0.050212
    
    
      vec19
      -0.017722
      -0.114135
      -0.169977
      -0.116136
      0.060587
      0.164823
      -0.206634
      0.011535
      0.019015
      0.012475
      ...
      0.135354
      -0.014151
      -0.222981
      0.171999
      0.214826
      -0.109719
      0.194199
      0.022289
      0.041999
      0.104576
    
    
      vec20
      0.010571
      0.053664
      0.052573
      -0.097521
      0.119144
      -0.119693
      0.065173
      -0.108112
      0.167841
      0.004613
      ...
      0.076374
      0.151426
      -0.198339
      -0.057686
      0.265916
      -0.233256
      -0.126688
      0.161773
      0.310187
      0.222974
    
    
      vec21
      0.028727
      0.132612
      0.183525
      0.123579
      0.049268
      -0.196257
      0.182820
      -0.065637
      -0.111487
      0.018035
      ...
      -0.045784
      -0.071549
      -0.039100
      0.049204
      -0.312219
      0.101839
      -0.135744
      0.014577
      0.106556
      -0.138166
    
    
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
    
    
      vec270
      0.046687
      0.113716
      0.212135
      0.035422
      0.134830
      -0.421580
      0.377687
      -0.208423
      0.125224
      -0.032285
      ...
      0.083018
      0.088619
      0.199429
      -0.087864
      -0.020807
      -0.107104
      -0.355612
      0.226833
      0.076884
      -0.058821
    
    
      vec271
      -0.010712
      -0.036333
      -0.114920
      -0.039947
      0.015466
      -0.049967
      0.045022
      -0.079912
      -0.003563
      -0.116323
      ...
      -0.009944
      0.070457
      0.039621
      0.086599
      -0.038119
      0.038870
      0.131242
      0.067536
      -0.026943
      0.032962
    
    
      vec272
      0.017403
      0.150534
      0.196077
      0.200743
      0.094152
      -0.309863
      0.280163
      -0.142861
      -0.015141
      -0.190043
      ...
      0.048669
      0.218655
      -0.041228
      0.008569
      -0.463058
      0.201739
      -0.167363
      -0.244383
      0.025017
      -0.013483
    
    
      vec273
      0.031521
      0.010851
      0.072408
      0.060361
      -0.038523
      -0.032932
      0.055037
      0.010160
      0.022519
      -0.006879
      ...
      0.180951
      -0.043665
      -0.002274
      0.070459
      0.130973
      -0.296575
      0.000056
      0.157145
      0.118444
      0.182334
    
    
      vec274
      0.021748
      0.058078
      0.176714
      0.093189
      0.047291
      -0.261210
      0.252870
      -0.101457
      0.188002
      -0.116188
      ...
      0.018539
      -0.092921
      0.111762
      -0.052368
      0.064583
      -0.072599
      -0.181438
      0.322904
      0.148508
      -0.091687
    
    
      vec275
      -0.052947
      -0.078521
      -0.149816
      -0.199782
      -0.007388
      0.125448
      -0.129395
      0.030249
      0.003701
      0.072829
      ...
      0.332487
      0.072493
      -0.195277
      -0.164607
      0.191395
      0.008341
      0.117617
      -0.077701
      0.053703
      0.130733
    
    
      vec276
      -0.024138
      -0.046860
      -0.151765
      -0.070463
      -0.039156
      0.154118
      -0.143344
      0.065948
      -0.122047
      -0.021997
      ...
      0.026207
      -0.123804
      -0.137541
      -0.164441
      -0.131555
      0.253992
      0.077084
      -0.137691
      -0.042582
      0.012189
    
    
      vec277
      0.016283
      -0.045008
      0.004159
      0.120040
      -0.092154
      -0.046709
      0.097530
      0.027718
      -0.066422
      -0.290467
      ...
      -0.347357
      -0.087059
      0.058642
      0.016361
      -0.384046
      0.145848
      0.069743
      -0.096444
      0.109076
      -0.272880
    
    
      vec278
      -0.039918
      -0.114212
      -0.129923
      -0.072647
      -0.132023
      0.225731
      -0.171071
      0.140027
      0.102135
      0.091509
      ...
      0.103889
      0.061577
      0.092860
      -0.004333
      0.174749
      -0.201516
      0.228591
      0.002645
      -0.193668
      0.098340
    
    
      vec279
      -0.036219
      -0.097571
      -0.139562
      -0.136086
      -0.030958
      0.220027
      -0.217598
      0.107606
      -0.128492
      0.242425
      ...
      0.219319
      0.029836
      -0.300291
      -0.242878
      0.108033
      0.125545
      0.167167
      -0.365112
      0.084133
      0.216840
    
    
      vec280
      -0.004194
      0.103293
      0.057103
      -0.067578
      0.001907
      -0.040280
      0.041804
      0.003452
      0.136609
      -0.062074
      ...
      0.134555
      0.070313
      -0.018814
      -0.170358
      -0.128993
      0.244894
      0.084874
      -0.155362
      -0.227278
      -0.153922
    
    
      vec281
      0.043319
      0.004465
      0.170179
      0.211801
      0.005537
      -0.259527
      0.272804
      -0.073884
      -0.073035
      -0.193041
      ...
      -0.240477
      -0.066219
      0.276648
      0.168121
      -0.034327
      0.022993
      -0.090216
      0.112205
      0.190575
      -0.191582
    
    
      vec282
      0.012622
      0.075245
      0.116539
      0.223578
      0.100512
      -0.094632
      0.048206
      -0.061312
      -0.205968
      0.132335
      ...
      0.064774
      0.139208
      -0.189202
      0.075291
      -0.220616
      0.109271
      -0.136714
      -0.355535
      0.060926
      0.067327
    
    
      vec283
      -0.004048
      -0.041612
      0.029245
      0.027726
      -0.013222
      -0.060225
      0.070833
      -0.026591
      0.075360
      -0.018149
      ...
      -0.294120
      -0.026700
      0.388810
      0.030704
      0.166547
      -0.251729
      -0.062269
      0.253014
      0.029334
      -0.028913
    
    
      vec284
      0.023236
      -0.035544
      0.038400
      0.163045
      -0.093831
      0.062545
      -0.017634
      0.090981
      -0.092794
      -0.181956
      ...
      -0.157039
      -0.136451
      -0.024965
      0.169442
      -0.039534
      0.019582
      0.119338
      -0.008964
      0.112580
      -0.180220
    
    
      vec285
      0.004928
      -0.095984
      -0.175659
      -0.046647
      -0.018414
      0.201379
      -0.204361
      0.065982
      -0.375118
      0.026617
      ...
      0.102106
      -0.081220
      -0.327520
      0.035204
      -0.217459
      0.147503
      0.262739
      -0.405118
      0.054221
      0.060966
    
    
      vec286
      -0.059088
      -0.217704
      -0.220235
      -0.135602
      -0.149810
      0.312414
      -0.253891
      0.179578
      0.058300
      0.047025
      ...
      0.118051
      0.057925
      -0.210279
      -0.155900
      0.074927
      -0.086695
      0.253000
      -0.213195
      0.057477
      0.405567
    
    
      vec287
      -0.042835
      -0.116410
      -0.168371
      -0.051483
      -0.023383
      0.135190
      -0.131440
      0.004425
      0.242188
      -0.069623
      ...
      0.135522
      0.256998
      0.002601
      -0.059057
      0.233511
      -0.235458
      0.211204
      0.014788
      0.000029
      0.291241
    
    
      vec288
      0.008528
      0.128002
      0.073882
      -0.052030
      0.079790
      -0.025959
      -0.013930
      -0.020813
      0.019296
      0.141945
      ...
      0.062339
      0.116458
      -0.341633
      -0.180933
      -0.077763
      0.202168
      -0.177380
      -0.052322
      0.026598
      -0.101161
    
    
      vec289
      -0.012864
      -0.011573
      -0.063629
      -0.080055
      -0.032587
      0.008152
      0.008299
      -0.002056
      -0.056779
      -0.027825
      ...
      0.177160
      -0.015737
      -0.032328
      -0.040737
      0.004178
      0.088520
      0.034649
      -0.116996
      0.058200
      -0.139885
    
    
      vec290
      -0.006312
      0.096276
      0.025480
      -0.086714
      0.017673
      0.004793
      -0.014260
      0.007676
      0.140677
      0.124256
      ...
      1.000000
      0.086359
      -0.124330
      -0.143958
      -0.038215
      0.077831
      0.060642
      0.036517
      -0.247817
      -0.021947
    
    
      vec291
      -0.016650
      0.030849
      0.066575
      0.102816
      0.040843
      -0.030210
      0.010835
      -0.032437
      0.206046
      -0.010269
      ...
      0.086359
      1.000000
      -0.025664
      -0.018051
      0.026688
      -0.130943
      -0.089177
      -0.032861
      0.049265
      0.191543
    
    
      vec292
      0.010745
      0.041608
      0.140240
      0.108107
      -0.028029
      -0.193280
      0.219893
      -0.051333
      0.237824
      -0.115197
      ...
      -0.124330
      -0.025664
      1.000000
      0.134685
      -0.022947
      -0.113328
      0.063752
      0.376372
      -0.271742
      -0.074341
    
    
      vec293
      0.003621
      -0.014270
      -0.035748
      0.099261
      -0.016988
      0.041793
      -0.035567
      0.003716
      -0.041626
      -0.014984
      ...
      -0.143958
      -0.018051
      0.134685
      1.000000
      0.182641
      -0.309797
      0.162131
      0.306469
      0.050644
      -0.098392
    
    
      vec294
      -0.041179
      -0.136374
      -0.166247
      -0.200150
      0.041079
      0.137701
      -0.167636
      -0.022217
      0.147721
      0.225464
      ...
      -0.038215
      0.026688
      -0.022947
      0.182641
      1.000000
      -0.459458
      -0.014614
      0.382778
      0.181371
      0.243914
    
    
      vec295
      0.020941
      0.105767
      0.148201
      0.147153
      0.122023
      -0.087122
      0.029034
      -0.031866
      -0.204304
      -0.143607
      ...
      0.077831
      -0.130943
      -0.113328
      -0.309797
      -0.459458
      1.000000
      0.047613
      -0.329496
      -0.015858
      -0.333320
    
    
      vec296
      -0.038949
      -0.133743
      -0.230496
      -0.066937
      -0.130083
      0.385883
      -0.342245
      0.203050
      0.059901
      -0.194399
      ...
      0.060642
      -0.089177
      0.063752
      0.162131
      -0.014614
      0.047613
      1.000000
      -0.011686
      -0.221068
      0.012556
    
    
      vec297
      0.010570
      0.112584
      0.089968
      -0.091301
      0.097845
      -0.184340
      0.144911
      -0.150316
      0.384844
      -0.033445
      ...
      0.036517
      -0.032861
      0.376372
      0.306469
      0.382778
      -0.329496
      -0.011686
      1.000000
      -0.074942
      -0.158611
    
    
      vec298
      0.011678
      -0.015821
      0.084351
      0.087600
      0.059839
      -0.112153
      0.088001
      -0.054746
      -0.171336
      -0.064594
      ...
      -0.247817
      0.049265
      -0.271742
      0.050644
      0.181371
      -0.015858
      -0.221068
      -0.074942
      1.000000
      -0.062006
    
    
      vec299
      -0.031660
      -0.159772
      -0.194113
      -0.150674
      -0.086497
      0.182514
      -0.148827
      0.067414
      0.039900
      0.169178
      ...
      -0.021947
      0.191543
      -0.074341
      -0.098392
      0.243914
      -0.333320
      0.012556
      -0.158611
      -0.062006
      1.000000
    
  

308 rows × 308 columns



In [37]:

    
raw_data=raw_data[fin_cols].copy()



In [38]:

    
raw_data.head()









    Out[38]:






  
    
      
      helpful
      num_sents
      num_words
      readability
      neg_senti
      pos_senti
      neu_senti
      comp_senti
      text_lemma
      vec0
      ...
      vec290
      vec291
      vec292
      vec293
      vec294
      vec295
      vec296
      vec297
      vec298
      vec299
    
  
  
    
      0
      0.0
      0.693147
      3.610918
      6.742881
      0.079
      0.068
      0.853
      -0.1027
      product arrive label peanut actually small siz...
      -0.019901
      ...
      -0.178709
      0.120293
      0.048853
      -0.028560
      0.024294
      -0.051074
      -0.082868
      -0.058978
      0.058156
      0.020952
    
    
      1
      0.0
      1.386294
      3.555348
      6.734948
      0.000
      0.448
      0.552
      0.9468
      great taffy great price wide assortment yummy ...
      -0.076091
      ...
      -0.125921
      0.026862
      -0.011833
      -0.023788
      0.028657
      -0.001059
      -0.003236
      -0.048324
      -0.050874
      0.113610
    
    
      2
      0.0
      1.609438
      4.521789
      6.743588
      0.029
      0.163
      0.809
      0.8830
      get wild hair taffy order pound bag taffy enjo...
      -0.048797
      ...
      -0.154745
      0.004021
      0.004185
      0.006071
      -0.032341
      0.030001
      0.004792
      -0.122627
      -0.015319
      0.046176
    
    
      3
      0.0
      1.609438
      4.143135
      6.742527
      0.034
      0.273
      0.693
      0.9346
      saltwater taffy great flavor soft chewy candy ...
      -0.009421
      ...
      -0.185385
      0.038134
      0.014824
      -0.012089
      0.007642
      -0.013590
      0.038388
      -0.117533
      0.042929
      0.137415
    
    
      4
      0.0
      1.609438
      3.526361
      6.737915
      0.000
      0.480
      0.520
      0.9487
      taffy good soft chewy flavor amazing definitel...
      -0.073490
      ...
      -0.155703
      0.041312
      -0.121036
      -0.063175
      0.075995
      -0.005276
      0.051416
      -0.136569
      0.021066
      0.123007
    
  

5 rows × 309 columns



In [39]:

    
raw_data.to_pickle('./clean_data/clean_data.pkl')

	Id	ProductId	UserId	ProfileName	Score	Time	Summary	Text	...	vec299	num_sents	num_words	readability	sentiment_dict	neg_senti	pos_senti	neu_senti	comp_senti	text_lemma
0	2	B00813GRG4	A1D87F6ZCVE5NK	dll pa	1	1346976000	Not as Advertised	Product arrived labeled as Jumbo Salted Peanut...	...	0.020952	2	37	8.0	{'neg': 0.079, 'neu': 0.853, 'pos': 0.068, 'co...	0.079	0.068	0.853	-0.1027	product arrive label peanut actually small siz...
1	5	B006K2ZZ7K	A1UQRSCLF8GW1T	Michael D. Bigham "M. Wassir"	5	1350777600	Great taffy	Great taffy at a great price. There was a wid...	...	0.113610	4	35	1.3	{'neg': 0.0, 'neu': 0.552, 'pos': 0.448, 'comp...	0.000	0.448	0.552	0.9468	great taffy great price wide assortment yummy ...
2	6	B006K2ZZ7K	ADT0SRK1MGOEU	Twoapennything	4	1342051200	Nice Taffy	I got a wild hair for taffy and ordered this f...	...	0.046176	5	92	8.6	{'neg': 0.029, 'neu': 0.809, 'pos': 0.163, 'co...	0.029	0.163	0.809	0.8830	get wild hair taffy order pound bag taffy enjo...
3	7	B006K2ZZ7K	A1SP2KVKFXXRU1	David C. Sullivan	5	1340150400	Great! Just as good as the expensive brands!	This saltwater taffy had great flavors and was...	...	0.137415	5	63	7.7	{'neg': 0.034, 'neu': 0.693, 'pos': 0.273, 'co...	0.034	0.273	0.693	0.9346	saltwater taffy great flavor soft chewy candy ...
4	8	B006K2ZZ7K	A3JRGQVEQN31IQ	Pamela G. Williams	5	1336003200	Wonderful, tasty taffy	This taffy is so good. It is very soft and ch...	...	0.123007	5	34	3.8	{'neg': 0.0, 'neu': 0.52, 'pos': 0.48, 'compou...	0.000	0.480	0.520	0.9487	taffy good soft chewy flavor amazing definitel...

	neg_senti	pos_senti	neu_senti	comp_senti
neg_senti	1.000000	-0.361147	-0.136729	-0.660513
pos_senti	-0.361147	1.000000	-0.874365	0.608528
neu_senti	-0.136729	-0.874365	1.000000	-0.302719
comp_senti	-0.660513	0.608528	-0.302719	1.000000

	helpful	num_sents	num_words	readability	neg_senti	pos_senti	neu_senti	comp_senti	vec0	vec1	...	vec290	vec291	vec292	vec293	vec294	vec295	vec296	vec297	vec298	vec299
helpful	1.000000	0.101176	0.117021	0.045834	0.012478	-0.047061	0.043529	0.014837	-0.021024	-0.000308	...	-0.006312	-0.016650	0.010745	0.003621	-0.041179	0.020941	-0.038949	0.010570	0.011678	-0.031660
num_sents	0.101176	1.000000	0.852068	-0.030509	0.076832	-0.219416	0.193097	0.127060	0.080847	-0.023221	...	0.096276	0.030849	0.041608	-0.014270	-0.136374	0.105767	-0.133743	0.112584	-0.015821	-0.159772
num_words	0.117021	0.852068	1.000000	0.313085	0.073248	-0.339856	0.322906	0.121910	0.093157	-0.023474	...	0.025480	0.066575	0.140240	-0.035748	-0.166247	0.148201	-0.230496	0.089968	0.084351	-0.194113
readability	0.045834	-0.030509	0.313085	1.000000	0.022891	-0.157609	0.155496	0.006546	-0.040833	-0.082752	...	-0.086714	0.102816	0.108107	0.099261	-0.200150	0.147153	-0.066937	-0.091301	0.087600	-0.150674
neg_senti	0.012478	0.076832	0.073248	0.022891	1.000000	-0.361147	-0.136729	-0.660513	-0.066167	-0.015326	...	0.017673	0.040843	-0.028029	-0.016988	0.041079	0.122023	-0.130083	0.097845	0.059839	-0.086497
pos_senti	-0.047061	-0.219416	-0.339856	-0.157609	-0.361147	1.000000	-0.874365	0.608528	-0.124317	0.166925	...	0.004793	-0.030210	-0.193280	0.041793	0.137701	-0.087122	0.385883	-0.184340	-0.112153	0.182514
neu_senti	0.043529	0.193097	0.322906	0.155496	-0.136729	-0.874365	1.000000	-0.302719	0.166511	-0.169324	...	-0.014260	0.010835	0.219893	-0.035567	-0.167636	0.029034	-0.342245	0.144911	0.088001	-0.148827
comp_senti	0.014837	0.127060	0.121910	0.006546	-0.660513	0.608528	-0.302719	1.000000	-0.007327	0.085703	...	0.007676	-0.032437	-0.051333	0.003716	-0.022217	-0.031866	0.203050	-0.150316	-0.054746	0.067414
vec0	-0.021024	0.080847	0.093157	-0.040833	-0.066167	-0.124317	0.166511	-0.007327	1.000000	-0.239346	...	0.140677	0.206046	0.237824	-0.041626	0.147721	-0.204304	0.059901	0.384844	-0.171336	0.039900
vec1	-0.000308	-0.023221	-0.023474	-0.082752	-0.015326	0.166925	-0.169324	0.085703	-0.239346	1.000000	...	0.124256	-0.010269	-0.115197	-0.014984	0.225464	-0.143607	-0.194399	-0.033445	-0.064594	0.169178
vec2	-0.001800	-0.040844	-0.083776	0.038187	-0.102236	0.043041	0.007467	0.036784	-0.139377	0.012549	...	0.204658	-0.006103	0.152946	-0.107047	-0.373972	0.075628	0.089887	-0.242217	-0.376876	-0.022572
vec3	0.037009	0.048317	0.219438	0.266689	-0.059989	-0.071295	0.106951	0.080793	0.007059	0.004844	...	0.196621	0.056743	0.330639	-0.045391	-0.200419	0.072164	0.025184	-0.033891	-0.166246	-0.193170
vec4	-0.027483	-0.030523	-0.014919	-0.039737	-0.176090	0.104084	-0.018921	0.151693	0.075945	-0.060827	...	-0.129243	-0.094865	0.062424	0.042619	-0.072820	-0.022953	0.099799	-0.062912	-0.025225	-0.081417
vec5	0.005861	0.034797	0.026264	0.086985	-0.002146	0.068732	-0.071889	0.057091	-0.150680	0.095188	...	0.266313	0.118189	-0.411483	-0.000468	-0.224229	0.203962	0.098254	-0.347256	0.074543	0.116049
vec6	-0.028466	-0.080706	-0.159235	-0.119074	0.069101	0.242410	-0.293459	0.026779	-0.082140	0.142972	...	-0.070788	-0.039885	-0.236534	0.168851	0.481742	-0.109835	0.120877	0.142119	0.089868	0.138145
vec7	0.015983	0.040698	0.111544	0.237758	-0.053963	-0.093716	0.127608	0.034884	0.002990	-0.284373	...	-0.153826	0.044967	0.012524	-0.246825	-0.468049	0.260467	0.012502	-0.192033	0.006660	-0.211779
vec8	-0.008543	-0.053366	-0.042866	-0.068535	-0.083356	-0.030607	0.075896	0.020454	0.103116	-0.174756	...	0.049581	-0.001291	0.157500	-0.090473	-0.074079	-0.063739	0.076762	0.031495	-0.081361	0.037611
vec9	-0.024680	-0.030126	0.001294	-0.138502	0.083311	-0.060317	0.020743	-0.060879	0.146462	0.135247	...	-0.274154	0.028144	0.033730	0.144731	0.365791	-0.225978	-0.130372	0.299036	0.247475	0.011821
vec10	0.022359	0.075521	0.146375	0.249865	0.110868	-0.142832	0.094007	-0.079335	-0.027544	-0.164458	...	-0.293097	-0.081100	0.095453	0.209641	-0.097731	0.131152	-0.068611	0.096385	0.051349	-0.442117
vec11	0.013360	0.024754	-0.148576	-0.218639	-0.085405	0.044330	-0.002639	0.002961	-0.187227	0.042166	...	0.109710	-0.093437	-0.028785	0.158295	-0.113576	-0.077821	0.047306	-0.019741	-0.031634	-0.055061
vec12	0.016946	0.129597	0.132072	-0.030925	0.092374	-0.224558	0.190474	-0.146186	0.266408	-0.108939	...	-0.332689	0.137497	0.092700	0.195032	0.246625	-0.194367	-0.263180	0.341567	0.253517	-0.054801
vec13	0.002316	-0.033118	-0.074886	0.002575	0.008436	0.151440	-0.165278	0.048670	-0.190556	0.209237	...	0.056679	-0.002352	-0.290314	-0.080823	-0.039640	0.122661	-0.118793	-0.217003	0.075118	-0.032563
vec14	-0.004829	-0.034882	-0.047273	0.092256	-0.097265	0.039312	0.008859	0.052992	0.024597	-0.164679	...	0.172790	0.203899	-0.065643	-0.335584	-0.304207	0.165112	0.064876	-0.336967	-0.174648	0.144551
vec15	0.026936	0.082896	0.078156	-0.055285	-0.015697	-0.111698	0.126834	-0.027553	0.107417	-0.115801	...	-0.037488	-0.243637	0.105147	0.189803	0.115211	-0.063695	0.047062	0.384396	0.092247	-0.220648
vec16	0.042710	0.117887	0.106741	0.109839	0.090225	-0.197286	0.162602	-0.106401	-0.127513	-0.086831	...	0.120651	-0.131306	0.005134	0.048534	-0.286665	0.202404	-0.056175	0.002245	-0.105485	-0.253934
vec17	0.009666	-0.048920	-0.077814	-0.095418	-0.122056	0.083072	-0.024717	0.084116	-0.195741	0.226244	...	-0.155056	0.049024	0.048734	-0.302486	-0.227978	0.088641	-0.073952	-0.330209	-0.089352	0.253440
vec18	-0.031889	-0.076767	-0.112490	0.017747	0.033408	0.016888	-0.035323	-0.060855	0.105047	0.021997	...	0.220337	0.097333	0.234584	0.031465	-0.027726	-0.056936	0.147275	0.035333	-0.404321	0.050212
vec19	-0.017722	-0.114135	-0.169977	-0.116136	0.060587	0.164823	-0.206634	0.011535	0.019015	0.012475	...	0.135354	-0.014151	-0.222981	0.171999	0.214826	-0.109719	0.194199	0.022289	0.041999	0.104576
vec20	0.010571	0.053664	0.052573	-0.097521	0.119144	-0.119693	0.065173	-0.108112	0.167841	0.004613	...	0.076374	0.151426	-0.198339	-0.057686	0.265916	-0.233256	-0.126688	0.161773	0.310187	0.222974
vec21	0.028727	0.132612	0.183525	0.123579	0.049268	-0.196257	0.182820	-0.065637	-0.111487	0.018035	...	-0.045784	-0.071549	-0.039100	0.049204	-0.312219	0.101839	-0.135744	0.014577	0.106556	-0.138166
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
vec270	0.046687	0.113716	0.212135	0.035422	0.134830	-0.421580	0.377687	-0.208423	0.125224	-0.032285	...	0.083018	0.088619	0.199429	-0.087864	-0.020807	-0.107104	-0.355612	0.226833	0.076884	-0.058821
vec271	-0.010712	-0.036333	-0.114920	-0.039947	0.015466	-0.049967	0.045022	-0.079912	-0.003563	-0.116323	...	-0.009944	0.070457	0.039621	0.086599	-0.038119	0.038870	0.131242	0.067536	-0.026943	0.032962
vec272	0.017403	0.150534	0.196077	0.200743	0.094152	-0.309863	0.280163	-0.142861	-0.015141	-0.190043	...	0.048669	0.218655	-0.041228	0.008569	-0.463058	0.201739	-0.167363	-0.244383	0.025017	-0.013483
vec273	0.031521	0.010851	0.072408	0.060361	-0.038523	-0.032932	0.055037	0.010160	0.022519	-0.006879	...	0.180951	-0.043665	-0.002274	0.070459	0.130973	-0.296575	0.000056	0.157145	0.118444	0.182334
vec274	0.021748	0.058078	0.176714	0.093189	0.047291	-0.261210	0.252870	-0.101457	0.188002	-0.116188	...	0.018539	-0.092921	0.111762	-0.052368	0.064583	-0.072599	-0.181438	0.322904	0.148508	-0.091687
vec275	-0.052947	-0.078521	-0.149816	-0.199782	-0.007388	0.125448	-0.129395	0.030249	0.003701	0.072829	...	0.332487	0.072493	-0.195277	-0.164607	0.191395	0.008341	0.117617	-0.077701	0.053703	0.130733
vec276	-0.024138	-0.046860	-0.151765	-0.070463	-0.039156	0.154118	-0.143344	0.065948	-0.122047	-0.021997	...	0.026207	-0.123804	-0.137541	-0.164441	-0.131555	0.253992	0.077084	-0.137691	-0.042582	0.012189
vec277	0.016283	-0.045008	0.004159	0.120040	-0.092154	-0.046709	0.097530	0.027718	-0.066422	-0.290467	...	-0.347357	-0.087059	0.058642	0.016361	-0.384046	0.145848	0.069743	-0.096444	0.109076	-0.272880
vec278	-0.039918	-0.114212	-0.129923	-0.072647	-0.132023	0.225731	-0.171071	0.140027	0.102135	0.091509	...	0.103889	0.061577	0.092860	-0.004333	0.174749	-0.201516	0.228591	0.002645	-0.193668	0.098340
vec279	-0.036219	-0.097571	-0.139562	-0.136086	-0.030958	0.220027	-0.217598	0.107606	-0.128492	0.242425	...	0.219319	0.029836	-0.300291	-0.242878	0.108033	0.125545	0.167167	-0.365112	0.084133	0.216840
vec280	-0.004194	0.103293	0.057103	-0.067578	0.001907	-0.040280	0.041804	0.003452	0.136609	-0.062074	...	0.134555	0.070313	-0.018814	-0.170358	-0.128993	0.244894	0.084874	-0.155362	-0.227278	-0.153922
vec281	0.043319	0.004465	0.170179	0.211801	0.005537	-0.259527	0.272804	-0.073884	-0.073035	-0.193041	...	-0.240477	-0.066219	0.276648	0.168121	-0.034327	0.022993	-0.090216	0.112205	0.190575	-0.191582
vec282	0.012622	0.075245	0.116539	0.223578	0.100512	-0.094632	0.048206	-0.061312	-0.205968	0.132335	...	0.064774	0.139208	-0.189202	0.075291	-0.220616	0.109271	-0.136714	-0.355535	0.060926	0.067327
vec283	-0.004048	-0.041612	0.029245	0.027726	-0.013222	-0.060225	0.070833	-0.026591	0.075360	-0.018149	...	-0.294120	-0.026700	0.388810	0.030704	0.166547	-0.251729	-0.062269	0.253014	0.029334	-0.028913
vec284	0.023236	-0.035544	0.038400	0.163045	-0.093831	0.062545	-0.017634	0.090981	-0.092794	-0.181956	...	-0.157039	-0.136451	-0.024965	0.169442	-0.039534	0.019582	0.119338	-0.008964	0.112580	-0.180220
vec285	0.004928	-0.095984	-0.175659	-0.046647	-0.018414	0.201379	-0.204361	0.065982	-0.375118	0.026617	...	0.102106	-0.081220	-0.327520	0.035204	-0.217459	0.147503	0.262739	-0.405118	0.054221	0.060966
vec286	-0.059088	-0.217704	-0.220235	-0.135602	-0.149810	0.312414	-0.253891	0.179578	0.058300	0.047025	...	0.118051	0.057925	-0.210279	-0.155900	0.074927	-0.086695	0.253000	-0.213195	0.057477	0.405567
vec287	-0.042835	-0.116410	-0.168371	-0.051483	-0.023383	0.135190	-0.131440	0.004425	0.242188	-0.069623	...	0.135522	0.256998	0.002601	-0.059057	0.233511	-0.235458	0.211204	0.014788	0.000029	0.291241
vec288	0.008528	0.128002	0.073882	-0.052030	0.079790	-0.025959	-0.013930	-0.020813	0.019296	0.141945	...	0.062339	0.116458	-0.341633	-0.180933	-0.077763	0.202168	-0.177380	-0.052322	0.026598	-0.101161
vec289	-0.012864	-0.011573	-0.063629	-0.080055	-0.032587	0.008152	0.008299	-0.002056	-0.056779	-0.027825	...	0.177160	-0.015737	-0.032328	-0.040737	0.004178	0.088520	0.034649	-0.116996	0.058200	-0.139885
vec290	-0.006312	0.096276	0.025480	-0.086714	0.017673	0.004793	-0.014260	0.007676	0.140677	0.124256	...	1.000000	0.086359	-0.124330	-0.143958	-0.038215	0.077831	0.060642	0.036517	-0.247817	-0.021947
vec291	-0.016650	0.030849	0.066575	0.102816	0.040843	-0.030210	0.010835	-0.032437	0.206046	-0.010269	...	0.086359	1.000000	-0.025664	-0.018051	0.026688	-0.130943	-0.089177	-0.032861	0.049265	0.191543
vec292	0.010745	0.041608	0.140240	0.108107	-0.028029	-0.193280	0.219893	-0.051333	0.237824	-0.115197	...	-0.124330	-0.025664	1.000000	0.134685	-0.022947	-0.113328	0.063752	0.376372	-0.271742	-0.074341
vec293	0.003621	-0.014270	-0.035748	0.099261	-0.016988	0.041793	-0.035567	0.003716	-0.041626	-0.014984	...	-0.143958	-0.018051	0.134685	1.000000	0.182641	-0.309797	0.162131	0.306469	0.050644	-0.098392
vec294	-0.041179	-0.136374	-0.166247	-0.200150	0.041079	0.137701	-0.167636	-0.022217	0.147721	0.225464	...	-0.038215	0.026688	-0.022947	0.182641	1.000000	-0.459458	-0.014614	0.382778	0.181371	0.243914
vec295	0.020941	0.105767	0.148201	0.147153	0.122023	-0.087122	0.029034	-0.031866	-0.204304	-0.143607	...	0.077831	-0.130943	-0.113328	-0.309797	-0.459458	1.000000	0.047613	-0.329496	-0.015858	-0.333320
vec296	-0.038949	-0.133743	-0.230496	-0.066937	-0.130083	0.385883	-0.342245	0.203050	0.059901	-0.194399	...	0.060642	-0.089177	0.063752	0.162131	-0.014614	0.047613	1.000000	-0.011686	-0.221068	0.012556
vec297	0.010570	0.112584	0.089968	-0.091301	0.097845	-0.184340	0.144911	-0.150316	0.384844	-0.033445	...	0.036517	-0.032861	0.376372	0.306469	0.382778	-0.329496	-0.011686	1.000000	-0.074942	-0.158611
vec298	0.011678	-0.015821	0.084351	0.087600	0.059839	-0.112153	0.088001	-0.054746	-0.171336	-0.064594	...	-0.247817	0.049265	-0.271742	0.050644	0.181371	-0.015858	-0.221068	-0.074942	1.000000	-0.062006
vec299	-0.031660	-0.159772	-0.194113	-0.150674	-0.086497	0.182514	-0.148827	0.067414	0.039900	0.169178	...	-0.021947	0.191543	-0.074341	-0.098392	0.243914	-0.333320	0.012556	-0.158611	-0.062006	1.000000

	num_sents	num_words	readability	neg_senti	pos_senti	neu_senti	comp_senti	text_lemma	vec0	...	vec290	vec291	vec292	vec293	vec294	vec295	vec296	vec297	vec298	vec299
0	0.693147	3.610918	6.742881	0.079	0.068	0.853	-0.1027	product arrive label peanut actually small siz...	-0.019901	...	-0.178709	0.120293	0.048853	-0.028560	0.024294	-0.051074	-0.082868	-0.058978	0.058156	0.020952
1	1.386294	3.555348	6.734948	0.000	0.448	0.552	0.9468	great taffy great price wide assortment yummy ...	-0.076091	...	-0.125921	0.026862	-0.011833	-0.023788	0.028657	-0.001059	-0.003236	-0.048324	-0.050874	0.113610
2	1.609438	4.521789	6.743588	0.029	0.163	0.809	0.8830	get wild hair taffy order pound bag taffy enjo...	-0.048797	...	-0.154745	0.004021	0.004185	0.006071	-0.032341	0.030001	0.004792	-0.122627	-0.015319	0.046176
3	1.609438	4.143135	6.742527	0.034	0.273	0.693	0.9346	saltwater taffy great flavor soft chewy candy ...	-0.009421	...	-0.185385	0.038134	0.014824	-0.012089	0.007642	-0.013590	0.038388	-0.117533	0.042929	0.137415
4	1.609438	3.526361	6.737915	0.000	0.480	0.520	0.9487	taffy good soft chewy flavor amazing definitel...	-0.073490	...	-0.155703	0.041312	-0.121036	-0.063175	0.075995	-0.005276	0.051416	-0.136569	0.021066	0.123007