In [2]:
    
import pandas as pd
    
In [3]:
    
# For monitoring duration of pandas processes
from tqdm import tqdm, tqdm_pandas
# To avoid RuntimeError: Set changed size during iteration
tqdm.monitor_interval = 0
# Register `pandas.progress_apply` and `pandas.Series.map_apply` with `tqdm`
# (can use `tqdm_gui`, `tqdm_notebook`, optional kwargs, etc.)
tqdm.pandas(desc="Progress:")
# Now you can use `progress_apply` instead of `apply`
# and `progress_map` instead of `map`
# can also groupby:
# df.groupby(0).progress_apply(lambda x: x**2)
    
In [10]:
    
df00 = pd.read_pickle('../data/interim/005_important_nouns.p')
    
df00.head()
In [12]:
    
len(df00)
    
    Out[12]:
59324
In [13]:
    
df01 = df00.assign(num_of_imp_nouns = df00['imp_nns'].progress_apply(lambda imp_nouns:len(imp_nouns)))
df02 = df01.loc[df01['num_of_imp_nouns'] != 0]
len(df02)
    
    
Progress:: 100%|██████████| 59324/59324 [00:00<00:00, 1123729.33it/s]
    Out[13]:
48939
In [14]:
    
df02.head()
    
    Out[14]:
  
    
       
      asin 
      imp_nns 
      num_of_imp_nouns 
     
  
  
    
      0 
      000100039X 
      [kneads, profits, preachers, territory, exile,... 
      26 
     
    
      1 
      0002051850 
      [declarations, towns, smaller, threatens, desi... 
      73 
     
    
      2 
      0002113570 
      [humane, homo, ancestors, michener] 
      4 
     
    
      3 
      0002117088 
      [surgery, sorts, goodnight, virtues, translato... 
      7 
     
    
      4 
      000215725X 
      [treachery, fort, emperors, 17th, uk, mundane,... 
      39 
     
  
In [16]:
    
df10 = pd.read_pickle('../data/interim/002_pos_tagged_keyed_reviews.p')
    
In [17]:
    
df10.head()
    
    Out[17]:
  
    
       
      uniqueKey 
      reviewText 
     
  
  
    
      0 
      A2XQ5LZHTD4AFT##000100039X 
      [(timeless, NN), ( classic, JJ), ( demanding, ... 
     
    
      1 
      AF7CSSGV93RXN##000100039X 
      [(first, RB), ( read, JJ), ( prophet, NNP), ( ... 
     
    
      2 
      A1NPNGWBVD9AK3##000100039X 
      [(one, CD), ( first, NNP), ( literary, JJ), ( ... 
     
    
      3 
      A3IS4WGMFR4X65##000100039X 
      [(prophet, NN), ( kahlil, NN), ( gibrans, VBZ)... 
     
    
      4 
      AWLFVCT9128JV##000100039X 
      [(gibran, NN), ( khalil, NNP), ( gibran, NNP),... 
     
  
In [18]:
    
len(df10)
    
    Out[18]:
582711
In [21]:
    
df11 = pd.DataFrame(df10.uniqueKey.str.split('##',1).tolist(),columns = ['userId','asin'])
df11.head()
    
    Out[21]:
  
    
       
      userId 
      asin 
     
  
  
    
      0 
      A2XQ5LZHTD4AFT 
      000100039X 
     
    
      1 
      AF7CSSGV93RXN 
      000100039X 
     
    
      2 
      A1NPNGWBVD9AK3 
      000100039X 
     
    
      3 
      A3IS4WGMFR4X65 
      000100039X 
     
    
      4 
      AWLFVCT9128JV 
      000100039X 
     
  
In [22]:
    
df_12 = pd.DataFrame(df10['reviewText'])
df_12.head()
    
    Out[22]:
  
    
       
      reviewText 
     
  
  
    
      0 
      [(timeless, NN), ( classic, JJ), ( demanding, ... 
     
    
      1 
      [(first, RB), ( read, JJ), ( prophet, NNP), ( ... 
     
    
      2 
      [(one, CD), ( first, NNP), ( literary, JJ), ( ... 
     
    
      3 
      [(prophet, NN), ( kahlil, NN), ( gibrans, VBZ)... 
     
    
      4 
      [(gibran, NN), ( khalil, NNP), ( gibran, NNP),... 
     
  
In [19]:
    
df_13 = pd.concat([df11, df_12], axis=1)
df_13.head()
    
    Out[19]:
  
    
       
      userId 
      asin 
      reviewText 
     
  
  
    
      0 
      A2XQ5LZHTD4AFT 
      000100039X 
      [(timeless, NN), ( classic, JJ), ( demanding, ... 
     
    
      1 
      AF7CSSGV93RXN 
      000100039X 
      [(first, RB), ( read, JJ), ( prophet, NNP), ( ... 
     
    
      2 
      A1NPNGWBVD9AK3 
      000100039X 
      [(one, CD), ( first, NNP), ( literary, JJ), ( ... 
     
    
      3 
      A3IS4WGMFR4X65 
      000100039X 
      [(prophet, NN), ( kahlil, NN), ( gibrans, VBZ)... 
     
    
      4 
      AWLFVCT9128JV 
      000100039X 
      [(gibran, NN), ( khalil, NNP), ( gibran, NNP),... 
     
  
In [20]:
    
df_joined = df_13.merge(df02, left_on='asin', right_on='asin', how='inner')
df_joined[0:31]
    
    Out[20]:
  
    
       
      userId 
      asin 
      reviewText 
      imp_nns 
      num_of_imp_nouns 
     
  
  
    
      0 
      A2XQ5LZHTD4AFT 
      000100039X 
      [(timeless, NN), ( classic, JJ), ( demanding, ... 
      [kneads, profits, preachers, territory, exile,... 
      26 
     
    
      1 
      AF7CSSGV93RXN 
      000100039X 
      [(first, RB), ( read, JJ), ( prophet, NNP), ( ... 
      [kneads, profits, preachers, territory, exile,... 
      26 
     
    
      2 
      A1NPNGWBVD9AK3 
      000100039X 
      [(one, CD), ( first, NNP), ( literary, JJ), ( ... 
      [kneads, profits, preachers, territory, exile,... 
      26 
     
    
      3 
      A3IS4WGMFR4X65 
      000100039X 
      [(prophet, NN), ( kahlil, NN), ( gibrans, VBZ)... 
      [kneads, profits, preachers, territory, exile,... 
      26 
     
    
      4 
      AWLFVCT9128JV 
      000100039X 
      [(gibran, NN), ( khalil, NNP), ( gibran, NNP),... 
      [kneads, profits, preachers, territory, exile,... 
      26 
     
    
      5 
      AFY0BT42DDYZV 
      000100039X 
      [(days, NNS), ( kahlil, VBP), ( gibrans, NNS),... 
      [kneads, profits, preachers, territory, exile,... 
      26 
     
    
      6 
      A25P6DY6ARTCGZ 
      000100039X 
      [(book, NN), ( almost, RBS), ( kahlil, JJ), ( ... 
      [kneads, profits, preachers, territory, exile,... 
      26 
     
    
      7 
      A1SP45I55GQIIE 
      000100039X 
      [(certainly, RB), ( words, NNS), ( kahlil, NNP... 
      [kneads, profits, preachers, territory, exile,... 
      26 
     
    
      8 
      A2E71VWXO59342 
      000100039X 
      [(prophet, NN), ( dispenses, NNS), ( ultimate,... 
      [kneads, profits, preachers, territory, exile,... 
      26 
     
    
      9 
      A2OP1HD9RGX5OW 
      000100039X 
      [(book, NN), ( poetic, JJ), ( myth, NNP), ( wo... 
      [kneads, profits, preachers, territory, exile,... 
      26 
     
    
      10 
      A2052JNVUPRTMT 
      000100039X 
      [(gibran, JJ), ( gets, NNS), ( right, VBD), ( ... 
      [kneads, profits, preachers, territory, exile,... 
      26 
     
    
      11 
      AGKPTMTR3UX1R 
      000100039X 
      [(kahlil, NN), ( gibran, JJ), ( eighteen milli... 
      [kneads, profits, preachers, territory, exile,... 
      26 
     
    
      12 
      A1HS49P9TZRGV9 
      000100039X 
      [(father, RB), ( huge, JJ), ( book, NN), ( col... 
      [kneads, profits, preachers, territory, exile,... 
      26 
     
    
      13 
      A2ZZHMT58ZMVCZ 
      000100039X 
      [(prophet, NN), ( waited, VBD), ( twelve, CD),... 
      [kneads, profits, preachers, territory, exile,... 
      26 
     
    
      14 
      A3W43PSHRIG8KV 
      000100039X 
      [(first, RB), ( became, JJ), ( aware, NNP), ( ... 
      [kneads, profits, preachers, territory, exile,... 
      26 
     
    
      15 
      A1TR1LU2JSZLUL 
      000100039X 
      [(book, NN), ( given, RB), ( gift, NNP), ( jou... 
      [kneads, profits, preachers, territory, exile,... 
      26 
     
    
      16 
      ADIDQRLLR4KBQ 
      000100039X 
      [(atheist, NN), ( may, NNP), ( seem, NNP), ( s... 
      [kneads, profits, preachers, territory, exile,... 
      26 
     
    
      17 
      A3AW2ZG0GP4SKN 
      000100039X 
      [(bought, VBN), ( book, NN), ( son, NNP), ( st... 
      [kneads, profits, preachers, territory, exile,... 
      26 
     
    
      18 
      A2MMON52VMO7NT 
      000100039X 
      [(gibrans, NNS), ( words, NNS), ( strike, IN),... 
      [kneads, profits, preachers, territory, exile,... 
      26 
     
    
      19 
      AR72Z89LACZ8Q 
      000100039X 
      [(unusual, JJ), ( departure, NN), ( imaginary,... 
      [kneads, profits, preachers, territory, exile,... 
      26 
     
    
      20 
      A3RTC17QVQGML7 
      000100039X 
      [(almost, RB), ( fifty-one, CD), ( years, NNS)... 
      [kneads, profits, preachers, territory, exile,... 
      26 
     
    
      21 
      A3HCD8V6TW4DBV 
      000100039X 
      [(inspirational, JJ), ( true, NN), ( teachings... 
      [kneads, profits, preachers, territory, exile,... 
      26 
     
    
      22 
      A281NPSIMI1C2R 
      000100039X 
      [(alive, JJ), ( like, NN), ( standing, VBG), (... 
      [kneads, profits, preachers, territory, exile,... 
      26 
     
    
      23 
      A2X3E8EU2KBVN8 
      000100039X 
      [(seldom, RB), ( book, NNP), ( read, NNP), ( t... 
      [kneads, profits, preachers, territory, exile,... 
      26 
     
    
      24 
      A2R64CR74I98K3 
      000100039X 
      [(usefull, JJ), ( book, NN), ( used, VBD), ( s... 
      [kneads, profits, preachers, territory, exile,... 
      26 
     
    
      25 
      AHD101501WCN1 
      000100039X 
      [(never, RB), (quite, RB), ( make, JJ), ( mind... 
      [kneads, profits, preachers, territory, exile,... 
      26 
     
    
      26 
      AF4QKY2R2TD3U 
      000100039X 
      [(say, VB), ( found, IN), ( truth, NNP), ( rat... 
      [kneads, profits, preachers, territory, exile,... 
      26 
     
    
      27 
      A3SMT15X2QVUR8 
      000100039X 
      [(prophet, NN), ( almustafa, CC), ( waits, NNS... 
      [kneads, profits, preachers, territory, exile,... 
      26 
     
    
      28 
      A3D7L0R1281COX 
      000100039X 
      [(gibrans, NNS), ( prophet, VBP), ( best, JJS)... 
      [kneads, profits, preachers, territory, exile,... 
      26 
     
    
      29 
      A2INDDW3XYFFV1 
      000100039X 
      [(khalil, NNS), ( gibrans, NNS), ( prophet, VB... 
      [kneads, profits, preachers, territory, exile,... 
      26 
     
    
      30 
      A1CSL3TFTFOTWH 
      0002051850 
      [(found, VBN), ( book, NN), ( takes, NNS), ( p... 
      [declarations, towns, smaller, threatens, desi... 
      73 
     
  
In [26]:
    
df_joined.describe()
    
    Out[26]:
  
    
       
      num_of_imp_nouns 
     
  
  
    
      count 
      511364.000000 
     
    
      mean 
      27.590157 
     
    
      std 
      25.774587 
     
    
      min 
      4.000000 
     
    
      25% 
      10.000000 
     
    
      50% 
      19.000000 
     
    
      75% 
      36.000000 
     
    
      max 
      226.000000 
     
  
In [21]:
    
1 - 511364/582711
    
    Out[21]:
0.12243976859884231
In [22]:
    
582711-511364
    
    Out[22]:
71347
In [92]:
    
import numpy as np
matrix_m01 = df_joined.as_matrix()
len(matrix_m01)
    
    Out[92]:
511364
In [96]:
    
matrix_m02 = np.append(matrix_m01,np.zeros([len(matrix_m01),1]),1)
sample = pd.DataFrame(matrix_m02[0:10])
sample
    
    Out[96]:
  
    
       
      0 
      1 
      2 
      3 
      4 
      5 
     
  
  
    
      0 
      A2XQ5LZHTD4AFT 
      000100039X 
      [(timeless, NN), ( classic, JJ), ( demanding, ... 
      [kneads, profits, preachers, territory, exile,... 
      26 
      0 
     
    
      1 
      AF7CSSGV93RXN 
      000100039X 
      [(first, RB), ( read, JJ), ( prophet, NNP), ( ... 
      [kneads, profits, preachers, territory, exile,... 
      26 
      0 
     
    
      2 
      A1NPNGWBVD9AK3 
      000100039X 
      [(one, CD), ( first, NNP), ( literary, JJ), ( ... 
      [kneads, profits, preachers, territory, exile,... 
      26 
      0 
     
    
      3 
      A3IS4WGMFR4X65 
      000100039X 
      [(prophet, NN), ( kahlil, NN), ( gibrans, VBZ)... 
      [kneads, profits, preachers, territory, exile,... 
      26 
      0 
     
    
      4 
      AWLFVCT9128JV 
      000100039X 
      [(gibran, NN), ( khalil, NNP), ( gibran, NNP),... 
      [kneads, profits, preachers, territory, exile,... 
      26 
      0 
     
    
      5 
      AFY0BT42DDYZV 
      000100039X 
      [(days, NNS), ( kahlil, VBP), ( gibrans, NNS),... 
      [kneads, profits, preachers, territory, exile,... 
      26 
      0 
     
    
      6 
      A25P6DY6ARTCGZ 
      000100039X 
      [(book, NN), ( almost, RBS), ( kahlil, JJ), ( ... 
      [kneads, profits, preachers, territory, exile,... 
      26 
      0 
     
    
      7 
      A1SP45I55GQIIE 
      000100039X 
      [(certainly, RB), ( words, NNS), ( kahlil, NNP... 
      [kneads, profits, preachers, territory, exile,... 
      26 
      0 
     
    
      8 
      A2E71VWXO59342 
      000100039X 
      [(prophet, NN), ( dispenses, NNS), ( ultimate,... 
      [kneads, profits, preachers, territory, exile,... 
      26 
      0 
     
    
      9 
      A2OP1HD9RGX5OW 
      000100039X 
      [(book, NN), ( poetic, JJ), ( myth, NNP), ( wo... 
      [kneads, profits, preachers, territory, exile,... 
      26 
      0 
     
  
In [86]:
    
def get_pair(index, tagged_review):
    
    possible_pairs_dictionary = {}
    
    # left window
    counter = 0
    left_index = index - 1
    while((left_index!=-1) and (counter<10)):
        if tagged_review[left_index][1] in {'JJ', 'JJR', 'JJS'}:
            distance = index - left_index 
            possible_pairs_dictionary.update({tagged_review[left_index][0]:distance})
        left_index -= 1
        counter += 1
    # right window
    counter = 0
    right_index = index + 1
    while((right_index!=len(tagged_review)) and (counter<10)):
        if tagged_review[right_index][1] in {'JJ', 'JJR', 'JJS'}:
            distance = right_index - index 
            possible_pairs_dictionary.update({tagged_review[left_index][0]:distance})
        right_index += 1
        counter += 1
    
    # get shortest adj with shortest distance if multiple are found
    if(len(possible_pairs_dictionary)>1):
        return (min(possible_pairs_dictionary, key=lambda k: possible_pairs_dictionary[k]), tagged_review[index][0])
    elif(len(possible_pairs_dictionary)==1):
        return (possible_pairs_dictionary.get(0),tagged_review[index][0])
    else:
        return (None, tagged_review[index][0])
    
In [101]:
    
from tqdm import tqdm
with tqdm(total=len(matrix_m02)) as pbar:
    for i in range(len(matrix_m02)):
        pairs = []
        tagged_review = matrix_m02[i][2]
        imp_nns = matrix_m02[i][3]
        index = 0
        for(word, tag) in tagged_review:
            if tag in {'NN', 'NNS', 'NNP', 'NNPS'}:
                if word.strip() in imp_nns:
                    (adj,nn) = get_pair(index, tagged_review)
                    if adj is not None:
                        pairs.append((adj.strip(),nn.strip()))
            index += 1
        matrix_m02[i][5] = pairs
        pbar.update(1)
    
    
100%|██████████| 511364/511364 [00:36<00:00, 14185.95it/s]
In [102]:
    
sample = pd.DataFrame(matrix_m02[0:100])
sample
    
    Out[102]:
  
    
       
      0 
      1 
      2 
      3 
      4 
      5 
     
  
  
    
      0 
      A2XQ5LZHTD4AFT 
      000100039X 
      [(timeless, NN), ( classic, JJ), ( demanding, ... 
      [kneads, profits, preachers, territory, exile,... 
      26 
      [(birth, prophets), (book, flows)] 
     
    
      1 
      AF7CSSGV93RXN 
      000100039X 
      [(first, RB), ( read, JJ), ( prophet, NNP), ( ... 
      [kneads, profits, preachers, territory, exile,... 
      26 
      [] 
     
    
      2 
      A1NPNGWBVD9AK3 
      000100039X 
      [(one, CD), ( first, NNP), ( literary, JJ), ( ... 
      [kneads, profits, preachers, territory, exile,... 
      26 
      [(relevant, catechism), (within, prophets), (t... 
     
    
      3 
      A3IS4WGMFR4X65 
      000100039X 
      [(prophet, NN), ( kahlil, NN), ( gibrans, VBZ)... 
      [kneads, profits, preachers, territory, exile,... 
      26 
      [] 
     
    
      4 
      AWLFVCT9128JV 
      000100039X 
      [(gibran, NN), ( khalil, NNP), ( gibran, NNP),... 
      [kneads, profits, preachers, territory, exile,... 
      26 
      [(forty-eight, almustafa)] 
     
    
      5 
      AFY0BT42DDYZV 
      000100039X 
      [(days, NNS), ( kahlil, VBP), ( gibrans, NNS),... 
      [kneads, profits, preachers, territory, exile,... 
      26 
      [(souls, profits), (wordofmouth, twentysix), (... 
     
    
      6 
      A25P6DY6ARTCGZ 
      000100039X 
      [(book, NN), ( almost, RBS), ( kahlil, JJ), ( ... 
      [kneads, profits, preachers, territory, exile,... 
      26 
      [] 
     
    
      7 
      A1SP45I55GQIIE 
      000100039X 
      [(certainly, RB), ( words, NNS), ( kahlil, NNP... 
      [kneads, profits, preachers, territory, exile,... 
      26 
      [] 
     
    
      8 
      A2E71VWXO59342 
      000100039X 
      [(prophet, NN), ( dispenses, NNS), ( ultimate,... 
      [kneads, profits, preachers, territory, exile,... 
      26 
      [] 
     
    
      9 
      A2OP1HD9RGX5OW 
      000100039X 
      [(book, NN), ( poetic, JJ), ( myth, NNP), ( wo... 
      [kneads, profits, preachers, territory, exile,... 
      26 
      [] 
     
    
      10 
      A2052JNVUPRTMT 
      000100039X 
      [(gibran, JJ), ( gets, NNS), ( right, VBD), ( ... 
      [kneads, profits, preachers, territory, exile,... 
      26 
      [] 
     
    
      11 
      AGKPTMTR3UX1R 
      000100039X 
      [(kahlil, NN), ( gibran, JJ), ( eighteen milli... 
      [kneads, profits, preachers, territory, exile,... 
      26 
      [] 
     
    
      12 
      A1HS49P9TZRGV9 
      000100039X 
      [(father, RB), ( huge, JJ), ( book, NN), ( col... 
      [kneads, profits, preachers, territory, exile,... 
      26 
      [] 
     
    
      13 
      A2ZZHMT58ZMVCZ 
      000100039X 
      [(prophet, NN), ( waited, VBD), ( twelve, CD),... 
      [kneads, profits, preachers, territory, exile,... 
      26 
      [(bear, departs), (others, pillars), (similar,... 
     
    
      14 
      A3W43PSHRIG8KV 
      000100039X 
      [(first, RB), ( became, JJ), ( aware, NNP), ( ... 
      [kneads, profits, preachers, territory, exile,... 
      26 
      [] 
     
    
      15 
      A1TR1LU2JSZLUL 
      000100039X 
      [(book, NN), ( given, RB), ( gift, NNP), ( jou... 
      [kneads, profits, preachers, territory, exile,... 
      26 
      [] 
     
    
      16 
      ADIDQRLLR4KBQ 
      000100039X 
      [(atheist, NN), ( may, NNP), ( seem, NNP), ( s... 
      [kneads, profits, preachers, territory, exile,... 
      26 
      [(beautiful, metaphors), (live, prophets)] 
     
    
      17 
      A3AW2ZG0GP4SKN 
      000100039X 
      [(bought, VBN), ( book, NN), ( son, NNP), ( st... 
      [kneads, profits, preachers, territory, exile,... 
      26 
      [] 
     
    
      18 
      A2MMON52VMO7NT 
      000100039X 
      [(gibrans, NNS), ( words, NNS), ( strike, IN),... 
      [kneads, profits, preachers, territory, exile,... 
      26 
      [] 
     
    
      19 
      AR72Z89LACZ8Q 
      000100039X 
      [(unusual, JJ), ( departure, NN), ( imaginary,... 
      [kneads, profits, preachers, territory, exile,... 
      26 
      [] 
     
    
      20 
      A3RTC17QVQGML7 
      000100039X 
      [(almost, RB), ( fifty-one, CD), ( years, NNS)... 
      [kneads, profits, preachers, territory, exile,... 
      26 
      [] 
     
    
      21 
      A3HCD8V6TW4DBV 
      000100039X 
      [(inspirational, JJ), ( true, NN), ( teachings... 
      [kneads, profits, preachers, territory, exile,... 
      26 
      [] 
     
    
      22 
      A281NPSIMI1C2R 
      000100039X 
      [(alive, JJ), ( like, NN), ( standing, VBG), (... 
      [kneads, profits, preachers, territory, exile,... 
      26 
      [(pain, waves), (separate, almustafa)] 
     
    
      23 
      A2X3E8EU2KBVN8 
      000100039X 
      [(seldom, RB), ( book, NNP), ( read, NNP), ( t... 
      [kneads, profits, preachers, territory, exile,... 
      26 
      [] 
     
    
      24 
      A2R64CR74I98K3 
      000100039X 
      [(usefull, JJ), ( book, NN), ( used, VBD), ( s... 
      [kneads, profits, preachers, territory, exile,... 
      26 
      [(religious, texts)] 
     
    
      25 
      AHD101501WCN1 
      000100039X 
      [(never, RB), (quite, RB), ( make, JJ), ( mind... 
      [kneads, profits, preachers, territory, exile,... 
      26 
      [] 
     
    
      26 
      AF4QKY2R2TD3U 
      000100039X 
      [(say, VB), ( found, IN), ( truth, NNP), ( rat... 
      [kneads, profits, preachers, territory, exile,... 
      26 
      [(rich, metaphors)] 
     
    
      27 
      A3SMT15X2QVUR8 
      000100039X 
      [(prophet, NN), ( almustafa, CC), ( waits, NNS... 
      [kneads, profits, preachers, territory, exile,... 
      26 
      [(orphalese, metaphor)] 
     
    
      28 
      A3D7L0R1281COX 
      000100039X 
      [(gibrans, NNS), ( prophet, VBP), ( best, JJS)... 
      [kneads, profits, preachers, territory, exile,... 
      26 
      [] 
     
    
      29 
      A2INDDW3XYFFV1 
      000100039X 
      [(khalil, NNS), ( gibrans, NNS), ( prophet, VB... 
      [kneads, profits, preachers, territory, exile,... 
      26 
      [(home, prophets)] 
     
    
      ... 
      ... 
      ... 
      ... 
      ... 
      ... 
      ... 
     
    
      70 
      A39BB196X0I2N1 
      0002117088 
      [(renoir, NN), ( far, NNP), ( one, NN), ( worl... 
      [surgery, sorts, goodnight, virtues, translato... 
      7 
      [] 
     
    
      71 
      A3KL4JSUOH8NVF 
      0002117088 
      [(book, NN), ( presents, NNS), ( touching, VBG... 
      [surgery, sorts, goodnight, virtues, translato... 
      7 
      [] 
     
    
      72 
      A2GHTSBU7IHIBO 
      0002117088 
      [(book, NN), ( renoir, NNP), ( whose, NNP), ( ... 
      [surgery, sorts, goodnight, virtues, translato... 
      7 
      [] 
     
    
      73 
      A3I4IFMO4Z4S7J 
      000215725X 
      [(william, JJ), ( dalrymple, NNP), ( great, NN... 
      [treachery, fort, emperors, 17th, uk, mundane,... 
      39 
      [] 
     
    
      74 
      A1RAUVCWYHTQI4 
      000215725X 
      [(really, RB), ( wonderful, JJ), ( book, NN), ... 
      [treachery, fort, emperors, 17th, uk, mundane,... 
      39 
      [] 
     
    
      75 
      A280GY5UVUS2QH 
      000215725X 
      [(william, JJ), ( dalrymple, JJ), ( historian,... 
      [treachery, fort, emperors, 17th, uk, mundane,... 
      39 
      [(thought, fraser), (one, fraser), (nineteenth... 
     
    
      76 
      A2CBZMETQJTNEE 
      000215725X 
      [(djinn, NN), ( spirit, NN), ( invisible, JJ),... 
      [treachery, fort, emperors, 17th, uk, mundane,... 
      39 
      [(unusual, pigeon), (indian, weddings)] 
     
    
      77 
      A21CL0N0DQTZXL 
      000215725X 
      [(whether, IN), ( plan, JJ), ( visit, NNP), ( ... 
      [treachery, fort, emperors, 17th, uk, mundane,... 
      39 
      [] 
     
    
      78 
      A2SDYWB9LB1LYI 
      000215725X 
      [(loved, VBN), ( dalrymples, NNS), ( holy, VBP... 
      [treachery, fort, emperors, 17th, uk, mundane,... 
      39 
      [(throng, drivers), (favorite, fights), (parts... 
     
    
      79 
      A38ELBK7FLXGOY 
      000215725X 
      [(quot, JJ), ( city, NN), ( djinns, NNP), ( qu... 
      [treachery, fort, emperors, 17th, uk, mundane,... 
      39 
      [] 
     
    
      80 
      A2S5KWZ8HVAB5X 
      000215725X 
      [(dalrymple, NN), ( simply, NNP), ( one, NNP),... 
      [treachery, fort, emperors, 17th, uk, mundane,... 
      39 
      [] 
     
    
      81 
      A2GNVZUDL15YDK 
      000215725X 
      [(book, NN), ( ninety, NN), ( informative, JJ)... 
      [treachery, fort, emperors, 17th, uk, mundane,... 
      39 
      [(ancient, introduces), (present, fort), (libr... 
     
    
      82 
      A2ZU5UM8ZI8LQG 
      000215725X 
      [(first, RB), ( thing, VBG), ( incredibly, RB)... 
      [treachery, fort, emperors, 17th, uk, mundane,... 
      39 
      [(diary, wifes), (diary, sights), (without, bi... 
     
    
      83 
      A1F6Q5Q0U6MS5X 
      000215725X 
      [(city, NN), ( djinns, CD), ( year, JJ), ( del... 
      [treachery, fort, emperors, 17th, uk, mundane,... 
      39 
      [(punjabi, hindu), (delhi, degenerate), (20th,... 
     
    
      84 
      AEJ31WGHJ59C 
      0002219417 
      [(every, DT), ( herman, NN), ( wouk, VBZ), ( b... 
      [humanlevel, smaller, conversion, periods, lic... 
      32 
      [(america, remembrance), (america, continues),... 
     
    
      85 
      A3L00A15HDNQGK 
      0002219417 
      [(lets, NNS), ( start, VBP), ( clear, JJ), ( w... 
      [humanlevel, smaller, conversion, periods, lic... 
      32 
      [(nervous, giggle)] 
     
    
      86 
      A1EJ6J6N9I7W35 
      0002219417 
      [(pair, NN), ( winds, VBZ), ( war, JJ), ( plus... 
      [humanlevel, smaller, conversion, periods, lic... 
      32 
      [(war, remembrance), (europe, remembrance), (g... 
     
    
      87 
      A3J2CGLKCCPSBE 
      0002219417 
      [(novel, NN), ( definitely, RB), ( mustread, J... 
      [humanlevel, smaller, conversion, periods, lic... 
      32 
      [] 
     
    
      88 
      A15ENK5HYBO2YF 
      0002219417 
      [(im, JJ), ( bit, NN), ( young, NNP), ( world,... 
      [humanlevel, smaller, conversion, periods, lic... 
      32 
      [(shell, remembrance)] 
     
    
      89 
      AQ9OSVFTHBAI1 
      0002219417 
      [(four, CD), ( components, NNS), ( writer, RBR... 
      [humanlevel, smaller, conversion, periods, lic... 
      32 
      [(tell, bellow), (forget, situations), (blame,... 
     
    
      90 
      AW3VZ5O895LRK 
      0002219417 
      [(go, VB), ( historical, JJ), ( romance, NN), ... 
      [humanlevel, smaller, conversion, periods, lic... 
      32 
      [(incredible, remembrance)] 
     
    
      91 
      A39WL61420S1T6 
      0002219417 
      [(winds, NNS), ( war, JJ), ( war, JJ), ( remem... 
      [humanlevel, smaller, conversion, periods, lic... 
      32 
      [(war, remembrance), (like, theyd), (endlessly... 
     
    
      92 
      A2JM4TKX99S313 
      0002219417 
      [(sit, NN), ( read, JJ), ( winds, VBZ), ( war,... 
      [humanlevel, smaller, conversion, periods, lic... 
      32 
      [(umpteenth, remembrance)] 
     
    
      93 
      A3LP5RUMWG7GQ0 
      0002219417 
      [(winds, NNS), ( war, JJ), ( decent, JJ), ( st... 
      [humanlevel, smaller, conversion, periods, lic... 
      32 
      [] 
     
    
      94 
      AOSDS7DNROB5K 
      0002219417 
      [(extremely, RB), ( insulting, VBG), ( no, PRP... 
      [humanlevel, smaller, conversion, periods, lic... 
      32 
      [(clean, conversion), (line, conversion), (app... 
     
    
      95 
      A3NCKDPCAUOD4T 
      0002219417 
      [(ive, JJ), ( read, NN), ( many, NN), ( wwiire... 
      [humanlevel, smaller, conversion, periods, lic... 
      32 
      [] 
     
    
      96 
      A2K4RNOAD5J3WB 
      0002219417 
      [(quot, JJ), ( winds, VBZ), ( war, JJ), ( quot... 
      [humanlevel, smaller, conversion, periods, lic... 
      32 
      [] 
     
    
      97 
      AOKQZVWCLONRH 
      0002219417 
      [(ive, JJ), ( noticed, VBD), ( continues, NNS)... 
      [humanlevel, smaller, conversion, periods, lic... 
      32 
      [(books, continues), (american, naval), (key, ... 
     
    
      98 
      A2DNR5QVO8U9ZI 
      000222383X 
      [(patrick, JJ), ( obrian, JJ), ( naval, NN), (... 
      [treasons, construct, expansion, captains, fav... 
      11 
      [(obrian, naval), (british, naval), (british, ... 
     
    
      99 
      AYKYFFCC49HVT 
      000222383X 
      [(read, JJ), ( prior, NNP), ( three, NNP), ( n... 
      [treasons, construct, expansion, captains, fav... 
      11 
      [(nautical, naval), (overall, captains), (summ... 
     
  
100 rows × 6 columns
In [107]:
    
df20 = pd.DataFrame(matrix_m02)
df20.columns = ['userId','asin','reviewText','imp_nns','num_of_imp_nouns','pairs']
df20.head()
    
    Out[107]:
  
    
       
      userId 
      asin 
      reviewText 
      imp_nns 
      num_of_imp_nouns 
      pairs 
     
  
  
    
      0 
      A2XQ5LZHTD4AFT 
      000100039X 
      [(timeless, NN), ( classic, JJ), ( demanding, ... 
      [kneads, profits, preachers, territory, exile,... 
      26 
      [(birth, prophets), (book, flows)] 
     
    
      1 
      AF7CSSGV93RXN 
      000100039X 
      [(first, RB), ( read, JJ), ( prophet, NNP), ( ... 
      [kneads, profits, preachers, territory, exile,... 
      26 
      [] 
     
    
      2 
      A1NPNGWBVD9AK3 
      000100039X 
      [(one, CD), ( first, NNP), ( literary, JJ), ( ... 
      [kneads, profits, preachers, territory, exile,... 
      26 
      [(relevant, catechism), (within, prophets), (t... 
     
    
      3 
      A3IS4WGMFR4X65 
      000100039X 
      [(prophet, NN), ( kahlil, NN), ( gibrans, VBZ)... 
      [kneads, profits, preachers, territory, exile,... 
      26 
      [] 
     
    
      4 
      AWLFVCT9128JV 
      000100039X 
      [(gibran, NN), ( khalil, NNP), ( gibran, NNP),... 
      [kneads, profits, preachers, territory, exile,... 
      26 
      [(forty-eight, almustafa)] 
     
  
In [108]:
    
len(df20)
    
    Out[108]:
511364
In [110]:
    
reviews_vs_feature_opinion_pairs = df20[df20['pairs'].map(lambda pairs: len(pairs)) > 0]
len(reviews_vs_feature_opinion_pairs)
    
    Out[110]:
249871
In [112]:
    
reviews_vs_feature_opinion_pairs[0:100]
    
    Out[112]:
  
    
       
      userId 
      asin 
      reviewText 
      imp_nns 
      num_of_imp_nouns 
      pairs 
     
  
  
    
      0 
      A2XQ5LZHTD4AFT 
      000100039X 
      [(timeless, NN), ( classic, JJ), ( demanding, ... 
      [kneads, profits, preachers, territory, exile,... 
      26 
      [(birth, prophets), (book, flows)] 
     
    
      2 
      A1NPNGWBVD9AK3 
      000100039X 
      [(one, CD), ( first, NNP), ( literary, JJ), ( ... 
      [kneads, profits, preachers, territory, exile,... 
      26 
      [(relevant, catechism), (within, prophets), (t... 
     
    
      4 
      AWLFVCT9128JV 
      000100039X 
      [(gibran, NN), ( khalil, NNP), ( gibran, NNP),... 
      [kneads, profits, preachers, territory, exile,... 
      26 
      [(forty-eight, almustafa)] 
     
    
      5 
      AFY0BT42DDYZV 
      000100039X 
      [(days, NNS), ( kahlil, VBP), ( gibrans, NNS),... 
      [kneads, profits, preachers, territory, exile,... 
      26 
      [(souls, profits), (wordofmouth, twentysix), (... 
     
    
      13 
      A2ZZHMT58ZMVCZ 
      000100039X 
      [(prophet, NN), ( waited, VBD), ( twelve, CD),... 
      [kneads, profits, preachers, territory, exile,... 
      26 
      [(bear, departs), (others, pillars), (similar,... 
     
    
      16 
      ADIDQRLLR4KBQ 
      000100039X 
      [(atheist, NN), ( may, NNP), ( seem, NNP), ( s... 
      [kneads, profits, preachers, territory, exile,... 
      26 
      [(beautiful, metaphors), (live, prophets)] 
     
    
      22 
      A281NPSIMI1C2R 
      000100039X 
      [(alive, JJ), ( like, NN), ( standing, VBG), (... 
      [kneads, profits, preachers, territory, exile,... 
      26 
      [(pain, waves), (separate, almustafa)] 
     
    
      24 
      A2R64CR74I98K3 
      000100039X 
      [(usefull, JJ), ( book, NN), ( used, VBD), ( s... 
      [kneads, profits, preachers, territory, exile,... 
      26 
      [(religious, texts)] 
     
    
      26 
      AF4QKY2R2TD3U 
      000100039X 
      [(say, VB), ( found, IN), ( truth, NNP), ( rat... 
      [kneads, profits, preachers, territory, exile,... 
      26 
      [(rich, metaphors)] 
     
    
      27 
      A3SMT15X2QVUR8 
      000100039X 
      [(prophet, NN), ( almustafa, CC), ( waits, NNS... 
      [kneads, profits, preachers, territory, exile,... 
      26 
      [(orphalese, metaphor)] 
     
    
      29 
      A2INDDW3XYFFV1 
      000100039X 
      [(khalil, NNS), ( gibrans, NNS), ( prophet, VB... 
      [kneads, profits, preachers, territory, exile,... 
      26 
      [(home, prophets)] 
     
    
      30 
      A1CSL3TFTFOTWH 
      0002051850 
      [(found, VBN), ( book, NN), ( takes, NNS), ( p... 
      [declarations, towns, smaller, threatens, desi... 
      73 
      [(independent, periods), (story, progresses), ... 
     
    
      31 
      A313LJLZT8646J 
      0002051850 
      [(bell, NN), ( tolls, VBZ), ( ernest, JJS), ( ... 
      [declarations, towns, smaller, threatens, desi... 
      73 
      [(consistent, dire), (nine hundred and thirty-... 
     
    
      32 
      AHCVWPLA1O4X8 
      0002051850 
      [(one, CD), ( greatest, JJS), ( modernist, NN)... 
      [declarations, towns, smaller, threatens, desi... 
      73 
      [(american, spain), (fact, thee)] 
     
    
      36 
      A1K1JW1C5CUSUZ 
      0002051850 
      [(hemingways, NNS), ( magnificent, VBP), ( nov... 
      [declarations, towns, smaller, threatens, desi... 
      73 
      [(political, fascism), (extensive, flashbacks)... 
     
    
      37 
      A33R4E8T9KVLOM 
      0002051850 
      [(robert, JJ), ( jordan, NNP), ( one, NN), ( e... 
      [declarations, towns, smaller, threatens, desi... 
      73 
      [(read, spain), (indepth, reflects), (sadistic... 
     
    
      39 
      A3IKBHODOTYYHM 
      0002051850 
      [(novel, JJ), ( sum, NNP), ( consequential, NN... 
      [declarations, towns, smaller, threatens, desi... 
      73 
      [(sold, spain)] 
     
    
      40 
      A1PN3R8DXRQ1C3 
      0002051850 
      [(spanish, JJ), ( civil, NNP), ( war, NNP), ( ... 
      [declarations, towns, smaller, threatens, desi... 
      73 
      [(western, spain), (many, intellectuals), (des... 
     
    
      42 
      A1RECBDKHVOJMW 
      0002051850 
      [(bell, NN), ( tolls, NNS), ( long, JJ), ( fav... 
      [declarations, towns, smaller, threatens, desi... 
      73 
      [(european, spain), (red, spain), (conservativ... 
     
    
      45 
      A3SI6F1RGCTAOH 
      0002051850 
      [(last, JJ), ( time, JJ), ( read, JJ), ( hemin... 
      [declarations, towns, smaller, threatens, desi... 
      73 
      [(war, shines), (sex, declarations), (novel, c... 
     
    
      48 
      A3QZCA4LTTVGAD 
      0002051850 
      [(set, VBN), ( spanish, JJ), ( civil, NNP), ( ... 
      [declarations, towns, smaller, threatens, desi... 
      73 
      [(republican, guerrilla), (various, focuses), ... 
     
    
      50 
      A1MC81HLJ6Z9ZQ 
      0002051850 
      [(anything, NN), ( hemingway, RB), ( ever, NNP... 
      [declarations, towns, smaller, threatens, desi... 
      73 
      [(horrible, affair), (pull, coltish), (content... 
     
    
      53 
      A8IPQ1Q1O7YX5 
      0002051850 
      [(dont, NN), (think, VBP), ( ever, IN), ( take... 
      [declarations, towns, smaller, threatens, desi... 
      73 
      [(enemy, guerrilla)] 
     
    
      54 
      A3Q9K57FARA2WQ 
      0002051850 
      [(say, VB), ( masterpiece, NNP), ( hasnt, NNP)... 
      [declarations, towns, smaller, threatens, desi... 
      73 
      [(american, spain)] 
     
    
      56 
      A3KRRXPFEAO6V 
      0002051850 
      [(ernest, JJS), ( hemingway, RB), ( bell, NNP)... 
      [declarations, towns, smaller, threatens, desi... 
      73 
      [(fascist, threatens), (latter, partners), (co... 
     
    
      58 
      A1RLYOPK16YXC1 
      0002051850 
      [(bell, NN), ( tolls, NNS), ( takes, VBZ), ( p... 
      [declarations, towns, smaller, threatens, desi... 
      73 
      [(american, spain), (missions, guerrilla), (pr... 
     
    
      59 
      AMTADN8VCK6J2 
      0002051850 
      [(novel, NN), ( considered, VBD), ( one, CD), ... 
      [declarations, towns, smaller, threatens, desi... 
      73 
      [(story, mountains)] 
     
    
      60 
      A29SHFBU5O9BWO 
      0002051850 
      [(perhaps, RB), ( bell, NNP), ( tolled, VBD), ... 
      [declarations, towns, smaller, threatens, desi... 
      73 
      [(literary, greatness), (suitable, achievement... 
     
    
      64 
      A2EQ74Y24BHHIF 
      0002113570 
      [(jane, NN), ( goodall, DT), ( unique, NNP), (... 
      [humane, homo, ancestors, michener] 
      4 
      [(like, michener), (could, homo), (common, anc... 
     
    
      68 
      A2KUKUSSSAYAKH 
      0002117088 
      [(adopted, VBN), ( quot, JJ), ( renoir, NNP), ... 
      [surgery, sorts, goodnight, virtues, translato... 
      7 
      [(hip, surgery), (hip, goodnight), (come, clau... 
     
    
      ... 
      ... 
      ... 
      ... 
      ... 
      ... 
      ... 
     
    
      138 
      A1WQR544MMF68L 
      000224053X 
      [(red, JJ), ( mars, NNS), ( first, RB), ( kim,... 
      [fundamentalists, coast, pioneer, inconsistenc... 
      81 
      [(first, expidition), (insead, focuses), (stru... 
     
    
      139 
      A2YUZKPLUYQDKV 
      000224053X 
      [(readers, NNS), ( sciencefiction, VBP), ( tre... 
      [fundamentalists, coast, pioneer, inconsistenc... 
      81 
      [(famous, thunder)] 
     
    
      140 
      A21Z8FUB9IZI2L 
      000224053X 
      [(colonization, NN), ( solar, JJ), ( system, N... 
      [fundamentalists, coast, pioneer, inconsistenc... 
      81 
      [(gigantic, corporations)] 
     
    
      141 
      A1RSGDRA57QN6J 
      000224053X 
      [(nominally, RB), ( futurehistory, JJ), ( mart... 
      [fundamentalists, coast, pioneer, inconsistenc... 
      81 
      [(initial, corporations), (martian, landscape)... 
     
    
      142 
      A1SG73VR5R809B 
      000224053X 
      [(first, RB), ( one hundred, JJ), ( colonists,... 
      [fundamentalists, coast, pioneer, inconsistenc... 
      81 
      [(plausible, technologies)] 
     
    
      143 
      AWFD44Q17PCOV 
      000224053X 
      [(dont, NN), ( confused, VBD), ( technical, JJ... 
      [fundamentalists, coast, pioneer, inconsistenc... 
      81 
      [(siberian, construction)] 
     
    
      144 
      A260X99UW6HA9Z 
      000224053X 
      [(robinson, NN), ( achieves, NNS), ( pitch, VB... 
      [fundamentalists, coast, pioneer, inconsistenc... 
      81 
      [(science, coyote)] 
     
    
      150 
      A18YAKQM1X2MCJ 
      000224053X 
      [(robinson, NN), ( excellent, NN), ( writer, N... 
      [fundamentalists, coast, pioneer, inconsistenc... 
      81 
      [(asimov, clarke)] 
     
    
      151 
      A2ZMWGU610X3ZT 
      000224053X 
      [(warning, VBG), ( books, NNS), ( hypnotically... 
      [fundamentalists, coast, pioneer, inconsistenc... 
      81 
      [(prestigious, awards), (creole, jazz), (among... 
     
    
      153 
      A3D0BL5DWYN3NQ 
      000224053X 
      [(heres, NNS), ( incredibly, RB), ( realistic,... 
      [fundamentalists, coast, pioneer, inconsistenc... 
      81 
      [(global, exploitation)] 
     
    
      155 
      A3C2A3D2KG1F1A 
      000224053X 
      [(really, RB), ( looking, VBG), ( forward, NNP... 
      [fundamentalists, coast, pioneer, inconsistenc... 
      81 
      [(read, landscape)] 
     
    
      156 
      A3UL9V4K9OJU9F 
      000224053X 
      [(read, NN), ( lot, NN), ( science, NNP), ( fi... 
      [fundamentalists, coast, pioneer, inconsistenc... 
      81 
      [(habitable, environment)] 
     
    
      165 
      A3PE884TA8529G 
      0002242052 
      [(usually, RB), ( read, JJ), ( fiction, NN), (... 
      [stretches, authorities, ludlum, drugdealers, ... 
      14 
      [(begins, pimp), (drug, pimp)] 
     
    
      166 
      A3QWNQ2WA7QZYJ 
      0002242052 
      [(tom, NN), ( clancys, NNP), ( strength, NNP),... 
      [stretches, authorities, ludlum, drugdealers, ... 
      14 
      [(basic, tech), (character, ludlum), (long, st... 
     
    
      171 
      A3B0YYFNBNDX6C 
      0002242052 
      [(without, IN), ( remorseby, NNP), ( tom, NNP)... 
      [stretches, authorities, ludlum, drugdealers, ... 
      14 
      [(several, languages), (mechanical, semitruck)... 
     
    
      175 
      A1AUBGENRIZODO 
      0002311216 
      [(novel, JJ), ( follows, VBZ), ( adventures, N... 
      [andersons, espionage, poirot, worldwide, open... 
      16 
      [(real, jaan), (tedious, twothirds), (various,... 
     
    
      177 
      A1AOIKQ6JNWETI 
      0002311216 
      [(first, RB), ( let, NNP), ( say, NNP), ( huge... 
      [andersons, espionage, poirot, worldwide, open... 
      16 
      [(world, espionage), (various, cabinet)] 
     
    
      178 
      A3A6GXILVT9JNW 
      0002311216 
      [(one thousand, NN), ( nine hundred and sevent... 
      [andersons, espionage, poirot, worldwide, open... 
      16 
      [(worldwide, neo), (much, hercule), (much, poi... 
     
    
      179 
      A29IPCJZUUAL32 
      0002311216 
      [(disappointed, JJ), ( book, NN), ( great, NN)... 
      [andersons, espionage, poirot, worldwide, open... 
      16 
      [(excellent, openings)] 
     
    
      181 
      A1T86T7FTUW38K 
      0002311216 
      [(im, NN), ( surprised, VBD), ( hasnt, NNP), (... 
      [andersons, espionage, poirot, worldwide, open... 
      16 
      [(ythrian, motives)] 
     
    
      183 
      AENXJM2CKEN5L 
      0002550938 
      [(sahara, NN), ( written, NNP), ( clive, NNP),... 
      [fights, mysery, pollution, cusslers, threads,... 
      6 
      [(ironclad, confederate), (eight hundred and s... 
     
    
      185 
      A2F3M93RRLFQNJ 
      0002550938 
      [(last, JJ), ( time, JJ), ( read, JJ), ( clive... 
      [fights, mysery, pollution, cusslers, threads,... 
      6 
      [(pair, confederate)] 
     
    
      187 
      A1TAWJ2PA3USVX 
      0002550938 
      [(whats, NNS), ( sillier, VBP), ( even, RB), (... 
      [fights, mysery, pollution, cusslers, threads,... 
      6 
      [(eye, cusslers)] 
     
    
      190 
      ADCZ95UF83HLM 
      000255383X 
      [(thats, NNS), ( common, VBP), ( rabble, JJ), ... 
      [ubi, shark, merge, beeper, phrases] 
      5 
      [(aspiring, beeper)] 
     
    
      191 
      A2VE83MZF98ITY 
      000255383X 
      [(wonderful, JJ), ( little, JJ), ( book, NN), ... 
      [ubi, shark, merge, beeper, phrases] 
      5 
      [(various, phrases), (innocuous, beeper), (sec... 
     
    
      194 
      A3N4J7FRI8BPT6 
      0002621509 
      [(written, VBN), ( americans, NNS), ( perspect... 
      [surgery, espionage, dolgun, paranoid, employe... 
      8 
      [(people, dolgun)] 
     
    
      196 
      A2E3GFHUDNPYDH 
      0002621509 
      [(walk, NN), ( one, NN), ( day, JJ), ( moscow,... 
      [surgery, espionage, dolgun, paranoid, employe... 
      8 
      [(shalomov, employee), (american, dolgun), (ni... 
     
    
      197 
      A3QIEISBZP4QTV 
      0002621509 
      [(enjoy, NN), ( genre, JJ), ( adventure, NN), ... 
      [surgery, espionage, dolgun, paranoid, employe... 
      8 
      [(much, dolgun)] 
     
    
      198 
      A1VIXUHSCZTJUT 
      0002621509 
      [(book, NN), ( hits, NNS), ( like, IN), ( slap... 
      [surgery, espionage, dolgun, paranoid, employe... 
      8 
      [(russia, dolgun)] 
     
    
      199 
      A17S1450Y06C86 
      0002726874 
      [(towards, NNS), ( end, VBP), ( life, JJ), ( g... 
      [macdonalds, aboot, ye, smaller, theer, uk, al... 
      63 
      [(life, fraser), (battles, lance), (imperial, ... 
     
  
100 rows × 6 columns
In [113]:
    
249871/511364
    
    Out[113]:
0.48863627474753796
In [114]:
    
reviews_vs_feature_opinion_pairs = reviews_vs_feature_opinion_pairs.assign(num_of_pairs = reviews_vs_feature_opinion_pairs['pairs'].progress_apply(lambda pairs:len(pairs)))
reviews_vs_feature_opinion_pairs.head()
    
    
Progress:: 100%|██████████| 249871/249871 [00:00<00:00, 1138209.59it/s]
    Out[114]:
  
    
       
      userId 
      asin 
      reviewText 
      imp_nns 
      num_of_imp_nouns 
      pairs 
      num_of_pairs 
     
  
  
    
      0 
      A2XQ5LZHTD4AFT 
      000100039X 
      [(timeless, NN), ( classic, JJ), ( demanding, ... 
      [kneads, profits, preachers, territory, exile,... 
      26 
      [(birth, prophets), (book, flows)] 
      2 
     
    
      2 
      A1NPNGWBVD9AK3 
      000100039X 
      [(one, CD), ( first, NNP), ( literary, JJ), ( ... 
      [kneads, profits, preachers, territory, exile,... 
      26 
      [(relevant, catechism), (within, prophets), (t... 
      4 
     
    
      4 
      AWLFVCT9128JV 
      000100039X 
      [(gibran, NN), ( khalil, NNP), ( gibran, NNP),... 
      [kneads, profits, preachers, territory, exile,... 
      26 
      [(forty-eight, almustafa)] 
      1 
     
    
      5 
      AFY0BT42DDYZV 
      000100039X 
      [(days, NNS), ( kahlil, VBP), ( gibrans, NNS),... 
      [kneads, profits, preachers, territory, exile,... 
      26 
      [(souls, profits), (wordofmouth, twentysix), (... 
      3 
     
    
      13 
      A2ZZHMT58ZMVCZ 
      000100039X 
      [(prophet, NN), ( waited, VBD), ( twelve, CD),... 
      [kneads, profits, preachers, territory, exile,... 
      26 
      [(bear, departs), (others, pillars), (similar,... 
      4 
     
  
In [118]:
    
reviews_vs_feature_opinion_pairs[0:100]
    
    Out[118]:
  
    
       
      userId 
      asin 
      reviewText 
      imp_nns 
      num_of_imp_nouns 
      pairs 
      num_of_pairs 
     
  
  
    
      0 
      A2XQ5LZHTD4AFT 
      000100039X 
      [(timeless, NN), ( classic, JJ), ( demanding, ... 
      [kneads, profits, preachers, territory, exile,... 
      26 
      [(birth, prophets), (book, flows)] 
      2 
     
    
      2 
      A1NPNGWBVD9AK3 
      000100039X 
      [(one, CD), ( first, NNP), ( literary, JJ), ( ... 
      [kneads, profits, preachers, territory, exile,... 
      26 
      [(relevant, catechism), (within, prophets), (t... 
      4 
     
    
      4 
      AWLFVCT9128JV 
      000100039X 
      [(gibran, NN), ( khalil, NNP), ( gibran, NNP),... 
      [kneads, profits, preachers, territory, exile,... 
      26 
      [(forty-eight, almustafa)] 
      1 
     
    
      5 
      AFY0BT42DDYZV 
      000100039X 
      [(days, NNS), ( kahlil, VBP), ( gibrans, NNS),... 
      [kneads, profits, preachers, territory, exile,... 
      26 
      [(souls, profits), (wordofmouth, twentysix), (... 
      3 
     
    
      13 
      A2ZZHMT58ZMVCZ 
      000100039X 
      [(prophet, NN), ( waited, VBD), ( twelve, CD),... 
      [kneads, profits, preachers, territory, exile,... 
      26 
      [(bear, departs), (others, pillars), (similar,... 
      4 
     
    
      16 
      ADIDQRLLR4KBQ 
      000100039X 
      [(atheist, NN), ( may, NNP), ( seem, NNP), ( s... 
      [kneads, profits, preachers, territory, exile,... 
      26 
      [(beautiful, metaphors), (live, prophets)] 
      2 
     
    
      22 
      A281NPSIMI1C2R 
      000100039X 
      [(alive, JJ), ( like, NN), ( standing, VBG), (... 
      [kneads, profits, preachers, territory, exile,... 
      26 
      [(pain, waves), (separate, almustafa)] 
      2 
     
    
      24 
      A2R64CR74I98K3 
      000100039X 
      [(usefull, JJ), ( book, NN), ( used, VBD), ( s... 
      [kneads, profits, preachers, territory, exile,... 
      26 
      [(religious, texts)] 
      1 
     
    
      26 
      AF4QKY2R2TD3U 
      000100039X 
      [(say, VB), ( found, IN), ( truth, NNP), ( rat... 
      [kneads, profits, preachers, territory, exile,... 
      26 
      [(rich, metaphors)] 
      1 
     
    
      27 
      A3SMT15X2QVUR8 
      000100039X 
      [(prophet, NN), ( almustafa, CC), ( waits, NNS... 
      [kneads, profits, preachers, territory, exile,... 
      26 
      [(orphalese, metaphor)] 
      1 
     
    
      29 
      A2INDDW3XYFFV1 
      000100039X 
      [(khalil, NNS), ( gibrans, NNS), ( prophet, VB... 
      [kneads, profits, preachers, territory, exile,... 
      26 
      [(home, prophets)] 
      1 
     
    
      30 
      A1CSL3TFTFOTWH 
      0002051850 
      [(found, VBN), ( book, NN), ( takes, NNS), ( p... 
      [declarations, towns, smaller, threatens, desi... 
      73 
      [(independent, periods), (story, progresses), ... 
      6 
     
    
      31 
      A313LJLZT8646J 
      0002051850 
      [(bell, NN), ( tolls, VBZ), ( ernest, JJS), ( ... 
      [declarations, towns, smaller, threatens, desi... 
      73 
      [(consistent, dire), (nine hundred and thirty-... 
      9 
     
    
      32 
      AHCVWPLA1O4X8 
      0002051850 
      [(one, CD), ( greatest, JJS), ( modernist, NN)... 
      [declarations, towns, smaller, threatens, desi... 
      73 
      [(american, spain), (fact, thee)] 
      2 
     
    
      36 
      A1K1JW1C5CUSUZ 
      0002051850 
      [(hemingways, NNS), ( magnificent, VBP), ( nov... 
      [declarations, towns, smaller, threatens, desi... 
      73 
      [(political, fascism), (extensive, flashbacks)... 
      5 
     
    
      37 
      A33R4E8T9KVLOM 
      0002051850 
      [(robert, JJ), ( jordan, NNP), ( one, NN), ( e... 
      [declarations, towns, smaller, threatens, desi... 
      73 
      [(read, spain), (indepth, reflects), (sadistic... 
      4 
     
    
      39 
      A3IKBHODOTYYHM 
      0002051850 
      [(novel, JJ), ( sum, NNP), ( consequential, NN... 
      [declarations, towns, smaller, threatens, desi... 
      73 
      [(sold, spain)] 
      1 
     
    
      40 
      A1PN3R8DXRQ1C3 
      0002051850 
      [(spanish, JJ), ( civil, NNP), ( war, NNP), ( ... 
      [declarations, towns, smaller, threatens, desi... 
      73 
      [(western, spain), (many, intellectuals), (des... 
      4 
     
    
      42 
      A1RECBDKHVOJMW 
      0002051850 
      [(bell, NN), ( tolls, NNS), ( long, JJ), ( fav... 
      [declarations, towns, smaller, threatens, desi... 
      73 
      [(european, spain), (red, spain), (conservativ... 
      16 
     
    
      45 
      A3SI6F1RGCTAOH 
      0002051850 
      [(last, JJ), ( time, JJ), ( read, JJ), ( hemin... 
      [declarations, towns, smaller, threatens, desi... 
      73 
      [(war, shines), (sex, declarations), (novel, c... 
      4 
     
    
      48 
      A3QZCA4LTTVGAD 
      0002051850 
      [(set, VBN), ( spanish, JJ), ( civil, NNP), ( ... 
      [declarations, towns, smaller, threatens, desi... 
      73 
      [(republican, guerrilla), (various, focuses), ... 
      12 
     
    
      50 
      A1MC81HLJ6Z9ZQ 
      0002051850 
      [(anything, NN), ( hemingway, RB), ( ever, NNP... 
      [declarations, towns, smaller, threatens, desi... 
      73 
      [(horrible, affair), (pull, coltish), (content... 
      5 
     
    
      53 
      A8IPQ1Q1O7YX5 
      0002051850 
      [(dont, NN), (think, VBP), ( ever, IN), ( take... 
      [declarations, towns, smaller, threatens, desi... 
      73 
      [(enemy, guerrilla)] 
      1 
     
    
      54 
      A3Q9K57FARA2WQ 
      0002051850 
      [(say, VB), ( masterpiece, NNP), ( hasnt, NNP)... 
      [declarations, towns, smaller, threatens, desi... 
      73 
      [(american, spain)] 
      1 
     
    
      56 
      A3KRRXPFEAO6V 
      0002051850 
      [(ernest, JJS), ( hemingway, RB), ( bell, NNP)... 
      [declarations, towns, smaller, threatens, desi... 
      73 
      [(fascist, threatens), (latter, partners), (co... 
      4 
     
    
      58 
      A1RLYOPK16YXC1 
      0002051850 
      [(bell, NN), ( tolls, NNS), ( takes, VBZ), ( p... 
      [declarations, towns, smaller, threatens, desi... 
      73 
      [(american, spain), (missions, guerrilla), (pr... 
      11 
     
    
      59 
      AMTADN8VCK6J2 
      0002051850 
      [(novel, NN), ( considered, VBD), ( one, CD), ... 
      [declarations, towns, smaller, threatens, desi... 
      73 
      [(story, mountains)] 
      1 
     
    
      60 
      A29SHFBU5O9BWO 
      0002051850 
      [(perhaps, RB), ( bell, NNP), ( tolled, VBD), ... 
      [declarations, towns, smaller, threatens, desi... 
      73 
      [(literary, greatness), (suitable, achievement... 
      6 
     
    
      64 
      A2EQ74Y24BHHIF 
      0002113570 
      [(jane, NN), ( goodall, DT), ( unique, NNP), (... 
      [humane, homo, ancestors, michener] 
      4 
      [(like, michener), (could, homo), (common, anc... 
      4 
     
    
      68 
      A2KUKUSSSAYAKH 
      0002117088 
      [(adopted, VBN), ( quot, JJ), ( renoir, NNP), ... 
      [surgery, sorts, goodnight, virtues, translato... 
      7 
      [(hip, surgery), (hip, goodnight), (come, clau... 
      7 
     
    
      ... 
      ... 
      ... 
      ... 
      ... 
      ... 
      ... 
      ... 
     
    
      138 
      A1WQR544MMF68L 
      000224053X 
      [(red, JJ), ( mars, NNS), ( first, RB), ( kim,... 
      [fundamentalists, coast, pioneer, inconsistenc... 
      81 
      [(first, expidition), (insead, focuses), (stru... 
      4 
     
    
      139 
      A2YUZKPLUYQDKV 
      000224053X 
      [(readers, NNS), ( sciencefiction, VBP), ( tre... 
      [fundamentalists, coast, pioneer, inconsistenc... 
      81 
      [(famous, thunder)] 
      1 
     
    
      140 
      A21Z8FUB9IZI2L 
      000224053X 
      [(colonization, NN), ( solar, JJ), ( system, N... 
      [fundamentalists, coast, pioneer, inconsistenc... 
      81 
      [(gigantic, corporations)] 
      1 
     
    
      141 
      A1RSGDRA57QN6J 
      000224053X 
      [(nominally, RB), ( futurehistory, JJ), ( mart... 
      [fundamentalists, coast, pioneer, inconsistenc... 
      81 
      [(initial, corporations), (martian, landscape)... 
      8 
     
    
      142 
      A1SG73VR5R809B 
      000224053X 
      [(first, RB), ( one hundred, JJ), ( colonists,... 
      [fundamentalists, coast, pioneer, inconsistenc... 
      81 
      [(plausible, technologies)] 
      1 
     
    
      143 
      AWFD44Q17PCOV 
      000224053X 
      [(dont, NN), ( confused, VBD), ( technical, JJ... 
      [fundamentalists, coast, pioneer, inconsistenc... 
      81 
      [(siberian, construction)] 
      1 
     
    
      144 
      A260X99UW6HA9Z 
      000224053X 
      [(robinson, NN), ( achieves, NNS), ( pitch, VB... 
      [fundamentalists, coast, pioneer, inconsistenc... 
      81 
      [(science, coyote)] 
      1 
     
    
      150 
      A18YAKQM1X2MCJ 
      000224053X 
      [(robinson, NN), ( excellent, NN), ( writer, N... 
      [fundamentalists, coast, pioneer, inconsistenc... 
      81 
      [(asimov, clarke)] 
      1 
     
    
      151 
      A2ZMWGU610X3ZT 
      000224053X 
      [(warning, VBG), ( books, NNS), ( hypnotically... 
      [fundamentalists, coast, pioneer, inconsistenc... 
      81 
      [(prestigious, awards), (creole, jazz), (among... 
      7 
     
    
      153 
      A3D0BL5DWYN3NQ 
      000224053X 
      [(heres, NNS), ( incredibly, RB), ( realistic,... 
      [fundamentalists, coast, pioneer, inconsistenc... 
      81 
      [(global, exploitation)] 
      1 
     
    
      155 
      A3C2A3D2KG1F1A 
      000224053X 
      [(really, RB), ( looking, VBG), ( forward, NNP... 
      [fundamentalists, coast, pioneer, inconsistenc... 
      81 
      [(read, landscape)] 
      1 
     
    
      156 
      A3UL9V4K9OJU9F 
      000224053X 
      [(read, NN), ( lot, NN), ( science, NNP), ( fi... 
      [fundamentalists, coast, pioneer, inconsistenc... 
      81 
      [(habitable, environment)] 
      1 
     
    
      165 
      A3PE884TA8529G 
      0002242052 
      [(usually, RB), ( read, JJ), ( fiction, NN), (... 
      [stretches, authorities, ludlum, drugdealers, ... 
      14 
      [(begins, pimp), (drug, pimp)] 
      2 
     
    
      166 
      A3QWNQ2WA7QZYJ 
      0002242052 
      [(tom, NN), ( clancys, NNP), ( strength, NNP),... 
      [stretches, authorities, ludlum, drugdealers, ... 
      14 
      [(basic, tech), (character, ludlum), (long, st... 
      4 
     
    
      171 
      A3B0YYFNBNDX6C 
      0002242052 
      [(without, IN), ( remorseby, NNP), ( tom, NNP)... 
      [stretches, authorities, ludlum, drugdealers, ... 
      14 
      [(several, languages), (mechanical, semitruck)... 
      8 
     
    
      175 
      A1AUBGENRIZODO 
      0002311216 
      [(novel, JJ), ( follows, VBZ), ( adventures, N... 
      [andersons, espionage, poirot, worldwide, open... 
      16 
      [(real, jaan), (tedious, twothirds), (various,... 
      4 
     
    
      177 
      A1AOIKQ6JNWETI 
      0002311216 
      [(first, RB), ( let, NNP), ( say, NNP), ( huge... 
      [andersons, espionage, poirot, worldwide, open... 
      16 
      [(world, espionage), (various, cabinet)] 
      2 
     
    
      178 
      A3A6GXILVT9JNW 
      0002311216 
      [(one thousand, NN), ( nine hundred and sevent... 
      [andersons, espionage, poirot, worldwide, open... 
      16 
      [(worldwide, neo), (much, hercule), (much, poi... 
      4 
     
    
      179 
      A29IPCJZUUAL32 
      0002311216 
      [(disappointed, JJ), ( book, NN), ( great, NN)... 
      [andersons, espionage, poirot, worldwide, open... 
      16 
      [(excellent, openings)] 
      1 
     
    
      181 
      A1T86T7FTUW38K 
      0002311216 
      [(im, NN), ( surprised, VBD), ( hasnt, NNP), (... 
      [andersons, espionage, poirot, worldwide, open... 
      16 
      [(ythrian, motives)] 
      1 
     
    
      183 
      AENXJM2CKEN5L 
      0002550938 
      [(sahara, NN), ( written, NNP), ( clive, NNP),... 
      [fights, mysery, pollution, cusslers, threads,... 
      6 
      [(ironclad, confederate), (eight hundred and s... 
      5 
     
    
      185 
      A2F3M93RRLFQNJ 
      0002550938 
      [(last, JJ), ( time, JJ), ( read, JJ), ( clive... 
      [fights, mysery, pollution, cusslers, threads,... 
      6 
      [(pair, confederate)] 
      1 
     
    
      187 
      A1TAWJ2PA3USVX 
      0002550938 
      [(whats, NNS), ( sillier, VBP), ( even, RB), (... 
      [fights, mysery, pollution, cusslers, threads,... 
      6 
      [(eye, cusslers)] 
      1 
     
    
      190 
      ADCZ95UF83HLM 
      000255383X 
      [(thats, NNS), ( common, VBP), ( rabble, JJ), ... 
      [ubi, shark, merge, beeper, phrases] 
      5 
      [(aspiring, beeper)] 
      1 
     
    
      191 
      A2VE83MZF98ITY 
      000255383X 
      [(wonderful, JJ), ( little, JJ), ( book, NN), ... 
      [ubi, shark, merge, beeper, phrases] 
      5 
      [(various, phrases), (innocuous, beeper), (sec... 
      3 
     
    
      194 
      A3N4J7FRI8BPT6 
      0002621509 
      [(written, VBN), ( americans, NNS), ( perspect... 
      [surgery, espionage, dolgun, paranoid, employe... 
      8 
      [(people, dolgun)] 
      1 
     
    
      196 
      A2E3GFHUDNPYDH 
      0002621509 
      [(walk, NN), ( one, NN), ( day, JJ), ( moscow,... 
      [surgery, espionage, dolgun, paranoid, employe... 
      8 
      [(shalomov, employee), (american, dolgun), (ni... 
      8 
     
    
      197 
      A3QIEISBZP4QTV 
      0002621509 
      [(enjoy, NN), ( genre, JJ), ( adventure, NN), ... 
      [surgery, espionage, dolgun, paranoid, employe... 
      8 
      [(much, dolgun)] 
      1 
     
    
      198 
      A1VIXUHSCZTJUT 
      0002621509 
      [(book, NN), ( hits, NNS), ( like, IN), ( slap... 
      [surgery, espionage, dolgun, paranoid, employe... 
      8 
      [(russia, dolgun)] 
      1 
     
    
      199 
      A17S1450Y06C86 
      0002726874 
      [(towards, NNS), ( end, VBP), ( life, JJ), ( g... 
      [macdonalds, aboot, ye, smaller, theer, uk, al... 
      63 
      [(life, fraser), (battles, lance), (imperial, ... 
      5 
     
  
100 rows × 7 columns
In [123]:
    
pairs_per_book = reviews_vs_feature_opinion_pairs.groupby(['asin'])[["num_of_pairs"]].sum()
pairs_per_book = pairs_per_book.reset_index()
pairs_per_book.head()
    
    Out[123]:
  
    
       
      asin 
      num_of_pairs 
     
  
  
    
      0 
      000100039X 
      22 
     
    
      1 
      0002051850 
      92 
     
    
      2 
      0002113570 
      4 
     
    
      3 
      0002117088 
      7 
     
    
      4 
      000215725X 
      46 
     
  
In [124]:
    
len(pairs_per_book)
    
    Out[124]:
48853
In [125]:
    
48939 - 48853
    
    Out[125]:
86
In [126]:
    
import plotly 
import plotly.plotly as py
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
import cufflinks as cf
print(cf.__version__)
# Configure cufflings 
cf.set_config_file(offline=False, world_readable=True, theme='pearl')
    
    
0.12.1
In [127]:
    
pairs_per_book['num_of_pairs'].iplot(kind='histogram', bins=100, xTitle='Number of Pairs', yTitle='Number of Books')
    
    
/Users/falehalrashidi/anaconda3/lib/python3.6/site-packages/plotly/plotly/plotly.py:224: UserWarning:
Woah there! Look at all those points! Due to browser limitations, the Plotly SVG drawing functions have a hard time graphing more than 500k data points for line charts, or 40k points for other types of charts. Here are some suggestions:
(1) Use the `plotly.graph_objs.Scattergl` trace object to generate a WebGl graph.
(2) Trying using the image API to return an image instead of a graph URL
(3) Use matplotlib
(4) See if you can create your visualization with fewer data points
If the visualization you're using aggregates points (e.g., box plot, histogram, etc.) you can disregard this warning.
    Out[127]:
 
In [129]:
    
# Save data
pairs_per_book.to_pickle("../data/interim/006_pairs_per_book.p")
    
In [130]:
    
reviews_vs_feature_opinion_pairs.to_pickle("../data/interim/006_pairs_per_review.p")
    
In [131]:
    
## END_OF_FILE
    
In [ ]:
    
    
Content source: VictorQuintana91/Thesis
Similar notebooks: