Feature-Opinion Pairing


In [2]:
import pandas as pd

In [3]:
# For monitoring duration of pandas processes
from tqdm import tqdm, tqdm_pandas

# To avoid RuntimeError: Set changed size during iteration
tqdm.monitor_interval = 0

# Register `pandas.progress_apply` and `pandas.Series.map_apply` with `tqdm`
# (can use `tqdm_gui`, `tqdm_notebook`, optional kwargs, etc.)
tqdm.pandas(desc="Progress:")

# Now you can use `progress_apply` instead of `apply`
# and `progress_map` instead of `map`
# can also groupby:
# df.groupby(0).progress_apply(lambda x: x**2)

Load important nouns


In [10]:
df00 = pd.read_pickle('../data/interim/005_important_nouns.p')

df00.head()


In [12]:
len(df00)


Out[12]:
59324

In [13]:
df01 = df00.assign(num_of_imp_nouns = df00['imp_nns'].progress_apply(lambda imp_nouns:len(imp_nouns)))
df02 = df01.loc[df01['num_of_imp_nouns'] != 0]
len(df02)


Progress:: 100%|██████████| 59324/59324 [00:00<00:00, 1123729.33it/s]
Out[13]:
48939

In [14]:
df02.head()


Out[14]:
asin imp_nns num_of_imp_nouns
0 000100039X [kneads, profits, preachers, territory, exile,... 26
1 0002051850 [declarations, towns, smaller, threatens, desi... 73
2 0002113570 [humane, homo, ancestors, michener] 4
3 0002117088 [surgery, sorts, goodnight, virtues, translato... 7
4 000215725X [treachery, fort, emperors, 17th, uk, mundane,... 39

Load book tagged reviews


In [16]:
df10 = pd.read_pickle('../data/interim/002_pos_tagged_keyed_reviews.p')

In [17]:
df10.head()


Out[17]:
uniqueKey reviewText
0 A2XQ5LZHTD4AFT##000100039X [(timeless, NN), ( classic, JJ), ( demanding, ...
1 AF7CSSGV93RXN##000100039X [(first, RB), ( read, JJ), ( prophet, NNP), ( ...
2 A1NPNGWBVD9AK3##000100039X [(one, CD), ( first, NNP), ( literary, JJ), ( ...
3 A3IS4WGMFR4X65##000100039X [(prophet, NN), ( kahlil, NN), ( gibrans, VBZ)...
4 AWLFVCT9128JV##000100039X [(gibran, NN), ( khalil, NNP), ( gibran, NNP),...

In [18]:
len(df10)


Out[18]:
582711

In [21]:
df11 = pd.DataFrame(df10.uniqueKey.str.split('##',1).tolist(),columns = ['userId','asin'])
df11.head()


Out[21]:
userId asin
0 A2XQ5LZHTD4AFT 000100039X
1 AF7CSSGV93RXN 000100039X
2 A1NPNGWBVD9AK3 000100039X
3 A3IS4WGMFR4X65 000100039X
4 AWLFVCT9128JV 000100039X

In [22]:
df_12 = pd.DataFrame(df10['reviewText'])
df_12.head()


Out[22]:
reviewText
0 [(timeless, NN), ( classic, JJ), ( demanding, ...
1 [(first, RB), ( read, JJ), ( prophet, NNP), ( ...
2 [(one, CD), ( first, NNP), ( literary, JJ), ( ...
3 [(prophet, NN), ( kahlil, NN), ( gibrans, VBZ)...
4 [(gibran, NN), ( khalil, NNP), ( gibran, NNP),...

In [19]:
df_13 = pd.concat([df11, df_12], axis=1)
df_13.head()


Out[19]:
userId asin reviewText
0 A2XQ5LZHTD4AFT 000100039X [(timeless, NN), ( classic, JJ), ( demanding, ...
1 AF7CSSGV93RXN 000100039X [(first, RB), ( read, JJ), ( prophet, NNP), ( ...
2 A1NPNGWBVD9AK3 000100039X [(one, CD), ( first, NNP), ( literary, JJ), ( ...
3 A3IS4WGMFR4X65 000100039X [(prophet, NN), ( kahlil, NN), ( gibrans, VBZ)...
4 AWLFVCT9128JV 000100039X [(gibran, NN), ( khalil, NNP), ( gibran, NNP),...

Join reviews with important nouns


In [20]:
df_joined = df_13.merge(df02, left_on='asin', right_on='asin', how='inner')
df_joined[0:31]


Out[20]:
userId asin reviewText imp_nns num_of_imp_nouns
0 A2XQ5LZHTD4AFT 000100039X [(timeless, NN), ( classic, JJ), ( demanding, ... [kneads, profits, preachers, territory, exile,... 26
1 AF7CSSGV93RXN 000100039X [(first, RB), ( read, JJ), ( prophet, NNP), ( ... [kneads, profits, preachers, territory, exile,... 26
2 A1NPNGWBVD9AK3 000100039X [(one, CD), ( first, NNP), ( literary, JJ), ( ... [kneads, profits, preachers, territory, exile,... 26
3 A3IS4WGMFR4X65 000100039X [(prophet, NN), ( kahlil, NN), ( gibrans, VBZ)... [kneads, profits, preachers, territory, exile,... 26
4 AWLFVCT9128JV 000100039X [(gibran, NN), ( khalil, NNP), ( gibran, NNP),... [kneads, profits, preachers, territory, exile,... 26
5 AFY0BT42DDYZV 000100039X [(days, NNS), ( kahlil, VBP), ( gibrans, NNS),... [kneads, profits, preachers, territory, exile,... 26
6 A25P6DY6ARTCGZ 000100039X [(book, NN), ( almost, RBS), ( kahlil, JJ), ( ... [kneads, profits, preachers, territory, exile,... 26
7 A1SP45I55GQIIE 000100039X [(certainly, RB), ( words, NNS), ( kahlil, NNP... [kneads, profits, preachers, territory, exile,... 26
8 A2E71VWXO59342 000100039X [(prophet, NN), ( dispenses, NNS), ( ultimate,... [kneads, profits, preachers, territory, exile,... 26
9 A2OP1HD9RGX5OW 000100039X [(book, NN), ( poetic, JJ), ( myth, NNP), ( wo... [kneads, profits, preachers, territory, exile,... 26
10 A2052JNVUPRTMT 000100039X [(gibran, JJ), ( gets, NNS), ( right, VBD), ( ... [kneads, profits, preachers, territory, exile,... 26
11 AGKPTMTR3UX1R 000100039X [(kahlil, NN), ( gibran, JJ), ( eighteen milli... [kneads, profits, preachers, territory, exile,... 26
12 A1HS49P9TZRGV9 000100039X [(father, RB), ( huge, JJ), ( book, NN), ( col... [kneads, profits, preachers, territory, exile,... 26
13 A2ZZHMT58ZMVCZ 000100039X [(prophet, NN), ( waited, VBD), ( twelve, CD),... [kneads, profits, preachers, territory, exile,... 26
14 A3W43PSHRIG8KV 000100039X [(first, RB), ( became, JJ), ( aware, NNP), ( ... [kneads, profits, preachers, territory, exile,... 26
15 A1TR1LU2JSZLUL 000100039X [(book, NN), ( given, RB), ( gift, NNP), ( jou... [kneads, profits, preachers, territory, exile,... 26
16 ADIDQRLLR4KBQ 000100039X [(atheist, NN), ( may, NNP), ( seem, NNP), ( s... [kneads, profits, preachers, territory, exile,... 26
17 A3AW2ZG0GP4SKN 000100039X [(bought, VBN), ( book, NN), ( son, NNP), ( st... [kneads, profits, preachers, territory, exile,... 26
18 A2MMON52VMO7NT 000100039X [(gibrans, NNS), ( words, NNS), ( strike, IN),... [kneads, profits, preachers, territory, exile,... 26
19 AR72Z89LACZ8Q 000100039X [(unusual, JJ), ( departure, NN), ( imaginary,... [kneads, profits, preachers, territory, exile,... 26
20 A3RTC17QVQGML7 000100039X [(almost, RB), ( fifty-one, CD), ( years, NNS)... [kneads, profits, preachers, territory, exile,... 26
21 A3HCD8V6TW4DBV 000100039X [(inspirational, JJ), ( true, NN), ( teachings... [kneads, profits, preachers, territory, exile,... 26
22 A281NPSIMI1C2R 000100039X [(alive, JJ), ( like, NN), ( standing, VBG), (... [kneads, profits, preachers, territory, exile,... 26
23 A2X3E8EU2KBVN8 000100039X [(seldom, RB), ( book, NNP), ( read, NNP), ( t... [kneads, profits, preachers, territory, exile,... 26
24 A2R64CR74I98K3 000100039X [(usefull, JJ), ( book, NN), ( used, VBD), ( s... [kneads, profits, preachers, territory, exile,... 26
25 AHD101501WCN1 000100039X [(never, RB), (quite, RB), ( make, JJ), ( mind... [kneads, profits, preachers, territory, exile,... 26
26 AF4QKY2R2TD3U 000100039X [(say, VB), ( found, IN), ( truth, NNP), ( rat... [kneads, profits, preachers, territory, exile,... 26
27 A3SMT15X2QVUR8 000100039X [(prophet, NN), ( almustafa, CC), ( waits, NNS... [kneads, profits, preachers, territory, exile,... 26
28 A3D7L0R1281COX 000100039X [(gibrans, NNS), ( prophet, VBP), ( best, JJS)... [kneads, profits, preachers, territory, exile,... 26
29 A2INDDW3XYFFV1 000100039X [(khalil, NNS), ( gibrans, NNS), ( prophet, VB... [kneads, profits, preachers, territory, exile,... 26
30 A1CSL3TFTFOTWH 0002051850 [(found, VBN), ( book, NN), ( takes, NNS), ( p... [declarations, towns, smaller, threatens, desi... 73

In [26]:
df_joined.describe()


Out[26]:
num_of_imp_nouns
count 511364.000000
mean 27.590157
std 25.774587
min 4.000000
25% 10.000000
50% 19.000000
75% 36.000000
max 226.000000

In [21]:
1 - 511364/582711


Out[21]:
0.12243976859884231

In [22]:
582711-511364


Out[22]:
71347

In [92]:
import numpy as np
matrix_m01 = df_joined.as_matrix()
len(matrix_m01)


Out[92]:
511364

In [96]:
matrix_m02 = np.append(matrix_m01,np.zeros([len(matrix_m01),1]),1)
sample = pd.DataFrame(matrix_m02[0:10])
sample


Out[96]:
0 1 2 3 4 5
0 A2XQ5LZHTD4AFT 000100039X [(timeless, NN), ( classic, JJ), ( demanding, ... [kneads, profits, preachers, territory, exile,... 26 0
1 AF7CSSGV93RXN 000100039X [(first, RB), ( read, JJ), ( prophet, NNP), ( ... [kneads, profits, preachers, territory, exile,... 26 0
2 A1NPNGWBVD9AK3 000100039X [(one, CD), ( first, NNP), ( literary, JJ), ( ... [kneads, profits, preachers, territory, exile,... 26 0
3 A3IS4WGMFR4X65 000100039X [(prophet, NN), ( kahlil, NN), ( gibrans, VBZ)... [kneads, profits, preachers, territory, exile,... 26 0
4 AWLFVCT9128JV 000100039X [(gibran, NN), ( khalil, NNP), ( gibran, NNP),... [kneads, profits, preachers, territory, exile,... 26 0
5 AFY0BT42DDYZV 000100039X [(days, NNS), ( kahlil, VBP), ( gibrans, NNS),... [kneads, profits, preachers, territory, exile,... 26 0
6 A25P6DY6ARTCGZ 000100039X [(book, NN), ( almost, RBS), ( kahlil, JJ), ( ... [kneads, profits, preachers, territory, exile,... 26 0
7 A1SP45I55GQIIE 000100039X [(certainly, RB), ( words, NNS), ( kahlil, NNP... [kneads, profits, preachers, territory, exile,... 26 0
8 A2E71VWXO59342 000100039X [(prophet, NN), ( dispenses, NNS), ( ultimate,... [kneads, profits, preachers, territory, exile,... 26 0
9 A2OP1HD9RGX5OW 000100039X [(book, NN), ( poetic, JJ), ( myth, NNP), ( wo... [kneads, profits, preachers, territory, exile,... 26 0

In [86]:
def get_pair(index, tagged_review):
    
    possible_pairs_dictionary = {}
    
    # left window
    counter = 0
    left_index = index - 1
    while((left_index!=-1) and (counter<10)):
        if tagged_review[left_index][1] in {'JJ', 'JJR', 'JJS'}:
            distance = index - left_index 
            possible_pairs_dictionary.update({tagged_review[left_index][0]:distance})
        left_index -= 1
        counter += 1

    # right window
    counter = 0
    right_index = index + 1
    while((right_index!=len(tagged_review)) and (counter<10)):
        if tagged_review[right_index][1] in {'JJ', 'JJR', 'JJS'}:
            distance = right_index - index 
            possible_pairs_dictionary.update({tagged_review[left_index][0]:distance})
        right_index += 1
        counter += 1
    
    # get shortest adj with shortest distance if multiple are found
    if(len(possible_pairs_dictionary)>1):
        return (min(possible_pairs_dictionary, key=lambda k: possible_pairs_dictionary[k]), tagged_review[index][0])
    elif(len(possible_pairs_dictionary)==1):
        return (possible_pairs_dictionary.get(0),tagged_review[index][0])
    else:
        return (None, tagged_review[index][0])

In [101]:
from tqdm import tqdm

with tqdm(total=len(matrix_m02)) as pbar:
    for i in range(len(matrix_m02)):
        pairs = []
        tagged_review = matrix_m02[i][2]
        imp_nns = matrix_m02[i][3]
        index = 0
        for(word, tag) in tagged_review:
            if tag in {'NN', 'NNS', 'NNP', 'NNPS'}:
                if word.strip() in imp_nns:
                    (adj,nn) = get_pair(index, tagged_review)
                    if adj is not None:
                        pairs.append((adj.strip(),nn.strip()))
            index += 1
        matrix_m02[i][5] = pairs
        pbar.update(1)


100%|██████████| 511364/511364 [00:36<00:00, 14185.95it/s]

In [102]:
sample = pd.DataFrame(matrix_m02[0:100])
sample


Out[102]:
0 1 2 3 4 5
0 A2XQ5LZHTD4AFT 000100039X [(timeless, NN), ( classic, JJ), ( demanding, ... [kneads, profits, preachers, territory, exile,... 26 [(birth, prophets), (book, flows)]
1 AF7CSSGV93RXN 000100039X [(first, RB), ( read, JJ), ( prophet, NNP), ( ... [kneads, profits, preachers, territory, exile,... 26 []
2 A1NPNGWBVD9AK3 000100039X [(one, CD), ( first, NNP), ( literary, JJ), ( ... [kneads, profits, preachers, territory, exile,... 26 [(relevant, catechism), (within, prophets), (t...
3 A3IS4WGMFR4X65 000100039X [(prophet, NN), ( kahlil, NN), ( gibrans, VBZ)... [kneads, profits, preachers, territory, exile,... 26 []
4 AWLFVCT9128JV 000100039X [(gibran, NN), ( khalil, NNP), ( gibran, NNP),... [kneads, profits, preachers, territory, exile,... 26 [(forty-eight, almustafa)]
5 AFY0BT42DDYZV 000100039X [(days, NNS), ( kahlil, VBP), ( gibrans, NNS),... [kneads, profits, preachers, territory, exile,... 26 [(souls, profits), (wordofmouth, twentysix), (...
6 A25P6DY6ARTCGZ 000100039X [(book, NN), ( almost, RBS), ( kahlil, JJ), ( ... [kneads, profits, preachers, territory, exile,... 26 []
7 A1SP45I55GQIIE 000100039X [(certainly, RB), ( words, NNS), ( kahlil, NNP... [kneads, profits, preachers, territory, exile,... 26 []
8 A2E71VWXO59342 000100039X [(prophet, NN), ( dispenses, NNS), ( ultimate,... [kneads, profits, preachers, territory, exile,... 26 []
9 A2OP1HD9RGX5OW 000100039X [(book, NN), ( poetic, JJ), ( myth, NNP), ( wo... [kneads, profits, preachers, territory, exile,... 26 []
10 A2052JNVUPRTMT 000100039X [(gibran, JJ), ( gets, NNS), ( right, VBD), ( ... [kneads, profits, preachers, territory, exile,... 26 []
11 AGKPTMTR3UX1R 000100039X [(kahlil, NN), ( gibran, JJ), ( eighteen milli... [kneads, profits, preachers, territory, exile,... 26 []
12 A1HS49P9TZRGV9 000100039X [(father, RB), ( huge, JJ), ( book, NN), ( col... [kneads, profits, preachers, territory, exile,... 26 []
13 A2ZZHMT58ZMVCZ 000100039X [(prophet, NN), ( waited, VBD), ( twelve, CD),... [kneads, profits, preachers, territory, exile,... 26 [(bear, departs), (others, pillars), (similar,...
14 A3W43PSHRIG8KV 000100039X [(first, RB), ( became, JJ), ( aware, NNP), ( ... [kneads, profits, preachers, territory, exile,... 26 []
15 A1TR1LU2JSZLUL 000100039X [(book, NN), ( given, RB), ( gift, NNP), ( jou... [kneads, profits, preachers, territory, exile,... 26 []
16 ADIDQRLLR4KBQ 000100039X [(atheist, NN), ( may, NNP), ( seem, NNP), ( s... [kneads, profits, preachers, territory, exile,... 26 [(beautiful, metaphors), (live, prophets)]
17 A3AW2ZG0GP4SKN 000100039X [(bought, VBN), ( book, NN), ( son, NNP), ( st... [kneads, profits, preachers, territory, exile,... 26 []
18 A2MMON52VMO7NT 000100039X [(gibrans, NNS), ( words, NNS), ( strike, IN),... [kneads, profits, preachers, territory, exile,... 26 []
19 AR72Z89LACZ8Q 000100039X [(unusual, JJ), ( departure, NN), ( imaginary,... [kneads, profits, preachers, territory, exile,... 26 []
20 A3RTC17QVQGML7 000100039X [(almost, RB), ( fifty-one, CD), ( years, NNS)... [kneads, profits, preachers, territory, exile,... 26 []
21 A3HCD8V6TW4DBV 000100039X [(inspirational, JJ), ( true, NN), ( teachings... [kneads, profits, preachers, territory, exile,... 26 []
22 A281NPSIMI1C2R 000100039X [(alive, JJ), ( like, NN), ( standing, VBG), (... [kneads, profits, preachers, territory, exile,... 26 [(pain, waves), (separate, almustafa)]
23 A2X3E8EU2KBVN8 000100039X [(seldom, RB), ( book, NNP), ( read, NNP), ( t... [kneads, profits, preachers, territory, exile,... 26 []
24 A2R64CR74I98K3 000100039X [(usefull, JJ), ( book, NN), ( used, VBD), ( s... [kneads, profits, preachers, territory, exile,... 26 [(religious, texts)]
25 AHD101501WCN1 000100039X [(never, RB), (quite, RB), ( make, JJ), ( mind... [kneads, profits, preachers, territory, exile,... 26 []
26 AF4QKY2R2TD3U 000100039X [(say, VB), ( found, IN), ( truth, NNP), ( rat... [kneads, profits, preachers, territory, exile,... 26 [(rich, metaphors)]
27 A3SMT15X2QVUR8 000100039X [(prophet, NN), ( almustafa, CC), ( waits, NNS... [kneads, profits, preachers, territory, exile,... 26 [(orphalese, metaphor)]
28 A3D7L0R1281COX 000100039X [(gibrans, NNS), ( prophet, VBP), ( best, JJS)... [kneads, profits, preachers, territory, exile,... 26 []
29 A2INDDW3XYFFV1 000100039X [(khalil, NNS), ( gibrans, NNS), ( prophet, VB... [kneads, profits, preachers, territory, exile,... 26 [(home, prophets)]
... ... ... ... ... ... ...
70 A39BB196X0I2N1 0002117088 [(renoir, NN), ( far, NNP), ( one, NN), ( worl... [surgery, sorts, goodnight, virtues, translato... 7 []
71 A3KL4JSUOH8NVF 0002117088 [(book, NN), ( presents, NNS), ( touching, VBG... [surgery, sorts, goodnight, virtues, translato... 7 []
72 A2GHTSBU7IHIBO 0002117088 [(book, NN), ( renoir, NNP), ( whose, NNP), ( ... [surgery, sorts, goodnight, virtues, translato... 7 []
73 A3I4IFMO4Z4S7J 000215725X [(william, JJ), ( dalrymple, NNP), ( great, NN... [treachery, fort, emperors, 17th, uk, mundane,... 39 []
74 A1RAUVCWYHTQI4 000215725X [(really, RB), ( wonderful, JJ), ( book, NN), ... [treachery, fort, emperors, 17th, uk, mundane,... 39 []
75 A280GY5UVUS2QH 000215725X [(william, JJ), ( dalrymple, JJ), ( historian,... [treachery, fort, emperors, 17th, uk, mundane,... 39 [(thought, fraser), (one, fraser), (nineteenth...
76 A2CBZMETQJTNEE 000215725X [(djinn, NN), ( spirit, NN), ( invisible, JJ),... [treachery, fort, emperors, 17th, uk, mundane,... 39 [(unusual, pigeon), (indian, weddings)]
77 A21CL0N0DQTZXL 000215725X [(whether, IN), ( plan, JJ), ( visit, NNP), ( ... [treachery, fort, emperors, 17th, uk, mundane,... 39 []
78 A2SDYWB9LB1LYI 000215725X [(loved, VBN), ( dalrymples, NNS), ( holy, VBP... [treachery, fort, emperors, 17th, uk, mundane,... 39 [(throng, drivers), (favorite, fights), (parts...
79 A38ELBK7FLXGOY 000215725X [(quot, JJ), ( city, NN), ( djinns, NNP), ( qu... [treachery, fort, emperors, 17th, uk, mundane,... 39 []
80 A2S5KWZ8HVAB5X 000215725X [(dalrymple, NN), ( simply, NNP), ( one, NNP),... [treachery, fort, emperors, 17th, uk, mundane,... 39 []
81 A2GNVZUDL15YDK 000215725X [(book, NN), ( ninety, NN), ( informative, JJ)... [treachery, fort, emperors, 17th, uk, mundane,... 39 [(ancient, introduces), (present, fort), (libr...
82 A2ZU5UM8ZI8LQG 000215725X [(first, RB), ( thing, VBG), ( incredibly, RB)... [treachery, fort, emperors, 17th, uk, mundane,... 39 [(diary, wifes), (diary, sights), (without, bi...
83 A1F6Q5Q0U6MS5X 000215725X [(city, NN), ( djinns, CD), ( year, JJ), ( del... [treachery, fort, emperors, 17th, uk, mundane,... 39 [(punjabi, hindu), (delhi, degenerate), (20th,...
84 AEJ31WGHJ59C 0002219417 [(every, DT), ( herman, NN), ( wouk, VBZ), ( b... [humanlevel, smaller, conversion, periods, lic... 32 [(america, remembrance), (america, continues),...
85 A3L00A15HDNQGK 0002219417 [(lets, NNS), ( start, VBP), ( clear, JJ), ( w... [humanlevel, smaller, conversion, periods, lic... 32 [(nervous, giggle)]
86 A1EJ6J6N9I7W35 0002219417 [(pair, NN), ( winds, VBZ), ( war, JJ), ( plus... [humanlevel, smaller, conversion, periods, lic... 32 [(war, remembrance), (europe, remembrance), (g...
87 A3J2CGLKCCPSBE 0002219417 [(novel, NN), ( definitely, RB), ( mustread, J... [humanlevel, smaller, conversion, periods, lic... 32 []
88 A15ENK5HYBO2YF 0002219417 [(im, JJ), ( bit, NN), ( young, NNP), ( world,... [humanlevel, smaller, conversion, periods, lic... 32 [(shell, remembrance)]
89 AQ9OSVFTHBAI1 0002219417 [(four, CD), ( components, NNS), ( writer, RBR... [humanlevel, smaller, conversion, periods, lic... 32 [(tell, bellow), (forget, situations), (blame,...
90 AW3VZ5O895LRK 0002219417 [(go, VB), ( historical, JJ), ( romance, NN), ... [humanlevel, smaller, conversion, periods, lic... 32 [(incredible, remembrance)]
91 A39WL61420S1T6 0002219417 [(winds, NNS), ( war, JJ), ( war, JJ), ( remem... [humanlevel, smaller, conversion, periods, lic... 32 [(war, remembrance), (like, theyd), (endlessly...
92 A2JM4TKX99S313 0002219417 [(sit, NN), ( read, JJ), ( winds, VBZ), ( war,... [humanlevel, smaller, conversion, periods, lic... 32 [(umpteenth, remembrance)]
93 A3LP5RUMWG7GQ0 0002219417 [(winds, NNS), ( war, JJ), ( decent, JJ), ( st... [humanlevel, smaller, conversion, periods, lic... 32 []
94 AOSDS7DNROB5K 0002219417 [(extremely, RB), ( insulting, VBG), ( no, PRP... [humanlevel, smaller, conversion, periods, lic... 32 [(clean, conversion), (line, conversion), (app...
95 A3NCKDPCAUOD4T 0002219417 [(ive, JJ), ( read, NN), ( many, NN), ( wwiire... [humanlevel, smaller, conversion, periods, lic... 32 []
96 A2K4RNOAD5J3WB 0002219417 [(quot, JJ), ( winds, VBZ), ( war, JJ), ( quot... [humanlevel, smaller, conversion, periods, lic... 32 []
97 AOKQZVWCLONRH 0002219417 [(ive, JJ), ( noticed, VBD), ( continues, NNS)... [humanlevel, smaller, conversion, periods, lic... 32 [(books, continues), (american, naval), (key, ...
98 A2DNR5QVO8U9ZI 000222383X [(patrick, JJ), ( obrian, JJ), ( naval, NN), (... [treasons, construct, expansion, captains, fav... 11 [(obrian, naval), (british, naval), (british, ...
99 AYKYFFCC49HVT 000222383X [(read, JJ), ( prior, NNP), ( three, NNP), ( n... [treasons, construct, expansion, captains, fav... 11 [(nautical, naval), (overall, captains), (summ...

100 rows × 6 columns


In [107]:
df20 = pd.DataFrame(matrix_m02)
df20.columns = ['userId','asin','reviewText','imp_nns','num_of_imp_nouns','pairs']
df20.head()


Out[107]:
userId asin reviewText imp_nns num_of_imp_nouns pairs
0 A2XQ5LZHTD4AFT 000100039X [(timeless, NN), ( classic, JJ), ( demanding, ... [kneads, profits, preachers, territory, exile,... 26 [(birth, prophets), (book, flows)]
1 AF7CSSGV93RXN 000100039X [(first, RB), ( read, JJ), ( prophet, NNP), ( ... [kneads, profits, preachers, territory, exile,... 26 []
2 A1NPNGWBVD9AK3 000100039X [(one, CD), ( first, NNP), ( literary, JJ), ( ... [kneads, profits, preachers, territory, exile,... 26 [(relevant, catechism), (within, prophets), (t...
3 A3IS4WGMFR4X65 000100039X [(prophet, NN), ( kahlil, NN), ( gibrans, VBZ)... [kneads, profits, preachers, territory, exile,... 26 []
4 AWLFVCT9128JV 000100039X [(gibran, NN), ( khalil, NNP), ( gibran, NNP),... [kneads, profits, preachers, territory, exile,... 26 [(forty-eight, almustafa)]

In [108]:
len(df20)


Out[108]:
511364

In [110]:
reviews_vs_feature_opinion_pairs = df20[df20['pairs'].map(lambda pairs: len(pairs)) > 0]
len(reviews_vs_feature_opinion_pairs)


Out[110]:
249871

In [112]:
reviews_vs_feature_opinion_pairs[0:100]


Out[112]:
userId asin reviewText imp_nns num_of_imp_nouns pairs
0 A2XQ5LZHTD4AFT 000100039X [(timeless, NN), ( classic, JJ), ( demanding, ... [kneads, profits, preachers, territory, exile,... 26 [(birth, prophets), (book, flows)]
2 A1NPNGWBVD9AK3 000100039X [(one, CD), ( first, NNP), ( literary, JJ), ( ... [kneads, profits, preachers, territory, exile,... 26 [(relevant, catechism), (within, prophets), (t...
4 AWLFVCT9128JV 000100039X [(gibran, NN), ( khalil, NNP), ( gibran, NNP),... [kneads, profits, preachers, territory, exile,... 26 [(forty-eight, almustafa)]
5 AFY0BT42DDYZV 000100039X [(days, NNS), ( kahlil, VBP), ( gibrans, NNS),... [kneads, profits, preachers, territory, exile,... 26 [(souls, profits), (wordofmouth, twentysix), (...
13 A2ZZHMT58ZMVCZ 000100039X [(prophet, NN), ( waited, VBD), ( twelve, CD),... [kneads, profits, preachers, territory, exile,... 26 [(bear, departs), (others, pillars), (similar,...
16 ADIDQRLLR4KBQ 000100039X [(atheist, NN), ( may, NNP), ( seem, NNP), ( s... [kneads, profits, preachers, territory, exile,... 26 [(beautiful, metaphors), (live, prophets)]
22 A281NPSIMI1C2R 000100039X [(alive, JJ), ( like, NN), ( standing, VBG), (... [kneads, profits, preachers, territory, exile,... 26 [(pain, waves), (separate, almustafa)]
24 A2R64CR74I98K3 000100039X [(usefull, JJ), ( book, NN), ( used, VBD), ( s... [kneads, profits, preachers, territory, exile,... 26 [(religious, texts)]
26 AF4QKY2R2TD3U 000100039X [(say, VB), ( found, IN), ( truth, NNP), ( rat... [kneads, profits, preachers, territory, exile,... 26 [(rich, metaphors)]
27 A3SMT15X2QVUR8 000100039X [(prophet, NN), ( almustafa, CC), ( waits, NNS... [kneads, profits, preachers, territory, exile,... 26 [(orphalese, metaphor)]
29 A2INDDW3XYFFV1 000100039X [(khalil, NNS), ( gibrans, NNS), ( prophet, VB... [kneads, profits, preachers, territory, exile,... 26 [(home, prophets)]
30 A1CSL3TFTFOTWH 0002051850 [(found, VBN), ( book, NN), ( takes, NNS), ( p... [declarations, towns, smaller, threatens, desi... 73 [(independent, periods), (story, progresses), ...
31 A313LJLZT8646J 0002051850 [(bell, NN), ( tolls, VBZ), ( ernest, JJS), ( ... [declarations, towns, smaller, threatens, desi... 73 [(consistent, dire), (nine hundred and thirty-...
32 AHCVWPLA1O4X8 0002051850 [(one, CD), ( greatest, JJS), ( modernist, NN)... [declarations, towns, smaller, threatens, desi... 73 [(american, spain), (fact, thee)]
36 A1K1JW1C5CUSUZ 0002051850 [(hemingways, NNS), ( magnificent, VBP), ( nov... [declarations, towns, smaller, threatens, desi... 73 [(political, fascism), (extensive, flashbacks)...
37 A33R4E8T9KVLOM 0002051850 [(robert, JJ), ( jordan, NNP), ( one, NN), ( e... [declarations, towns, smaller, threatens, desi... 73 [(read, spain), (indepth, reflects), (sadistic...
39 A3IKBHODOTYYHM 0002051850 [(novel, JJ), ( sum, NNP), ( consequential, NN... [declarations, towns, smaller, threatens, desi... 73 [(sold, spain)]
40 A1PN3R8DXRQ1C3 0002051850 [(spanish, JJ), ( civil, NNP), ( war, NNP), ( ... [declarations, towns, smaller, threatens, desi... 73 [(western, spain), (many, intellectuals), (des...
42 A1RECBDKHVOJMW 0002051850 [(bell, NN), ( tolls, NNS), ( long, JJ), ( fav... [declarations, towns, smaller, threatens, desi... 73 [(european, spain), (red, spain), (conservativ...
45 A3SI6F1RGCTAOH 0002051850 [(last, JJ), ( time, JJ), ( read, JJ), ( hemin... [declarations, towns, smaller, threatens, desi... 73 [(war, shines), (sex, declarations), (novel, c...
48 A3QZCA4LTTVGAD 0002051850 [(set, VBN), ( spanish, JJ), ( civil, NNP), ( ... [declarations, towns, smaller, threatens, desi... 73 [(republican, guerrilla), (various, focuses), ...
50 A1MC81HLJ6Z9ZQ 0002051850 [(anything, NN), ( hemingway, RB), ( ever, NNP... [declarations, towns, smaller, threatens, desi... 73 [(horrible, affair), (pull, coltish), (content...
53 A8IPQ1Q1O7YX5 0002051850 [(dont, NN), (think, VBP), ( ever, IN), ( take... [declarations, towns, smaller, threatens, desi... 73 [(enemy, guerrilla)]
54 A3Q9K57FARA2WQ 0002051850 [(say, VB), ( masterpiece, NNP), ( hasnt, NNP)... [declarations, towns, smaller, threatens, desi... 73 [(american, spain)]
56 A3KRRXPFEAO6V 0002051850 [(ernest, JJS), ( hemingway, RB), ( bell, NNP)... [declarations, towns, smaller, threatens, desi... 73 [(fascist, threatens), (latter, partners), (co...
58 A1RLYOPK16YXC1 0002051850 [(bell, NN), ( tolls, NNS), ( takes, VBZ), ( p... [declarations, towns, smaller, threatens, desi... 73 [(american, spain), (missions, guerrilla), (pr...
59 AMTADN8VCK6J2 0002051850 [(novel, NN), ( considered, VBD), ( one, CD), ... [declarations, towns, smaller, threatens, desi... 73 [(story, mountains)]
60 A29SHFBU5O9BWO 0002051850 [(perhaps, RB), ( bell, NNP), ( tolled, VBD), ... [declarations, towns, smaller, threatens, desi... 73 [(literary, greatness), (suitable, achievement...
64 A2EQ74Y24BHHIF 0002113570 [(jane, NN), ( goodall, DT), ( unique, NNP), (... [humane, homo, ancestors, michener] 4 [(like, michener), (could, homo), (common, anc...
68 A2KUKUSSSAYAKH 0002117088 [(adopted, VBN), ( quot, JJ), ( renoir, NNP), ... [surgery, sorts, goodnight, virtues, translato... 7 [(hip, surgery), (hip, goodnight), (come, clau...
... ... ... ... ... ... ...
138 A1WQR544MMF68L 000224053X [(red, JJ), ( mars, NNS), ( first, RB), ( kim,... [fundamentalists, coast, pioneer, inconsistenc... 81 [(first, expidition), (insead, focuses), (stru...
139 A2YUZKPLUYQDKV 000224053X [(readers, NNS), ( sciencefiction, VBP), ( tre... [fundamentalists, coast, pioneer, inconsistenc... 81 [(famous, thunder)]
140 A21Z8FUB9IZI2L 000224053X [(colonization, NN), ( solar, JJ), ( system, N... [fundamentalists, coast, pioneer, inconsistenc... 81 [(gigantic, corporations)]
141 A1RSGDRA57QN6J 000224053X [(nominally, RB), ( futurehistory, JJ), ( mart... [fundamentalists, coast, pioneer, inconsistenc... 81 [(initial, corporations), (martian, landscape)...
142 A1SG73VR5R809B 000224053X [(first, RB), ( one hundred, JJ), ( colonists,... [fundamentalists, coast, pioneer, inconsistenc... 81 [(plausible, technologies)]
143 AWFD44Q17PCOV 000224053X [(dont, NN), ( confused, VBD), ( technical, JJ... [fundamentalists, coast, pioneer, inconsistenc... 81 [(siberian, construction)]
144 A260X99UW6HA9Z 000224053X [(robinson, NN), ( achieves, NNS), ( pitch, VB... [fundamentalists, coast, pioneer, inconsistenc... 81 [(science, coyote)]
150 A18YAKQM1X2MCJ 000224053X [(robinson, NN), ( excellent, NN), ( writer, N... [fundamentalists, coast, pioneer, inconsistenc... 81 [(asimov, clarke)]
151 A2ZMWGU610X3ZT 000224053X [(warning, VBG), ( books, NNS), ( hypnotically... [fundamentalists, coast, pioneer, inconsistenc... 81 [(prestigious, awards), (creole, jazz), (among...
153 A3D0BL5DWYN3NQ 000224053X [(heres, NNS), ( incredibly, RB), ( realistic,... [fundamentalists, coast, pioneer, inconsistenc... 81 [(global, exploitation)]
155 A3C2A3D2KG1F1A 000224053X [(really, RB), ( looking, VBG), ( forward, NNP... [fundamentalists, coast, pioneer, inconsistenc... 81 [(read, landscape)]
156 A3UL9V4K9OJU9F 000224053X [(read, NN), ( lot, NN), ( science, NNP), ( fi... [fundamentalists, coast, pioneer, inconsistenc... 81 [(habitable, environment)]
165 A3PE884TA8529G 0002242052 [(usually, RB), ( read, JJ), ( fiction, NN), (... [stretches, authorities, ludlum, drugdealers, ... 14 [(begins, pimp), (drug, pimp)]
166 A3QWNQ2WA7QZYJ 0002242052 [(tom, NN), ( clancys, NNP), ( strength, NNP),... [stretches, authorities, ludlum, drugdealers, ... 14 [(basic, tech), (character, ludlum), (long, st...
171 A3B0YYFNBNDX6C 0002242052 [(without, IN), ( remorseby, NNP), ( tom, NNP)... [stretches, authorities, ludlum, drugdealers, ... 14 [(several, languages), (mechanical, semitruck)...
175 A1AUBGENRIZODO 0002311216 [(novel, JJ), ( follows, VBZ), ( adventures, N... [andersons, espionage, poirot, worldwide, open... 16 [(real, jaan), (tedious, twothirds), (various,...
177 A1AOIKQ6JNWETI 0002311216 [(first, RB), ( let, NNP), ( say, NNP), ( huge... [andersons, espionage, poirot, worldwide, open... 16 [(world, espionage), (various, cabinet)]
178 A3A6GXILVT9JNW 0002311216 [(one thousand, NN), ( nine hundred and sevent... [andersons, espionage, poirot, worldwide, open... 16 [(worldwide, neo), (much, hercule), (much, poi...
179 A29IPCJZUUAL32 0002311216 [(disappointed, JJ), ( book, NN), ( great, NN)... [andersons, espionage, poirot, worldwide, open... 16 [(excellent, openings)]
181 A1T86T7FTUW38K 0002311216 [(im, NN), ( surprised, VBD), ( hasnt, NNP), (... [andersons, espionage, poirot, worldwide, open... 16 [(ythrian, motives)]
183 AENXJM2CKEN5L 0002550938 [(sahara, NN), ( written, NNP), ( clive, NNP),... [fights, mysery, pollution, cusslers, threads,... 6 [(ironclad, confederate), (eight hundred and s...
185 A2F3M93RRLFQNJ 0002550938 [(last, JJ), ( time, JJ), ( read, JJ), ( clive... [fights, mysery, pollution, cusslers, threads,... 6 [(pair, confederate)]
187 A1TAWJ2PA3USVX 0002550938 [(whats, NNS), ( sillier, VBP), ( even, RB), (... [fights, mysery, pollution, cusslers, threads,... 6 [(eye, cusslers)]
190 ADCZ95UF83HLM 000255383X [(thats, NNS), ( common, VBP), ( rabble, JJ), ... [ubi, shark, merge, beeper, phrases] 5 [(aspiring, beeper)]
191 A2VE83MZF98ITY 000255383X [(wonderful, JJ), ( little, JJ), ( book, NN), ... [ubi, shark, merge, beeper, phrases] 5 [(various, phrases), (innocuous, beeper), (sec...
194 A3N4J7FRI8BPT6 0002621509 [(written, VBN), ( americans, NNS), ( perspect... [surgery, espionage, dolgun, paranoid, employe... 8 [(people, dolgun)]
196 A2E3GFHUDNPYDH 0002621509 [(walk, NN), ( one, NN), ( day, JJ), ( moscow,... [surgery, espionage, dolgun, paranoid, employe... 8 [(shalomov, employee), (american, dolgun), (ni...
197 A3QIEISBZP4QTV 0002621509 [(enjoy, NN), ( genre, JJ), ( adventure, NN), ... [surgery, espionage, dolgun, paranoid, employe... 8 [(much, dolgun)]
198 A1VIXUHSCZTJUT 0002621509 [(book, NN), ( hits, NNS), ( like, IN), ( slap... [surgery, espionage, dolgun, paranoid, employe... 8 [(russia, dolgun)]
199 A17S1450Y06C86 0002726874 [(towards, NNS), ( end, VBP), ( life, JJ), ( g... [macdonalds, aboot, ye, smaller, theer, uk, al... 63 [(life, fraser), (battles, lance), (imperial, ...

100 rows × 6 columns


In [113]:
249871/511364


Out[113]:
0.48863627474753796

In [114]:
reviews_vs_feature_opinion_pairs = reviews_vs_feature_opinion_pairs.assign(num_of_pairs = reviews_vs_feature_opinion_pairs['pairs'].progress_apply(lambda pairs:len(pairs)))
reviews_vs_feature_opinion_pairs.head()


Progress:: 100%|██████████| 249871/249871 [00:00<00:00, 1138209.59it/s]
Out[114]:
userId asin reviewText imp_nns num_of_imp_nouns pairs num_of_pairs
0 A2XQ5LZHTD4AFT 000100039X [(timeless, NN), ( classic, JJ), ( demanding, ... [kneads, profits, preachers, territory, exile,... 26 [(birth, prophets), (book, flows)] 2
2 A1NPNGWBVD9AK3 000100039X [(one, CD), ( first, NNP), ( literary, JJ), ( ... [kneads, profits, preachers, territory, exile,... 26 [(relevant, catechism), (within, prophets), (t... 4
4 AWLFVCT9128JV 000100039X [(gibran, NN), ( khalil, NNP), ( gibran, NNP),... [kneads, profits, preachers, territory, exile,... 26 [(forty-eight, almustafa)] 1
5 AFY0BT42DDYZV 000100039X [(days, NNS), ( kahlil, VBP), ( gibrans, NNS),... [kneads, profits, preachers, territory, exile,... 26 [(souls, profits), (wordofmouth, twentysix), (... 3
13 A2ZZHMT58ZMVCZ 000100039X [(prophet, NN), ( waited, VBD), ( twelve, CD),... [kneads, profits, preachers, territory, exile,... 26 [(bear, departs), (others, pillars), (similar,... 4

In [118]:
reviews_vs_feature_opinion_pairs[0:100]


Out[118]:
userId asin reviewText imp_nns num_of_imp_nouns pairs num_of_pairs
0 A2XQ5LZHTD4AFT 000100039X [(timeless, NN), ( classic, JJ), ( demanding, ... [kneads, profits, preachers, territory, exile,... 26 [(birth, prophets), (book, flows)] 2
2 A1NPNGWBVD9AK3 000100039X [(one, CD), ( first, NNP), ( literary, JJ), ( ... [kneads, profits, preachers, territory, exile,... 26 [(relevant, catechism), (within, prophets), (t... 4
4 AWLFVCT9128JV 000100039X [(gibran, NN), ( khalil, NNP), ( gibran, NNP),... [kneads, profits, preachers, territory, exile,... 26 [(forty-eight, almustafa)] 1
5 AFY0BT42DDYZV 000100039X [(days, NNS), ( kahlil, VBP), ( gibrans, NNS),... [kneads, profits, preachers, territory, exile,... 26 [(souls, profits), (wordofmouth, twentysix), (... 3
13 A2ZZHMT58ZMVCZ 000100039X [(prophet, NN), ( waited, VBD), ( twelve, CD),... [kneads, profits, preachers, territory, exile,... 26 [(bear, departs), (others, pillars), (similar,... 4
16 ADIDQRLLR4KBQ 000100039X [(atheist, NN), ( may, NNP), ( seem, NNP), ( s... [kneads, profits, preachers, territory, exile,... 26 [(beautiful, metaphors), (live, prophets)] 2
22 A281NPSIMI1C2R 000100039X [(alive, JJ), ( like, NN), ( standing, VBG), (... [kneads, profits, preachers, territory, exile,... 26 [(pain, waves), (separate, almustafa)] 2
24 A2R64CR74I98K3 000100039X [(usefull, JJ), ( book, NN), ( used, VBD), ( s... [kneads, profits, preachers, territory, exile,... 26 [(religious, texts)] 1
26 AF4QKY2R2TD3U 000100039X [(say, VB), ( found, IN), ( truth, NNP), ( rat... [kneads, profits, preachers, territory, exile,... 26 [(rich, metaphors)] 1
27 A3SMT15X2QVUR8 000100039X [(prophet, NN), ( almustafa, CC), ( waits, NNS... [kneads, profits, preachers, territory, exile,... 26 [(orphalese, metaphor)] 1
29 A2INDDW3XYFFV1 000100039X [(khalil, NNS), ( gibrans, NNS), ( prophet, VB... [kneads, profits, preachers, territory, exile,... 26 [(home, prophets)] 1
30 A1CSL3TFTFOTWH 0002051850 [(found, VBN), ( book, NN), ( takes, NNS), ( p... [declarations, towns, smaller, threatens, desi... 73 [(independent, periods), (story, progresses), ... 6
31 A313LJLZT8646J 0002051850 [(bell, NN), ( tolls, VBZ), ( ernest, JJS), ( ... [declarations, towns, smaller, threatens, desi... 73 [(consistent, dire), (nine hundred and thirty-... 9
32 AHCVWPLA1O4X8 0002051850 [(one, CD), ( greatest, JJS), ( modernist, NN)... [declarations, towns, smaller, threatens, desi... 73 [(american, spain), (fact, thee)] 2
36 A1K1JW1C5CUSUZ 0002051850 [(hemingways, NNS), ( magnificent, VBP), ( nov... [declarations, towns, smaller, threatens, desi... 73 [(political, fascism), (extensive, flashbacks)... 5
37 A33R4E8T9KVLOM 0002051850 [(robert, JJ), ( jordan, NNP), ( one, NN), ( e... [declarations, towns, smaller, threatens, desi... 73 [(read, spain), (indepth, reflects), (sadistic... 4
39 A3IKBHODOTYYHM 0002051850 [(novel, JJ), ( sum, NNP), ( consequential, NN... [declarations, towns, smaller, threatens, desi... 73 [(sold, spain)] 1
40 A1PN3R8DXRQ1C3 0002051850 [(spanish, JJ), ( civil, NNP), ( war, NNP), ( ... [declarations, towns, smaller, threatens, desi... 73 [(western, spain), (many, intellectuals), (des... 4
42 A1RECBDKHVOJMW 0002051850 [(bell, NN), ( tolls, NNS), ( long, JJ), ( fav... [declarations, towns, smaller, threatens, desi... 73 [(european, spain), (red, spain), (conservativ... 16
45 A3SI6F1RGCTAOH 0002051850 [(last, JJ), ( time, JJ), ( read, JJ), ( hemin... [declarations, towns, smaller, threatens, desi... 73 [(war, shines), (sex, declarations), (novel, c... 4
48 A3QZCA4LTTVGAD 0002051850 [(set, VBN), ( spanish, JJ), ( civil, NNP), ( ... [declarations, towns, smaller, threatens, desi... 73 [(republican, guerrilla), (various, focuses), ... 12
50 A1MC81HLJ6Z9ZQ 0002051850 [(anything, NN), ( hemingway, RB), ( ever, NNP... [declarations, towns, smaller, threatens, desi... 73 [(horrible, affair), (pull, coltish), (content... 5
53 A8IPQ1Q1O7YX5 0002051850 [(dont, NN), (think, VBP), ( ever, IN), ( take... [declarations, towns, smaller, threatens, desi... 73 [(enemy, guerrilla)] 1
54 A3Q9K57FARA2WQ 0002051850 [(say, VB), ( masterpiece, NNP), ( hasnt, NNP)... [declarations, towns, smaller, threatens, desi... 73 [(american, spain)] 1
56 A3KRRXPFEAO6V 0002051850 [(ernest, JJS), ( hemingway, RB), ( bell, NNP)... [declarations, towns, smaller, threatens, desi... 73 [(fascist, threatens), (latter, partners), (co... 4
58 A1RLYOPK16YXC1 0002051850 [(bell, NN), ( tolls, NNS), ( takes, VBZ), ( p... [declarations, towns, smaller, threatens, desi... 73 [(american, spain), (missions, guerrilla), (pr... 11
59 AMTADN8VCK6J2 0002051850 [(novel, NN), ( considered, VBD), ( one, CD), ... [declarations, towns, smaller, threatens, desi... 73 [(story, mountains)] 1
60 A29SHFBU5O9BWO 0002051850 [(perhaps, RB), ( bell, NNP), ( tolled, VBD), ... [declarations, towns, smaller, threatens, desi... 73 [(literary, greatness), (suitable, achievement... 6
64 A2EQ74Y24BHHIF 0002113570 [(jane, NN), ( goodall, DT), ( unique, NNP), (... [humane, homo, ancestors, michener] 4 [(like, michener), (could, homo), (common, anc... 4
68 A2KUKUSSSAYAKH 0002117088 [(adopted, VBN), ( quot, JJ), ( renoir, NNP), ... [surgery, sorts, goodnight, virtues, translato... 7 [(hip, surgery), (hip, goodnight), (come, clau... 7
... ... ... ... ... ... ... ...
138 A1WQR544MMF68L 000224053X [(red, JJ), ( mars, NNS), ( first, RB), ( kim,... [fundamentalists, coast, pioneer, inconsistenc... 81 [(first, expidition), (insead, focuses), (stru... 4
139 A2YUZKPLUYQDKV 000224053X [(readers, NNS), ( sciencefiction, VBP), ( tre... [fundamentalists, coast, pioneer, inconsistenc... 81 [(famous, thunder)] 1
140 A21Z8FUB9IZI2L 000224053X [(colonization, NN), ( solar, JJ), ( system, N... [fundamentalists, coast, pioneer, inconsistenc... 81 [(gigantic, corporations)] 1
141 A1RSGDRA57QN6J 000224053X [(nominally, RB), ( futurehistory, JJ), ( mart... [fundamentalists, coast, pioneer, inconsistenc... 81 [(initial, corporations), (martian, landscape)... 8
142 A1SG73VR5R809B 000224053X [(first, RB), ( one hundred, JJ), ( colonists,... [fundamentalists, coast, pioneer, inconsistenc... 81 [(plausible, technologies)] 1
143 AWFD44Q17PCOV 000224053X [(dont, NN), ( confused, VBD), ( technical, JJ... [fundamentalists, coast, pioneer, inconsistenc... 81 [(siberian, construction)] 1
144 A260X99UW6HA9Z 000224053X [(robinson, NN), ( achieves, NNS), ( pitch, VB... [fundamentalists, coast, pioneer, inconsistenc... 81 [(science, coyote)] 1
150 A18YAKQM1X2MCJ 000224053X [(robinson, NN), ( excellent, NN), ( writer, N... [fundamentalists, coast, pioneer, inconsistenc... 81 [(asimov, clarke)] 1
151 A2ZMWGU610X3ZT 000224053X [(warning, VBG), ( books, NNS), ( hypnotically... [fundamentalists, coast, pioneer, inconsistenc... 81 [(prestigious, awards), (creole, jazz), (among... 7
153 A3D0BL5DWYN3NQ 000224053X [(heres, NNS), ( incredibly, RB), ( realistic,... [fundamentalists, coast, pioneer, inconsistenc... 81 [(global, exploitation)] 1
155 A3C2A3D2KG1F1A 000224053X [(really, RB), ( looking, VBG), ( forward, NNP... [fundamentalists, coast, pioneer, inconsistenc... 81 [(read, landscape)] 1
156 A3UL9V4K9OJU9F 000224053X [(read, NN), ( lot, NN), ( science, NNP), ( fi... [fundamentalists, coast, pioneer, inconsistenc... 81 [(habitable, environment)] 1
165 A3PE884TA8529G 0002242052 [(usually, RB), ( read, JJ), ( fiction, NN), (... [stretches, authorities, ludlum, drugdealers, ... 14 [(begins, pimp), (drug, pimp)] 2
166 A3QWNQ2WA7QZYJ 0002242052 [(tom, NN), ( clancys, NNP), ( strength, NNP),... [stretches, authorities, ludlum, drugdealers, ... 14 [(basic, tech), (character, ludlum), (long, st... 4
171 A3B0YYFNBNDX6C 0002242052 [(without, IN), ( remorseby, NNP), ( tom, NNP)... [stretches, authorities, ludlum, drugdealers, ... 14 [(several, languages), (mechanical, semitruck)... 8
175 A1AUBGENRIZODO 0002311216 [(novel, JJ), ( follows, VBZ), ( adventures, N... [andersons, espionage, poirot, worldwide, open... 16 [(real, jaan), (tedious, twothirds), (various,... 4
177 A1AOIKQ6JNWETI 0002311216 [(first, RB), ( let, NNP), ( say, NNP), ( huge... [andersons, espionage, poirot, worldwide, open... 16 [(world, espionage), (various, cabinet)] 2
178 A3A6GXILVT9JNW 0002311216 [(one thousand, NN), ( nine hundred and sevent... [andersons, espionage, poirot, worldwide, open... 16 [(worldwide, neo), (much, hercule), (much, poi... 4
179 A29IPCJZUUAL32 0002311216 [(disappointed, JJ), ( book, NN), ( great, NN)... [andersons, espionage, poirot, worldwide, open... 16 [(excellent, openings)] 1
181 A1T86T7FTUW38K 0002311216 [(im, NN), ( surprised, VBD), ( hasnt, NNP), (... [andersons, espionage, poirot, worldwide, open... 16 [(ythrian, motives)] 1
183 AENXJM2CKEN5L 0002550938 [(sahara, NN), ( written, NNP), ( clive, NNP),... [fights, mysery, pollution, cusslers, threads,... 6 [(ironclad, confederate), (eight hundred and s... 5
185 A2F3M93RRLFQNJ 0002550938 [(last, JJ), ( time, JJ), ( read, JJ), ( clive... [fights, mysery, pollution, cusslers, threads,... 6 [(pair, confederate)] 1
187 A1TAWJ2PA3USVX 0002550938 [(whats, NNS), ( sillier, VBP), ( even, RB), (... [fights, mysery, pollution, cusslers, threads,... 6 [(eye, cusslers)] 1
190 ADCZ95UF83HLM 000255383X [(thats, NNS), ( common, VBP), ( rabble, JJ), ... [ubi, shark, merge, beeper, phrases] 5 [(aspiring, beeper)] 1
191 A2VE83MZF98ITY 000255383X [(wonderful, JJ), ( little, JJ), ( book, NN), ... [ubi, shark, merge, beeper, phrases] 5 [(various, phrases), (innocuous, beeper), (sec... 3
194 A3N4J7FRI8BPT6 0002621509 [(written, VBN), ( americans, NNS), ( perspect... [surgery, espionage, dolgun, paranoid, employe... 8 [(people, dolgun)] 1
196 A2E3GFHUDNPYDH 0002621509 [(walk, NN), ( one, NN), ( day, JJ), ( moscow,... [surgery, espionage, dolgun, paranoid, employe... 8 [(shalomov, employee), (american, dolgun), (ni... 8
197 A3QIEISBZP4QTV 0002621509 [(enjoy, NN), ( genre, JJ), ( adventure, NN), ... [surgery, espionage, dolgun, paranoid, employe... 8 [(much, dolgun)] 1
198 A1VIXUHSCZTJUT 0002621509 [(book, NN), ( hits, NNS), ( like, IN), ( slap... [surgery, espionage, dolgun, paranoid, employe... 8 [(russia, dolgun)] 1
199 A17S1450Y06C86 0002726874 [(towards, NNS), ( end, VBP), ( life, JJ), ( g... [macdonalds, aboot, ye, smaller, theer, uk, al... 63 [(life, fraser), (battles, lance), (imperial, ... 5

100 rows × 7 columns


In [123]:
pairs_per_book = reviews_vs_feature_opinion_pairs.groupby(['asin'])[["num_of_pairs"]].sum()
pairs_per_book = pairs_per_book.reset_index()
pairs_per_book.head()


Out[123]:
asin num_of_pairs
0 000100039X 22
1 0002051850 92
2 0002113570 4
3 0002117088 7
4 000215725X 46

In [124]:
len(pairs_per_book)


Out[124]:
48853

In [125]:
48939 - 48853


Out[125]:
86

In [126]:
import plotly 
import plotly.plotly as py
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
import cufflinks as cf
print(cf.__version__)
# Configure cufflings 
cf.set_config_file(offline=False, world_readable=True, theme='pearl')


0.12.1

In [127]:
pairs_per_book['num_of_pairs'].iplot(kind='histogram', bins=100, xTitle='Number of Pairs', yTitle='Number of Books')


/Users/falehalrashidi/anaconda3/lib/python3.6/site-packages/plotly/plotly/plotly.py:224: UserWarning:

Woah there! Look at all those points! Due to browser limitations, the Plotly SVG drawing functions have a hard time graphing more than 500k data points for line charts, or 40k points for other types of charts. Here are some suggestions:
(1) Use the `plotly.graph_objs.Scattergl` trace object to generate a WebGl graph.
(2) Trying using the image API to return an image instead of a graph URL
(3) Use matplotlib
(4) See if you can create your visualization with fewer data points

If the visualization you're using aggregates points (e.g., box plot, histogram, etc.) you can disregard this warning.

Out[127]:

In [129]:
# Save data
pairs_per_book.to_pickle("../data/interim/006_pairs_per_book.p")

In [130]:
reviews_vs_feature_opinion_pairs.to_pickle("../data/interim/006_pairs_per_review.p")

In [131]:
## END_OF_FILE

In [ ]: