by Andrew Trask
In [1]:
def pretty_print_review_and_label(i):
print(labels[i] + "\t:\t" + reviews[i][:80] + "...")
g = open('reviews.txt','r') # What we know!
reviews = list(map(lambda x:x[:-1],g.readlines()))
g.close()
g = open('labels.txt','r') # What we WANT to know!
labels = list(map(lambda x:x[:-1].upper(),g.readlines()))
g.close()
In [2]:
len(reviews)
Out[2]:
In [3]:
reviews[0]
Out[3]:
In [4]:
labels[0]
Out[4]:
In [5]:
print("labels.txt \t : \t reviews.txt\n")
pretty_print_review_and_label(2137)
pretty_print_review_and_label(12816)
pretty_print_review_and_label(6267)
pretty_print_review_and_label(21934)
pretty_print_review_and_label(5297)
pretty_print_review_and_label(4998)
In [8]:
positive_reviews = []
negative_reviews = []
for index, label in enumerate(labels):
if label == 'NEGATIVE':
negative_reviews.append(reviews[index])
continue
if label == 'POSITIVE':
positive_reviews.append(reviews[index])
continue
else:
print("Found NEITHER")
print(len(positive_reviews))
print(len(negative_reviews))
In [9]:
positive_words = " ".join(positive_reviews)
negative_words = " ".join(negative_reviews)
In [10]:
positive_words = positive_words.split(" ")
negative_words = negative_words.split(" ")
In [11]:
set_positive = set(positive_words)
set_negative = set(negative_words)
In [12]:
positive_only = set_positive - set_negative
negative_only = set_negative - set_positive
In [13]:
print("Positive Set: {} Positive Only: {}".format(len(set_positive), len(positive_only)))
print("Negative Set: {} Negative Only: {}".format(len(set_negative), len(negative_only)))