notebook.community

Edit and run



In [10]:

    
import os
files = os.listdir('aclimdb/train/pos')



In [12]:

    
first_file = files[0]
with open('aclimdb/train/pos/{}'.format(first_file),'r',encoding='utf-8') as f:
    review = f.read()
    f.close()



In [13]:

    
review









    Out[13]:





'Bromwell High is a cartoon comedy. It ran at the same time as some other programs about school life, such as "Teachers". My 35 years in the teaching profession lead me to believe that Bromwell High\'s satire is much closer to reality than is "Teachers". The scramble to survive financially, the insightful students who can see right through their pathetic teachers\' pomp, the pettiness of the whole situation, all remind me of the schools I knew and their students. When I saw the episode in which a student repeatedly tried to burn down the school, I immediately recalled ......... at .......... High. A classic line: INSPECTOR: I\'m here to sack one of your teachers. STUDENT: Welcome to Bromwell High. I expect that many adults of my age think that Bromwell High is far fetched. What a pity that it isn\'t!'



In [16]:

    
#전체 긍정 데이터 저장하기
pos_train_list=[]
for file in files:
    with open('aclimdb/train/pos/{}'.format(file),'r',encoding='utf-8') as f:
        review = f.read()
        f.close()
    pos_train_list.append(review)
print(len(pos_train_list))



In [17]:

    
import nltk
from nltk.corpus import sentiwordnet as swn



In [18]:

    
swn.senti_synsets('hate')









    Out[18]:





<filter at 0x1f1ceb4f780>



In [19]:

    
list(swn.senti_synsets('hate'))









    Out[19]:





[SentiSynset('hate.n.01'), SentiSynset('hate.v.01')]



In [20]:

    
list(swn.senti_synsets('hate','v'))









    Out[20]:





[SentiSynset('hate.v.01')]



In [21]:

    
list(swn.senti_synsets('hate','v'))[0].pos_score()









    Out[21]:





0.0



In [22]:

    
list(swn.senti_synsets('hate','v'))[0].neg_score()









    Out[22]:





0.75



In [27]:

    
def word_sentiment_calculator(word, tag):
    pos_score =0
    neg_score =0
    
    if 'NN' in tag and len(list(swn.senti_synsets(word, 'n')))>0:
        syn_set = list(swn.senti_synsets(word, 'n'))
    elif 'VB' in tag and len(list(swn.senti_synsets(word, 'v')))>0:
        syn_set = list(swn.senti_synsets(word, 'v'))
    elif 'JJ' in tag and len(list(swn.senti_synsets(word, 'a')))>0:
        syn_set = list(swn.senti_synsets(word, 'a'))
    elif 'RB' in tag and len(list(swn.senti_synsets(word, 'r')))>0:
        syn_set = list(swn.senti_synsets(word, 'r'))
    else :
        return (0,0)
    
    for syn in syn_set:
        pos_score += syn.pos_score()
        neg_score += syn.neg_score()
    return (pos_score/len(syn_set), neg_score/len(syn_set))



In [ ]:



In [30]:

    
word_sentiment_calculator('love','NN')









    Out[30]:





(0.22916666666666666, 0.0)



In [31]:

    
word_sentiment_calculator('love','VB')









    Out[31]:





(0.625, 0.03125)



In [38]:

    
sent = 'I hate you'
tokens = nltk.word_tokenize(sent)
pos_tags = nltk.pos_tag(tokens)
pos_tags









    Out[38]:





[('I', 'PRP'), ('hate', 'VBP'), ('you', 'PRP')]



In [52]:

    
def sentence_sentiment_calculator(a):
    tokens = nltk.word_tokenize(a)
    pos_tags = nltk.pos_tag(tokens)
    pos_score = 0
    neg_score =0
    for word, tag in pos_tags:
        pos_score += word_sentiment_calculator(word,tag)[0]
        neg_score += word_sentiment_calculator(word,tag)[1]
    return (pos_score, neg_score)



In [54]:

    
sentence_sentiment_calculator(review)









    Out[54]:





(1.1239035087719298, 0.2609649122807018)



In [ ]: