In [32]:
import csv
import numpy as np
from watson_developer_cloud import NaturalLanguageUnderstandingV1
import watson_developer_cloud.natural_language_understanding.features.v1 as features
import json
In [33]:
natural_language_understanding = NaturalLanguageUnderstandingV1(version='2017-02-27', username='')
In [34]:
import pandas
articles = pandas.read_csv('scmp_news_2.csv', encoding = "ISO-8859-1")
In [35]:
content=[]
for x in range(0,articles.shape[0]):
content.append(articles.Content[x])
In [36]:
dict_categories={}
for x in range(0,articles.shape[0]):
keys=dict_categories.keys()
response = natural_language_understanding.analyze( text=content[x],features=[features.Sentiment(),features.Categories()])
category=response['categories'][0]['label'].split('/')
if category[1] not in keys:
dict_categories[category[1]]={}
dict_categories[category[1]]['count']={}
dict_categories[category[1]]['count'][response['sentiment']['document']['label']]=1
else:
try:
dict_categories[category[1]]['count'][response['sentiment']['document']['label']]=dict_categories[category[1]]['count'][response['sentiment']['document']['label']]+1
except:
dict_categories[category[1]]['count'][response['sentiment']['document']['label']]=1
In [41]:
ptive_counts=[]
ntive_counts=[]
neu_counts=[]
ptive_categories=[]
ntive_categories=[]
neu_categories=[]
for x in dict_categories:
try:
ptive_counts.append(dict_categories[x]['count']['positive'])
except:
ptive_counts.append(0)
try:
ntive_counts.append(dict_categories[x]['count']['negative'])
except:
ntive_counts.append(0)
try:
neu_counts.append(dict_categories[x]['count']['neutral'])
except:
neu_counts.append(0)
ptive_categories.append(x)
ntive_categories.append(x)
neu_categories.append(x)
ptive_sentiment=['positive']*len(ptive_categories)
ntive_sentiment=['negative']*len(ptive_categories)
neu_sentiment=['neutral']*len(ptive_categories)
categories=ptive_categories+ntive_categories+neu_categories
counts=ptive_counts+ntive_counts+neu_counts
sentiment=ptive_sentiment+ntive_sentiment+neu_sentiment
In [42]:
with open("scmp_categories.csv", "w") as toWrite:
writer = csv.writer(toWrite, delimiter=",")
writer.writerow(["Category","Count", "Sentiment"])
for x in range(0,len(categories)):
writer.writerow([categories[x],counts[x],sentiment[x]])
In [43]:
categories = pandas.read_csv('scmp_categories.csv', encoding = "ISO-8859-1")
categories
Out[43]:
In [ ]: