In [32]:
import csv
import numpy as np
from watson_developer_cloud import NaturalLanguageUnderstandingV1
import watson_developer_cloud.natural_language_understanding.features.v1 as features
import json

In [33]:
natural_language_understanding = NaturalLanguageUnderstandingV1(version='2017-02-27', username='')

In [34]:
import pandas
articles = pandas.read_csv('scmp_news_2.csv', encoding = "ISO-8859-1")

In [35]:
content=[]
for x in range(0,articles.shape[0]):
    content.append(articles.Content[x])

In [36]:
dict_categories={}
for x in range(0,articles.shape[0]):
    keys=dict_categories.keys()
    response = natural_language_understanding.analyze( text=content[x],features=[features.Sentiment(),features.Categories()])
    category=response['categories'][0]['label'].split('/')
    if category[1] not in keys:
        dict_categories[category[1]]={}
        dict_categories[category[1]]['count']={}
        dict_categories[category[1]]['count'][response['sentiment']['document']['label']]=1
    else:
        try:
            dict_categories[category[1]]['count'][response['sentiment']['document']['label']]=dict_categories[category[1]]['count'][response['sentiment']['document']['label']]+1
        except:
            dict_categories[category[1]]['count'][response['sentiment']['document']['label']]=1

In [41]:
ptive_counts=[]
ntive_counts=[]
neu_counts=[]
ptive_categories=[]
ntive_categories=[]
neu_categories=[]
for x in dict_categories:
    try:
        ptive_counts.append(dict_categories[x]['count']['positive'])
    except:
        ptive_counts.append(0)
    try:
        ntive_counts.append(dict_categories[x]['count']['negative'])
    except:
        ntive_counts.append(0)
    try:
        neu_counts.append(dict_categories[x]['count']['neutral'])
    except:
        neu_counts.append(0)
                            
    ptive_categories.append(x)
    ntive_categories.append(x)
    neu_categories.append(x)
ptive_sentiment=['positive']*len(ptive_categories)
ntive_sentiment=['negative']*len(ptive_categories)
neu_sentiment=['neutral']*len(ptive_categories)

categories=ptive_categories+ntive_categories+neu_categories
counts=ptive_counts+ntive_counts+neu_counts
sentiment=ptive_sentiment+ntive_sentiment+neu_sentiment

In [42]:
with open("scmp_categories.csv", "w") as toWrite:
    writer = csv.writer(toWrite, delimiter=",")
    writer.writerow(["Category","Count", "Sentiment"])
    for x in range(0,len(categories)):
        writer.writerow([categories[x],counts[x],sentiment[x]])

In [43]:
categories = pandas.read_csv('scmp_categories.csv', encoding = "ISO-8859-1")
categories


Out[43]:
Category Count Sentiment
0 education 0 positive
1 law, govt and politics 7 positive
2 home and garden 4 positive
3 travel 9 positive
4 finance 1 positive
5 automotive and vehicles 3 positive
6 business and industrial 12 positive
7 pets 0 positive
8 technology and computing 3 positive
9 art and entertainment 3 positive
10 real estate 3 positive
11 science 2 positive
12 society 1 positive
13 health and fitness 0 positive
14 education 2 negative
15 law, govt and politics 11 negative
16 home and garden 4 negative
17 travel 16 negative
18 finance 2 negative
19 automotive and vehicles 0 negative
20 business and industrial 13 negative
21 pets 2 negative
22 technology and computing 3 negative
23 art and entertainment 0 negative
24 real estate 0 negative
25 science 0 negative
26 society 7 negative
27 health and fitness 1 negative
28 education 0 neutral
29 law, govt and politics 2 neutral
30 home and garden 0 neutral
31 travel 0 neutral
32 finance 0 neutral
33 automotive and vehicles 0 neutral
34 business and industrial 0 neutral
35 pets 0 neutral
36 technology and computing 0 neutral
37 art and entertainment 0 neutral
38 real estate 0 neutral
39 science 0 neutral
40 society 0 neutral
41 health and fitness 0 neutral

In [ ]: