notebook.community

Edit and run



In [32]:

    
import csv
import numpy as np
from watson_developer_cloud import NaturalLanguageUnderstandingV1
import watson_developer_cloud.natural_language_understanding.features.v1 as features
import json



In [33]:

    
natural_language_understanding = NaturalLanguageUnderstandingV1(version='2017-02-27', username='')



In [34]:

    
import pandas
articles = pandas.read_csv('scmp_news_2.csv', encoding = "ISO-8859-1")



In [35]:

    
content=[]
for x in range(0,articles.shape[0]):
    content.append(articles.Content[x])



In [36]:

    
dict_categories={}
for x in range(0,articles.shape[0]):
    keys=dict_categories.keys()
    response = natural_language_understanding.analyze( text=content[x],features=[features.Sentiment(),features.Categories()])
    category=response['categories'][0]['label'].split('/')
    if category[1] not in keys:
        dict_categories[category[1]]={}
        dict_categories[category[1]]['count']={}
        dict_categories[category[1]]['count'][response['sentiment']['document']['label']]=1
    else:
        try:
            dict_categories[category[1]]['count'][response['sentiment']['document']['label']]=dict_categories[category[1]]['count'][response['sentiment']['document']['label']]+1
        except:
            dict_categories[category[1]]['count'][response['sentiment']['document']['label']]=1



In [41]:

    
ptive_counts=[]
ntive_counts=[]
neu_counts=[]
ptive_categories=[]
ntive_categories=[]
neu_categories=[]
for x in dict_categories:
    try:
        ptive_counts.append(dict_categories[x]['count']['positive'])
    except:
        ptive_counts.append(0)
    try:
        ntive_counts.append(dict_categories[x]['count']['negative'])
    except:
        ntive_counts.append(0)
    try:
        neu_counts.append(dict_categories[x]['count']['neutral'])
    except:
        neu_counts.append(0)
                            
    ptive_categories.append(x)
    ntive_categories.append(x)
    neu_categories.append(x)
ptive_sentiment=['positive']*len(ptive_categories)
ntive_sentiment=['negative']*len(ptive_categories)
neu_sentiment=['neutral']*len(ptive_categories)

categories=ptive_categories+ntive_categories+neu_categories
counts=ptive_counts+ntive_counts+neu_counts
sentiment=ptive_sentiment+ntive_sentiment+neu_sentiment



In [42]:

    
with open("scmp_categories.csv", "w") as toWrite:
    writer = csv.writer(toWrite, delimiter=",")
    writer.writerow(["Category","Count", "Sentiment"])
    for x in range(0,len(categories)):
        writer.writerow([categories[x],counts[x],sentiment[x]])



In [43]:

    
categories = pandas.read_csv('scmp_categories.csv', encoding = "ISO-8859-1")
categories









    Out[43]:






  
    
      
      Category
      Count
      Sentiment
    
  
  
    
      0
      education
      0
      positive
    
    
      1
      law, govt and politics
      7
      positive
    
    
      2
      home and garden
      4
      positive
    
    
      3
      travel
      9
      positive
    
    
      4
      finance
      1
      positive
    
    
      5
      automotive and vehicles
      3
      positive
    
    
      6
      business and industrial
      12
      positive
    
    
      7
      pets
      0
      positive
    
    
      8
      technology and computing
      3
      positive
    
    
      9
      art and entertainment
      3
      positive
    
    
      10
      real estate
      3
      positive
    
    
      11
      science
      2
      positive
    
    
      12
      society
      1
      positive
    
    
      13
      health and fitness
      0
      positive
    
    
      14
      education
      2
      negative
    
    
      15
      law, govt and politics
      11
      negative
    
    
      16
      home and garden
      4
      negative
    
    
      17
      travel
      16
      negative
    
    
      18
      finance
      2
      negative
    
    
      19
      automotive and vehicles
      0
      negative
    
    
      20
      business and industrial
      13
      negative
    
    
      21
      pets
      2
      negative
    
    
      22
      technology and computing
      3
      negative
    
    
      23
      art and entertainment
      0
      negative
    
    
      24
      real estate
      0
      negative
    
    
      25
      science
      0
      negative
    
    
      26
      society
      7
      negative
    
    
      27
      health and fitness
      1
      negative
    
    
      28
      education
      0
      neutral
    
    
      29
      law, govt and politics
      2
      neutral
    
    
      30
      home and garden
      0
      neutral
    
    
      31
      travel
      0
      neutral
    
    
      32
      finance
      0
      neutral
    
    
      33
      automotive and vehicles
      0
      neutral
    
    
      34
      business and industrial
      0
      neutral
    
    
      35
      pets
      0
      neutral
    
    
      36
      technology and computing
      0
      neutral
    
    
      37
      art and entertainment
      0
      neutral
    
    
      38
      real estate
      0
      neutral
    
    
      39
      science
      0
      neutral
    
    
      40
      society
      0
      neutral
    
    
      41
      health and fitness
      0
      neutral



In [ ]:

	Category	Count	Sentiment
0	education	0	positive
1	law, govt and politics	7	positive
2	home and garden	4	positive
3	travel	9	positive
4	finance	1	positive
5	automotive and vehicles	3	positive
6	business and industrial	12	positive
7	pets	0	positive
8	technology and computing	3	positive
9	art and entertainment	3	positive
10	real estate	3	positive
11	science	2	positive
12	society	1	positive
13	health and fitness	0	positive
14	education	2	negative
15	law, govt and politics	11	negative
16	home and garden	4	negative
17	travel	16	negative
18	finance	2	negative
19	automotive and vehicles	0	negative
20	business and industrial	13	negative
21	pets	2	negative
22	technology and computing	3	negative
23	art and entertainment	0	negative
24	real estate	0	negative
25	science	0	negative
26	society	7	negative
27	health and fitness	1	negative
28	education	0	neutral
29	law, govt and politics	2	neutral
30	home and garden	0	neutral
31	travel	0	neutral
32	finance	0	neutral
33	automotive and vehicles	0	neutral
34	business and industrial	0	neutral
35	pets	0	neutral
36	technology and computing	0	neutral
37	art and entertainment	0	neutral
38	real estate	0	neutral
39	science	0	neutral
40	society	0	neutral
41	health and fitness	0	neutral