In [48]:
#Load libraries
library(ggplot2)
library(dplyr)

In [108]:
#Load SPON Data
spon_sentiment_all <- na.omit(read.csv("/home/hao/workspace/6thSemester/DataScience/data/SPON_All.csv", fileEncoding="UTF-16"))
head(spon_sentiment_all)


Xyearmonthdaypositiv_absneutral_absnegativ_abspositiv_relneutral_relnegativ_rel
1 2001 1 1 4 4 12 0.020150500.018561870.05886288
2 2001 1 2 35 103 135 0.016194420.040361500.05168048
3 2001 1 3 69 120 120 0.031488330.057940160.05850294
4 2001 1 4 26 49 93 0.018678120.034445130.06457513
5 2001 1 5 45 102 133 0.021150490.046020440.05967728
6 2001 1 8 42 100 100 0.021822660.051493650.05300146

In [109]:
#Load Junge Freiheit Data
jf_sentiment_all <- na.omit(read.csv("/home/hao/workspace/6thSemester/DataScience/data/JF_ALL.csv", fileEncoding="UTF-8"))
head(jf_sentiment_all)


Xyearmonthdaypositiv_absneutral_absnegativ_abspositiv_relneutral_relnegativ_rel
1 1970 1 1 10 9 20 0.031250000.028125000.06250000
2 1997 6 13 58 102 156 0.024803970.042211350.07471271
3 1997 6 20 20 71 85 0.022346370.079329610.09497207
4 1997 6 27 12 22 54 0.014423080.026442310.06490385
5 1997 7 4 18 29 43 0.028037380.045171340.06697819
6 1997 9 5 12 47 44 0.015306120.059948980.05612245

In [110]:
#Load Bild Data
bild_sentiment_all <- na.omit(read.csv("/home/hao/workspace/6thSemester/DataScience/data/BILD_All.csv", fileEncoding="UTF-8"))
head(bild_sentiment_all)


Xyearmonthdaypositiv_absneutral_absnegativ_abspositiv_relneutral_relnegativ_rel
1 2007 12 5 11 33 49 0.024279840.082716050.11234568
2 2007 12 7 9 16 6 0.040000000.071111110.02666667
3 2007 12 10 8 33 22 0.027189540.099234360.06162465
4 2007 12 12 3 5 5 0.027777780.046296300.04247182
5 2007 12 13 16 36 44 0.032128510.072289160.08835341
6 2007 12 25 16 14 10 0.064516130.056451610.04032258

In [111]:
#Defining Method to aggregate positive_abs, neutral_abs, negative_abs to one value called polarity
calc_polarity <- function(positiv, neutral, negativ) {
    sum <- positiv + neutral + negativ
    polarity <- (positiv - negativ) / sum;
    polarity
}

In [112]:
#Group SPON Data by Year
spon_sentiment_byYear <- spon_sentiment_all %>% 
  group_by(year) %>%
  summarise(sum_positiv_abs = sum(positiv_abs), 
            sum_neutral_abs = sum(neutral_abs), 
            sum_negativ_abs = sum(negativ_abs), 
            polarity = calc_polarity(sum_positiv_abs, sum_neutral_abs, sum_negativ_abs), 
            mean_positiv_rel = mean(positiv_rel), 
            mean_neutral_rel = mean(neutral_rel), 
            mean_negativ_rel = mean(negativ_rel), 
            source = "SPON")
head(spon_sentiment_byYear)


yearsum_positiv_abssum_neutral_abssum_negativ_abspolaritymean_positiv_relmean_neutral_relmean_negativ_relsource
2001 17061 40026 45843 -0.27962690.021156690.047164680.06094070SPON
2002 19963 46281 58506 -0.30896190.020515690.045810020.06203490SPON
2003 20419 47891 58816 -0.30203890.021315700.046829800.06166580SPON
2004 21964 50029 63053 -0.30425930.021328060.045967300.06173111SPON
2005 28573 66727 86556 -0.31884020.020566200.046594780.06138549SPON
2006 35739 82787 105757 -0.31218590.020470340.047807350.06207602SPON

In [113]:
#Group JF Data by Year
jf_sentiment_byYear <- jf_sentiment_all %>% 
  group_by(year) %>%
  summarise(sum_positiv_abs = sum(positiv_abs), 
            sum_neutral_abs = sum(neutral_abs), 
            sum_negativ_abs = sum(negativ_abs), 
            polarity = calc_polarity(sum_positiv_abs, sum_neutral_abs, sum_negativ_abs), 
            mean_positiv_rel = mean(positiv_rel), 
            mean_neutral_rel = mean(neutral_rel), 
            mean_negativ_rel = mean(negativ_rel), 
            source = "JF")
#filter
jf_sentiment_byYear <- jf_sentiment_byYear[(jf_sentiment_byYear>=2001),]
head(jf_sentiment_byYear)


yearsum_positiv_abssum_neutral_abssum_negativ_abspolaritymean_positiv_relmean_neutral_relmean_negativ_relsource
2001 603 1536 1755 -0.29583980.024293570.054445370.06530460JF
2002 3046 7194 10130 -0.34776630.021553080.050308540.06907093JF
2003 33078 80859 106913 -0.33432190.020918550.051231630.06888684JF
2004 31223 76285 101556 -0.33641850.019307420.049376460.06833263JF
2005 16484 39802 53293 -0.33591290.019346920.049119050.06956803JF
2006 17769 43598 59105 -0.34311710.019419660.042995060.07025098JF

In [117]:
#Group Bild Data by Year
bild_sentiment_byYear <- bild_sentiment_all %>% 
  group_by(year) %>%
  summarise(sum_positiv_abs = sum(positiv_abs), 
            sum_neutral_abs = sum(neutral_abs), 
            sum_negativ_abs = sum(negativ_abs), 
            polarity = calc_polarity(sum_positiv_abs, sum_neutral_abs, sum_negativ_abs), 
            mean_positiv_rel = mean(positiv_rel), 
            mean_neutral_rel = mean(neutral_rel), 
            mean_negativ_rel = mean(negativ_rel),
            source = "BILD")
bild_sentiment_byYear$year <- as.numeric(as.character(bild_sentiment_byYear$year))
head(bild_sentiment_byYear)


yearsum_positiv_abssum_neutral_abssum_negativ_abspolaritymean_positiv_relmean_neutral_relmean_negativ_relsource
2007 99 221 216 -0.21828360.034587180.067591510.06364433BILD
2008 954 2224 2537 -0.27699040.026466460.056084310.06326936BILD
2009 5482 11663 12296 -0.23144590.029907950.057692470.06205002BILD
2010 10082 22059 23749 -0.24453390.027780700.058361940.06338421BILD
2011 13231 28312 32570 -0.26093940.027596030.056830450.06317456BILD
2012 13203 28881 32248 -0.25621540.027055770.057685490.06348543BILD

In [125]:
#Plotting Polarity of all three News Sites
ggplot() + 
#SPON
geom_line(data = spon_sentiment_byYear, aes(x=year, y=polarity, color = source)) + 
geom_point(data = spon_sentiment_byYear, aes(x=year, y=polarity, color = source)) +
geom_smooth(data = spon_sentiment_byYear, aes(x=year, y=polarity, color = source), method = 'loess') + 
#JF
geom_line(data = jf_sentiment_byYear, aes(x=year, y=polarity, color = source)) + 
geom_point(data = jf_sentiment_byYear, aes(x=year, y=polarity, color = source)) +
geom_smooth(data = jf_sentiment_byYear, aes(x=year, y=polarity, color = source), method = 'loess') + 
#BILD
geom_line(data = bild_sentiment_byYear, aes(x=year, y=polarity, color = source)) + 
geom_point(data = bild_sentiment_byYear, aes(x=year, y=polarity, color = source)) +
geom_smooth(data = bild_sentiment_byYear, aes(x=year, y=polarity, color = source), method = 'loess')


Warning message:
“Removed 67 rows containing non-finite values (stat_smooth).”Warning message:
“Removed 67 rows containing missing values (geom_point).”

In [ ]: