In [1]:
library(data.table)

In [2]:
## Using a table of article assessments and views, build tables
## (matrices) that shows the number of dissonant articles per
## assessment category based on sorting by popularity.
##
## The underlying assumption is that in an ideal system with a limited
## and fixed amount of resources (in other words, popularity and high quality
## artefacts does not increase the amount of resources in the system),
## popularity ranking and assessment class follow a 1-to-1 relationship.
## We can therefore sort by popularity and group articles that way
## because work will be prioritised by popularity.

## DATA ASSUMPTION: views_with_redirects from resolve-redirects.R
## is loaded into memory.

## 3: build a 2x2 matrix of assessment classes and popularity classes
## 

## Assessment classes in ascending order of quality.

In [3]:
assessment_classes = c('E', 'D', 'C', 'B', 'A');

In [4]:
quality_prediction_and_page_views <- read.table("../../results/sql_queries/entity_views_and_aggregated_revisions/entity_views_and_aggregated_revisions_and_quality_scoring_20121101.tsv", header=FALSE, sep="\t")

In [5]:
quality_prediction_and_page_views <- data.table(quality_prediction_and_page_views)

In [6]:
colnames(quality_prediction_and_page_views) <- c('entity_id','number_of_revisions', 'page_views', 'prediction')

In [7]:
summary(quality_prediction_and_page_views)


   entity_id    number_of_revisions   page_views        prediction
 Q1     :   1   Min.   :  14        Min.   :2.300e+01   D:1133    
 Q100   :   1   1st Qu.:  95        1st Qu.:6.075e+04   E:3259    
 Q1000  :   1   Median : 203        Median :5.756e+05             
 Q1001  :   1   Mean   : 246        Mean   :9.044e+06             
 Q1002  :   1   3rd Qu.: 302        3rd Qu.:2.352e+06             
 Q1003  :   1   Max.   :3152        Max.   :5.668e+09             
 (Other):4386                                                     

In [8]:
## 0: calculate number of articles in each assessment class
n_per_class = quality_prediction_and_page_views[, list(narticles=sum(.N)), by='prediction']

In [9]:
setkey(n_per_class, prediction);
## NOTE: setkey allows us to do n_per_class['GA']$narticles to get counts

In [10]:
## 1: order articles by popularity
articles_by_pop = quality_prediction_and_page_views[order(quality_prediction_and_page_views$page_views)][,list(entity_id, prediction, page_views)];

In [11]:
## 2: assign popularity assessment class based on rank
##   (buckets based on number of articles in each class)
articles_by_pop[, pop_class := ''];
articles_by_pop[, seqNum := seq_len(nrow(articles_by_pop))];


entity_idpredictionpage_viewspop_class
Q3086E 23
Q3265E 46
Q4483E 52
Q3152E 71
Q3517E 74
Q4017E 90
Q3649E 112
Q3254E 120
Q3527E 121
Q3522E 128
Q3186E 138
Q3634E 161
Q4486E 163
Q3652E 176
Q4475E 179
Q3633E 202
Q3287E 221
Q3781E 237
Q3654E 248
Q3641E 286
Q2711D 287
Q4281E 298
Q3293E 304
Q3084E 307
Q293 D 329
Q2631E 330
Q3307E 330
Q4003E 337
Q4170E 338
Q3979E 341
Q408 D 108249066
Q212 D 121262940
Q90 D 132079969
Q739 D 149356287
Q150 D 154676097
Q16 D 161566243
Q60 D 161626758
Q188 D 173805898
Q2184 E 226793302
Q515 E 233190433
Q649 D 243081566
Q1321 D 252584097
Q2736 E 270665514
Q414 D 282547432
Q96 D 307141203
Q38 D 363184639
Q328 E 378666601
Q183 D 440162350
Q29 D 500913282
Q145 D 595132607
Q159 D 653480145
Q142 D 679432828
Q1860 E 1419981073
Q356 D 2047631596
Q565 D 2052996261
Q918 E 2063217449
Q866 E 2079749157
Q355 D 2093900731
Q30 D 2277746226
Q5 D 5668008721
entity_idpredictionpage_viewspop_classseqNum
Q3086E 23 1
Q3265E 46 2
Q4483E 52 3
Q3152E 71 4
Q3517E 74 5
Q4017E 90 6
Q3649E 112 7
Q3254E 120 8
Q3527E 121 9
Q3522E 128 10
Q3186E 138 11
Q3634E 161 12
Q4486E 163 13
Q3652E 176 14
Q4475E 179 15
Q3633E 202 16
Q3287E 221 17
Q3781E 237 18
Q3654E 248 19
Q3641E 286 20
Q2711D 287 21
Q4281E 298 22
Q3293E 304 23
Q3084E 307 24
Q293 D 329 25
Q2631E 330 26
Q3307E 330 27
Q4003E 337 28
Q4170E 338 29
Q3979E 341 30
Q408 D 108249066 4363
Q212 D 121262940 4364
Q90 D 132079969 4365
Q739 D 149356287 4366
Q150 D 154676097 4367
Q16 D 161566243 4368
Q60 D 161626758 4369
Q188 D 173805898 4370
Q2184 E 226793302 4371
Q515 E 233190433 4372
Q649 D 243081566 4373
Q1321 D 252584097 4374
Q2736 E 270665514 4375
Q414 D 282547432 4376
Q96 D 307141203 4377
Q38 D 363184639 4378
Q328 E 378666601 4379
Q183 D 440162350 4380
Q29 D 500913282 4381
Q145 D 595132607 4382
Q159 D 653480145 4383
Q142 D 679432828 4384
Q1860 E 1419981073 4385
Q356 D 2047631596 4386
Q565 D 2052996261 4387
Q918 E 2063217449 4388
Q866 E 2079749157 4389
Q355 D 2093900731 4390
Q30 D 2277746226 4391
Q5 D 5668008721 4392

In [12]:
assign_pop_class = function(dataset, classes, class_n) {
  ## Based on the per-class number of articles in class_n
  ## assign popularity based on classes to dataset.
  prev_idx = 0;
  for(rating in classes) {
    start_idx = prev_idx + 1;
    end_idx = start_idx + class_n[prediction == rating]$narticles;
    print(paste('start_idx =', start_idx, ', end_idx = ', end_idx));
    dataset[seqNum >= start_idx & seqNum <= end_idx, pop_class := rating];
    prev_idx = end_idx -1;
  }
  dataset;
}

In [13]:
articles_by_pop = assign_pop_class(articles_by_pop,
  assessment_classes, n_per_class);


[1] "start_idx = 1 , end_idx =  3260"
[1] "start_idx = 3260 , end_idx =  4393"
[1] "start_idx = 4393 , end_idx =  "
[1] "start_idx =  , end_idx =  "
[1] "start_idx =  , end_idx =  "

In [14]:
create_dissonance_matrix = function(articledata, classes) {
  d_mtrx = matrix(0, nrow=length(classes), ncol=length(classes));
  rownames(d_mtrx) = classes;
  colnames(d_mtrx) = classes;

  for(real_rating in classes) {
    for(pop_rating in classes) {
      d_mtrx[real_rating, pop_rating] = length(articledata[prediction == real_rating & pop_class == pop_rating]$entity_id);
    }
  }
  d_mtrx;
}

In [15]:
## Based on direct hits to articles:
create_dissonance_matrix(articles_by_pop, assessment_classes)


EDCBA
E2731528 0 0 0
D 528605 0 0 0
C 0 0 0 0 0
B 0 0 0 0 0
A 0 0 0 0 0

In [16]:
dissonance_matrix = create_dissonance_matrix(articles_by_pop,
  assessment_classes);

In [17]:
# Total misaligned entities
(dissonance_matrix[1,1]+dissonance_matrix[2,2]+dissonance_matrix[3,3]+dissonance_matrix[4,4]+dissonance_matrix[5,5])/sum(dissonance_matrix[,])


0.759562841530055

In [18]:
# A class quality and A class views over A class quality
dissonance_matrix[5,5]/sum(dissonance_matrix[5,])


NaN

In [19]:
# A class quality and E and D class views over A class quality
(dissonance_matrix[5,1]+dissonance_matrix[5,2])/sum(dissonance_matrix[5,])


NaN

In [20]:
# A class quality and < A class views
(dissonance_matrix[5,1]+dissonance_matrix[5,2]+dissonance_matrix[5,3]+dissonance_matrix[5,4])/sum(dissonance_matrix[5,])


NaN

In [21]:
# < A class quality and A class views
(dissonance_matrix[1,5]+dissonance_matrix[2,5]+dissonance_matrix[3,5]+dissonance_matrix[4,5])/sum(dissonance_matrix[,5])


NaN

In [22]:
prediction_e_pop_class_a <- merge(articles_by_pop[prediction == 'E' & pop_class == 'A'],quality_prediction_and_page_views, by='entity_id')[, c("entity_id","page_views.x", "number_of_revisions")]

In [23]:
head(prediction_e_pop_class_a)


entity_idpage_views.xnumber_of_revisions

In [24]:
## Q: why do I get _two_ pageid columns?  Solution is to do the selection
## on the joined table, not as a select _in_ the join.

## Dissonance matrix proportions by row (..., 1) and column (..., 2)
## rounded to 1 decimal places.

In [25]:
round(100*prop.table(dissonance_matrix, 1), 1);


EDCBA
E83.816.2 0 0 0
D46.653.4 0 0 0
C NaN NaNNaN NaN NaN
B NaN NaNNaN NaN NaN
A NaN NaNNaN NaN NaN

In [26]:
round(100*prop.table(dissonance_matrix, 2), 1);


EDCBA
E83.846.6NaN NaN NaN
D16.253.4NaN NaN NaN
C 0.0 0.0NaN NaN NaN
B 0.0 0.0NaN NaN NaN
A 0.0 0.0NaN NaN NaN

In [27]:
## Let's write the stubs out to a file
write.table(merge(articles_by_pop[(prediction == 'E' | prediction == 'D' | prediction == 'C' | prediction == 'B') & pop_class == 'A'], quality_prediction_and_page_views, by='entity_id')[, c("entity_id","pop_class", "prediction.x")],
           '../../results/entity_categorization/201211_a_class_views_less_than_a_quality.tsv', row.names=FALSE, col.names=FALSE, quote=FALSE, sep='\t');
merge(articles_by_pop[(prediction == 'E' | prediction == 'D' | prediction == 'C' | prediction == 'B') & pop_class == 'A'], quality_prediction_and_page_views, by='entity_id')[, c("entity_id","pop_class", "prediction.x")]


entity_idpop_classprediction.x

In [28]:
write.table(merge(articles_by_pop[prediction == 'A' & (pop_class == 'B' | pop_class == 'C' | pop_class == 'D' | pop_class == 'E')], quality_prediction_and_page_views, by='entity_id')[, c("entity_id","pop_class", "prediction.x")],
           '../../results/entity_categorization/201211_a_class_quality_less_than_a_views.tsv', row.names=FALSE, col.names=FALSE, quote=FALSE, sep='\t');

In [29]:
write.table(merge(articles_by_pop[(prediction == 'A' & pop_class == 'A') | (prediction == 'B' & pop_class == 'B') | (prediction == 'C' & pop_class == 'C') | (prediction == 'D' & pop_class == 'D') | (prediction == 'E' & pop_class == 'E')], quality_prediction_and_page_views, by='entity_id')[, c("entity_id","pop_class", "prediction.x")],
           '../../results/entity_categorization/201211_aligned.tsv', row.names=FALSE, col.names=FALSE, quote=FALSE, sep='\t');

In [30]:
write.table(merge(articles_by_pop[(prediction == 'A' & pop_class != 'A') | (prediction == 'B' & pop_class != 'B') | (prediction == 'C' & pop_class != 'C') | (prediction == 'D' & pop_class != 'D') | (prediction == 'E' & pop_class != 'E')], quality_prediction_and_page_views, by='entity_id')[, c("entity_id","pop_class", "prediction.x")],
           '../../results/entity_categorization/201211_misaligned.tsv', row.names=FALSE, col.names=FALSE, quote=FALSE, sep='\t');

Dissonance Measures (was seperate file)


In [29]:
## Various ways of measuring dissonance.

## DATA ASSUMPTION: articles_by_pop from build-dissonance-table.R
## is loaded into memory.

## None/Moderate/High measure of dissonance

In [30]:
articles_by_pop[, pop_class := ordered(pop_class, assessment_classes)];


entity_idpredictionpage_viewspop_classseqNum
Q3086E 23 E 1
Q3265E 46 E 2
Q4483E 52 E 3
Q3152E 71 E 4
Q3517E 74 E 5
Q4017E 90 E 6
Q3649E 112 E 7
Q3254E 120 E 8
Q3527E 121 E 9
Q3522E 128 E 10
Q3186E 138 E 11
Q3634E 161 E 12
Q4486E 163 E 13
Q3652E 176 E 14
Q4475E 179 E 15
Q3633E 202 E 16
Q3287E 221 E 17
Q3781E 237 E 18
Q3654E 248 E 19
Q3641E 286 E 20
Q2711D 287 E 21
Q4281E 298 E 22
Q3293E 304 E 23
Q3084E 307 E 24
Q293 D 329 E 25
Q2631E 330 E 26
Q3307E 330 E 27
Q4003E 337 E 28
Q4170E 338 E 29
Q3979E 341 E 30
Q408 D 108249066D 4363
Q212 D 121262940D 4364
Q90 D 132079969D 4365
Q739 D 149356287D 4366
Q150 D 154676097D 4367
Q16 D 161566243D 4368
Q60 D 161626758D 4369
Q188 D 173805898D 4370
Q2184 E 226793302D 4371
Q515 E 233190433D 4372
Q649 D 243081566D 4373
Q1321 D 252584097D 4374
Q2736 E 270665514D 4375
Q414 D 282547432D 4376
Q96 D 307141203D 4377
Q38 D 363184639D 4378
Q328 E 378666601D 4379
Q183 D 440162350D 4380
Q29 D 500913282D 4381
Q145 D 595132607D 4382
Q159 D 653480145D 4383
Q142 D 679432828D 4384
Q1860 E 1419981073D 4385
Q356 D 2047631596D 4386
Q565 D 2052996261D 4387
Q918 E 2063217449D 4388
Q866 E 2079749157D 4389
Q355 D 2093900731D 4390
Q30 D 2277746226D 4391
Q5 D 5668008721D 4392

In [31]:
dissonance_metric = c('High negative', 'Moderate negative',
  'None', 'Moderate positive', 'High positive');

In [32]:
articles_by_pop[, dissonance := factor(NA, dissonance_metric)];


entity_idpredictionpage_viewspop_classseqNumdissonance
Q3086E 23 E 1 NA
Q3265E 46 E 2 NA
Q4483E 52 E 3 NA
Q3152E 71 E 4 NA
Q3517E 74 E 5 NA
Q4017E 90 E 6 NA
Q3649E 112 E 7 NA
Q3254E 120 E 8 NA
Q3527E 121 E 9 NA
Q3522E 128 E 10 NA
Q3186E 138 E 11 NA
Q3634E 161 E 12 NA
Q4486E 163 E 13 NA
Q3652E 176 E 14 NA
Q4475E 179 E 15 NA
Q3633E 202 E 16 NA
Q3287E 221 E 17 NA
Q3781E 237 E 18 NA
Q3654E 248 E 19 NA
Q3641E 286 E 20 NA
Q2711D 287 E 21 NA
Q4281E 298 E 22 NA
Q3293E 304 E 23 NA
Q3084E 307 E 24 NA
Q293 D 329 E 25 NA
Q2631E 330 E 26 NA
Q3307E 330 E 27 NA
Q4003E 337 E 28 NA
Q4170E 338 E 29 NA
Q3979E 341 E 30 NA
Q408 D 108249066D 4363 NA
Q212 D 121262940D 4364 NA
Q90 D 132079969D 4365 NA
Q739 D 149356287D 4366 NA
Q150 D 154676097D 4367 NA
Q16 D 161566243D 4368 NA
Q60 D 161626758D 4369 NA
Q188 D 173805898D 4370 NA
Q2184 E 226793302D 4371 NA
Q515 E 233190433D 4372 NA
Q649 D 243081566D 4373 NA
Q1321 D 252584097D 4374 NA
Q2736 E 270665514D 4375 NA
Q414 D 282547432D 4376 NA
Q96 D 307141203D 4377 NA
Q38 D 363184639D 4378 NA
Q328 E 378666601D 4379 NA
Q183 D 440162350D 4380 NA
Q29 D 500913282D 4381 NA
Q145 D 595132607D 4382 NA
Q159 D 653480145D 4383 NA
Q142 D 679432828D 4384 NA
Q1860 E 1419981073D 4385 NA
Q356 D 2047631596D 4386 NA
Q565 D 2052996261D 4387 NA
Q918 E 2063217449D 4388 NA
Q866 E 2079749157D 4389 NA
Q355 D 2093900731D 4390 NA
Q30 D 2277746226D 4391 NA
Q5 D 5668008721D 4392 NA

In [33]:
## NOTE: because pop_class is of class ordered, we can use
##       expressions like "pop_class < 'C'" as expected

In [34]:
## A: None if A, Moderate if A, High elsewhere
articles_by_pop[prediction == 'A' & pop_class <= 'C',
                dissonance := 'High negative'];
articles_by_pop[prediction == 'A' & pop_class == 'B',
                dissonance := 'Moderate negative'];
articles_by_pop[prediction == 'A' & pop_class == 'A',
                dissonance := 'None'];


entity_idpredictionpage_viewspop_classseqNumdissonance
Q3086E 23 E 1 NA
Q3265E 46 E 2 NA
Q4483E 52 E 3 NA
Q3152E 71 E 4 NA
Q3517E 74 E 5 NA
Q4017E 90 E 6 NA
Q3649E 112 E 7 NA
Q3254E 120 E 8 NA
Q3527E 121 E 9 NA
Q3522E 128 E 10 NA
Q3186E 138 E 11 NA
Q3634E 161 E 12 NA
Q4486E 163 E 13 NA
Q3652E 176 E 14 NA
Q4475E 179 E 15 NA
Q3633E 202 E 16 NA
Q3287E 221 E 17 NA
Q3781E 237 E 18 NA
Q3654E 248 E 19 NA
Q3641E 286 E 20 NA
Q2711D 287 E 21 NA
Q4281E 298 E 22 NA
Q3293E 304 E 23 NA
Q3084E 307 E 24 NA
Q293 D 329 E 25 NA
Q2631E 330 E 26 NA
Q3307E 330 E 27 NA
Q4003E 337 E 28 NA
Q4170E 338 E 29 NA
Q3979E 341 E 30 NA
Q408 D 108249066D 4363 NA
Q212 D 121262940D 4364 NA
Q90 D 132079969D 4365 NA
Q739 D 149356287D 4366 NA
Q150 D 154676097D 4367 NA
Q16 D 161566243D 4368 NA
Q60 D 161626758D 4369 NA
Q188 D 173805898D 4370 NA
Q2184 E 226793302D 4371 NA
Q515 E 233190433D 4372 NA
Q649 D 243081566D 4373 NA
Q1321 D 252584097D 4374 NA
Q2736 E 270665514D 4375 NA
Q414 D 282547432D 4376 NA
Q96 D 307141203D 4377 NA
Q38 D 363184639D 4378 NA
Q328 E 378666601D 4379 NA
Q183 D 440162350D 4380 NA
Q29 D 500913282D 4381 NA
Q145 D 595132607D 4382 NA
Q159 D 653480145D 4383 NA
Q142 D 679432828D 4384 NA
Q1860 E 1419981073D 4385 NA
Q356 D 2047631596D 4386 NA
Q565 D 2052996261D 4387 NA
Q918 E 2063217449D 4388 NA
Q866 E 2079749157D 4389 NA
Q355 D 2093900731D 4390 NA
Q30 D 2277746226D 4391 NA
Q5 D 5668008721D 4392 NA
entity_idpredictionpage_viewspop_classseqNumdissonance
Q3086E 23 E 1 NA
Q3265E 46 E 2 NA
Q4483E 52 E 3 NA
Q3152E 71 E 4 NA
Q3517E 74 E 5 NA
Q4017E 90 E 6 NA
Q3649E 112 E 7 NA
Q3254E 120 E 8 NA
Q3527E 121 E 9 NA
Q3522E 128 E 10 NA
Q3186E 138 E 11 NA
Q3634E 161 E 12 NA
Q4486E 163 E 13 NA
Q3652E 176 E 14 NA
Q4475E 179 E 15 NA
Q3633E 202 E 16 NA
Q3287E 221 E 17 NA
Q3781E 237 E 18 NA
Q3654E 248 E 19 NA
Q3641E 286 E 20 NA
Q2711D 287 E 21 NA
Q4281E 298 E 22 NA
Q3293E 304 E 23 NA
Q3084E 307 E 24 NA
Q293 D 329 E 25 NA
Q2631E 330 E 26 NA
Q3307E 330 E 27 NA
Q4003E 337 E 28 NA
Q4170E 338 E 29 NA
Q3979E 341 E 30 NA
Q408 D 108249066D 4363 NA
Q212 D 121262940D 4364 NA
Q90 D 132079969D 4365 NA
Q739 D 149356287D 4366 NA
Q150 D 154676097D 4367 NA
Q16 D 161566243D 4368 NA
Q60 D 161626758D 4369 NA
Q188 D 173805898D 4370 NA
Q2184 E 226793302D 4371 NA
Q515 E 233190433D 4372 NA
Q649 D 243081566D 4373 NA
Q1321 D 252584097D 4374 NA
Q2736 E 270665514D 4375 NA
Q414 D 282547432D 4376 NA
Q96 D 307141203D 4377 NA
Q38 D 363184639D 4378 NA
Q328 E 378666601D 4379 NA
Q183 D 440162350D 4380 NA
Q29 D 500913282D 4381 NA
Q145 D 595132607D 4382 NA
Q159 D 653480145D 4383 NA
Q142 D 679432828D 4384 NA
Q1860 E 1419981073D 4385 NA
Q356 D 2047631596D 4386 NA
Q565 D 2052996261D 4387 NA
Q918 E 2063217449D 4388 NA
Q866 E 2079749157D 4389 NA
Q355 D 2093900731D 4390 NA
Q30 D 2277746226D 4391 NA
Q5 D 5668008721D 4392 NA
entity_idpredictionpage_viewspop_classseqNumdissonance
Q3086E 23 E 1 NA
Q3265E 46 E 2 NA
Q4483E 52 E 3 NA
Q3152E 71 E 4 NA
Q3517E 74 E 5 NA
Q4017E 90 E 6 NA
Q3649E 112 E 7 NA
Q3254E 120 E 8 NA
Q3527E 121 E 9 NA
Q3522E 128 E 10 NA
Q3186E 138 E 11 NA
Q3634E 161 E 12 NA
Q4486E 163 E 13 NA
Q3652E 176 E 14 NA
Q4475E 179 E 15 NA
Q3633E 202 E 16 NA
Q3287E 221 E 17 NA
Q3781E 237 E 18 NA
Q3654E 248 E 19 NA
Q3641E 286 E 20 NA
Q2711D 287 E 21 NA
Q4281E 298 E 22 NA
Q3293E 304 E 23 NA
Q3084E 307 E 24 NA
Q293 D 329 E 25 NA
Q2631E 330 E 26 NA
Q3307E 330 E 27 NA
Q4003E 337 E 28 NA
Q4170E 338 E 29 NA
Q3979E 341 E 30 NA
Q408 D 108249066D 4363 NA
Q212 D 121262940D 4364 NA
Q90 D 132079969D 4365 NA
Q739 D 149356287D 4366 NA
Q150 D 154676097D 4367 NA
Q16 D 161566243D 4368 NA
Q60 D 161626758D 4369 NA
Q188 D 173805898D 4370 NA
Q2184 E 226793302D 4371 NA
Q515 E 233190433D 4372 NA
Q649 D 243081566D 4373 NA
Q1321 D 252584097D 4374 NA
Q2736 E 270665514D 4375 NA
Q414 D 282547432D 4376 NA
Q96 D 307141203D 4377 NA
Q38 D 363184639D 4378 NA
Q328 E 378666601D 4379 NA
Q183 D 440162350D 4380 NA
Q29 D 500913282D 4381 NA
Q145 D 595132607D 4382 NA
Q159 D 653480145D 4383 NA
Q142 D 679432828D 4384 NA
Q1860 E 1419981073D 4385 NA
Q356 D 2047631596D 4386 NA
Q565 D 2052996261D 4387 NA
Q918 E 2063217449D 4388 NA
Q866 E 2079749157D 4389 NA
Q355 D 2093900731D 4390 NA
Q30 D 2277746226D 4391 NA
Q5 D 5668008721D 4392 NA

In [35]:
## B: 
articles_by_pop[prediction == 'B' & pop_class <= 'D',
                dissonance := 'High negative'];
articles_by_pop[prediction == 'B' & pop_class == 'C',
                dissonance := 'Moderate negative'];
articles_by_pop[prediction == 'B' & pop_class == 'B',
                dissonance := 'None'];
articles_by_pop[prediction == 'B' & pop_class == 'A',
                dissonance := 'Moderate positive'];


entity_idpredictionpage_viewspop_classseqNumdissonance
Q3086E 23 E 1 NA
Q3265E 46 E 2 NA
Q4483E 52 E 3 NA
Q3152E 71 E 4 NA
Q3517E 74 E 5 NA
Q4017E 90 E 6 NA
Q3649E 112 E 7 NA
Q3254E 120 E 8 NA
Q3527E 121 E 9 NA
Q3522E 128 E 10 NA
Q3186E 138 E 11 NA
Q3634E 161 E 12 NA
Q4486E 163 E 13 NA
Q3652E 176 E 14 NA
Q4475E 179 E 15 NA
Q3633E 202 E 16 NA
Q3287E 221 E 17 NA
Q3781E 237 E 18 NA
Q3654E 248 E 19 NA
Q3641E 286 E 20 NA
Q2711D 287 E 21 NA
Q4281E 298 E 22 NA
Q3293E 304 E 23 NA
Q3084E 307 E 24 NA
Q293 D 329 E 25 NA
Q2631E 330 E 26 NA
Q3307E 330 E 27 NA
Q4003E 337 E 28 NA
Q4170E 338 E 29 NA
Q3979E 341 E 30 NA
Q408 D 108249066D 4363 NA
Q212 D 121262940D 4364 NA
Q90 D 132079969D 4365 NA
Q739 D 149356287D 4366 NA
Q150 D 154676097D 4367 NA
Q16 D 161566243D 4368 NA
Q60 D 161626758D 4369 NA
Q188 D 173805898D 4370 NA
Q2184 E 226793302D 4371 NA
Q515 E 233190433D 4372 NA
Q649 D 243081566D 4373 NA
Q1321 D 252584097D 4374 NA
Q2736 E 270665514D 4375 NA
Q414 D 282547432D 4376 NA
Q96 D 307141203D 4377 NA
Q38 D 363184639D 4378 NA
Q328 E 378666601D 4379 NA
Q183 D 440162350D 4380 NA
Q29 D 500913282D 4381 NA
Q145 D 595132607D 4382 NA
Q159 D 653480145D 4383 NA
Q142 D 679432828D 4384 NA
Q1860 E 1419981073D 4385 NA
Q356 D 2047631596D 4386 NA
Q565 D 2052996261D 4387 NA
Q918 E 2063217449D 4388 NA
Q866 E 2079749157D 4389 NA
Q355 D 2093900731D 4390 NA
Q30 D 2277746226D 4391 NA
Q5 D 5668008721D 4392 NA
entity_idpredictionpage_viewspop_classseqNumdissonance
Q3086E 23 E 1 NA
Q3265E 46 E 2 NA
Q4483E 52 E 3 NA
Q3152E 71 E 4 NA
Q3517E 74 E 5 NA
Q4017E 90 E 6 NA
Q3649E 112 E 7 NA
Q3254E 120 E 8 NA
Q3527E 121 E 9 NA
Q3522E 128 E 10 NA
Q3186E 138 E 11 NA
Q3634E 161 E 12 NA
Q4486E 163 E 13 NA
Q3652E 176 E 14 NA
Q4475E 179 E 15 NA
Q3633E 202 E 16 NA
Q3287E 221 E 17 NA
Q3781E 237 E 18 NA
Q3654E 248 E 19 NA
Q3641E 286 E 20 NA
Q2711D 287 E 21 NA
Q4281E 298 E 22 NA
Q3293E 304 E 23 NA
Q3084E 307 E 24 NA
Q293 D 329 E 25 NA
Q2631E 330 E 26 NA
Q3307E 330 E 27 NA
Q4003E 337 E 28 NA
Q4170E 338 E 29 NA
Q3979E 341 E 30 NA
Q408 D 108249066D 4363 NA
Q212 D 121262940D 4364 NA
Q90 D 132079969D 4365 NA
Q739 D 149356287D 4366 NA
Q150 D 154676097D 4367 NA
Q16 D 161566243D 4368 NA
Q60 D 161626758D 4369 NA
Q188 D 173805898D 4370 NA
Q2184 E 226793302D 4371 NA
Q515 E 233190433D 4372 NA
Q649 D 243081566D 4373 NA
Q1321 D 252584097D 4374 NA
Q2736 E 270665514D 4375 NA
Q414 D 282547432D 4376 NA
Q96 D 307141203D 4377 NA
Q38 D 363184639D 4378 NA
Q328 E 378666601D 4379 NA
Q183 D 440162350D 4380 NA
Q29 D 500913282D 4381 NA
Q145 D 595132607D 4382 NA
Q159 D 653480145D 4383 NA
Q142 D 679432828D 4384 NA
Q1860 E 1419981073D 4385 NA
Q356 D 2047631596D 4386 NA
Q565 D 2052996261D 4387 NA
Q918 E 2063217449D 4388 NA
Q866 E 2079749157D 4389 NA
Q355 D 2093900731D 4390 NA
Q30 D 2277746226D 4391 NA
Q5 D 5668008721D 4392 NA
entity_idpredictionpage_viewspop_classseqNumdissonance
Q3086E 23 E 1 NA
Q3265E 46 E 2 NA
Q4483E 52 E 3 NA
Q3152E 71 E 4 NA
Q3517E 74 E 5 NA
Q4017E 90 E 6 NA
Q3649E 112 E 7 NA
Q3254E 120 E 8 NA
Q3527E 121 E 9 NA
Q3522E 128 E 10 NA
Q3186E 138 E 11 NA
Q3634E 161 E 12 NA
Q4486E 163 E 13 NA
Q3652E 176 E 14 NA
Q4475E 179 E 15 NA
Q3633E 202 E 16 NA
Q3287E 221 E 17 NA
Q3781E 237 E 18 NA
Q3654E 248 E 19 NA
Q3641E 286 E 20 NA
Q2711D 287 E 21 NA
Q4281E 298 E 22 NA
Q3293E 304 E 23 NA
Q3084E 307 E 24 NA
Q293 D 329 E 25 NA
Q2631E 330 E 26 NA
Q3307E 330 E 27 NA
Q4003E 337 E 28 NA
Q4170E 338 E 29 NA
Q3979E 341 E 30 NA
Q408 D 108249066D 4363 NA
Q212 D 121262940D 4364 NA
Q90 D 132079969D 4365 NA
Q739 D 149356287D 4366 NA
Q150 D 154676097D 4367 NA
Q16 D 161566243D 4368 NA
Q60 D 161626758D 4369 NA
Q188 D 173805898D 4370 NA
Q2184 E 226793302D 4371 NA
Q515 E 233190433D 4372 NA
Q649 D 243081566D 4373 NA
Q1321 D 252584097D 4374 NA
Q2736 E 270665514D 4375 NA
Q414 D 282547432D 4376 NA
Q96 D 307141203D 4377 NA
Q38 D 363184639D 4378 NA
Q328 E 378666601D 4379 NA
Q183 D 440162350D 4380 NA
Q29 D 500913282D 4381 NA
Q145 D 595132607D 4382 NA
Q159 D 653480145D 4383 NA
Q142 D 679432828D 4384 NA
Q1860 E 1419981073D 4385 NA
Q356 D 2047631596D 4386 NA
Q565 D 2052996261D 4387 NA
Q918 E 2063217449D 4388 NA
Q866 E 2079749157D 4389 NA
Q355 D 2093900731D 4390 NA
Q30 D 2277746226D 4391 NA
Q5 D 5668008721D 4392 NA
entity_idpredictionpage_viewspop_classseqNumdissonance
Q3086E 23 E 1 NA
Q3265E 46 E 2 NA
Q4483E 52 E 3 NA
Q3152E 71 E 4 NA
Q3517E 74 E 5 NA
Q4017E 90 E 6 NA
Q3649E 112 E 7 NA
Q3254E 120 E 8 NA
Q3527E 121 E 9 NA
Q3522E 128 E 10 NA
Q3186E 138 E 11 NA
Q3634E 161 E 12 NA
Q4486E 163 E 13 NA
Q3652E 176 E 14 NA
Q4475E 179 E 15 NA
Q3633E 202 E 16 NA
Q3287E 221 E 17 NA
Q3781E 237 E 18 NA
Q3654E 248 E 19 NA
Q3641E 286 E 20 NA
Q2711D 287 E 21 NA
Q4281E 298 E 22 NA
Q3293E 304 E 23 NA
Q3084E 307 E 24 NA
Q293 D 329 E 25 NA
Q2631E 330 E 26 NA
Q3307E 330 E 27 NA
Q4003E 337 E 28 NA
Q4170E 338 E 29 NA
Q3979E 341 E 30 NA
Q408 D 108249066D 4363 NA
Q212 D 121262940D 4364 NA
Q90 D 132079969D 4365 NA
Q739 D 149356287D 4366 NA
Q150 D 154676097D 4367 NA
Q16 D 161566243D 4368 NA
Q60 D 161626758D 4369 NA
Q188 D 173805898D 4370 NA
Q2184 E 226793302D 4371 NA
Q515 E 233190433D 4372 NA
Q649 D 243081566D 4373 NA
Q1321 D 252584097D 4374 NA
Q2736 E 270665514D 4375 NA
Q414 D 282547432D 4376 NA
Q96 D 307141203D 4377 NA
Q38 D 363184639D 4378 NA
Q328 E 378666601D 4379 NA
Q183 D 440162350D 4380 NA
Q29 D 500913282D 4381 NA
Q145 D 595132607D 4382 NA
Q159 D 653480145D 4383 NA
Q142 D 679432828D 4384 NA
Q1860 E 1419981073D 4385 NA
Q356 D 2047631596D 4386 NA
Q565 D 2052996261D 4387 NA
Q918 E 2063217449D 4388 NA
Q866 E 2079749157D 4389 NA
Q355 D 2093900731D 4390 NA
Q30 D 2277746226D 4391 NA
Q5 D 5668008721D 4392 NA

In [36]:
## C: 
articles_by_pop[prediction == 'C' & pop_class == 'E',
                dissonance := 'High negative'];
articles_by_pop[prediction == 'C' & pop_class == 'D',
                dissonance := 'Moderate negative'];
articles_by_pop[prediction == 'C' & pop_class == 'C',
                dissonance := 'None'];
articles_by_pop[prediction == 'C' & pop_class == 'B',
                dissonance := 'Moderate positive'];
articles_by_pop[prediction == 'C' & pop_class == 'A',
                dissonance := 'High positive'];


entity_idpredictionpage_viewspop_classseqNumdissonance
Q3086E 23 E 1 NA
Q3265E 46 E 2 NA
Q4483E 52 E 3 NA
Q3152E 71 E 4 NA
Q3517E 74 E 5 NA
Q4017E 90 E 6 NA
Q3649E 112 E 7 NA
Q3254E 120 E 8 NA
Q3527E 121 E 9 NA
Q3522E 128 E 10 NA
Q3186E 138 E 11 NA
Q3634E 161 E 12 NA
Q4486E 163 E 13 NA
Q3652E 176 E 14 NA
Q4475E 179 E 15 NA
Q3633E 202 E 16 NA
Q3287E 221 E 17 NA
Q3781E 237 E 18 NA
Q3654E 248 E 19 NA
Q3641E 286 E 20 NA
Q2711D 287 E 21 NA
Q4281E 298 E 22 NA
Q3293E 304 E 23 NA
Q3084E 307 E 24 NA
Q293 D 329 E 25 NA
Q2631E 330 E 26 NA
Q3307E 330 E 27 NA
Q4003E 337 E 28 NA
Q4170E 338 E 29 NA
Q3979E 341 E 30 NA
Q408 D 108249066D 4363 NA
Q212 D 121262940D 4364 NA
Q90 D 132079969D 4365 NA
Q739 D 149356287D 4366 NA
Q150 D 154676097D 4367 NA
Q16 D 161566243D 4368 NA
Q60 D 161626758D 4369 NA
Q188 D 173805898D 4370 NA
Q2184 E 226793302D 4371 NA
Q515 E 233190433D 4372 NA
Q649 D 243081566D 4373 NA
Q1321 D 252584097D 4374 NA
Q2736 E 270665514D 4375 NA
Q414 D 282547432D 4376 NA
Q96 D 307141203D 4377 NA
Q38 D 363184639D 4378 NA
Q328 E 378666601D 4379 NA
Q183 D 440162350D 4380 NA
Q29 D 500913282D 4381 NA
Q145 D 595132607D 4382 NA
Q159 D 653480145D 4383 NA
Q142 D 679432828D 4384 NA
Q1860 E 1419981073D 4385 NA
Q356 D 2047631596D 4386 NA
Q565 D 2052996261D 4387 NA
Q918 E 2063217449D 4388 NA
Q866 E 2079749157D 4389 NA
Q355 D 2093900731D 4390 NA
Q30 D 2277746226D 4391 NA
Q5 D 5668008721D 4392 NA
entity_idpredictionpage_viewspop_classseqNumdissonance
Q3086E 23 E 1 NA
Q3265E 46 E 2 NA
Q4483E 52 E 3 NA
Q3152E 71 E 4 NA
Q3517E 74 E 5 NA
Q4017E 90 E 6 NA
Q3649E 112 E 7 NA
Q3254E 120 E 8 NA
Q3527E 121 E 9 NA
Q3522E 128 E 10 NA
Q3186E 138 E 11 NA
Q3634E 161 E 12 NA
Q4486E 163 E 13 NA
Q3652E 176 E 14 NA
Q4475E 179 E 15 NA
Q3633E 202 E 16 NA
Q3287E 221 E 17 NA
Q3781E 237 E 18 NA
Q3654E 248 E 19 NA
Q3641E 286 E 20 NA
Q2711D 287 E 21 NA
Q4281E 298 E 22 NA
Q3293E 304 E 23 NA
Q3084E 307 E 24 NA
Q293 D 329 E 25 NA
Q2631E 330 E 26 NA
Q3307E 330 E 27 NA
Q4003E 337 E 28 NA
Q4170E 338 E 29 NA
Q3979E 341 E 30 NA
Q408 D 108249066D 4363 NA
Q212 D 121262940D 4364 NA
Q90 D 132079969D 4365 NA
Q739 D 149356287D 4366 NA
Q150 D 154676097D 4367 NA
Q16 D 161566243D 4368 NA
Q60 D 161626758D 4369 NA
Q188 D 173805898D 4370 NA
Q2184 E 226793302D 4371 NA
Q515 E 233190433D 4372 NA
Q649 D 243081566D 4373 NA
Q1321 D 252584097D 4374 NA
Q2736 E 270665514D 4375 NA
Q414 D 282547432D 4376 NA
Q96 D 307141203D 4377 NA
Q38 D 363184639D 4378 NA
Q328 E 378666601D 4379 NA
Q183 D 440162350D 4380 NA
Q29 D 500913282D 4381 NA
Q145 D 595132607D 4382 NA
Q159 D 653480145D 4383 NA
Q142 D 679432828D 4384 NA
Q1860 E 1419981073D 4385 NA
Q356 D 2047631596D 4386 NA
Q565 D 2052996261D 4387 NA
Q918 E 2063217449D 4388 NA
Q866 E 2079749157D 4389 NA
Q355 D 2093900731D 4390 NA
Q30 D 2277746226D 4391 NA
Q5 D 5668008721D 4392 NA
entity_idpredictionpage_viewspop_classseqNumdissonance
Q3086E 23 E 1 NA
Q3265E 46 E 2 NA
Q4483E 52 E 3 NA
Q3152E 71 E 4 NA
Q3517E 74 E 5 NA
Q4017E 90 E 6 NA
Q3649E 112 E 7 NA
Q3254E 120 E 8 NA
Q3527E 121 E 9 NA
Q3522E 128 E 10 NA
Q3186E 138 E 11 NA
Q3634E 161 E 12 NA
Q4486E 163 E 13 NA
Q3652E 176 E 14 NA
Q4475E 179 E 15 NA
Q3633E 202 E 16 NA
Q3287E 221 E 17 NA
Q3781E 237 E 18 NA
Q3654E 248 E 19 NA
Q3641E 286 E 20 NA
Q2711D 287 E 21 NA
Q4281E 298 E 22 NA
Q3293E 304 E 23 NA
Q3084E 307 E 24 NA
Q293 D 329 E 25 NA
Q2631E 330 E 26 NA
Q3307E 330 E 27 NA
Q4003E 337 E 28 NA
Q4170E 338 E 29 NA
Q3979E 341 E 30 NA
Q408 D 108249066D 4363 NA
Q212 D 121262940D 4364 NA
Q90 D 132079969D 4365 NA
Q739 D 149356287D 4366 NA
Q150 D 154676097D 4367 NA
Q16 D 161566243D 4368 NA
Q60 D 161626758D 4369 NA
Q188 D 173805898D 4370 NA
Q2184 E 226793302D 4371 NA
Q515 E 233190433D 4372 NA
Q649 D 243081566D 4373 NA
Q1321 D 252584097D 4374 NA
Q2736 E 270665514D 4375 NA
Q414 D 282547432D 4376 NA
Q96 D 307141203D 4377 NA
Q38 D 363184639D 4378 NA
Q328 E 378666601D 4379 NA
Q183 D 440162350D 4380 NA
Q29 D 500913282D 4381 NA
Q145 D 595132607D 4382 NA
Q159 D 653480145D 4383 NA
Q142 D 679432828D 4384 NA
Q1860 E 1419981073D 4385 NA
Q356 D 2047631596D 4386 NA
Q565 D 2052996261D 4387 NA
Q918 E 2063217449D 4388 NA
Q866 E 2079749157D 4389 NA
Q355 D 2093900731D 4390 NA
Q30 D 2277746226D 4391 NA
Q5 D 5668008721D 4392 NA
entity_idpredictionpage_viewspop_classseqNumdissonance
Q3086E 23 E 1 NA
Q3265E 46 E 2 NA
Q4483E 52 E 3 NA
Q3152E 71 E 4 NA
Q3517E 74 E 5 NA
Q4017E 90 E 6 NA
Q3649E 112 E 7 NA
Q3254E 120 E 8 NA
Q3527E 121 E 9 NA
Q3522E 128 E 10 NA
Q3186E 138 E 11 NA
Q3634E 161 E 12 NA
Q4486E 163 E 13 NA
Q3652E 176 E 14 NA
Q4475E 179 E 15 NA
Q3633E 202 E 16 NA
Q3287E 221 E 17 NA
Q3781E 237 E 18 NA
Q3654E 248 E 19 NA
Q3641E 286 E 20 NA
Q2711D 287 E 21 NA
Q4281E 298 E 22 NA
Q3293E 304 E 23 NA
Q3084E 307 E 24 NA
Q293 D 329 E 25 NA
Q2631E 330 E 26 NA
Q3307E 330 E 27 NA
Q4003E 337 E 28 NA
Q4170E 338 E 29 NA
Q3979E 341 E 30 NA
Q408 D 108249066D 4363 NA
Q212 D 121262940D 4364 NA
Q90 D 132079969D 4365 NA
Q739 D 149356287D 4366 NA
Q150 D 154676097D 4367 NA
Q16 D 161566243D 4368 NA
Q60 D 161626758D 4369 NA
Q188 D 173805898D 4370 NA
Q2184 E 226793302D 4371 NA
Q515 E 233190433D 4372 NA
Q649 D 243081566D 4373 NA
Q1321 D 252584097D 4374 NA
Q2736 E 270665514D 4375 NA
Q414 D 282547432D 4376 NA
Q96 D 307141203D 4377 NA
Q38 D 363184639D 4378 NA
Q328 E 378666601D 4379 NA
Q183 D 440162350D 4380 NA
Q29 D 500913282D 4381 NA
Q145 D 595132607D 4382 NA
Q159 D 653480145D 4383 NA
Q142 D 679432828D 4384 NA
Q1860 E 1419981073D 4385 NA
Q356 D 2047631596D 4386 NA
Q565 D 2052996261D 4387 NA
Q918 E 2063217449D 4388 NA
Q866 E 2079749157D 4389 NA
Q355 D 2093900731D 4390 NA
Q30 D 2277746226D 4391 NA
Q5 D 5668008721D 4392 NA
entity_idpredictionpage_viewspop_classseqNumdissonance
Q3086E 23 E 1 NA
Q3265E 46 E 2 NA
Q4483E 52 E 3 NA
Q3152E 71 E 4 NA
Q3517E 74 E 5 NA
Q4017E 90 E 6 NA
Q3649E 112 E 7 NA
Q3254E 120 E 8 NA
Q3527E 121 E 9 NA
Q3522E 128 E 10 NA
Q3186E 138 E 11 NA
Q3634E 161 E 12 NA
Q4486E 163 E 13 NA
Q3652E 176 E 14 NA
Q4475E 179 E 15 NA
Q3633E 202 E 16 NA
Q3287E 221 E 17 NA
Q3781E 237 E 18 NA
Q3654E 248 E 19 NA
Q3641E 286 E 20 NA
Q2711D 287 E 21 NA
Q4281E 298 E 22 NA
Q3293E 304 E 23 NA
Q3084E 307 E 24 NA
Q293 D 329 E 25 NA
Q2631E 330 E 26 NA
Q3307E 330 E 27 NA
Q4003E 337 E 28 NA
Q4170E 338 E 29 NA
Q3979E 341 E 30 NA
Q408 D 108249066D 4363 NA
Q212 D 121262940D 4364 NA
Q90 D 132079969D 4365 NA
Q739 D 149356287D 4366 NA
Q150 D 154676097D 4367 NA
Q16 D 161566243D 4368 NA
Q60 D 161626758D 4369 NA
Q188 D 173805898D 4370 NA
Q2184 E 226793302D 4371 NA
Q515 E 233190433D 4372 NA
Q649 D 243081566D 4373 NA
Q1321 D 252584097D 4374 NA
Q2736 E 270665514D 4375 NA
Q414 D 282547432D 4376 NA
Q96 D 307141203D 4377 NA
Q38 D 363184639D 4378 NA
Q328 E 378666601D 4379 NA
Q183 D 440162350D 4380 NA
Q29 D 500913282D 4381 NA
Q145 D 595132607D 4382 NA
Q159 D 653480145D 4383 NA
Q142 D 679432828D 4384 NA
Q1860 E 1419981073D 4385 NA
Q356 D 2047631596D 4386 NA
Q565 D 2052996261D 4387 NA
Q918 E 2063217449D 4388 NA
Q866 E 2079749157D 4389 NA
Q355 D 2093900731D 4390 NA
Q30 D 2277746226D 4391 NA
Q5 D 5668008721D 4392 NA

In [37]:
## D
articles_by_pop[prediction == 'D' & pop_class == 'E',
                dissonance := 'Moderate negative'];
articles_by_pop[prediction == 'D' & pop_class == 'D',
                dissonance := 'None'];
articles_by_pop[prediction == 'D' & pop_class == 'C',
                dissonance := 'Moderate positive'];
articles_by_pop[prediction == 'D' & pop_class >= 'B',
                dissonance := 'High positive'];


entity_idpredictionpage_viewspop_classseqNumdissonance
Q3086 E 23 E 1 NA
Q3265 E 46 E 2 NA
Q4483 E 52 E 3 NA
Q3152 E 71 E 4 NA
Q3517 E 74 E 5 NA
Q4017 E 90 E 6 NA
Q3649 E 112 E 7 NA
Q3254 E 120 E 8 NA
Q3527 E 121 E 9 NA
Q3522 E 128 E 10 NA
Q3186 E 138 E 11 NA
Q3634 E 161 E 12 NA
Q4486 E 163 E 13 NA
Q3652 E 176 E 14 NA
Q4475 E 179 E 15 NA
Q3633 E 202 E 16 NA
Q3287 E 221 E 17 NA
Q3781 E 237 E 18 NA
Q3654 E 248 E 19 NA
Q3641 E 286 E 20 NA
Q2711 D 287 E 21 Moderate negative
Q4281 E 298 E 22 NA
Q3293 E 304 E 23 NA
Q3084 E 307 E 24 NA
Q293 D 329 E 25 Moderate negative
Q2631 E 330 E 26 NA
Q3307 E 330 E 27 NA
Q4003 E 337 E 28 NA
Q4170 E 338 E 29 NA
Q3979 E 341 E 30 NA
Q408 D 108249066D 4363 NA
Q212 D 121262940D 4364 NA
Q90 D 132079969D 4365 NA
Q739 D 149356287D 4366 NA
Q150 D 154676097D 4367 NA
Q16 D 161566243D 4368 NA
Q60 D 161626758D 4369 NA
Q188 D 173805898D 4370 NA
Q2184 E 226793302D 4371 NA
Q515 E 233190433D 4372 NA
Q649 D 243081566D 4373 NA
Q1321 D 252584097D 4374 NA
Q2736 E 270665514D 4375 NA
Q414 D 282547432D 4376 NA
Q96 D 307141203D 4377 NA
Q38 D 363184639D 4378 NA
Q328 E 378666601D 4379 NA
Q183 D 440162350D 4380 NA
Q29 D 500913282D 4381 NA
Q145 D 595132607D 4382 NA
Q159 D 653480145D 4383 NA
Q142 D 679432828D 4384 NA
Q1860 E 1419981073D 4385 NA
Q356 D 2047631596D 4386 NA
Q565 D 2052996261D 4387 NA
Q918 E 2063217449D 4388 NA
Q866 E 2079749157D 4389 NA
Q355 D 2093900731D 4390 NA
Q30 D 2277746226D 4391 NA
Q5 D 5668008721D 4392 NA
entity_idpredictionpage_viewspop_classseqNumdissonance
Q3086 E 23 E 1 NA
Q3265 E 46 E 2 NA
Q4483 E 52 E 3 NA
Q3152 E 71 E 4 NA
Q3517 E 74 E 5 NA
Q4017 E 90 E 6 NA
Q3649 E 112 E 7 NA
Q3254 E 120 E 8 NA
Q3527 E 121 E 9 NA
Q3522 E 128 E 10 NA
Q3186 E 138 E 11 NA
Q3634 E 161 E 12 NA
Q4486 E 163 E 13 NA
Q3652 E 176 E 14 NA
Q4475 E 179 E 15 NA
Q3633 E 202 E 16 NA
Q3287 E 221 E 17 NA
Q3781 E 237 E 18 NA
Q3654 E 248 E 19 NA
Q3641 E 286 E 20 NA
Q2711 D 287 E 21 Moderate negative
Q4281 E 298 E 22 NA
Q3293 E 304 E 23 NA
Q3084 E 307 E 24 NA
Q293 D 329 E 25 Moderate negative
Q2631 E 330 E 26 NA
Q3307 E 330 E 27 NA
Q4003 E 337 E 28 NA
Q4170 E 338 E 29 NA
Q3979 E 341 E 30 NA
Q408 D 108249066D 4363 None
Q212 D 121262940D 4364 None
Q90 D 132079969D 4365 None
Q739 D 149356287D 4366 None
Q150 D 154676097D 4367 None
Q16 D 161566243D 4368 None
Q60 D 161626758D 4369 None
Q188 D 173805898D 4370 None
Q2184 E 226793302D 4371 NA
Q515 E 233190433D 4372 NA
Q649 D 243081566D 4373 None
Q1321 D 252584097D 4374 None
Q2736 E 270665514D 4375 NA
Q414 D 282547432D 4376 None
Q96 D 307141203D 4377 None
Q38 D 363184639D 4378 None
Q328 E 378666601D 4379 NA
Q183 D 440162350D 4380 None
Q29 D 500913282D 4381 None
Q145 D 595132607D 4382 None
Q159 D 653480145D 4383 None
Q142 D 679432828D 4384 None
Q1860 E 1419981073D 4385 NA
Q356 D 2047631596D 4386 None
Q565 D 2052996261D 4387 None
Q918 E 2063217449D 4388 NA
Q866 E 2079749157D 4389 NA
Q355 D 2093900731D 4390 None
Q30 D 2277746226D 4391 None
Q5 D 5668008721D 4392 None
entity_idpredictionpage_viewspop_classseqNumdissonance
Q3086 E 23 E 1 NA
Q3265 E 46 E 2 NA
Q4483 E 52 E 3 NA
Q3152 E 71 E 4 NA
Q3517 E 74 E 5 NA
Q4017 E 90 E 6 NA
Q3649 E 112 E 7 NA
Q3254 E 120 E 8 NA
Q3527 E 121 E 9 NA
Q3522 E 128 E 10 NA
Q3186 E 138 E 11 NA
Q3634 E 161 E 12 NA
Q4486 E 163 E 13 NA
Q3652 E 176 E 14 NA
Q4475 E 179 E 15 NA
Q3633 E 202 E 16 NA
Q3287 E 221 E 17 NA
Q3781 E 237 E 18 NA
Q3654 E 248 E 19 NA
Q3641 E 286 E 20 NA
Q2711 D 287 E 21 Moderate negative
Q4281 E 298 E 22 NA
Q3293 E 304 E 23 NA
Q3084 E 307 E 24 NA
Q293 D 329 E 25 Moderate negative
Q2631 E 330 E 26 NA
Q3307 E 330 E 27 NA
Q4003 E 337 E 28 NA
Q4170 E 338 E 29 NA
Q3979 E 341 E 30 NA
Q408 D 108249066D 4363 None
Q212 D 121262940D 4364 None
Q90 D 132079969D 4365 None
Q739 D 149356287D 4366 None
Q150 D 154676097D 4367 None
Q16 D 161566243D 4368 None
Q60 D 161626758D 4369 None
Q188 D 173805898D 4370 None
Q2184 E 226793302D 4371 NA
Q515 E 233190433D 4372 NA
Q649 D 243081566D 4373 None
Q1321 D 252584097D 4374 None
Q2736 E 270665514D 4375 NA
Q414 D 282547432D 4376 None
Q96 D 307141203D 4377 None
Q38 D 363184639D 4378 None
Q328 E 378666601D 4379 NA
Q183 D 440162350D 4380 None
Q29 D 500913282D 4381 None
Q145 D 595132607D 4382 None
Q159 D 653480145D 4383 None
Q142 D 679432828D 4384 None
Q1860 E 1419981073D 4385 NA
Q356 D 2047631596D 4386 None
Q565 D 2052996261D 4387 None
Q918 E 2063217449D 4388 NA
Q866 E 2079749157D 4389 NA
Q355 D 2093900731D 4390 None
Q30 D 2277746226D 4391 None
Q5 D 5668008721D 4392 None
entity_idpredictionpage_viewspop_classseqNumdissonance
Q3086 E 23 E 1 NA
Q3265 E 46 E 2 NA
Q4483 E 52 E 3 NA
Q3152 E 71 E 4 NA
Q3517 E 74 E 5 NA
Q4017 E 90 E 6 NA
Q3649 E 112 E 7 NA
Q3254 E 120 E 8 NA
Q3527 E 121 E 9 NA
Q3522 E 128 E 10 NA
Q3186 E 138 E 11 NA
Q3634 E 161 E 12 NA
Q4486 E 163 E 13 NA
Q3652 E 176 E 14 NA
Q4475 E 179 E 15 NA
Q3633 E 202 E 16 NA
Q3287 E 221 E 17 NA
Q3781 E 237 E 18 NA
Q3654 E 248 E 19 NA
Q3641 E 286 E 20 NA
Q2711 D 287 E 21 Moderate negative
Q4281 E 298 E 22 NA
Q3293 E 304 E 23 NA
Q3084 E 307 E 24 NA
Q293 D 329 E 25 Moderate negative
Q2631 E 330 E 26 NA
Q3307 E 330 E 27 NA
Q4003 E 337 E 28 NA
Q4170 E 338 E 29 NA
Q3979 E 341 E 30 NA
Q408 D 108249066D 4363 None
Q212 D 121262940D 4364 None
Q90 D 132079969D 4365 None
Q739 D 149356287D 4366 None
Q150 D 154676097D 4367 None
Q16 D 161566243D 4368 None
Q60 D 161626758D 4369 None
Q188 D 173805898D 4370 None
Q2184 E 226793302D 4371 NA
Q515 E 233190433D 4372 NA
Q649 D 243081566D 4373 None
Q1321 D 252584097D 4374 None
Q2736 E 270665514D 4375 NA
Q414 D 282547432D 4376 None
Q96 D 307141203D 4377 None
Q38 D 363184639D 4378 None
Q328 E 378666601D 4379 NA
Q183 D 440162350D 4380 None
Q29 D 500913282D 4381 None
Q145 D 595132607D 4382 None
Q159 D 653480145D 4383 None
Q142 D 679432828D 4384 None
Q1860 E 1419981073D 4385 NA
Q356 D 2047631596D 4386 None
Q565 D 2052996261D 4387 None
Q918 E 2063217449D 4388 NA
Q866 E 2079749157D 4389 NA
Q355 D 2093900731D 4390 None
Q30 D 2277746226D 4391 None
Q5 D 5668008721D 4392 None

In [38]:
## E
articles_by_pop[prediction == 'E' & pop_class == 'E',
                dissonance := 'None'];
articles_by_pop[prediction == 'E' & pop_class == 'D',
                dissonance := 'Moderate positive'];
articles_by_pop[prediction == 'E' & pop_class >= 'C',
                dissonance := 'High positive'];


entity_idpredictionpage_viewspop_classseqNumdissonance
Q3086 E 23 E 1 None
Q3265 E 46 E 2 None
Q4483 E 52 E 3 None
Q3152 E 71 E 4 None
Q3517 E 74 E 5 None
Q4017 E 90 E 6 None
Q3649 E 112 E 7 None
Q3254 E 120 E 8 None
Q3527 E 121 E 9 None
Q3522 E 128 E 10 None
Q3186 E 138 E 11 None
Q3634 E 161 E 12 None
Q4486 E 163 E 13 None
Q3652 E 176 E 14 None
Q4475 E 179 E 15 None
Q3633 E 202 E 16 None
Q3287 E 221 E 17 None
Q3781 E 237 E 18 None
Q3654 E 248 E 19 None
Q3641 E 286 E 20 None
Q2711 D 287 E 21 Moderate negative
Q4281 E 298 E 22 None
Q3293 E 304 E 23 None
Q3084 E 307 E 24 None
Q293 D 329 E 25 Moderate negative
Q2631 E 330 E 26 None
Q3307 E 330 E 27 None
Q4003 E 337 E 28 None
Q4170 E 338 E 29 None
Q3979 E 341 E 30 None
Q408 D 108249066D 4363 None
Q212 D 121262940D 4364 None
Q90 D 132079969D 4365 None
Q739 D 149356287D 4366 None
Q150 D 154676097D 4367 None
Q16 D 161566243D 4368 None
Q60 D 161626758D 4369 None
Q188 D 173805898D 4370 None
Q2184 E 226793302D 4371 NA
Q515 E 233190433D 4372 NA
Q649 D 243081566D 4373 None
Q1321 D 252584097D 4374 None
Q2736 E 270665514D 4375 NA
Q414 D 282547432D 4376 None
Q96 D 307141203D 4377 None
Q38 D 363184639D 4378 None
Q328 E 378666601D 4379 NA
Q183 D 440162350D 4380 None
Q29 D 500913282D 4381 None
Q145 D 595132607D 4382 None
Q159 D 653480145D 4383 None
Q142 D 679432828D 4384 None
Q1860 E 1419981073D 4385 NA
Q356 D 2047631596D 4386 None
Q565 D 2052996261D 4387 None
Q918 E 2063217449D 4388 NA
Q866 E 2079749157D 4389 NA
Q355 D 2093900731D 4390 None
Q30 D 2277746226D 4391 None
Q5 D 5668008721D 4392 None
entity_idpredictionpage_viewspop_classseqNumdissonance
Q3086 E 23 E 1 None
Q3265 E 46 E 2 None
Q4483 E 52 E 3 None
Q3152 E 71 E 4 None
Q3517 E 74 E 5 None
Q4017 E 90 E 6 None
Q3649 E 112 E 7 None
Q3254 E 120 E 8 None
Q3527 E 121 E 9 None
Q3522 E 128 E 10 None
Q3186 E 138 E 11 None
Q3634 E 161 E 12 None
Q4486 E 163 E 13 None
Q3652 E 176 E 14 None
Q4475 E 179 E 15 None
Q3633 E 202 E 16 None
Q3287 E 221 E 17 None
Q3781 E 237 E 18 None
Q3654 E 248 E 19 None
Q3641 E 286 E 20 None
Q2711 D 287 E 21 Moderate negative
Q4281 E 298 E 22 None
Q3293 E 304 E 23 None
Q3084 E 307 E 24 None
Q293 D 329 E 25 Moderate negative
Q2631 E 330 E 26 None
Q3307 E 330 E 27 None
Q4003 E 337 E 28 None
Q4170 E 338 E 29 None
Q3979 E 341 E 30 None
Q408 D 108249066 D 4363 None
Q212 D 121262940 D 4364 None
Q90 D 132079969 D 4365 None
Q739 D 149356287 D 4366 None
Q150 D 154676097 D 4367 None
Q16 D 161566243 D 4368 None
Q60 D 161626758 D 4369 None
Q188 D 173805898 D 4370 None
Q2184 E 226793302 D 4371 Moderate positive
Q515 E 233190433 D 4372 Moderate positive
Q649 D 243081566 D 4373 None
Q1321 D 252584097 D 4374 None
Q2736 E 270665514 D 4375 Moderate positive
Q414 D 282547432 D 4376 None
Q96 D 307141203 D 4377 None
Q38 D 363184639 D 4378 None
Q328 E 378666601 D 4379 Moderate positive
Q183 D 440162350 D 4380 None
Q29 D 500913282 D 4381 None
Q145 D 595132607 D 4382 None
Q159 D 653480145 D 4383 None
Q142 D 679432828 D 4384 None
Q1860 E 1419981073 D 4385 Moderate positive
Q356 D 2047631596 D 4386 None
Q565 D 2052996261 D 4387 None
Q918 E 2063217449 D 4388 Moderate positive
Q866 E 2079749157 D 4389 Moderate positive
Q355 D 2093900731 D 4390 None
Q30 D 2277746226 D 4391 None
Q5 D 5668008721 D 4392 None
entity_idpredictionpage_viewspop_classseqNumdissonance
Q3086 E 23 E 1 None
Q3265 E 46 E 2 None
Q4483 E 52 E 3 None
Q3152 E 71 E 4 None
Q3517 E 74 E 5 None
Q4017 E 90 E 6 None
Q3649 E 112 E 7 None
Q3254 E 120 E 8 None
Q3527 E 121 E 9 None
Q3522 E 128 E 10 None
Q3186 E 138 E 11 None
Q3634 E 161 E 12 None
Q4486 E 163 E 13 None
Q3652 E 176 E 14 None
Q4475 E 179 E 15 None
Q3633 E 202 E 16 None
Q3287 E 221 E 17 None
Q3781 E 237 E 18 None
Q3654 E 248 E 19 None
Q3641 E 286 E 20 None
Q2711 D 287 E 21 Moderate negative
Q4281 E 298 E 22 None
Q3293 E 304 E 23 None
Q3084 E 307 E 24 None
Q293 D 329 E 25 Moderate negative
Q2631 E 330 E 26 None
Q3307 E 330 E 27 None
Q4003 E 337 E 28 None
Q4170 E 338 E 29 None
Q3979 E 341 E 30 None
Q408 D 108249066 D 4363 None
Q212 D 121262940 D 4364 None
Q90 D 132079969 D 4365 None
Q739 D 149356287 D 4366 None
Q150 D 154676097 D 4367 None
Q16 D 161566243 D 4368 None
Q60 D 161626758 D 4369 None
Q188 D 173805898 D 4370 None
Q2184 E 226793302 D 4371 Moderate positive
Q515 E 233190433 D 4372 Moderate positive
Q649 D 243081566 D 4373 None
Q1321 D 252584097 D 4374 None
Q2736 E 270665514 D 4375 Moderate positive
Q414 D 282547432 D 4376 None
Q96 D 307141203 D 4377 None
Q38 D 363184639 D 4378 None
Q328 E 378666601 D 4379 Moderate positive
Q183 D 440162350 D 4380 None
Q29 D 500913282 D 4381 None
Q145 D 595132607 D 4382 None
Q159 D 653480145 D 4383 None
Q142 D 679432828 D 4384 None
Q1860 E 1419981073 D 4385 Moderate positive
Q356 D 2047631596 D 4386 None
Q565 D 2052996261 D 4387 None
Q918 E 2063217449 D 4388 Moderate positive
Q866 E 2079749157 D 4389 Moderate positive
Q355 D 2093900731 D 4390 None
Q30 D 2277746226 D 4391 None
Q5 D 5668008721 D 4392 None

In [39]:
## Build a matrix where columns are the metric and rows are classes
create_alt_diss_matrix = function(articledata, metric, classes) {
  d_mtrx = matrix(0, nrow=length(classes), ncol=length(metric));
  rownames(d_mtrx) = classes;
  colnames(d_mtrx) = metric;

  ## NOTE: R matrix values are [row,col] dimensions
  for(real_rating in classes) {
    for(diss_rating in metric) {
      d_mtrx[real_rating, diss_rating] = length(articledata[prediction == real_rating & dissonance == diss_rating]$entity_id);
    }
  }
  d_mtrx;
}

alternative_dissonance_matrix.1 = create_alt_diss_matrix(articles_by_pop,
  dissonance_metric, assessment_classes);

In [40]:
## Normalise by row
round(100*prop.table(alternative_dissonance_matrix.1, 1), 1);


High negativeModerate negativeNoneModerate positiveHigh positive
E 0 0.083.816.2 0
D 0 46.653.4 0.0 0
CNaN NaN NaN NaNNaN
BNaN NaN NaN NaNNaN
ANaN NaN NaN NaNNaN

In [41]:
## Number of dissonant views per assessment class and amount of dissonance
articles_by_pop[, list(dissonant_views=sum(page_views)), by=list(prediction, dissonance)];


predictiondissonancedissonant_views
E None 1149832465
D Moderate negative 398024248
E Moderate positive10903782585
D None 27269934694

In [42]:
## Calculations of total number of dissonant views per dissonance
articles_by_pop[, list(dissonant_views=sum(page_views)), by=list(dissonance)];


dissonancedissonant_views
None 28419767159
Moderate negative 398024248
Moderate positive10903782585

In [43]:
articles_by_pop[,sum(as.numeric(page_views))];


39721573992

In [44]:
## Proportions
100*65938379920/545180810059;
100*125047198/545180810059;
100*6713682043/545180810059;
100*120523625541/545180810059;
100*351880075357/545180810059;


12.0947727255594
0.0229368304409811
1.23145971375505
22.107092420945
64.5437383092995

In [45]:
# 87% of views are high positive

In [46]:
100*(articles_by_pop[, list(dissonant_views=sum(page_views)), by=list(dissonance)][1][,c('dissonant_views')]/articles_by_pop[,sum(as.numeric(page_views))])


dissonant_views
71.54743

In [47]:
100*(articles_by_pop[, list(dissonant_views=sum(page_views)), by=list(dissonance)][2][,c('dissonant_views')]/articles_by_pop[,sum(as.numeric(page_views))])


dissonant_views
1.002035

In [48]:
100*(articles_by_pop[, list(dissonant_views=sum(page_views)), by=list(dissonance)][3][,c('dissonant_views')]/articles_by_pop[,sum(as.numeric(page_views))])


dissonant_views
27.45053

In [49]:
100*(articles_by_pop[, list(dissonant_views=sum(page_views)), by=list(dissonance)][4][,c('dissonant_views')]/articles_by_pop[,sum(as.numeric(page_views))])


dissonant_views
NA

In [50]:
100*(articles_by_pop[, list(dissonant_views=sum(page_views)), by=list(dissonance)][5][,c('dissonant_views')]/articles_by_pop[,sum(as.numeric(page_views))])


dissonant_views
NA