In [1]:
library(data.table)

In [2]:
## Using a table of article assessments and views, build tables
## (matrices) that shows the number of dissonant articles per
## assessment category based on sorting by popularity.
##
## The underlying assumption is that in an ideal system with a limited
## and fixed amount of resources (in other words, popularity and high quality
## artefacts does not increase the amount of resources in the system),
## popularity ranking and assessment class follow a 1-to-1 relationship.
## We can therefore sort by popularity and group articles that way
## because work will be prioritised by popularity.

## DATA ASSUMPTION: views_with_redirects from resolve-redirects.R
## is loaded into memory.

## 3: build a 2x2 matrix of assessment classes and popularity classes
## 

## Assessment classes in ascending order of quality.

In [3]:
assessment_classes = c('E', 'D', 'C', 'B', 'A');

In [4]:
quality_prediction_and_page_views <- read.table("../../results/sql_queries/entity_views_and_aggregated_revisions/entity_views_and_aggregated_revisions_and_quality_scoring_20121201.tsv", header=FALSE, sep="\t")

In [5]:
quality_prediction_and_page_views <- data.table(quality_prediction_and_page_views)

In [6]:
colnames(quality_prediction_and_page_views) <- c('entity_id','number_of_revisions', 'page_views', 'prediction')

In [7]:
summary(quality_prediction_and_page_views)


   entity_id      number_of_revisions   page_views        prediction
 Q1     :     1   Min.   :   1.00     Min.   :0.000e+00   C:  1340  
 Q100   :     1   1st Qu.:  37.00     1st Qu.:2.602e+03   D: 18507  
 Q1000  :     1   Median :  63.00     Median :1.268e+04   E:193752  
 Q10000 :     1   Mean   :  77.61     Mean   :8.650e+05             
 Q100000:     1   3rd Qu.:  98.00     3rd Qu.:1.108e+05             
 Q100001:     1   Max.   :3152.00     Max.   :1.253e+10             
 (Other):213593                                                     

In [8]:
## 0: calculate number of articles in each assessment class
n_per_class = quality_prediction_and_page_views[, list(narticles=sum(.N)), by='prediction']

In [9]:
setkey(n_per_class, prediction);
## NOTE: setkey allows us to do n_per_class['GA']$narticles to get counts

In [10]:
## 1: order articles by popularity
articles_by_pop = quality_prediction_and_page_views[order(quality_prediction_and_page_views$page_views)][,list(entity_id, prediction, page_views)];

In [11]:
## 2: assign popularity assessment class based on rank
##   (buckets based on number of articles in each class)
articles_by_pop[, pop_class := ''];
articles_by_pop[, seqNum := seq_len(nrow(articles_by_pop))];


entity_idpredictionpage_viewspop_class
Q157851E 0
Q186832E 0
Q198896E 0
Q94803 E 0
Q190349E 0
Q155668E 0
Q149278E 0
Q207182E 0
Q223614E 0
Q121091E 0
Q218579E 0
Q207016E 1
Q49471 E 1
Q130928E 1
Q49475 E 1
Q146626E 1
Q128589E 1
Q38535 E 2
Q145841E 2
Q62197 E 2
Q177407E 2
Q38401 E 2
Q199034E 2
Q49468 E 2
Q35380 E 3
Q49473 E 3
Q38711 E 3
Q49479 E 3
Q80445 E 3
Q203097E 3
Q213678 E 2045786541
Q183718 E 2045831558
Q220509 E 2045834618
Q223142 E 2045976844
Q51044 E 2045987256
Q63056 E 2046036482
Q41226 E 2046097080
Q156376 E 2046132338
Q103204 E 2046461911
Q116933 E 2047137765
Q171186 E 2047579662
Q356 D 2047631596
Q31165 E 2048330818
Q40629 E 2049755644
Q105584 E 2049926923
Q565 D 2052996261
Q209330 E 2060928966
Q14005 D 2063120071
Q918 D 2063217449
Q150248 E 2068796814
Q866 D 2079749157
Q355 D 2093900731
Q33999 E 2108672678
Q193563 E 2130725560
Q37312 E 2142913121
Q54919 D 2148531382
Q36578 D 2229315598
Q30 D 2277746226
Q5 C 5668008721
Q5296 D 12530369761
entity_idpredictionpage_viewspop_classseqNum
Q157851E 0 1
Q186832E 0 2
Q198896E 0 3
Q94803 E 0 4
Q190349E 0 5
Q155668E 0 6
Q149278E 0 7
Q207182E 0 8
Q223614E 0 9
Q121091E 0 10
Q218579E 0 11
Q207016E 1 12
Q49471 E 1 13
Q130928E 1 14
Q49475 E 1 15
Q146626E 1 16
Q128589E 1 17
Q38535 E 2 18
Q145841E 2 19
Q62197 E 2 20
Q177407E 2 21
Q38401 E 2 22
Q199034E 2 23
Q49468 E 2 24
Q35380 E 3 25
Q49473 E 3 26
Q38711 E 3 27
Q49479 E 3 28
Q80445 E 3 29
Q203097E 3 30
Q213678 E 2045786541 213570
Q183718 E 2045831558 213571
Q220509 E 2045834618 213572
Q223142 E 2045976844 213573
Q51044 E 2045987256 213574
Q63056 E 2046036482 213575
Q41226 E 2046097080 213576
Q156376 E 2046132338 213577
Q103204 E 2046461911 213578
Q116933 E 2047137765 213579
Q171186 E 2047579662 213580
Q356 D 2047631596 213581
Q31165 E 2048330818 213582
Q40629 E 2049755644 213583
Q105584 E 2049926923 213584
Q565 D 2052996261 213585
Q209330 E 2060928966 213586
Q14005 D 2063120071 213587
Q918 D 2063217449 213588
Q150248 E 2068796814 213589
Q866 D 2079749157 213590
Q355 D 2093900731 213591
Q33999 E 2108672678 213592
Q193563 E 2130725560 213593
Q37312 E 2142913121 213594
Q54919 D 2148531382 213595
Q36578 D 2229315598 213596
Q30 D 2277746226 213597
Q5 C 5668008721 213598
Q5296 D 12530369761 213599

In [12]:
assign_pop_class = function(dataset, classes, class_n) {
  ## Based on the per-class number of articles in class_n
  ## assign popularity based on classes to dataset.
  prev_idx = 0;
  for(rating in classes) {
    start_idx = prev_idx + 1;
    end_idx = start_idx + class_n[prediction == rating]$narticles;
    print(paste('start_idx =', start_idx, ', end_idx = ', end_idx));
    dataset[seqNum >= start_idx & seqNum <= end_idx, pop_class := rating];
    prev_idx = end_idx -1;
  }
  dataset;
}

In [13]:
articles_by_pop = assign_pop_class(articles_by_pop,
  assessment_classes, n_per_class);


[1] "start_idx = 1 , end_idx =  193753"
[1] "start_idx = 193753 , end_idx =  212260"
[1] "start_idx = 212260 , end_idx =  213600"
[1] "start_idx = 213600 , end_idx =  "
[1] "start_idx =  , end_idx =  "

In [14]:
create_dissonance_matrix = function(articledata, classes) {
  d_mtrx = matrix(0, nrow=length(classes), ncol=length(classes));
  rownames(d_mtrx) = classes;
  colnames(d_mtrx) = classes;

  for(real_rating in classes) {
    for(pop_rating in classes) {
      d_mtrx[real_rating, pop_rating] = length(articledata[prediction == real_rating & pop_class == pop_rating]$entity_id);
    }
  }
  d_mtrx;
}

In [15]:
## Based on direct hits to articles:
create_dissonance_matrix(articles_by_pop, assessment_classes)


EDCBA
E17769015350 712 0 0
D 14836 3052 619 0 0
C 1226 105 9 0 0
B 0 0 0 0 0
A 0 0 0 0 0

In [16]:
dissonance_matrix = create_dissonance_matrix(articles_by_pop,
  assessment_classes);

In [17]:
# Total misaligned entities
(dissonance_matrix[1,1]+dissonance_matrix[2,2]+dissonance_matrix[3,3]+dissonance_matrix[4,4]+dissonance_matrix[5,5])/sum(dissonance_matrix[,])


0.846216508504253

In [18]:
# A class quality and A class views over A class quality
dissonance_matrix[5,5]/sum(dissonance_matrix[5,])


NaN

In [19]:
# A class quality and E and D class views over A class quality
(dissonance_matrix[5,1]+dissonance_matrix[5,2])/sum(dissonance_matrix[5,])


NaN

In [20]:
# A class quality and < A class views
(dissonance_matrix[5,1]+dissonance_matrix[5,2]+dissonance_matrix[5,3]+dissonance_matrix[5,4])/sum(dissonance_matrix[5,])


NaN

In [21]:
# < A class quality and A class views
(dissonance_matrix[1,5]+dissonance_matrix[2,5]+dissonance_matrix[3,5]+dissonance_matrix[4,5])/sum(dissonance_matrix[,5])


NaN

In [22]:
prediction_e_pop_class_a <- merge(articles_by_pop[prediction == 'E' & pop_class == 'A'],quality_prediction_and_page_views, by='entity_id')[, c("entity_id","page_views.x", "number_of_revisions")]

In [23]:
head(prediction_e_pop_class_a)


entity_idpage_views.xnumber_of_revisions

In [24]:
## Q: why do I get _two_ pageid columns?  Solution is to do the selection
## on the joined table, not as a select _in_ the join.

## Dissonance matrix proportions by row (..., 1) and column (..., 2)
## rounded to 1 decimal places.

In [25]:
round(100*prop.table(dissonance_matrix, 1), 1);


EDCBA
E91.7 7.90.4 0 0
D80.216.53.3 0 0
C91.5 7.80.7 0 0
B NaN NaNNaN NaN NaN
A NaN NaNNaN NaN NaN

In [26]:
round(100*prop.table(dissonance_matrix, 2), 1);


EDCBA
E91.782.953.1NaN NaN
D 7.716.546.2NaN NaN
C 0.6 0.6 0.7NaN NaN
B 0.0 0.0 0.0NaN NaN
A 0.0 0.0 0.0NaN NaN

In [27]:
## Let's write the stubs out to a file
write.table(merge(articles_by_pop[(prediction == 'E' | prediction == 'D' | prediction == 'C' | prediction == 'B') & pop_class == 'A'], quality_prediction_and_page_views, by='entity_id')[, c("entity_id","pop_class", "prediction.x")],
           '../../results/entity_categorization/201212_a_class_views_less_than_a_quality.tsv', row.names=FALSE, col.names=FALSE, quote=FALSE, sep='\t');
merge(articles_by_pop[(prediction == 'E' | prediction == 'D' | prediction == 'C' | prediction == 'B') & pop_class == 'A'], quality_prediction_and_page_views, by='entity_id')[, c("entity_id","pop_class", "prediction.x")]


entity_idpop_classprediction.x

In [28]:
write.table(merge(articles_by_pop[prediction == 'A' & (pop_class == 'B' | pop_class == 'C' | pop_class == 'D' | pop_class == 'E')], quality_prediction_and_page_views, by='entity_id')[, c("entity_id","pop_class", "prediction.x")],
           '../../results/entity_categorization/201212_a_class_quality_less_than_a_views.tsv', row.names=FALSE, col.names=FALSE, quote=FALSE, sep='\t');

In [29]:
write.table(merge(articles_by_pop[(prediction == 'A' & pop_class == 'A') | (prediction == 'B' & pop_class == 'B') | (prediction == 'C' & pop_class == 'C') | (prediction == 'D' & pop_class == 'D') | (prediction == 'E' & pop_class == 'E')], quality_prediction_and_page_views, by='entity_id')[, c("entity_id","pop_class", "prediction.x")],
           '../../results/entity_categorization/201212_aligned.tsv', row.names=FALSE, col.names=FALSE, quote=FALSE, sep='\t');

In [30]:
write.table(merge(articles_by_pop[(prediction == 'A' & pop_class != 'A') | (prediction == 'B' & pop_class != 'B') | (prediction == 'C' & pop_class != 'C') | (prediction == 'D' & pop_class != 'D') | (prediction == 'E' & pop_class != 'E')], quality_prediction_and_page_views, by='entity_id')[, c("entity_id","pop_class", "prediction.x")],
           '../../results/entity_categorization/201212_misaligned.tsv', row.names=FALSE, col.names=FALSE, quote=FALSE, sep='\t');

Dissonance Measures (was seperate file)


In [29]:
## Various ways of measuring dissonance.

## DATA ASSUMPTION: articles_by_pop from build-dissonance-table.R
## is loaded into memory.

## None/Moderate/High measure of dissonance

In [30]:
articles_by_pop[, pop_class := ordered(pop_class, assessment_classes)];


entity_idpredictionpage_viewspop_classseqNum
Q157851E 0 E 1
Q186832E 0 E 2
Q198896E 0 E 3
Q94803 E 0 E 4
Q190349E 0 E 5
Q155668E 0 E 6
Q149278E 0 E 7
Q207182E 0 E 8
Q223614E 0 E 9
Q121091E 0 E 10
Q218579E 0 E 11
Q207016E 1 E 12
Q49471 E 1 E 13
Q130928E 1 E 14
Q49475 E 1 E 15
Q146626E 1 E 16
Q128589E 1 E 17
Q38535 E 2 E 18
Q145841E 2 E 19
Q62197 E 2 E 20
Q177407E 2 E 21
Q38401 E 2 E 22
Q199034E 2 E 23
Q49468 E 2 E 24
Q35380 E 3 E 25
Q49473 E 3 E 26
Q38711 E 3 E 27
Q49479 E 3 E 28
Q80445 E 3 E 29
Q203097E 3 E 30
Q213678 E 2045786541C 213570
Q183718 E 2045831558C 213571
Q220509 E 2045834618C 213572
Q223142 E 2045976844C 213573
Q51044 E 2045987256C 213574
Q63056 E 2046036482C 213575
Q41226 E 2046097080C 213576
Q156376 E 2046132338C 213577
Q103204 E 2046461911C 213578
Q116933 E 2047137765C 213579
Q171186 E 2047579662C 213580
Q356 D 2047631596C 213581
Q31165 E 2048330818C 213582
Q40629 E 2049755644C 213583
Q105584 E 2049926923C 213584
Q565 D 2052996261C 213585
Q209330 E 2060928966C 213586
Q14005 D 2063120071C 213587
Q918 D 2063217449C 213588
Q150248 E 2068796814C 213589
Q866 D 2079749157C 213590
Q355 D 2093900731C 213591
Q33999 E 2108672678C 213592
Q193563 E 2130725560C 213593
Q37312 E 2142913121C 213594
Q54919 D 2148531382C 213595
Q36578 D 2229315598C 213596
Q30 D 2277746226C 213597
Q5 C 5668008721C 213598
Q5296 D 12530369761C 213599

In [31]:
dissonance_metric = c('High negative', 'Moderate negative',
  'None', 'Moderate positive', 'High positive');

In [32]:
articles_by_pop[, dissonance := factor(NA, dissonance_metric)];


entity_idpredictionpage_viewspop_classseqNumdissonance
Q157851E 0 E 1 NA
Q186832E 0 E 2 NA
Q198896E 0 E 3 NA
Q94803 E 0 E 4 NA
Q190349E 0 E 5 NA
Q155668E 0 E 6 NA
Q149278E 0 E 7 NA
Q207182E 0 E 8 NA
Q223614E 0 E 9 NA
Q121091E 0 E 10 NA
Q218579E 0 E 11 NA
Q207016E 1 E 12 NA
Q49471 E 1 E 13 NA
Q130928E 1 E 14 NA
Q49475 E 1 E 15 NA
Q146626E 1 E 16 NA
Q128589E 1 E 17 NA
Q38535 E 2 E 18 NA
Q145841E 2 E 19 NA
Q62197 E 2 E 20 NA
Q177407E 2 E 21 NA
Q38401 E 2 E 22 NA
Q199034E 2 E 23 NA
Q49468 E 2 E 24 NA
Q35380 E 3 E 25 NA
Q49473 E 3 E 26 NA
Q38711 E 3 E 27 NA
Q49479 E 3 E 28 NA
Q80445 E 3 E 29 NA
Q203097E 3 E 30 NA
Q213678 E 2045786541C 213570 NA
Q183718 E 2045831558C 213571 NA
Q220509 E 2045834618C 213572 NA
Q223142 E 2045976844C 213573 NA
Q51044 E 2045987256C 213574 NA
Q63056 E 2046036482C 213575 NA
Q41226 E 2046097080C 213576 NA
Q156376 E 2046132338C 213577 NA
Q103204 E 2046461911C 213578 NA
Q116933 E 2047137765C 213579 NA
Q171186 E 2047579662C 213580 NA
Q356 D 2047631596C 213581 NA
Q31165 E 2048330818C 213582 NA
Q40629 E 2049755644C 213583 NA
Q105584 E 2049926923C 213584 NA
Q565 D 2052996261C 213585 NA
Q209330 E 2060928966C 213586 NA
Q14005 D 2063120071C 213587 NA
Q918 D 2063217449C 213588 NA
Q150248 E 2068796814C 213589 NA
Q866 D 2079749157C 213590 NA
Q355 D 2093900731C 213591 NA
Q33999 E 2108672678C 213592 NA
Q193563 E 2130725560C 213593 NA
Q37312 E 2142913121C 213594 NA
Q54919 D 2148531382C 213595 NA
Q36578 D 2229315598C 213596 NA
Q30 D 2277746226C 213597 NA
Q5 C 5668008721C 213598 NA
Q5296 D 12530369761C 213599 NA

In [33]:
## NOTE: because pop_class is of class ordered, we can use
##       expressions like "pop_class < 'C'" as expected

In [34]:
## A: None if A, Moderate if A, High elsewhere
articles_by_pop[prediction == 'A' & pop_class <= 'C',
                dissonance := 'High negative'];
articles_by_pop[prediction == 'A' & pop_class == 'B',
                dissonance := 'Moderate negative'];
articles_by_pop[prediction == 'A' & pop_class == 'A',
                dissonance := 'None'];


entity_idpredictionpage_viewspop_classseqNumdissonance
Q157851E 0 E 1 NA
Q186832E 0 E 2 NA
Q198896E 0 E 3 NA
Q94803 E 0 E 4 NA
Q190349E 0 E 5 NA
Q155668E 0 E 6 NA
Q149278E 0 E 7 NA
Q207182E 0 E 8 NA
Q223614E 0 E 9 NA
Q121091E 0 E 10 NA
Q218579E 0 E 11 NA
Q207016E 1 E 12 NA
Q49471 E 1 E 13 NA
Q130928E 1 E 14 NA
Q49475 E 1 E 15 NA
Q146626E 1 E 16 NA
Q128589E 1 E 17 NA
Q38535 E 2 E 18 NA
Q145841E 2 E 19 NA
Q62197 E 2 E 20 NA
Q177407E 2 E 21 NA
Q38401 E 2 E 22 NA
Q199034E 2 E 23 NA
Q49468 E 2 E 24 NA
Q35380 E 3 E 25 NA
Q49473 E 3 E 26 NA
Q38711 E 3 E 27 NA
Q49479 E 3 E 28 NA
Q80445 E 3 E 29 NA
Q203097E 3 E 30 NA
Q213678 E 2045786541C 213570 NA
Q183718 E 2045831558C 213571 NA
Q220509 E 2045834618C 213572 NA
Q223142 E 2045976844C 213573 NA
Q51044 E 2045987256C 213574 NA
Q63056 E 2046036482C 213575 NA
Q41226 E 2046097080C 213576 NA
Q156376 E 2046132338C 213577 NA
Q103204 E 2046461911C 213578 NA
Q116933 E 2047137765C 213579 NA
Q171186 E 2047579662C 213580 NA
Q356 D 2047631596C 213581 NA
Q31165 E 2048330818C 213582 NA
Q40629 E 2049755644C 213583 NA
Q105584 E 2049926923C 213584 NA
Q565 D 2052996261C 213585 NA
Q209330 E 2060928966C 213586 NA
Q14005 D 2063120071C 213587 NA
Q918 D 2063217449C 213588 NA
Q150248 E 2068796814C 213589 NA
Q866 D 2079749157C 213590 NA
Q355 D 2093900731C 213591 NA
Q33999 E 2108672678C 213592 NA
Q193563 E 2130725560C 213593 NA
Q37312 E 2142913121C 213594 NA
Q54919 D 2148531382C 213595 NA
Q36578 D 2229315598C 213596 NA
Q30 D 2277746226C 213597 NA
Q5 C 5668008721C 213598 NA
Q5296 D 12530369761C 213599 NA
entity_idpredictionpage_viewspop_classseqNumdissonance
Q157851E 0 E 1 NA
Q186832E 0 E 2 NA
Q198896E 0 E 3 NA
Q94803 E 0 E 4 NA
Q190349E 0 E 5 NA
Q155668E 0 E 6 NA
Q149278E 0 E 7 NA
Q207182E 0 E 8 NA
Q223614E 0 E 9 NA
Q121091E 0 E 10 NA
Q218579E 0 E 11 NA
Q207016E 1 E 12 NA
Q49471 E 1 E 13 NA
Q130928E 1 E 14 NA
Q49475 E 1 E 15 NA
Q146626E 1 E 16 NA
Q128589E 1 E 17 NA
Q38535 E 2 E 18 NA
Q145841E 2 E 19 NA
Q62197 E 2 E 20 NA
Q177407E 2 E 21 NA
Q38401 E 2 E 22 NA
Q199034E 2 E 23 NA
Q49468 E 2 E 24 NA
Q35380 E 3 E 25 NA
Q49473 E 3 E 26 NA
Q38711 E 3 E 27 NA
Q49479 E 3 E 28 NA
Q80445 E 3 E 29 NA
Q203097E 3 E 30 NA
Q213678 E 2045786541C 213570 NA
Q183718 E 2045831558C 213571 NA
Q220509 E 2045834618C 213572 NA
Q223142 E 2045976844C 213573 NA
Q51044 E 2045987256C 213574 NA
Q63056 E 2046036482C 213575 NA
Q41226 E 2046097080C 213576 NA
Q156376 E 2046132338C 213577 NA
Q103204 E 2046461911C 213578 NA
Q116933 E 2047137765C 213579 NA
Q171186 E 2047579662C 213580 NA
Q356 D 2047631596C 213581 NA
Q31165 E 2048330818C 213582 NA
Q40629 E 2049755644C 213583 NA
Q105584 E 2049926923C 213584 NA
Q565 D 2052996261C 213585 NA
Q209330 E 2060928966C 213586 NA
Q14005 D 2063120071C 213587 NA
Q918 D 2063217449C 213588 NA
Q150248 E 2068796814C 213589 NA
Q866 D 2079749157C 213590 NA
Q355 D 2093900731C 213591 NA
Q33999 E 2108672678C 213592 NA
Q193563 E 2130725560C 213593 NA
Q37312 E 2142913121C 213594 NA
Q54919 D 2148531382C 213595 NA
Q36578 D 2229315598C 213596 NA
Q30 D 2277746226C 213597 NA
Q5 C 5668008721C 213598 NA
Q5296 D 12530369761C 213599 NA
entity_idpredictionpage_viewspop_classseqNumdissonance
Q157851E 0 E 1 NA
Q186832E 0 E 2 NA
Q198896E 0 E 3 NA
Q94803 E 0 E 4 NA
Q190349E 0 E 5 NA
Q155668E 0 E 6 NA
Q149278E 0 E 7 NA
Q207182E 0 E 8 NA
Q223614E 0 E 9 NA
Q121091E 0 E 10 NA
Q218579E 0 E 11 NA
Q207016E 1 E 12 NA
Q49471 E 1 E 13 NA
Q130928E 1 E 14 NA
Q49475 E 1 E 15 NA
Q146626E 1 E 16 NA
Q128589E 1 E 17 NA
Q38535 E 2 E 18 NA
Q145841E 2 E 19 NA
Q62197 E 2 E 20 NA
Q177407E 2 E 21 NA
Q38401 E 2 E 22 NA
Q199034E 2 E 23 NA
Q49468 E 2 E 24 NA
Q35380 E 3 E 25 NA
Q49473 E 3 E 26 NA
Q38711 E 3 E 27 NA
Q49479 E 3 E 28 NA
Q80445 E 3 E 29 NA
Q203097E 3 E 30 NA
Q213678 E 2045786541C 213570 NA
Q183718 E 2045831558C 213571 NA
Q220509 E 2045834618C 213572 NA
Q223142 E 2045976844C 213573 NA
Q51044 E 2045987256C 213574 NA
Q63056 E 2046036482C 213575 NA
Q41226 E 2046097080C 213576 NA
Q156376 E 2046132338C 213577 NA
Q103204 E 2046461911C 213578 NA
Q116933 E 2047137765C 213579 NA
Q171186 E 2047579662C 213580 NA
Q356 D 2047631596C 213581 NA
Q31165 E 2048330818C 213582 NA
Q40629 E 2049755644C 213583 NA
Q105584 E 2049926923C 213584 NA
Q565 D 2052996261C 213585 NA
Q209330 E 2060928966C 213586 NA
Q14005 D 2063120071C 213587 NA
Q918 D 2063217449C 213588 NA
Q150248 E 2068796814C 213589 NA
Q866 D 2079749157C 213590 NA
Q355 D 2093900731C 213591 NA
Q33999 E 2108672678C 213592 NA
Q193563 E 2130725560C 213593 NA
Q37312 E 2142913121C 213594 NA
Q54919 D 2148531382C 213595 NA
Q36578 D 2229315598C 213596 NA
Q30 D 2277746226C 213597 NA
Q5 C 5668008721C 213598 NA
Q5296 D 12530369761C 213599 NA

In [35]:
## B: 
articles_by_pop[prediction == 'B' & pop_class <= 'D',
                dissonance := 'High negative'];
articles_by_pop[prediction == 'B' & pop_class == 'C',
                dissonance := 'Moderate negative'];
articles_by_pop[prediction == 'B' & pop_class == 'B',
                dissonance := 'None'];
articles_by_pop[prediction == 'B' & pop_class == 'A',
                dissonance := 'Moderate positive'];


entity_idpredictionpage_viewspop_classseqNumdissonance
Q157851E 0 E 1 NA
Q186832E 0 E 2 NA
Q198896E 0 E 3 NA
Q94803 E 0 E 4 NA
Q190349E 0 E 5 NA
Q155668E 0 E 6 NA
Q149278E 0 E 7 NA
Q207182E 0 E 8 NA
Q223614E 0 E 9 NA
Q121091E 0 E 10 NA
Q218579E 0 E 11 NA
Q207016E 1 E 12 NA
Q49471 E 1 E 13 NA
Q130928E 1 E 14 NA
Q49475 E 1 E 15 NA
Q146626E 1 E 16 NA
Q128589E 1 E 17 NA
Q38535 E 2 E 18 NA
Q145841E 2 E 19 NA
Q62197 E 2 E 20 NA
Q177407E 2 E 21 NA
Q38401 E 2 E 22 NA
Q199034E 2 E 23 NA
Q49468 E 2 E 24 NA
Q35380 E 3 E 25 NA
Q49473 E 3 E 26 NA
Q38711 E 3 E 27 NA
Q49479 E 3 E 28 NA
Q80445 E 3 E 29 NA
Q203097E 3 E 30 NA
Q213678 E 2045786541C 213570 NA
Q183718 E 2045831558C 213571 NA
Q220509 E 2045834618C 213572 NA
Q223142 E 2045976844C 213573 NA
Q51044 E 2045987256C 213574 NA
Q63056 E 2046036482C 213575 NA
Q41226 E 2046097080C 213576 NA
Q156376 E 2046132338C 213577 NA
Q103204 E 2046461911C 213578 NA
Q116933 E 2047137765C 213579 NA
Q171186 E 2047579662C 213580 NA
Q356 D 2047631596C 213581 NA
Q31165 E 2048330818C 213582 NA
Q40629 E 2049755644C 213583 NA
Q105584 E 2049926923C 213584 NA
Q565 D 2052996261C 213585 NA
Q209330 E 2060928966C 213586 NA
Q14005 D 2063120071C 213587 NA
Q918 D 2063217449C 213588 NA
Q150248 E 2068796814C 213589 NA
Q866 D 2079749157C 213590 NA
Q355 D 2093900731C 213591 NA
Q33999 E 2108672678C 213592 NA
Q193563 E 2130725560C 213593 NA
Q37312 E 2142913121C 213594 NA
Q54919 D 2148531382C 213595 NA
Q36578 D 2229315598C 213596 NA
Q30 D 2277746226C 213597 NA
Q5 C 5668008721C 213598 NA
Q5296 D 12530369761C 213599 NA
entity_idpredictionpage_viewspop_classseqNumdissonance
Q157851E 0 E 1 NA
Q186832E 0 E 2 NA
Q198896E 0 E 3 NA
Q94803 E 0 E 4 NA
Q190349E 0 E 5 NA
Q155668E 0 E 6 NA
Q149278E 0 E 7 NA
Q207182E 0 E 8 NA
Q223614E 0 E 9 NA
Q121091E 0 E 10 NA
Q218579E 0 E 11 NA
Q207016E 1 E 12 NA
Q49471 E 1 E 13 NA
Q130928E 1 E 14 NA
Q49475 E 1 E 15 NA
Q146626E 1 E 16 NA
Q128589E 1 E 17 NA
Q38535 E 2 E 18 NA
Q145841E 2 E 19 NA
Q62197 E 2 E 20 NA
Q177407E 2 E 21 NA
Q38401 E 2 E 22 NA
Q199034E 2 E 23 NA
Q49468 E 2 E 24 NA
Q35380 E 3 E 25 NA
Q49473 E 3 E 26 NA
Q38711 E 3 E 27 NA
Q49479 E 3 E 28 NA
Q80445 E 3 E 29 NA
Q203097E 3 E 30 NA
Q213678 E 2045786541C 213570 NA
Q183718 E 2045831558C 213571 NA
Q220509 E 2045834618C 213572 NA
Q223142 E 2045976844C 213573 NA
Q51044 E 2045987256C 213574 NA
Q63056 E 2046036482C 213575 NA
Q41226 E 2046097080C 213576 NA
Q156376 E 2046132338C 213577 NA
Q103204 E 2046461911C 213578 NA
Q116933 E 2047137765C 213579 NA
Q171186 E 2047579662C 213580 NA
Q356 D 2047631596C 213581 NA
Q31165 E 2048330818C 213582 NA
Q40629 E 2049755644C 213583 NA
Q105584 E 2049926923C 213584 NA
Q565 D 2052996261C 213585 NA
Q209330 E 2060928966C 213586 NA
Q14005 D 2063120071C 213587 NA
Q918 D 2063217449C 213588 NA
Q150248 E 2068796814C 213589 NA
Q866 D 2079749157C 213590 NA
Q355 D 2093900731C 213591 NA
Q33999 E 2108672678C 213592 NA
Q193563 E 2130725560C 213593 NA
Q37312 E 2142913121C 213594 NA
Q54919 D 2148531382C 213595 NA
Q36578 D 2229315598C 213596 NA
Q30 D 2277746226C 213597 NA
Q5 C 5668008721C 213598 NA
Q5296 D 12530369761C 213599 NA
entity_idpredictionpage_viewspop_classseqNumdissonance
Q157851E 0 E 1 NA
Q186832E 0 E 2 NA
Q198896E 0 E 3 NA
Q94803 E 0 E 4 NA
Q190349E 0 E 5 NA
Q155668E 0 E 6 NA
Q149278E 0 E 7 NA
Q207182E 0 E 8 NA
Q223614E 0 E 9 NA
Q121091E 0 E 10 NA
Q218579E 0 E 11 NA
Q207016E 1 E 12 NA
Q49471 E 1 E 13 NA
Q130928E 1 E 14 NA
Q49475 E 1 E 15 NA
Q146626E 1 E 16 NA
Q128589E 1 E 17 NA
Q38535 E 2 E 18 NA
Q145841E 2 E 19 NA
Q62197 E 2 E 20 NA
Q177407E 2 E 21 NA
Q38401 E 2 E 22 NA
Q199034E 2 E 23 NA
Q49468 E 2 E 24 NA
Q35380 E 3 E 25 NA
Q49473 E 3 E 26 NA
Q38711 E 3 E 27 NA
Q49479 E 3 E 28 NA
Q80445 E 3 E 29 NA
Q203097E 3 E 30 NA
Q213678 E 2045786541C 213570 NA
Q183718 E 2045831558C 213571 NA
Q220509 E 2045834618C 213572 NA
Q223142 E 2045976844C 213573 NA
Q51044 E 2045987256C 213574 NA
Q63056 E 2046036482C 213575 NA
Q41226 E 2046097080C 213576 NA
Q156376 E 2046132338C 213577 NA
Q103204 E 2046461911C 213578 NA
Q116933 E 2047137765C 213579 NA
Q171186 E 2047579662C 213580 NA
Q356 D 2047631596C 213581 NA
Q31165 E 2048330818C 213582 NA
Q40629 E 2049755644C 213583 NA
Q105584 E 2049926923C 213584 NA
Q565 D 2052996261C 213585 NA
Q209330 E 2060928966C 213586 NA
Q14005 D 2063120071C 213587 NA
Q918 D 2063217449C 213588 NA
Q150248 E 2068796814C 213589 NA
Q866 D 2079749157C 213590 NA
Q355 D 2093900731C 213591 NA
Q33999 E 2108672678C 213592 NA
Q193563 E 2130725560C 213593 NA
Q37312 E 2142913121C 213594 NA
Q54919 D 2148531382C 213595 NA
Q36578 D 2229315598C 213596 NA
Q30 D 2277746226C 213597 NA
Q5 C 5668008721C 213598 NA
Q5296 D 12530369761C 213599 NA
entity_idpredictionpage_viewspop_classseqNumdissonance
Q157851E 0 E 1 NA
Q186832E 0 E 2 NA
Q198896E 0 E 3 NA
Q94803 E 0 E 4 NA
Q190349E 0 E 5 NA
Q155668E 0 E 6 NA
Q149278E 0 E 7 NA
Q207182E 0 E 8 NA
Q223614E 0 E 9 NA
Q121091E 0 E 10 NA
Q218579E 0 E 11 NA
Q207016E 1 E 12 NA
Q49471 E 1 E 13 NA
Q130928E 1 E 14 NA
Q49475 E 1 E 15 NA
Q146626E 1 E 16 NA
Q128589E 1 E 17 NA
Q38535 E 2 E 18 NA
Q145841E 2 E 19 NA
Q62197 E 2 E 20 NA
Q177407E 2 E 21 NA
Q38401 E 2 E 22 NA
Q199034E 2 E 23 NA
Q49468 E 2 E 24 NA
Q35380 E 3 E 25 NA
Q49473 E 3 E 26 NA
Q38711 E 3 E 27 NA
Q49479 E 3 E 28 NA
Q80445 E 3 E 29 NA
Q203097E 3 E 30 NA
Q213678 E 2045786541C 213570 NA
Q183718 E 2045831558C 213571 NA
Q220509 E 2045834618C 213572 NA
Q223142 E 2045976844C 213573 NA
Q51044 E 2045987256C 213574 NA
Q63056 E 2046036482C 213575 NA
Q41226 E 2046097080C 213576 NA
Q156376 E 2046132338C 213577 NA
Q103204 E 2046461911C 213578 NA
Q116933 E 2047137765C 213579 NA
Q171186 E 2047579662C 213580 NA
Q356 D 2047631596C 213581 NA
Q31165 E 2048330818C 213582 NA
Q40629 E 2049755644C 213583 NA
Q105584 E 2049926923C 213584 NA
Q565 D 2052996261C 213585 NA
Q209330 E 2060928966C 213586 NA
Q14005 D 2063120071C 213587 NA
Q918 D 2063217449C 213588 NA
Q150248 E 2068796814C 213589 NA
Q866 D 2079749157C 213590 NA
Q355 D 2093900731C 213591 NA
Q33999 E 2108672678C 213592 NA
Q193563 E 2130725560C 213593 NA
Q37312 E 2142913121C 213594 NA
Q54919 D 2148531382C 213595 NA
Q36578 D 2229315598C 213596 NA
Q30 D 2277746226C 213597 NA
Q5 C 5668008721C 213598 NA
Q5296 D 12530369761C 213599 NA

In [36]:
## C: 
articles_by_pop[prediction == 'C' & pop_class == 'E',
                dissonance := 'High negative'];
articles_by_pop[prediction == 'C' & pop_class == 'D',
                dissonance := 'Moderate negative'];
articles_by_pop[prediction == 'C' & pop_class == 'C',
                dissonance := 'None'];
articles_by_pop[prediction == 'C' & pop_class == 'B',
                dissonance := 'Moderate positive'];
articles_by_pop[prediction == 'C' & pop_class == 'A',
                dissonance := 'High positive'];


entity_idpredictionpage_viewspop_classseqNumdissonance
Q157851E 0 E 1 NA
Q186832E 0 E 2 NA
Q198896E 0 E 3 NA
Q94803 E 0 E 4 NA
Q190349E 0 E 5 NA
Q155668E 0 E 6 NA
Q149278E 0 E 7 NA
Q207182E 0 E 8 NA
Q223614E 0 E 9 NA
Q121091E 0 E 10 NA
Q218579E 0 E 11 NA
Q207016E 1 E 12 NA
Q49471 E 1 E 13 NA
Q130928E 1 E 14 NA
Q49475 E 1 E 15 NA
Q146626E 1 E 16 NA
Q128589E 1 E 17 NA
Q38535 E 2 E 18 NA
Q145841E 2 E 19 NA
Q62197 E 2 E 20 NA
Q177407E 2 E 21 NA
Q38401 E 2 E 22 NA
Q199034E 2 E 23 NA
Q49468 E 2 E 24 NA
Q35380 E 3 E 25 NA
Q49473 E 3 E 26 NA
Q38711 E 3 E 27 NA
Q49479 E 3 E 28 NA
Q80445 E 3 E 29 NA
Q203097E 3 E 30 NA
Q213678 E 2045786541C 213570 NA
Q183718 E 2045831558C 213571 NA
Q220509 E 2045834618C 213572 NA
Q223142 E 2045976844C 213573 NA
Q51044 E 2045987256C 213574 NA
Q63056 E 2046036482C 213575 NA
Q41226 E 2046097080C 213576 NA
Q156376 E 2046132338C 213577 NA
Q103204 E 2046461911C 213578 NA
Q116933 E 2047137765C 213579 NA
Q171186 E 2047579662C 213580 NA
Q356 D 2047631596C 213581 NA
Q31165 E 2048330818C 213582 NA
Q40629 E 2049755644C 213583 NA
Q105584 E 2049926923C 213584 NA
Q565 D 2052996261C 213585 NA
Q209330 E 2060928966C 213586 NA
Q14005 D 2063120071C 213587 NA
Q918 D 2063217449C 213588 NA
Q150248 E 2068796814C 213589 NA
Q866 D 2079749157C 213590 NA
Q355 D 2093900731C 213591 NA
Q33999 E 2108672678C 213592 NA
Q193563 E 2130725560C 213593 NA
Q37312 E 2142913121C 213594 NA
Q54919 D 2148531382C 213595 NA
Q36578 D 2229315598C 213596 NA
Q30 D 2277746226C 213597 NA
Q5 C 5668008721C 213598 NA
Q5296 D 12530369761C 213599 NA
entity_idpredictionpage_viewspop_classseqNumdissonance
Q157851E 0 E 1 NA
Q186832E 0 E 2 NA
Q198896E 0 E 3 NA
Q94803 E 0 E 4 NA
Q190349E 0 E 5 NA
Q155668E 0 E 6 NA
Q149278E 0 E 7 NA
Q207182E 0 E 8 NA
Q223614E 0 E 9 NA
Q121091E 0 E 10 NA
Q218579E 0 E 11 NA
Q207016E 1 E 12 NA
Q49471 E 1 E 13 NA
Q130928E 1 E 14 NA
Q49475 E 1 E 15 NA
Q146626E 1 E 16 NA
Q128589E 1 E 17 NA
Q38535 E 2 E 18 NA
Q145841E 2 E 19 NA
Q62197 E 2 E 20 NA
Q177407E 2 E 21 NA
Q38401 E 2 E 22 NA
Q199034E 2 E 23 NA
Q49468 E 2 E 24 NA
Q35380 E 3 E 25 NA
Q49473 E 3 E 26 NA
Q38711 E 3 E 27 NA
Q49479 E 3 E 28 NA
Q80445 E 3 E 29 NA
Q203097E 3 E 30 NA
Q213678 E 2045786541C 213570 NA
Q183718 E 2045831558C 213571 NA
Q220509 E 2045834618C 213572 NA
Q223142 E 2045976844C 213573 NA
Q51044 E 2045987256C 213574 NA
Q63056 E 2046036482C 213575 NA
Q41226 E 2046097080C 213576 NA
Q156376 E 2046132338C 213577 NA
Q103204 E 2046461911C 213578 NA
Q116933 E 2047137765C 213579 NA
Q171186 E 2047579662C 213580 NA
Q356 D 2047631596C 213581 NA
Q31165 E 2048330818C 213582 NA
Q40629 E 2049755644C 213583 NA
Q105584 E 2049926923C 213584 NA
Q565 D 2052996261C 213585 NA
Q209330 E 2060928966C 213586 NA
Q14005 D 2063120071C 213587 NA
Q918 D 2063217449C 213588 NA
Q150248 E 2068796814C 213589 NA
Q866 D 2079749157C 213590 NA
Q355 D 2093900731C 213591 NA
Q33999 E 2108672678C 213592 NA
Q193563 E 2130725560C 213593 NA
Q37312 E 2142913121C 213594 NA
Q54919 D 2148531382C 213595 NA
Q36578 D 2229315598C 213596 NA
Q30 D 2277746226C 213597 NA
Q5 C 5668008721C 213598 NA
Q5296 D 12530369761C 213599 NA
entity_idpredictionpage_viewspop_classseqNumdissonance
Q157851E 0 E 1 NA
Q186832E 0 E 2 NA
Q198896E 0 E 3 NA
Q94803 E 0 E 4 NA
Q190349E 0 E 5 NA
Q155668E 0 E 6 NA
Q149278E 0 E 7 NA
Q207182E 0 E 8 NA
Q223614E 0 E 9 NA
Q121091E 0 E 10 NA
Q218579E 0 E 11 NA
Q207016E 1 E 12 NA
Q49471 E 1 E 13 NA
Q130928E 1 E 14 NA
Q49475 E 1 E 15 NA
Q146626E 1 E 16 NA
Q128589E 1 E 17 NA
Q38535 E 2 E 18 NA
Q145841E 2 E 19 NA
Q62197 E 2 E 20 NA
Q177407E 2 E 21 NA
Q38401 E 2 E 22 NA
Q199034E 2 E 23 NA
Q49468 E 2 E 24 NA
Q35380 E 3 E 25 NA
Q49473 E 3 E 26 NA
Q38711 E 3 E 27 NA
Q49479 E 3 E 28 NA
Q80445 E 3 E 29 NA
Q203097E 3 E 30 NA
Q213678 E 2045786541C 213570 NA
Q183718 E 2045831558C 213571 NA
Q220509 E 2045834618C 213572 NA
Q223142 E 2045976844C 213573 NA
Q51044 E 2045987256C 213574 NA
Q63056 E 2046036482C 213575 NA
Q41226 E 2046097080C 213576 NA
Q156376 E 2046132338C 213577 NA
Q103204 E 2046461911C 213578 NA
Q116933 E 2047137765C 213579 NA
Q171186 E 2047579662C 213580 NA
Q356 D 2047631596C 213581 NA
Q31165 E 2048330818C 213582 NA
Q40629 E 2049755644C 213583 NA
Q105584 E 2049926923C 213584 NA
Q565 D 2052996261C 213585 NA
Q209330 E 2060928966C 213586 NA
Q14005 D 2063120071C 213587 NA
Q918 D 2063217449C 213588 NA
Q150248 E 2068796814C 213589 NA
Q866 D 2079749157C 213590 NA
Q355 D 2093900731C 213591 NA
Q33999 E 2108672678C 213592 NA
Q193563 E 2130725560C 213593 NA
Q37312 E 2142913121C 213594 NA
Q54919 D 2148531382C 213595 NA
Q36578 D 2229315598C 213596 NA
Q30 D 2277746226C 213597 NA
Q5 C 5668008721C 213598 None
Q5296 D 12530369761C 213599 NA
entity_idpredictionpage_viewspop_classseqNumdissonance
Q157851E 0 E 1 NA
Q186832E 0 E 2 NA
Q198896E 0 E 3 NA
Q94803 E 0 E 4 NA
Q190349E 0 E 5 NA
Q155668E 0 E 6 NA
Q149278E 0 E 7 NA
Q207182E 0 E 8 NA
Q223614E 0 E 9 NA
Q121091E 0 E 10 NA
Q218579E 0 E 11 NA
Q207016E 1 E 12 NA
Q49471 E 1 E 13 NA
Q130928E 1 E 14 NA
Q49475 E 1 E 15 NA
Q146626E 1 E 16 NA
Q128589E 1 E 17 NA
Q38535 E 2 E 18 NA
Q145841E 2 E 19 NA
Q62197 E 2 E 20 NA
Q177407E 2 E 21 NA
Q38401 E 2 E 22 NA
Q199034E 2 E 23 NA
Q49468 E 2 E 24 NA
Q35380 E 3 E 25 NA
Q49473 E 3 E 26 NA
Q38711 E 3 E 27 NA
Q49479 E 3 E 28 NA
Q80445 E 3 E 29 NA
Q203097E 3 E 30 NA
Q213678 E 2045786541C 213570 NA
Q183718 E 2045831558C 213571 NA
Q220509 E 2045834618C 213572 NA
Q223142 E 2045976844C 213573 NA
Q51044 E 2045987256C 213574 NA
Q63056 E 2046036482C 213575 NA
Q41226 E 2046097080C 213576 NA
Q156376 E 2046132338C 213577 NA
Q103204 E 2046461911C 213578 NA
Q116933 E 2047137765C 213579 NA
Q171186 E 2047579662C 213580 NA
Q356 D 2047631596C 213581 NA
Q31165 E 2048330818C 213582 NA
Q40629 E 2049755644C 213583 NA
Q105584 E 2049926923C 213584 NA
Q565 D 2052996261C 213585 NA
Q209330 E 2060928966C 213586 NA
Q14005 D 2063120071C 213587 NA
Q918 D 2063217449C 213588 NA
Q150248 E 2068796814C 213589 NA
Q866 D 2079749157C 213590 NA
Q355 D 2093900731C 213591 NA
Q33999 E 2108672678C 213592 NA
Q193563 E 2130725560C 213593 NA
Q37312 E 2142913121C 213594 NA
Q54919 D 2148531382C 213595 NA
Q36578 D 2229315598C 213596 NA
Q30 D 2277746226C 213597 NA
Q5 C 5668008721C 213598 None
Q5296 D 12530369761C 213599 NA
entity_idpredictionpage_viewspop_classseqNumdissonance
Q157851E 0 E 1 NA
Q186832E 0 E 2 NA
Q198896E 0 E 3 NA
Q94803 E 0 E 4 NA
Q190349E 0 E 5 NA
Q155668E 0 E 6 NA
Q149278E 0 E 7 NA
Q207182E 0 E 8 NA
Q223614E 0 E 9 NA
Q121091E 0 E 10 NA
Q218579E 0 E 11 NA
Q207016E 1 E 12 NA
Q49471 E 1 E 13 NA
Q130928E 1 E 14 NA
Q49475 E 1 E 15 NA
Q146626E 1 E 16 NA
Q128589E 1 E 17 NA
Q38535 E 2 E 18 NA
Q145841E 2 E 19 NA
Q62197 E 2 E 20 NA
Q177407E 2 E 21 NA
Q38401 E 2 E 22 NA
Q199034E 2 E 23 NA
Q49468 E 2 E 24 NA
Q35380 E 3 E 25 NA
Q49473 E 3 E 26 NA
Q38711 E 3 E 27 NA
Q49479 E 3 E 28 NA
Q80445 E 3 E 29 NA
Q203097E 3 E 30 NA
Q213678 E 2045786541C 213570 NA
Q183718 E 2045831558C 213571 NA
Q220509 E 2045834618C 213572 NA
Q223142 E 2045976844C 213573 NA
Q51044 E 2045987256C 213574 NA
Q63056 E 2046036482C 213575 NA
Q41226 E 2046097080C 213576 NA
Q156376 E 2046132338C 213577 NA
Q103204 E 2046461911C 213578 NA
Q116933 E 2047137765C 213579 NA
Q171186 E 2047579662C 213580 NA
Q356 D 2047631596C 213581 NA
Q31165 E 2048330818C 213582 NA
Q40629 E 2049755644C 213583 NA
Q105584 E 2049926923C 213584 NA
Q565 D 2052996261C 213585 NA
Q209330 E 2060928966C 213586 NA
Q14005 D 2063120071C 213587 NA
Q918 D 2063217449C 213588 NA
Q150248 E 2068796814C 213589 NA
Q866 D 2079749157C 213590 NA
Q355 D 2093900731C 213591 NA
Q33999 E 2108672678C 213592 NA
Q193563 E 2130725560C 213593 NA
Q37312 E 2142913121C 213594 NA
Q54919 D 2148531382C 213595 NA
Q36578 D 2229315598C 213596 NA
Q30 D 2277746226C 213597 NA
Q5 C 5668008721C 213598 None
Q5296 D 12530369761C 213599 NA

In [37]:
## D
articles_by_pop[prediction == 'D' & pop_class == 'E',
                dissonance := 'Moderate negative'];
articles_by_pop[prediction == 'D' & pop_class == 'D',
                dissonance := 'None'];
articles_by_pop[prediction == 'D' & pop_class == 'C',
                dissonance := 'Moderate positive'];
articles_by_pop[prediction == 'D' & pop_class >= 'B',
                dissonance := 'High positive'];


entity_idpredictionpage_viewspop_classseqNumdissonance
Q157851E 0 E 1 NA
Q186832E 0 E 2 NA
Q198896E 0 E 3 NA
Q94803 E 0 E 4 NA
Q190349E 0 E 5 NA
Q155668E 0 E 6 NA
Q149278E 0 E 7 NA
Q207182E 0 E 8 NA
Q223614E 0 E 9 NA
Q121091E 0 E 10 NA
Q218579E 0 E 11 NA
Q207016E 1 E 12 NA
Q49471 E 1 E 13 NA
Q130928E 1 E 14 NA
Q49475 E 1 E 15 NA
Q146626E 1 E 16 NA
Q128589E 1 E 17 NA
Q38535 E 2 E 18 NA
Q145841E 2 E 19 NA
Q62197 E 2 E 20 NA
Q177407E 2 E 21 NA
Q38401 E 2 E 22 NA
Q199034E 2 E 23 NA
Q49468 E 2 E 24 NA
Q35380 E 3 E 25 NA
Q49473 E 3 E 26 NA
Q38711 E 3 E 27 NA
Q49479 E 3 E 28 NA
Q80445 E 3 E 29 NA
Q203097E 3 E 30 NA
Q213678 E 2045786541C 213570 NA
Q183718 E 2045831558C 213571 NA
Q220509 E 2045834618C 213572 NA
Q223142 E 2045976844C 213573 NA
Q51044 E 2045987256C 213574 NA
Q63056 E 2046036482C 213575 NA
Q41226 E 2046097080C 213576 NA
Q156376 E 2046132338C 213577 NA
Q103204 E 2046461911C 213578 NA
Q116933 E 2047137765C 213579 NA
Q171186 E 2047579662C 213580 NA
Q356 D 2047631596C 213581 NA
Q31165 E 2048330818C 213582 NA
Q40629 E 2049755644C 213583 NA
Q105584 E 2049926923C 213584 NA
Q565 D 2052996261C 213585 NA
Q209330 E 2060928966C 213586 NA
Q14005 D 2063120071C 213587 NA
Q918 D 2063217449C 213588 NA
Q150248 E 2068796814C 213589 NA
Q866 D 2079749157C 213590 NA
Q355 D 2093900731C 213591 NA
Q33999 E 2108672678C 213592 NA
Q193563 E 2130725560C 213593 NA
Q37312 E 2142913121C 213594 NA
Q54919 D 2148531382C 213595 NA
Q36578 D 2229315598C 213596 NA
Q30 D 2277746226C 213597 NA
Q5 C 5668008721C 213598 None
Q5296 D 12530369761C 213599 NA
entity_idpredictionpage_viewspop_classseqNumdissonance
Q157851E 0 E 1 NA
Q186832E 0 E 2 NA
Q198896E 0 E 3 NA
Q94803 E 0 E 4 NA
Q190349E 0 E 5 NA
Q155668E 0 E 6 NA
Q149278E 0 E 7 NA
Q207182E 0 E 8 NA
Q223614E 0 E 9 NA
Q121091E 0 E 10 NA
Q218579E 0 E 11 NA
Q207016E 1 E 12 NA
Q49471 E 1 E 13 NA
Q130928E 1 E 14 NA
Q49475 E 1 E 15 NA
Q146626E 1 E 16 NA
Q128589E 1 E 17 NA
Q38535 E 2 E 18 NA
Q145841E 2 E 19 NA
Q62197 E 2 E 20 NA
Q177407E 2 E 21 NA
Q38401 E 2 E 22 NA
Q199034E 2 E 23 NA
Q49468 E 2 E 24 NA
Q35380 E 3 E 25 NA
Q49473 E 3 E 26 NA
Q38711 E 3 E 27 NA
Q49479 E 3 E 28 NA
Q80445 E 3 E 29 NA
Q203097E 3 E 30 NA
Q213678 E 2045786541C 213570 NA
Q183718 E 2045831558C 213571 NA
Q220509 E 2045834618C 213572 NA
Q223142 E 2045976844C 213573 NA
Q51044 E 2045987256C 213574 NA
Q63056 E 2046036482C 213575 NA
Q41226 E 2046097080C 213576 NA
Q156376 E 2046132338C 213577 NA
Q103204 E 2046461911C 213578 NA
Q116933 E 2047137765C 213579 NA
Q171186 E 2047579662C 213580 NA
Q356 D 2047631596C 213581 NA
Q31165 E 2048330818C 213582 NA
Q40629 E 2049755644C 213583 NA
Q105584 E 2049926923C 213584 NA
Q565 D 2052996261C 213585 NA
Q209330 E 2060928966C 213586 NA
Q14005 D 2063120071C 213587 NA
Q918 D 2063217449C 213588 NA
Q150248 E 2068796814C 213589 NA
Q866 D 2079749157C 213590 NA
Q355 D 2093900731C 213591 NA
Q33999 E 2108672678C 213592 NA
Q193563 E 2130725560C 213593 NA
Q37312 E 2142913121C 213594 NA
Q54919 D 2148531382C 213595 NA
Q36578 D 2229315598C 213596 NA
Q30 D 2277746226C 213597 NA
Q5 C 5668008721C 213598 None
Q5296 D 12530369761C 213599 NA
entity_idpredictionpage_viewspop_classseqNumdissonance
Q157851E 0 E 1 NA
Q186832E 0 E 2 NA
Q198896E 0 E 3 NA
Q94803 E 0 E 4 NA
Q190349E 0 E 5 NA
Q155668E 0 E 6 NA
Q149278E 0 E 7 NA
Q207182E 0 E 8 NA
Q223614E 0 E 9 NA
Q121091E 0 E 10 NA
Q218579E 0 E 11 NA
Q207016E 1 E 12 NA
Q49471 E 1 E 13 NA
Q130928E 1 E 14 NA
Q49475 E 1 E 15 NA
Q146626E 1 E 16 NA
Q128589E 1 E 17 NA
Q38535 E 2 E 18 NA
Q145841E 2 E 19 NA
Q62197 E 2 E 20 NA
Q177407E 2 E 21 NA
Q38401 E 2 E 22 NA
Q199034E 2 E 23 NA
Q49468 E 2 E 24 NA
Q35380 E 3 E 25 NA
Q49473 E 3 E 26 NA
Q38711 E 3 E 27 NA
Q49479 E 3 E 28 NA
Q80445 E 3 E 29 NA
Q203097E 3 E 30 NA
Q213678 E 2045786541 C 213570 NA
Q183718 E 2045831558 C 213571 NA
Q220509 E 2045834618 C 213572 NA
Q223142 E 2045976844 C 213573 NA
Q51044 E 2045987256 C 213574 NA
Q63056 E 2046036482 C 213575 NA
Q41226 E 2046097080 C 213576 NA
Q156376 E 2046132338 C 213577 NA
Q103204 E 2046461911 C 213578 NA
Q116933 E 2047137765 C 213579 NA
Q171186 E 2047579662 C 213580 NA
Q356 D 2047631596 C 213581 Moderate positive
Q31165 E 2048330818 C 213582 NA
Q40629 E 2049755644 C 213583 NA
Q105584 E 2049926923 C 213584 NA
Q565 D 2052996261 C 213585 Moderate positive
Q209330 E 2060928966 C 213586 NA
Q14005 D 2063120071 C 213587 Moderate positive
Q918 D 2063217449 C 213588 Moderate positive
Q150248 E 2068796814 C 213589 NA
Q866 D 2079749157 C 213590 Moderate positive
Q355 D 2093900731 C 213591 Moderate positive
Q33999 E 2108672678 C 213592 NA
Q193563 E 2130725560 C 213593 NA
Q37312 E 2142913121 C 213594 NA
Q54919 D 2148531382 C 213595 Moderate positive
Q36578 D 2229315598 C 213596 Moderate positive
Q30 D 2277746226 C 213597 Moderate positive
Q5 C 5668008721 C 213598 None
Q5296 D 12530369761 C 213599 Moderate positive
entity_idpredictionpage_viewspop_classseqNumdissonance
Q157851E 0 E 1 NA
Q186832E 0 E 2 NA
Q198896E 0 E 3 NA
Q94803 E 0 E 4 NA
Q190349E 0 E 5 NA
Q155668E 0 E 6 NA
Q149278E 0 E 7 NA
Q207182E 0 E 8 NA
Q223614E 0 E 9 NA
Q121091E 0 E 10 NA
Q218579E 0 E 11 NA
Q207016E 1 E 12 NA
Q49471 E 1 E 13 NA
Q130928E 1 E 14 NA
Q49475 E 1 E 15 NA
Q146626E 1 E 16 NA
Q128589E 1 E 17 NA
Q38535 E 2 E 18 NA
Q145841E 2 E 19 NA
Q62197 E 2 E 20 NA
Q177407E 2 E 21 NA
Q38401 E 2 E 22 NA
Q199034E 2 E 23 NA
Q49468 E 2 E 24 NA
Q35380 E 3 E 25 NA
Q49473 E 3 E 26 NA
Q38711 E 3 E 27 NA
Q49479 E 3 E 28 NA
Q80445 E 3 E 29 NA
Q203097E 3 E 30 NA
Q213678 E 2045786541 C 213570 NA
Q183718 E 2045831558 C 213571 NA
Q220509 E 2045834618 C 213572 NA
Q223142 E 2045976844 C 213573 NA
Q51044 E 2045987256 C 213574 NA
Q63056 E 2046036482 C 213575 NA
Q41226 E 2046097080 C 213576 NA
Q156376 E 2046132338 C 213577 NA
Q103204 E 2046461911 C 213578 NA
Q116933 E 2047137765 C 213579 NA
Q171186 E 2047579662 C 213580 NA
Q356 D 2047631596 C 213581 Moderate positive
Q31165 E 2048330818 C 213582 NA
Q40629 E 2049755644 C 213583 NA
Q105584 E 2049926923 C 213584 NA
Q565 D 2052996261 C 213585 Moderate positive
Q209330 E 2060928966 C 213586 NA
Q14005 D 2063120071 C 213587 Moderate positive
Q918 D 2063217449 C 213588 Moderate positive
Q150248 E 2068796814 C 213589 NA
Q866 D 2079749157 C 213590 Moderate positive
Q355 D 2093900731 C 213591 Moderate positive
Q33999 E 2108672678 C 213592 NA
Q193563 E 2130725560 C 213593 NA
Q37312 E 2142913121 C 213594 NA
Q54919 D 2148531382 C 213595 Moderate positive
Q36578 D 2229315598 C 213596 Moderate positive
Q30 D 2277746226 C 213597 Moderate positive
Q5 C 5668008721 C 213598 None
Q5296 D 12530369761 C 213599 Moderate positive

In [38]:
## E
articles_by_pop[prediction == 'E' & pop_class == 'E',
                dissonance := 'None'];
articles_by_pop[prediction == 'E' & pop_class == 'D',
                dissonance := 'Moderate positive'];
articles_by_pop[prediction == 'E' & pop_class >= 'C',
                dissonance := 'High positive'];


entity_idpredictionpage_viewspop_classseqNumdissonance
Q157851E 0 E 1 None
Q186832E 0 E 2 None
Q198896E 0 E 3 None
Q94803 E 0 E 4 None
Q190349E 0 E 5 None
Q155668E 0 E 6 None
Q149278E 0 E 7 None
Q207182E 0 E 8 None
Q223614E 0 E 9 None
Q121091E 0 E 10 None
Q218579E 0 E 11 None
Q207016E 1 E 12 None
Q49471 E 1 E 13 None
Q130928E 1 E 14 None
Q49475 E 1 E 15 None
Q146626E 1 E 16 None
Q128589E 1 E 17 None
Q38535 E 2 E 18 None
Q145841E 2 E 19 None
Q62197 E 2 E 20 None
Q177407E 2 E 21 None
Q38401 E 2 E 22 None
Q199034E 2 E 23 None
Q49468 E 2 E 24 None
Q35380 E 3 E 25 None
Q49473 E 3 E 26 None
Q38711 E 3 E 27 None
Q49479 E 3 E 28 None
Q80445 E 3 E 29 None
Q203097E 3 E 30 None
Q213678 E 2045786541 C 213570 NA
Q183718 E 2045831558 C 213571 NA
Q220509 E 2045834618 C 213572 NA
Q223142 E 2045976844 C 213573 NA
Q51044 E 2045987256 C 213574 NA
Q63056 E 2046036482 C 213575 NA
Q41226 E 2046097080 C 213576 NA
Q156376 E 2046132338 C 213577 NA
Q103204 E 2046461911 C 213578 NA
Q116933 E 2047137765 C 213579 NA
Q171186 E 2047579662 C 213580 NA
Q356 D 2047631596 C 213581 Moderate positive
Q31165 E 2048330818 C 213582 NA
Q40629 E 2049755644 C 213583 NA
Q105584 E 2049926923 C 213584 NA
Q565 D 2052996261 C 213585 Moderate positive
Q209330 E 2060928966 C 213586 NA
Q14005 D 2063120071 C 213587 Moderate positive
Q918 D 2063217449 C 213588 Moderate positive
Q150248 E 2068796814 C 213589 NA
Q866 D 2079749157 C 213590 Moderate positive
Q355 D 2093900731 C 213591 Moderate positive
Q33999 E 2108672678 C 213592 NA
Q193563 E 2130725560 C 213593 NA
Q37312 E 2142913121 C 213594 NA
Q54919 D 2148531382 C 213595 Moderate positive
Q36578 D 2229315598 C 213596 Moderate positive
Q30 D 2277746226 C 213597 Moderate positive
Q5 C 5668008721 C 213598 None
Q5296 D 12530369761 C 213599 Moderate positive
entity_idpredictionpage_viewspop_classseqNumdissonance
Q157851E 0 E 1 None
Q186832E 0 E 2 None
Q198896E 0 E 3 None
Q94803 E 0 E 4 None
Q190349E 0 E 5 None
Q155668E 0 E 6 None
Q149278E 0 E 7 None
Q207182E 0 E 8 None
Q223614E 0 E 9 None
Q121091E 0 E 10 None
Q218579E 0 E 11 None
Q207016E 1 E 12 None
Q49471 E 1 E 13 None
Q130928E 1 E 14 None
Q49475 E 1 E 15 None
Q146626E 1 E 16 None
Q128589E 1 E 17 None
Q38535 E 2 E 18 None
Q145841E 2 E 19 None
Q62197 E 2 E 20 None
Q177407E 2 E 21 None
Q38401 E 2 E 22 None
Q199034E 2 E 23 None
Q49468 E 2 E 24 None
Q35380 E 3 E 25 None
Q49473 E 3 E 26 None
Q38711 E 3 E 27 None
Q49479 E 3 E 28 None
Q80445 E 3 E 29 None
Q203097E 3 E 30 None
Q213678 E 2045786541 C 213570 NA
Q183718 E 2045831558 C 213571 NA
Q220509 E 2045834618 C 213572 NA
Q223142 E 2045976844 C 213573 NA
Q51044 E 2045987256 C 213574 NA
Q63056 E 2046036482 C 213575 NA
Q41226 E 2046097080 C 213576 NA
Q156376 E 2046132338 C 213577 NA
Q103204 E 2046461911 C 213578 NA
Q116933 E 2047137765 C 213579 NA
Q171186 E 2047579662 C 213580 NA
Q356 D 2047631596 C 213581 Moderate positive
Q31165 E 2048330818 C 213582 NA
Q40629 E 2049755644 C 213583 NA
Q105584 E 2049926923 C 213584 NA
Q565 D 2052996261 C 213585 Moderate positive
Q209330 E 2060928966 C 213586 NA
Q14005 D 2063120071 C 213587 Moderate positive
Q918 D 2063217449 C 213588 Moderate positive
Q150248 E 2068796814 C 213589 NA
Q866 D 2079749157 C 213590 Moderate positive
Q355 D 2093900731 C 213591 Moderate positive
Q33999 E 2108672678 C 213592 NA
Q193563 E 2130725560 C 213593 NA
Q37312 E 2142913121 C 213594 NA
Q54919 D 2148531382 C 213595 Moderate positive
Q36578 D 2229315598 C 213596 Moderate positive
Q30 D 2277746226 C 213597 Moderate positive
Q5 C 5668008721 C 213598 None
Q5296 D 12530369761 C 213599 Moderate positive
entity_idpredictionpage_viewspop_classseqNumdissonance
Q157851E 0 E 1 None
Q186832E 0 E 2 None
Q198896E 0 E 3 None
Q94803 E 0 E 4 None
Q190349E 0 E 5 None
Q155668E 0 E 6 None
Q149278E 0 E 7 None
Q207182E 0 E 8 None
Q223614E 0 E 9 None
Q121091E 0 E 10 None
Q218579E 0 E 11 None
Q207016E 1 E 12 None
Q49471 E 1 E 13 None
Q130928E 1 E 14 None
Q49475 E 1 E 15 None
Q146626E 1 E 16 None
Q128589E 1 E 17 None
Q38535 E 2 E 18 None
Q145841E 2 E 19 None
Q62197 E 2 E 20 None
Q177407E 2 E 21 None
Q38401 E 2 E 22 None
Q199034E 2 E 23 None
Q49468 E 2 E 24 None
Q35380 E 3 E 25 None
Q49473 E 3 E 26 None
Q38711 E 3 E 27 None
Q49479 E 3 E 28 None
Q80445 E 3 E 29 None
Q203097E 3 E 30 None
Q213678 E 2045786541 C 213570 High positive
Q183718 E 2045831558 C 213571 High positive
Q220509 E 2045834618 C 213572 High positive
Q223142 E 2045976844 C 213573 High positive
Q51044 E 2045987256 C 213574 High positive
Q63056 E 2046036482 C 213575 High positive
Q41226 E 2046097080 C 213576 High positive
Q156376 E 2046132338 C 213577 High positive
Q103204 E 2046461911 C 213578 High positive
Q116933 E 2047137765 C 213579 High positive
Q171186 E 2047579662 C 213580 High positive
Q356 D 2047631596 C 213581 Moderate positive
Q31165 E 2048330818 C 213582 High positive
Q40629 E 2049755644 C 213583 High positive
Q105584 E 2049926923 C 213584 High positive
Q565 D 2052996261 C 213585 Moderate positive
Q209330 E 2060928966 C 213586 High positive
Q14005 D 2063120071 C 213587 Moderate positive
Q918 D 2063217449 C 213588 Moderate positive
Q150248 E 2068796814 C 213589 High positive
Q866 D 2079749157 C 213590 Moderate positive
Q355 D 2093900731 C 213591 Moderate positive
Q33999 E 2108672678 C 213592 High positive
Q193563 E 2130725560 C 213593 High positive
Q37312 E 2142913121 C 213594 High positive
Q54919 D 2148531382 C 213595 Moderate positive
Q36578 D 2229315598 C 213596 Moderate positive
Q30 D 2277746226 C 213597 Moderate positive
Q5 C 5668008721 C 213598 None
Q5296 D 12530369761 C 213599 Moderate positive

In [39]:
## Build a matrix where columns are the metric and rows are classes
create_alt_diss_matrix = function(articledata, metric, classes) {
  d_mtrx = matrix(0, nrow=length(classes), ncol=length(metric));
  rownames(d_mtrx) = classes;
  colnames(d_mtrx) = metric;

  ## NOTE: R matrix values are [row,col] dimensions
  for(real_rating in classes) {
    for(diss_rating in metric) {
      d_mtrx[real_rating, diss_rating] = length(articledata[prediction == real_rating & dissonance == diss_rating]$entity_id);
    }
  }
  d_mtrx;
}

alternative_dissonance_matrix.1 = create_alt_diss_matrix(articles_by_pop,
  dissonance_metric, assessment_classes);

In [40]:
## Normalise by row
round(100*prop.table(alternative_dissonance_matrix.1, 1), 1);


High negativeModerate negativeNoneModerate positiveHigh positive
E 0.0 0.091.77.9 0.4
D 0.080.216.53.3 0.0
C91.5 7.8 0.70.0 0.0
B NaN NaN NaNNaN NaN
A NaN NaN NaNNaN NaN

In [41]:
## Number of dissonant views per assessment class and amount of dissonance
articles_by_pop[, list(dissonant_views=sum(page_views)), by=list(prediction, dissonance)];


predictiondissonancedissonant_views
E None 12232045225
D Moderate negative 1290793968
C High negative 290679247
E Moderate positive32048520634
D None 8668013622
C Moderate negative 174929110
E High positive 70737384857
D Moderate positive53268761162
C None 6044940092

In [42]:
## Calculations of total number of dissonant views per dissonance
articles_by_pop[, list(dissonant_views=sum(page_views)), by=list(dissonance)];


dissonancedissonant_views
None 26944998939
Moderate negative 1465723078
High negative 290679247
Moderate positive85317281796
High positive 70737384857

In [43]:
articles_by_pop[,sum(as.numeric(page_views))];


184756067917

In [44]:
## Proportions
100*65938379920/545180810059;
100*125047198/545180810059;
100*6713682043/545180810059;
100*120523625541/545180810059;
100*351880075357/545180810059;


12.0947727255594
0.0229368304409811
1.23145971375505
22.107092420945
64.5437383092995

In [45]:
# 87% of views are high positive

In [46]:
100*(articles_by_pop[, list(dissonant_views=sum(page_views)), by=list(dissonance)][1][,c('dissonant_views')]/articles_by_pop[,sum(as.numeric(page_views))])


dissonant_views
14.58409

In [47]:
100*(articles_by_pop[, list(dissonant_views=sum(page_views)), by=list(dissonance)][2][,c('dissonant_views')]/articles_by_pop[,sum(as.numeric(page_views))])


dissonant_views
0.7933288

In [48]:
100*(articles_by_pop[, list(dissonant_views=sum(page_views)), by=list(dissonance)][3][,c('dissonant_views')]/articles_by_pop[,sum(as.numeric(page_views))])


dissonant_views
0.1573314

In [49]:
100*(articles_by_pop[, list(dissonant_views=sum(page_views)), by=list(dissonance)][4][,c('dissonant_views')]/articles_by_pop[,sum(as.numeric(page_views))])


dissonant_views
46.17834

In [50]:
100*(articles_by_pop[, list(dissonant_views=sum(page_views)), by=list(dissonance)][5][,c('dissonant_views')]/articles_by_pop[,sum(as.numeric(page_views))])


dissonant_views
38.28691