In [1]:
library(data.table)
In [2]:
## Using a table of article assessments and views, build tables
## (matrices) that shows the number of dissonant articles per
## assessment category based on sorting by popularity.
##
## The underlying assumption is that in an ideal system with a limited
## and fixed amount of resources (in other words, popularity and high quality
## artefacts does not increase the amount of resources in the system),
## popularity ranking and assessment class follow a 1-to-1 relationship.
## We can therefore sort by popularity and group articles that way
## because work will be prioritised by popularity.
## DATA ASSUMPTION: views_with_redirects from resolve-redirects.R
## is loaded into memory.
## 3: build a 2x2 matrix of assessment classes and popularity classes
##
## Assessment classes in ascending order of quality.
In [3]:
assessment_classes = c('E', 'D', 'C', 'B', 'A');
In [4]:
quality_prediction_and_page_views <- read.table("../../results/sql_queries/entity_views_and_aggregated_revisions/entity_views_and_aggregated_revisions_and_quality_scoring_20121101.tsv", header=FALSE, sep="\t")
In [5]:
quality_prediction_and_page_views <- data.table(quality_prediction_and_page_views)
In [6]:
colnames(quality_prediction_and_page_views) <- c('entity_id','number_of_revisions', 'page_views', 'prediction')
In [7]:
summary(quality_prediction_and_page_views)
entity_id number_of_revisions page_views prediction
Q1 : 1 Min. : 14 Min. :2.300e+01 D:1133
Q100 : 1 1st Qu.: 95 1st Qu.:6.075e+04 E:3259
Q1000 : 1 Median : 203 Median :5.756e+05
Q1001 : 1 Mean : 246 Mean :9.044e+06
Q1002 : 1 3rd Qu.: 302 3rd Qu.:2.352e+06
Q1003 : 1 Max. :3152 Max. :5.668e+09
(Other):4386
In [8]:
## 0: calculate number of articles in each assessment class
n_per_class = quality_prediction_and_page_views[, list(narticles=sum(.N)), by='prediction']
In [9]:
setkey(n_per_class, prediction);
## NOTE: setkey allows us to do n_per_class['GA']$narticles to get counts
In [10]:
## 1: order articles by popularity
articles_by_pop = quality_prediction_and_page_views[order(quality_prediction_and_page_views$page_views)][,list(entity_id, prediction, page_views)];
In [11]:
## 2: assign popularity assessment class based on rank
## (buckets based on number of articles in each class)
articles_by_pop[, pop_class := ''];
articles_by_pop[, seqNum := seq_len(nrow(articles_by_pop))];
entity_id prediction page_views pop_class
Q3086 E 23
Q3265 E 46
Q4483 E 52
Q3152 E 71
Q3517 E 74
Q4017 E 90
Q3649 E 112
Q3254 E 120
Q3527 E 121
Q3522 E 128
Q3186 E 138
Q3634 E 161
Q4486 E 163
Q3652 E 176
Q4475 E 179
Q3633 E 202
Q3287 E 221
Q3781 E 237
Q3654 E 248
Q3641 E 286
Q2711 D 287
Q4281 E 298
Q3293 E 304
Q3084 E 307
Q293 D 329
Q2631 E 330
Q3307 E 330
Q4003 E 337
Q4170 E 338
Q3979 E 341
⋮ ⋮ ⋮ ⋮
Q408 D 108249066
Q212 D 121262940
Q90 D 132079969
Q739 D 149356287
Q150 D 154676097
Q16 D 161566243
Q60 D 161626758
Q188 D 173805898
Q2184 E 226793302
Q515 E 233190433
Q649 D 243081566
Q1321 D 252584097
Q2736 E 270665514
Q414 D 282547432
Q96 D 307141203
Q38 D 363184639
Q328 E 378666601
Q183 D 440162350
Q29 D 500913282
Q145 D 595132607
Q159 D 653480145
Q142 D 679432828
Q1860 E 1419981073
Q356 D 2047631596
Q565 D 2052996261
Q918 E 2063217449
Q866 E 2079749157
Q355 D 2093900731
Q30 D 2277746226
Q5 D 5668008721
entity_id prediction page_views pop_class seqNum
Q3086 E 23 1
Q3265 E 46 2
Q4483 E 52 3
Q3152 E 71 4
Q3517 E 74 5
Q4017 E 90 6
Q3649 E 112 7
Q3254 E 120 8
Q3527 E 121 9
Q3522 E 128 10
Q3186 E 138 11
Q3634 E 161 12
Q4486 E 163 13
Q3652 E 176 14
Q4475 E 179 15
Q3633 E 202 16
Q3287 E 221 17
Q3781 E 237 18
Q3654 E 248 19
Q3641 E 286 20
Q2711 D 287 21
Q4281 E 298 22
Q3293 E 304 23
Q3084 E 307 24
Q293 D 329 25
Q2631 E 330 26
Q3307 E 330 27
Q4003 E 337 28
Q4170 E 338 29
Q3979 E 341 30
⋮ ⋮ ⋮ ⋮ ⋮
Q408 D 108249066 4363
Q212 D 121262940 4364
Q90 D 132079969 4365
Q739 D 149356287 4366
Q150 D 154676097 4367
Q16 D 161566243 4368
Q60 D 161626758 4369
Q188 D 173805898 4370
Q2184 E 226793302 4371
Q515 E 233190433 4372
Q649 D 243081566 4373
Q1321 D 252584097 4374
Q2736 E 270665514 4375
Q414 D 282547432 4376
Q96 D 307141203 4377
Q38 D 363184639 4378
Q328 E 378666601 4379
Q183 D 440162350 4380
Q29 D 500913282 4381
Q145 D 595132607 4382
Q159 D 653480145 4383
Q142 D 679432828 4384
Q1860 E 1419981073 4385
Q356 D 2047631596 4386
Q565 D 2052996261 4387
Q918 E 2063217449 4388
Q866 E 2079749157 4389
Q355 D 2093900731 4390
Q30 D 2277746226 4391
Q5 D 5668008721 4392
In [12]:
assign_pop_class = function(dataset, classes, class_n) {
## Based on the per-class number of articles in class_n
## assign popularity based on classes to dataset.
prev_idx = 0;
for(rating in classes) {
start_idx = prev_idx + 1;
end_idx = start_idx + class_n[prediction == rating]$narticles;
print(paste('start_idx =', start_idx, ', end_idx = ', end_idx));
dataset[seqNum >= start_idx & seqNum <= end_idx, pop_class := rating];
prev_idx = end_idx -1;
}
dataset;
}
In [13]:
articles_by_pop = assign_pop_class(articles_by_pop,
assessment_classes, n_per_class);
[1] "start_idx = 1 , end_idx = 3260"
[1] "start_idx = 3260 , end_idx = 4393"
[1] "start_idx = 4393 , end_idx = "
[1] "start_idx = , end_idx = "
[1] "start_idx = , end_idx = "
In [14]:
create_dissonance_matrix = function(articledata, classes) {
d_mtrx = matrix(0, nrow=length(classes), ncol=length(classes));
rownames(d_mtrx) = classes;
colnames(d_mtrx) = classes;
for(real_rating in classes) {
for(pop_rating in classes) {
d_mtrx[real_rating, pop_rating] = length(articledata[prediction == real_rating & pop_class == pop_rating]$entity_id);
}
}
d_mtrx;
}
In [15]:
## Based on direct hits to articles:
create_dissonance_matrix(articles_by_pop, assessment_classes)
E D C B A
E 2731 528 0 0 0
D 528 605 0 0 0
C 0 0 0 0 0
B 0 0 0 0 0
A 0 0 0 0 0
In [16]:
dissonance_matrix = create_dissonance_matrix(articles_by_pop,
assessment_classes);
In [17]:
# Total misaligned entities
(dissonance_matrix[1,1]+dissonance_matrix[2,2]+dissonance_matrix[3,3]+dissonance_matrix[4,4]+dissonance_matrix[5,5])/sum(dissonance_matrix[,])
0.759562841530055
In [18]:
# A class quality and A class views over A class quality
dissonance_matrix[5,5]/sum(dissonance_matrix[5,])
NaN
In [19]:
# A class quality and E and D class views over A class quality
(dissonance_matrix[5,1]+dissonance_matrix[5,2])/sum(dissonance_matrix[5,])
NaN
In [20]:
# A class quality and < A class views
(dissonance_matrix[5,1]+dissonance_matrix[5,2]+dissonance_matrix[5,3]+dissonance_matrix[5,4])/sum(dissonance_matrix[5,])
NaN
In [21]:
# < A class quality and A class views
(dissonance_matrix[1,5]+dissonance_matrix[2,5]+dissonance_matrix[3,5]+dissonance_matrix[4,5])/sum(dissonance_matrix[,5])
NaN
In [22]:
prediction_e_pop_class_a <- merge(articles_by_pop[prediction == 'E' & pop_class == 'A'],quality_prediction_and_page_views, by='entity_id')[, c("entity_id","page_views.x", "number_of_revisions")]
In [23]:
head(prediction_e_pop_class_a)
entity_id page_views.x number_of_revisions
In [24]:
## Q: why do I get _two_ pageid columns? Solution is to do the selection
## on the joined table, not as a select _in_ the join.
## Dissonance matrix proportions by row (..., 1) and column (..., 2)
## rounded to 1 decimal places.
In [25]:
round(100*prop.table(dissonance_matrix, 1), 1);
E D C B A
E 83.8 16.2 0 0 0
D 46.6 53.4 0 0 0
C NaN NaN NaN NaN NaN
B NaN NaN NaN NaN NaN
A NaN NaN NaN NaN NaN
In [26]:
round(100*prop.table(dissonance_matrix, 2), 1);
E D C B A
E 83.8 46.6 NaN NaN NaN
D 16.2 53.4 NaN NaN NaN
C 0.0 0.0 NaN NaN NaN
B 0.0 0.0 NaN NaN NaN
A 0.0 0.0 NaN NaN NaN
In [27]:
## Let's write the stubs out to a file
write.table(merge(articles_by_pop[(prediction == 'E' | prediction == 'D' | prediction == 'C' | prediction == 'B') & pop_class == 'A'], quality_prediction_and_page_views, by='entity_id')[, c("entity_id","pop_class", "prediction.x")],
'../../results/entity_categorization/201211_a_class_views_less_than_a_quality.tsv', row.names=FALSE, col.names=FALSE, quote=FALSE, sep='\t');
merge(articles_by_pop[(prediction == 'E' | prediction == 'D' | prediction == 'C' | prediction == 'B') & pop_class == 'A'], quality_prediction_and_page_views, by='entity_id')[, c("entity_id","pop_class", "prediction.x")]
entity_id pop_class prediction.x
In [28]:
write.table(merge(articles_by_pop[prediction == 'A' & (pop_class == 'B' | pop_class == 'C' | pop_class == 'D' | pop_class == 'E')], quality_prediction_and_page_views, by='entity_id')[, c("entity_id","pop_class", "prediction.x")],
'../../results/entity_categorization/201211_a_class_quality_less_than_a_views.tsv', row.names=FALSE, col.names=FALSE, quote=FALSE, sep='\t');
In [29]:
write.table(merge(articles_by_pop[(prediction == 'A' & pop_class == 'A') | (prediction == 'B' & pop_class == 'B') | (prediction == 'C' & pop_class == 'C') | (prediction == 'D' & pop_class == 'D') | (prediction == 'E' & pop_class == 'E')], quality_prediction_and_page_views, by='entity_id')[, c("entity_id","pop_class", "prediction.x")],
'../../results/entity_categorization/201211_aligned.tsv', row.names=FALSE, col.names=FALSE, quote=FALSE, sep='\t');
In [30]:
write.table(merge(articles_by_pop[(prediction == 'A' & pop_class != 'A') | (prediction == 'B' & pop_class != 'B') | (prediction == 'C' & pop_class != 'C') | (prediction == 'D' & pop_class != 'D') | (prediction == 'E' & pop_class != 'E')], quality_prediction_and_page_views, by='entity_id')[, c("entity_id","pop_class", "prediction.x")],
'../../results/entity_categorization/201211_misaligned.tsv', row.names=FALSE, col.names=FALSE, quote=FALSE, sep='\t');
In [29]:
## Various ways of measuring dissonance.
## DATA ASSUMPTION: articles_by_pop from build-dissonance-table.R
## is loaded into memory.
## None/Moderate/High measure of dissonance
In [30]:
articles_by_pop[, pop_class := ordered(pop_class, assessment_classes)];
entity_id prediction page_views pop_class seqNum
Q3086 E 23 E 1
Q3265 E 46 E 2
Q4483 E 52 E 3
Q3152 E 71 E 4
Q3517 E 74 E 5
Q4017 E 90 E 6
Q3649 E 112 E 7
Q3254 E 120 E 8
Q3527 E 121 E 9
Q3522 E 128 E 10
Q3186 E 138 E 11
Q3634 E 161 E 12
Q4486 E 163 E 13
Q3652 E 176 E 14
Q4475 E 179 E 15
Q3633 E 202 E 16
Q3287 E 221 E 17
Q3781 E 237 E 18
Q3654 E 248 E 19
Q3641 E 286 E 20
Q2711 D 287 E 21
Q4281 E 298 E 22
Q3293 E 304 E 23
Q3084 E 307 E 24
Q293 D 329 E 25
Q2631 E 330 E 26
Q3307 E 330 E 27
Q4003 E 337 E 28
Q4170 E 338 E 29
Q3979 E 341 E 30
⋮ ⋮ ⋮ ⋮ ⋮
Q408 D 108249066 D 4363
Q212 D 121262940 D 4364
Q90 D 132079969 D 4365
Q739 D 149356287 D 4366
Q150 D 154676097 D 4367
Q16 D 161566243 D 4368
Q60 D 161626758 D 4369
Q188 D 173805898 D 4370
Q2184 E 226793302 D 4371
Q515 E 233190433 D 4372
Q649 D 243081566 D 4373
Q1321 D 252584097 D 4374
Q2736 E 270665514 D 4375
Q414 D 282547432 D 4376
Q96 D 307141203 D 4377
Q38 D 363184639 D 4378
Q328 E 378666601 D 4379
Q183 D 440162350 D 4380
Q29 D 500913282 D 4381
Q145 D 595132607 D 4382
Q159 D 653480145 D 4383
Q142 D 679432828 D 4384
Q1860 E 1419981073 D 4385
Q356 D 2047631596 D 4386
Q565 D 2052996261 D 4387
Q918 E 2063217449 D 4388
Q866 E 2079749157 D 4389
Q355 D 2093900731 D 4390
Q30 D 2277746226 D 4391
Q5 D 5668008721 D 4392
In [31]:
dissonance_metric = c('High negative', 'Moderate negative',
'None', 'Moderate positive', 'High positive');
In [32]:
articles_by_pop[, dissonance := factor(NA, dissonance_metric)];
entity_id prediction page_views pop_class seqNum dissonance
Q3086 E 23 E 1 NA
Q3265 E 46 E 2 NA
Q4483 E 52 E 3 NA
Q3152 E 71 E 4 NA
Q3517 E 74 E 5 NA
Q4017 E 90 E 6 NA
Q3649 E 112 E 7 NA
Q3254 E 120 E 8 NA
Q3527 E 121 E 9 NA
Q3522 E 128 E 10 NA
Q3186 E 138 E 11 NA
Q3634 E 161 E 12 NA
Q4486 E 163 E 13 NA
Q3652 E 176 E 14 NA
Q4475 E 179 E 15 NA
Q3633 E 202 E 16 NA
Q3287 E 221 E 17 NA
Q3781 E 237 E 18 NA
Q3654 E 248 E 19 NA
Q3641 E 286 E 20 NA
Q2711 D 287 E 21 NA
Q4281 E 298 E 22 NA
Q3293 E 304 E 23 NA
Q3084 E 307 E 24 NA
Q293 D 329 E 25 NA
Q2631 E 330 E 26 NA
Q3307 E 330 E 27 NA
Q4003 E 337 E 28 NA
Q4170 E 338 E 29 NA
Q3979 E 341 E 30 NA
⋮ ⋮ ⋮ ⋮ ⋮ ⋮
Q408 D 108249066 D 4363 NA
Q212 D 121262940 D 4364 NA
Q90 D 132079969 D 4365 NA
Q739 D 149356287 D 4366 NA
Q150 D 154676097 D 4367 NA
Q16 D 161566243 D 4368 NA
Q60 D 161626758 D 4369 NA
Q188 D 173805898 D 4370 NA
Q2184 E 226793302 D 4371 NA
Q515 E 233190433 D 4372 NA
Q649 D 243081566 D 4373 NA
Q1321 D 252584097 D 4374 NA
Q2736 E 270665514 D 4375 NA
Q414 D 282547432 D 4376 NA
Q96 D 307141203 D 4377 NA
Q38 D 363184639 D 4378 NA
Q328 E 378666601 D 4379 NA
Q183 D 440162350 D 4380 NA
Q29 D 500913282 D 4381 NA
Q145 D 595132607 D 4382 NA
Q159 D 653480145 D 4383 NA
Q142 D 679432828 D 4384 NA
Q1860 E 1419981073 D 4385 NA
Q356 D 2047631596 D 4386 NA
Q565 D 2052996261 D 4387 NA
Q918 E 2063217449 D 4388 NA
Q866 E 2079749157 D 4389 NA
Q355 D 2093900731 D 4390 NA
Q30 D 2277746226 D 4391 NA
Q5 D 5668008721 D 4392 NA
In [33]:
## NOTE: because pop_class is of class ordered, we can use
## expressions like "pop_class < 'C'" as expected
In [34]:
## A: None if A, Moderate if A, High elsewhere
articles_by_pop[prediction == 'A' & pop_class <= 'C',
dissonance := 'High negative'];
articles_by_pop[prediction == 'A' & pop_class == 'B',
dissonance := 'Moderate negative'];
articles_by_pop[prediction == 'A' & pop_class == 'A',
dissonance := 'None'];
entity_id prediction page_views pop_class seqNum dissonance
Q3086 E 23 E 1 NA
Q3265 E 46 E 2 NA
Q4483 E 52 E 3 NA
Q3152 E 71 E 4 NA
Q3517 E 74 E 5 NA
Q4017 E 90 E 6 NA
Q3649 E 112 E 7 NA
Q3254 E 120 E 8 NA
Q3527 E 121 E 9 NA
Q3522 E 128 E 10 NA
Q3186 E 138 E 11 NA
Q3634 E 161 E 12 NA
Q4486 E 163 E 13 NA
Q3652 E 176 E 14 NA
Q4475 E 179 E 15 NA
Q3633 E 202 E 16 NA
Q3287 E 221 E 17 NA
Q3781 E 237 E 18 NA
Q3654 E 248 E 19 NA
Q3641 E 286 E 20 NA
Q2711 D 287 E 21 NA
Q4281 E 298 E 22 NA
Q3293 E 304 E 23 NA
Q3084 E 307 E 24 NA
Q293 D 329 E 25 NA
Q2631 E 330 E 26 NA
Q3307 E 330 E 27 NA
Q4003 E 337 E 28 NA
Q4170 E 338 E 29 NA
Q3979 E 341 E 30 NA
⋮ ⋮ ⋮ ⋮ ⋮ ⋮
Q408 D 108249066 D 4363 NA
Q212 D 121262940 D 4364 NA
Q90 D 132079969 D 4365 NA
Q739 D 149356287 D 4366 NA
Q150 D 154676097 D 4367 NA
Q16 D 161566243 D 4368 NA
Q60 D 161626758 D 4369 NA
Q188 D 173805898 D 4370 NA
Q2184 E 226793302 D 4371 NA
Q515 E 233190433 D 4372 NA
Q649 D 243081566 D 4373 NA
Q1321 D 252584097 D 4374 NA
Q2736 E 270665514 D 4375 NA
Q414 D 282547432 D 4376 NA
Q96 D 307141203 D 4377 NA
Q38 D 363184639 D 4378 NA
Q328 E 378666601 D 4379 NA
Q183 D 440162350 D 4380 NA
Q29 D 500913282 D 4381 NA
Q145 D 595132607 D 4382 NA
Q159 D 653480145 D 4383 NA
Q142 D 679432828 D 4384 NA
Q1860 E 1419981073 D 4385 NA
Q356 D 2047631596 D 4386 NA
Q565 D 2052996261 D 4387 NA
Q918 E 2063217449 D 4388 NA
Q866 E 2079749157 D 4389 NA
Q355 D 2093900731 D 4390 NA
Q30 D 2277746226 D 4391 NA
Q5 D 5668008721 D 4392 NA
entity_id prediction page_views pop_class seqNum dissonance
Q3086 E 23 E 1 NA
Q3265 E 46 E 2 NA
Q4483 E 52 E 3 NA
Q3152 E 71 E 4 NA
Q3517 E 74 E 5 NA
Q4017 E 90 E 6 NA
Q3649 E 112 E 7 NA
Q3254 E 120 E 8 NA
Q3527 E 121 E 9 NA
Q3522 E 128 E 10 NA
Q3186 E 138 E 11 NA
Q3634 E 161 E 12 NA
Q4486 E 163 E 13 NA
Q3652 E 176 E 14 NA
Q4475 E 179 E 15 NA
Q3633 E 202 E 16 NA
Q3287 E 221 E 17 NA
Q3781 E 237 E 18 NA
Q3654 E 248 E 19 NA
Q3641 E 286 E 20 NA
Q2711 D 287 E 21 NA
Q4281 E 298 E 22 NA
Q3293 E 304 E 23 NA
Q3084 E 307 E 24 NA
Q293 D 329 E 25 NA
Q2631 E 330 E 26 NA
Q3307 E 330 E 27 NA
Q4003 E 337 E 28 NA
Q4170 E 338 E 29 NA
Q3979 E 341 E 30 NA
⋮ ⋮ ⋮ ⋮ ⋮ ⋮
Q408 D 108249066 D 4363 NA
Q212 D 121262940 D 4364 NA
Q90 D 132079969 D 4365 NA
Q739 D 149356287 D 4366 NA
Q150 D 154676097 D 4367 NA
Q16 D 161566243 D 4368 NA
Q60 D 161626758 D 4369 NA
Q188 D 173805898 D 4370 NA
Q2184 E 226793302 D 4371 NA
Q515 E 233190433 D 4372 NA
Q649 D 243081566 D 4373 NA
Q1321 D 252584097 D 4374 NA
Q2736 E 270665514 D 4375 NA
Q414 D 282547432 D 4376 NA
Q96 D 307141203 D 4377 NA
Q38 D 363184639 D 4378 NA
Q328 E 378666601 D 4379 NA
Q183 D 440162350 D 4380 NA
Q29 D 500913282 D 4381 NA
Q145 D 595132607 D 4382 NA
Q159 D 653480145 D 4383 NA
Q142 D 679432828 D 4384 NA
Q1860 E 1419981073 D 4385 NA
Q356 D 2047631596 D 4386 NA
Q565 D 2052996261 D 4387 NA
Q918 E 2063217449 D 4388 NA
Q866 E 2079749157 D 4389 NA
Q355 D 2093900731 D 4390 NA
Q30 D 2277746226 D 4391 NA
Q5 D 5668008721 D 4392 NA
entity_id prediction page_views pop_class seqNum dissonance
Q3086 E 23 E 1 NA
Q3265 E 46 E 2 NA
Q4483 E 52 E 3 NA
Q3152 E 71 E 4 NA
Q3517 E 74 E 5 NA
Q4017 E 90 E 6 NA
Q3649 E 112 E 7 NA
Q3254 E 120 E 8 NA
Q3527 E 121 E 9 NA
Q3522 E 128 E 10 NA
Q3186 E 138 E 11 NA
Q3634 E 161 E 12 NA
Q4486 E 163 E 13 NA
Q3652 E 176 E 14 NA
Q4475 E 179 E 15 NA
Q3633 E 202 E 16 NA
Q3287 E 221 E 17 NA
Q3781 E 237 E 18 NA
Q3654 E 248 E 19 NA
Q3641 E 286 E 20 NA
Q2711 D 287 E 21 NA
Q4281 E 298 E 22 NA
Q3293 E 304 E 23 NA
Q3084 E 307 E 24 NA
Q293 D 329 E 25 NA
Q2631 E 330 E 26 NA
Q3307 E 330 E 27 NA
Q4003 E 337 E 28 NA
Q4170 E 338 E 29 NA
Q3979 E 341 E 30 NA
⋮ ⋮ ⋮ ⋮ ⋮ ⋮
Q408 D 108249066 D 4363 NA
Q212 D 121262940 D 4364 NA
Q90 D 132079969 D 4365 NA
Q739 D 149356287 D 4366 NA
Q150 D 154676097 D 4367 NA
Q16 D 161566243 D 4368 NA
Q60 D 161626758 D 4369 NA
Q188 D 173805898 D 4370 NA
Q2184 E 226793302 D 4371 NA
Q515 E 233190433 D 4372 NA
Q649 D 243081566 D 4373 NA
Q1321 D 252584097 D 4374 NA
Q2736 E 270665514 D 4375 NA
Q414 D 282547432 D 4376 NA
Q96 D 307141203 D 4377 NA
Q38 D 363184639 D 4378 NA
Q328 E 378666601 D 4379 NA
Q183 D 440162350 D 4380 NA
Q29 D 500913282 D 4381 NA
Q145 D 595132607 D 4382 NA
Q159 D 653480145 D 4383 NA
Q142 D 679432828 D 4384 NA
Q1860 E 1419981073 D 4385 NA
Q356 D 2047631596 D 4386 NA
Q565 D 2052996261 D 4387 NA
Q918 E 2063217449 D 4388 NA
Q866 E 2079749157 D 4389 NA
Q355 D 2093900731 D 4390 NA
Q30 D 2277746226 D 4391 NA
Q5 D 5668008721 D 4392 NA
In [35]:
## B:
articles_by_pop[prediction == 'B' & pop_class <= 'D',
dissonance := 'High negative'];
articles_by_pop[prediction == 'B' & pop_class == 'C',
dissonance := 'Moderate negative'];
articles_by_pop[prediction == 'B' & pop_class == 'B',
dissonance := 'None'];
articles_by_pop[prediction == 'B' & pop_class == 'A',
dissonance := 'Moderate positive'];
entity_id prediction page_views pop_class seqNum dissonance
Q3086 E 23 E 1 NA
Q3265 E 46 E 2 NA
Q4483 E 52 E 3 NA
Q3152 E 71 E 4 NA
Q3517 E 74 E 5 NA
Q4017 E 90 E 6 NA
Q3649 E 112 E 7 NA
Q3254 E 120 E 8 NA
Q3527 E 121 E 9 NA
Q3522 E 128 E 10 NA
Q3186 E 138 E 11 NA
Q3634 E 161 E 12 NA
Q4486 E 163 E 13 NA
Q3652 E 176 E 14 NA
Q4475 E 179 E 15 NA
Q3633 E 202 E 16 NA
Q3287 E 221 E 17 NA
Q3781 E 237 E 18 NA
Q3654 E 248 E 19 NA
Q3641 E 286 E 20 NA
Q2711 D 287 E 21 NA
Q4281 E 298 E 22 NA
Q3293 E 304 E 23 NA
Q3084 E 307 E 24 NA
Q293 D 329 E 25 NA
Q2631 E 330 E 26 NA
Q3307 E 330 E 27 NA
Q4003 E 337 E 28 NA
Q4170 E 338 E 29 NA
Q3979 E 341 E 30 NA
⋮ ⋮ ⋮ ⋮ ⋮ ⋮
Q408 D 108249066 D 4363 NA
Q212 D 121262940 D 4364 NA
Q90 D 132079969 D 4365 NA
Q739 D 149356287 D 4366 NA
Q150 D 154676097 D 4367 NA
Q16 D 161566243 D 4368 NA
Q60 D 161626758 D 4369 NA
Q188 D 173805898 D 4370 NA
Q2184 E 226793302 D 4371 NA
Q515 E 233190433 D 4372 NA
Q649 D 243081566 D 4373 NA
Q1321 D 252584097 D 4374 NA
Q2736 E 270665514 D 4375 NA
Q414 D 282547432 D 4376 NA
Q96 D 307141203 D 4377 NA
Q38 D 363184639 D 4378 NA
Q328 E 378666601 D 4379 NA
Q183 D 440162350 D 4380 NA
Q29 D 500913282 D 4381 NA
Q145 D 595132607 D 4382 NA
Q159 D 653480145 D 4383 NA
Q142 D 679432828 D 4384 NA
Q1860 E 1419981073 D 4385 NA
Q356 D 2047631596 D 4386 NA
Q565 D 2052996261 D 4387 NA
Q918 E 2063217449 D 4388 NA
Q866 E 2079749157 D 4389 NA
Q355 D 2093900731 D 4390 NA
Q30 D 2277746226 D 4391 NA
Q5 D 5668008721 D 4392 NA
entity_id prediction page_views pop_class seqNum dissonance
Q3086 E 23 E 1 NA
Q3265 E 46 E 2 NA
Q4483 E 52 E 3 NA
Q3152 E 71 E 4 NA
Q3517 E 74 E 5 NA
Q4017 E 90 E 6 NA
Q3649 E 112 E 7 NA
Q3254 E 120 E 8 NA
Q3527 E 121 E 9 NA
Q3522 E 128 E 10 NA
Q3186 E 138 E 11 NA
Q3634 E 161 E 12 NA
Q4486 E 163 E 13 NA
Q3652 E 176 E 14 NA
Q4475 E 179 E 15 NA
Q3633 E 202 E 16 NA
Q3287 E 221 E 17 NA
Q3781 E 237 E 18 NA
Q3654 E 248 E 19 NA
Q3641 E 286 E 20 NA
Q2711 D 287 E 21 NA
Q4281 E 298 E 22 NA
Q3293 E 304 E 23 NA
Q3084 E 307 E 24 NA
Q293 D 329 E 25 NA
Q2631 E 330 E 26 NA
Q3307 E 330 E 27 NA
Q4003 E 337 E 28 NA
Q4170 E 338 E 29 NA
Q3979 E 341 E 30 NA
⋮ ⋮ ⋮ ⋮ ⋮ ⋮
Q408 D 108249066 D 4363 NA
Q212 D 121262940 D 4364 NA
Q90 D 132079969 D 4365 NA
Q739 D 149356287 D 4366 NA
Q150 D 154676097 D 4367 NA
Q16 D 161566243 D 4368 NA
Q60 D 161626758 D 4369 NA
Q188 D 173805898 D 4370 NA
Q2184 E 226793302 D 4371 NA
Q515 E 233190433 D 4372 NA
Q649 D 243081566 D 4373 NA
Q1321 D 252584097 D 4374 NA
Q2736 E 270665514 D 4375 NA
Q414 D 282547432 D 4376 NA
Q96 D 307141203 D 4377 NA
Q38 D 363184639 D 4378 NA
Q328 E 378666601 D 4379 NA
Q183 D 440162350 D 4380 NA
Q29 D 500913282 D 4381 NA
Q145 D 595132607 D 4382 NA
Q159 D 653480145 D 4383 NA
Q142 D 679432828 D 4384 NA
Q1860 E 1419981073 D 4385 NA
Q356 D 2047631596 D 4386 NA
Q565 D 2052996261 D 4387 NA
Q918 E 2063217449 D 4388 NA
Q866 E 2079749157 D 4389 NA
Q355 D 2093900731 D 4390 NA
Q30 D 2277746226 D 4391 NA
Q5 D 5668008721 D 4392 NA
entity_id prediction page_views pop_class seqNum dissonance
Q3086 E 23 E 1 NA
Q3265 E 46 E 2 NA
Q4483 E 52 E 3 NA
Q3152 E 71 E 4 NA
Q3517 E 74 E 5 NA
Q4017 E 90 E 6 NA
Q3649 E 112 E 7 NA
Q3254 E 120 E 8 NA
Q3527 E 121 E 9 NA
Q3522 E 128 E 10 NA
Q3186 E 138 E 11 NA
Q3634 E 161 E 12 NA
Q4486 E 163 E 13 NA
Q3652 E 176 E 14 NA
Q4475 E 179 E 15 NA
Q3633 E 202 E 16 NA
Q3287 E 221 E 17 NA
Q3781 E 237 E 18 NA
Q3654 E 248 E 19 NA
Q3641 E 286 E 20 NA
Q2711 D 287 E 21 NA
Q4281 E 298 E 22 NA
Q3293 E 304 E 23 NA
Q3084 E 307 E 24 NA
Q293 D 329 E 25 NA
Q2631 E 330 E 26 NA
Q3307 E 330 E 27 NA
Q4003 E 337 E 28 NA
Q4170 E 338 E 29 NA
Q3979 E 341 E 30 NA
⋮ ⋮ ⋮ ⋮ ⋮ ⋮
Q408 D 108249066 D 4363 NA
Q212 D 121262940 D 4364 NA
Q90 D 132079969 D 4365 NA
Q739 D 149356287 D 4366 NA
Q150 D 154676097 D 4367 NA
Q16 D 161566243 D 4368 NA
Q60 D 161626758 D 4369 NA
Q188 D 173805898 D 4370 NA
Q2184 E 226793302 D 4371 NA
Q515 E 233190433 D 4372 NA
Q649 D 243081566 D 4373 NA
Q1321 D 252584097 D 4374 NA
Q2736 E 270665514 D 4375 NA
Q414 D 282547432 D 4376 NA
Q96 D 307141203 D 4377 NA
Q38 D 363184639 D 4378 NA
Q328 E 378666601 D 4379 NA
Q183 D 440162350 D 4380 NA
Q29 D 500913282 D 4381 NA
Q145 D 595132607 D 4382 NA
Q159 D 653480145 D 4383 NA
Q142 D 679432828 D 4384 NA
Q1860 E 1419981073 D 4385 NA
Q356 D 2047631596 D 4386 NA
Q565 D 2052996261 D 4387 NA
Q918 E 2063217449 D 4388 NA
Q866 E 2079749157 D 4389 NA
Q355 D 2093900731 D 4390 NA
Q30 D 2277746226 D 4391 NA
Q5 D 5668008721 D 4392 NA
entity_id prediction page_views pop_class seqNum dissonance
Q3086 E 23 E 1 NA
Q3265 E 46 E 2 NA
Q4483 E 52 E 3 NA
Q3152 E 71 E 4 NA
Q3517 E 74 E 5 NA
Q4017 E 90 E 6 NA
Q3649 E 112 E 7 NA
Q3254 E 120 E 8 NA
Q3527 E 121 E 9 NA
Q3522 E 128 E 10 NA
Q3186 E 138 E 11 NA
Q3634 E 161 E 12 NA
Q4486 E 163 E 13 NA
Q3652 E 176 E 14 NA
Q4475 E 179 E 15 NA
Q3633 E 202 E 16 NA
Q3287 E 221 E 17 NA
Q3781 E 237 E 18 NA
Q3654 E 248 E 19 NA
Q3641 E 286 E 20 NA
Q2711 D 287 E 21 NA
Q4281 E 298 E 22 NA
Q3293 E 304 E 23 NA
Q3084 E 307 E 24 NA
Q293 D 329 E 25 NA
Q2631 E 330 E 26 NA
Q3307 E 330 E 27 NA
Q4003 E 337 E 28 NA
Q4170 E 338 E 29 NA
Q3979 E 341 E 30 NA
⋮ ⋮ ⋮ ⋮ ⋮ ⋮
Q408 D 108249066 D 4363 NA
Q212 D 121262940 D 4364 NA
Q90 D 132079969 D 4365 NA
Q739 D 149356287 D 4366 NA
Q150 D 154676097 D 4367 NA
Q16 D 161566243 D 4368 NA
Q60 D 161626758 D 4369 NA
Q188 D 173805898 D 4370 NA
Q2184 E 226793302 D 4371 NA
Q515 E 233190433 D 4372 NA
Q649 D 243081566 D 4373 NA
Q1321 D 252584097 D 4374 NA
Q2736 E 270665514 D 4375 NA
Q414 D 282547432 D 4376 NA
Q96 D 307141203 D 4377 NA
Q38 D 363184639 D 4378 NA
Q328 E 378666601 D 4379 NA
Q183 D 440162350 D 4380 NA
Q29 D 500913282 D 4381 NA
Q145 D 595132607 D 4382 NA
Q159 D 653480145 D 4383 NA
Q142 D 679432828 D 4384 NA
Q1860 E 1419981073 D 4385 NA
Q356 D 2047631596 D 4386 NA
Q565 D 2052996261 D 4387 NA
Q918 E 2063217449 D 4388 NA
Q866 E 2079749157 D 4389 NA
Q355 D 2093900731 D 4390 NA
Q30 D 2277746226 D 4391 NA
Q5 D 5668008721 D 4392 NA
In [36]:
## C:
articles_by_pop[prediction == 'C' & pop_class == 'E',
dissonance := 'High negative'];
articles_by_pop[prediction == 'C' & pop_class == 'D',
dissonance := 'Moderate negative'];
articles_by_pop[prediction == 'C' & pop_class == 'C',
dissonance := 'None'];
articles_by_pop[prediction == 'C' & pop_class == 'B',
dissonance := 'Moderate positive'];
articles_by_pop[prediction == 'C' & pop_class == 'A',
dissonance := 'High positive'];
entity_id prediction page_views pop_class seqNum dissonance
Q3086 E 23 E 1 NA
Q3265 E 46 E 2 NA
Q4483 E 52 E 3 NA
Q3152 E 71 E 4 NA
Q3517 E 74 E 5 NA
Q4017 E 90 E 6 NA
Q3649 E 112 E 7 NA
Q3254 E 120 E 8 NA
Q3527 E 121 E 9 NA
Q3522 E 128 E 10 NA
Q3186 E 138 E 11 NA
Q3634 E 161 E 12 NA
Q4486 E 163 E 13 NA
Q3652 E 176 E 14 NA
Q4475 E 179 E 15 NA
Q3633 E 202 E 16 NA
Q3287 E 221 E 17 NA
Q3781 E 237 E 18 NA
Q3654 E 248 E 19 NA
Q3641 E 286 E 20 NA
Q2711 D 287 E 21 NA
Q4281 E 298 E 22 NA
Q3293 E 304 E 23 NA
Q3084 E 307 E 24 NA
Q293 D 329 E 25 NA
Q2631 E 330 E 26 NA
Q3307 E 330 E 27 NA
Q4003 E 337 E 28 NA
Q4170 E 338 E 29 NA
Q3979 E 341 E 30 NA
⋮ ⋮ ⋮ ⋮ ⋮ ⋮
Q408 D 108249066 D 4363 NA
Q212 D 121262940 D 4364 NA
Q90 D 132079969 D 4365 NA
Q739 D 149356287 D 4366 NA
Q150 D 154676097 D 4367 NA
Q16 D 161566243 D 4368 NA
Q60 D 161626758 D 4369 NA
Q188 D 173805898 D 4370 NA
Q2184 E 226793302 D 4371 NA
Q515 E 233190433 D 4372 NA
Q649 D 243081566 D 4373 NA
Q1321 D 252584097 D 4374 NA
Q2736 E 270665514 D 4375 NA
Q414 D 282547432 D 4376 NA
Q96 D 307141203 D 4377 NA
Q38 D 363184639 D 4378 NA
Q328 E 378666601 D 4379 NA
Q183 D 440162350 D 4380 NA
Q29 D 500913282 D 4381 NA
Q145 D 595132607 D 4382 NA
Q159 D 653480145 D 4383 NA
Q142 D 679432828 D 4384 NA
Q1860 E 1419981073 D 4385 NA
Q356 D 2047631596 D 4386 NA
Q565 D 2052996261 D 4387 NA
Q918 E 2063217449 D 4388 NA
Q866 E 2079749157 D 4389 NA
Q355 D 2093900731 D 4390 NA
Q30 D 2277746226 D 4391 NA
Q5 D 5668008721 D 4392 NA
entity_id prediction page_views pop_class seqNum dissonance
Q3086 E 23 E 1 NA
Q3265 E 46 E 2 NA
Q4483 E 52 E 3 NA
Q3152 E 71 E 4 NA
Q3517 E 74 E 5 NA
Q4017 E 90 E 6 NA
Q3649 E 112 E 7 NA
Q3254 E 120 E 8 NA
Q3527 E 121 E 9 NA
Q3522 E 128 E 10 NA
Q3186 E 138 E 11 NA
Q3634 E 161 E 12 NA
Q4486 E 163 E 13 NA
Q3652 E 176 E 14 NA
Q4475 E 179 E 15 NA
Q3633 E 202 E 16 NA
Q3287 E 221 E 17 NA
Q3781 E 237 E 18 NA
Q3654 E 248 E 19 NA
Q3641 E 286 E 20 NA
Q2711 D 287 E 21 NA
Q4281 E 298 E 22 NA
Q3293 E 304 E 23 NA
Q3084 E 307 E 24 NA
Q293 D 329 E 25 NA
Q2631 E 330 E 26 NA
Q3307 E 330 E 27 NA
Q4003 E 337 E 28 NA
Q4170 E 338 E 29 NA
Q3979 E 341 E 30 NA
⋮ ⋮ ⋮ ⋮ ⋮ ⋮
Q408 D 108249066 D 4363 NA
Q212 D 121262940 D 4364 NA
Q90 D 132079969 D 4365 NA
Q739 D 149356287 D 4366 NA
Q150 D 154676097 D 4367 NA
Q16 D 161566243 D 4368 NA
Q60 D 161626758 D 4369 NA
Q188 D 173805898 D 4370 NA
Q2184 E 226793302 D 4371 NA
Q515 E 233190433 D 4372 NA
Q649 D 243081566 D 4373 NA
Q1321 D 252584097 D 4374 NA
Q2736 E 270665514 D 4375 NA
Q414 D 282547432 D 4376 NA
Q96 D 307141203 D 4377 NA
Q38 D 363184639 D 4378 NA
Q328 E 378666601 D 4379 NA
Q183 D 440162350 D 4380 NA
Q29 D 500913282 D 4381 NA
Q145 D 595132607 D 4382 NA
Q159 D 653480145 D 4383 NA
Q142 D 679432828 D 4384 NA
Q1860 E 1419981073 D 4385 NA
Q356 D 2047631596 D 4386 NA
Q565 D 2052996261 D 4387 NA
Q918 E 2063217449 D 4388 NA
Q866 E 2079749157 D 4389 NA
Q355 D 2093900731 D 4390 NA
Q30 D 2277746226 D 4391 NA
Q5 D 5668008721 D 4392 NA
entity_id prediction page_views pop_class seqNum dissonance
Q3086 E 23 E 1 NA
Q3265 E 46 E 2 NA
Q4483 E 52 E 3 NA
Q3152 E 71 E 4 NA
Q3517 E 74 E 5 NA
Q4017 E 90 E 6 NA
Q3649 E 112 E 7 NA
Q3254 E 120 E 8 NA
Q3527 E 121 E 9 NA
Q3522 E 128 E 10 NA
Q3186 E 138 E 11 NA
Q3634 E 161 E 12 NA
Q4486 E 163 E 13 NA
Q3652 E 176 E 14 NA
Q4475 E 179 E 15 NA
Q3633 E 202 E 16 NA
Q3287 E 221 E 17 NA
Q3781 E 237 E 18 NA
Q3654 E 248 E 19 NA
Q3641 E 286 E 20 NA
Q2711 D 287 E 21 NA
Q4281 E 298 E 22 NA
Q3293 E 304 E 23 NA
Q3084 E 307 E 24 NA
Q293 D 329 E 25 NA
Q2631 E 330 E 26 NA
Q3307 E 330 E 27 NA
Q4003 E 337 E 28 NA
Q4170 E 338 E 29 NA
Q3979 E 341 E 30 NA
⋮ ⋮ ⋮ ⋮ ⋮ ⋮
Q408 D 108249066 D 4363 NA
Q212 D 121262940 D 4364 NA
Q90 D 132079969 D 4365 NA
Q739 D 149356287 D 4366 NA
Q150 D 154676097 D 4367 NA
Q16 D 161566243 D 4368 NA
Q60 D 161626758 D 4369 NA
Q188 D 173805898 D 4370 NA
Q2184 E 226793302 D 4371 NA
Q515 E 233190433 D 4372 NA
Q649 D 243081566 D 4373 NA
Q1321 D 252584097 D 4374 NA
Q2736 E 270665514 D 4375 NA
Q414 D 282547432 D 4376 NA
Q96 D 307141203 D 4377 NA
Q38 D 363184639 D 4378 NA
Q328 E 378666601 D 4379 NA
Q183 D 440162350 D 4380 NA
Q29 D 500913282 D 4381 NA
Q145 D 595132607 D 4382 NA
Q159 D 653480145 D 4383 NA
Q142 D 679432828 D 4384 NA
Q1860 E 1419981073 D 4385 NA
Q356 D 2047631596 D 4386 NA
Q565 D 2052996261 D 4387 NA
Q918 E 2063217449 D 4388 NA
Q866 E 2079749157 D 4389 NA
Q355 D 2093900731 D 4390 NA
Q30 D 2277746226 D 4391 NA
Q5 D 5668008721 D 4392 NA
entity_id prediction page_views pop_class seqNum dissonance
Q3086 E 23 E 1 NA
Q3265 E 46 E 2 NA
Q4483 E 52 E 3 NA
Q3152 E 71 E 4 NA
Q3517 E 74 E 5 NA
Q4017 E 90 E 6 NA
Q3649 E 112 E 7 NA
Q3254 E 120 E 8 NA
Q3527 E 121 E 9 NA
Q3522 E 128 E 10 NA
Q3186 E 138 E 11 NA
Q3634 E 161 E 12 NA
Q4486 E 163 E 13 NA
Q3652 E 176 E 14 NA
Q4475 E 179 E 15 NA
Q3633 E 202 E 16 NA
Q3287 E 221 E 17 NA
Q3781 E 237 E 18 NA
Q3654 E 248 E 19 NA
Q3641 E 286 E 20 NA
Q2711 D 287 E 21 NA
Q4281 E 298 E 22 NA
Q3293 E 304 E 23 NA
Q3084 E 307 E 24 NA
Q293 D 329 E 25 NA
Q2631 E 330 E 26 NA
Q3307 E 330 E 27 NA
Q4003 E 337 E 28 NA
Q4170 E 338 E 29 NA
Q3979 E 341 E 30 NA
⋮ ⋮ ⋮ ⋮ ⋮ ⋮
Q408 D 108249066 D 4363 NA
Q212 D 121262940 D 4364 NA
Q90 D 132079969 D 4365 NA
Q739 D 149356287 D 4366 NA
Q150 D 154676097 D 4367 NA
Q16 D 161566243 D 4368 NA
Q60 D 161626758 D 4369 NA
Q188 D 173805898 D 4370 NA
Q2184 E 226793302 D 4371 NA
Q515 E 233190433 D 4372 NA
Q649 D 243081566 D 4373 NA
Q1321 D 252584097 D 4374 NA
Q2736 E 270665514 D 4375 NA
Q414 D 282547432 D 4376 NA
Q96 D 307141203 D 4377 NA
Q38 D 363184639 D 4378 NA
Q328 E 378666601 D 4379 NA
Q183 D 440162350 D 4380 NA
Q29 D 500913282 D 4381 NA
Q145 D 595132607 D 4382 NA
Q159 D 653480145 D 4383 NA
Q142 D 679432828 D 4384 NA
Q1860 E 1419981073 D 4385 NA
Q356 D 2047631596 D 4386 NA
Q565 D 2052996261 D 4387 NA
Q918 E 2063217449 D 4388 NA
Q866 E 2079749157 D 4389 NA
Q355 D 2093900731 D 4390 NA
Q30 D 2277746226 D 4391 NA
Q5 D 5668008721 D 4392 NA
entity_id prediction page_views pop_class seqNum dissonance
Q3086 E 23 E 1 NA
Q3265 E 46 E 2 NA
Q4483 E 52 E 3 NA
Q3152 E 71 E 4 NA
Q3517 E 74 E 5 NA
Q4017 E 90 E 6 NA
Q3649 E 112 E 7 NA
Q3254 E 120 E 8 NA
Q3527 E 121 E 9 NA
Q3522 E 128 E 10 NA
Q3186 E 138 E 11 NA
Q3634 E 161 E 12 NA
Q4486 E 163 E 13 NA
Q3652 E 176 E 14 NA
Q4475 E 179 E 15 NA
Q3633 E 202 E 16 NA
Q3287 E 221 E 17 NA
Q3781 E 237 E 18 NA
Q3654 E 248 E 19 NA
Q3641 E 286 E 20 NA
Q2711 D 287 E 21 NA
Q4281 E 298 E 22 NA
Q3293 E 304 E 23 NA
Q3084 E 307 E 24 NA
Q293 D 329 E 25 NA
Q2631 E 330 E 26 NA
Q3307 E 330 E 27 NA
Q4003 E 337 E 28 NA
Q4170 E 338 E 29 NA
Q3979 E 341 E 30 NA
⋮ ⋮ ⋮ ⋮ ⋮ ⋮
Q408 D 108249066 D 4363 NA
Q212 D 121262940 D 4364 NA
Q90 D 132079969 D 4365 NA
Q739 D 149356287 D 4366 NA
Q150 D 154676097 D 4367 NA
Q16 D 161566243 D 4368 NA
Q60 D 161626758 D 4369 NA
Q188 D 173805898 D 4370 NA
Q2184 E 226793302 D 4371 NA
Q515 E 233190433 D 4372 NA
Q649 D 243081566 D 4373 NA
Q1321 D 252584097 D 4374 NA
Q2736 E 270665514 D 4375 NA
Q414 D 282547432 D 4376 NA
Q96 D 307141203 D 4377 NA
Q38 D 363184639 D 4378 NA
Q328 E 378666601 D 4379 NA
Q183 D 440162350 D 4380 NA
Q29 D 500913282 D 4381 NA
Q145 D 595132607 D 4382 NA
Q159 D 653480145 D 4383 NA
Q142 D 679432828 D 4384 NA
Q1860 E 1419981073 D 4385 NA
Q356 D 2047631596 D 4386 NA
Q565 D 2052996261 D 4387 NA
Q918 E 2063217449 D 4388 NA
Q866 E 2079749157 D 4389 NA
Q355 D 2093900731 D 4390 NA
Q30 D 2277746226 D 4391 NA
Q5 D 5668008721 D 4392 NA
In [37]:
## D
articles_by_pop[prediction == 'D' & pop_class == 'E',
dissonance := 'Moderate negative'];
articles_by_pop[prediction == 'D' & pop_class == 'D',
dissonance := 'None'];
articles_by_pop[prediction == 'D' & pop_class == 'C',
dissonance := 'Moderate positive'];
articles_by_pop[prediction == 'D' & pop_class >= 'B',
dissonance := 'High positive'];
entity_id prediction page_views pop_class seqNum dissonance
Q3086 E 23 E 1 NA
Q3265 E 46 E 2 NA
Q4483 E 52 E 3 NA
Q3152 E 71 E 4 NA
Q3517 E 74 E 5 NA
Q4017 E 90 E 6 NA
Q3649 E 112 E 7 NA
Q3254 E 120 E 8 NA
Q3527 E 121 E 9 NA
Q3522 E 128 E 10 NA
Q3186 E 138 E 11 NA
Q3634 E 161 E 12 NA
Q4486 E 163 E 13 NA
Q3652 E 176 E 14 NA
Q4475 E 179 E 15 NA
Q3633 E 202 E 16 NA
Q3287 E 221 E 17 NA
Q3781 E 237 E 18 NA
Q3654 E 248 E 19 NA
Q3641 E 286 E 20 NA
Q2711 D 287 E 21 Moderate negative
Q4281 E 298 E 22 NA
Q3293 E 304 E 23 NA
Q3084 E 307 E 24 NA
Q293 D 329 E 25 Moderate negative
Q2631 E 330 E 26 NA
Q3307 E 330 E 27 NA
Q4003 E 337 E 28 NA
Q4170 E 338 E 29 NA
Q3979 E 341 E 30 NA
⋮ ⋮ ⋮ ⋮ ⋮ ⋮
Q408 D 108249066 D 4363 NA
Q212 D 121262940 D 4364 NA
Q90 D 132079969 D 4365 NA
Q739 D 149356287 D 4366 NA
Q150 D 154676097 D 4367 NA
Q16 D 161566243 D 4368 NA
Q60 D 161626758 D 4369 NA
Q188 D 173805898 D 4370 NA
Q2184 E 226793302 D 4371 NA
Q515 E 233190433 D 4372 NA
Q649 D 243081566 D 4373 NA
Q1321 D 252584097 D 4374 NA
Q2736 E 270665514 D 4375 NA
Q414 D 282547432 D 4376 NA
Q96 D 307141203 D 4377 NA
Q38 D 363184639 D 4378 NA
Q328 E 378666601 D 4379 NA
Q183 D 440162350 D 4380 NA
Q29 D 500913282 D 4381 NA
Q145 D 595132607 D 4382 NA
Q159 D 653480145 D 4383 NA
Q142 D 679432828 D 4384 NA
Q1860 E 1419981073 D 4385 NA
Q356 D 2047631596 D 4386 NA
Q565 D 2052996261 D 4387 NA
Q918 E 2063217449 D 4388 NA
Q866 E 2079749157 D 4389 NA
Q355 D 2093900731 D 4390 NA
Q30 D 2277746226 D 4391 NA
Q5 D 5668008721 D 4392 NA
entity_id prediction page_views pop_class seqNum dissonance
Q3086 E 23 E 1 NA
Q3265 E 46 E 2 NA
Q4483 E 52 E 3 NA
Q3152 E 71 E 4 NA
Q3517 E 74 E 5 NA
Q4017 E 90 E 6 NA
Q3649 E 112 E 7 NA
Q3254 E 120 E 8 NA
Q3527 E 121 E 9 NA
Q3522 E 128 E 10 NA
Q3186 E 138 E 11 NA
Q3634 E 161 E 12 NA
Q4486 E 163 E 13 NA
Q3652 E 176 E 14 NA
Q4475 E 179 E 15 NA
Q3633 E 202 E 16 NA
Q3287 E 221 E 17 NA
Q3781 E 237 E 18 NA
Q3654 E 248 E 19 NA
Q3641 E 286 E 20 NA
Q2711 D 287 E 21 Moderate negative
Q4281 E 298 E 22 NA
Q3293 E 304 E 23 NA
Q3084 E 307 E 24 NA
Q293 D 329 E 25 Moderate negative
Q2631 E 330 E 26 NA
Q3307 E 330 E 27 NA
Q4003 E 337 E 28 NA
Q4170 E 338 E 29 NA
Q3979 E 341 E 30 NA
⋮ ⋮ ⋮ ⋮ ⋮ ⋮
Q408 D 108249066 D 4363 None
Q212 D 121262940 D 4364 None
Q90 D 132079969 D 4365 None
Q739 D 149356287 D 4366 None
Q150 D 154676097 D 4367 None
Q16 D 161566243 D 4368 None
Q60 D 161626758 D 4369 None
Q188 D 173805898 D 4370 None
Q2184 E 226793302 D 4371 NA
Q515 E 233190433 D 4372 NA
Q649 D 243081566 D 4373 None
Q1321 D 252584097 D 4374 None
Q2736 E 270665514 D 4375 NA
Q414 D 282547432 D 4376 None
Q96 D 307141203 D 4377 None
Q38 D 363184639 D 4378 None
Q328 E 378666601 D 4379 NA
Q183 D 440162350 D 4380 None
Q29 D 500913282 D 4381 None
Q145 D 595132607 D 4382 None
Q159 D 653480145 D 4383 None
Q142 D 679432828 D 4384 None
Q1860 E 1419981073 D 4385 NA
Q356 D 2047631596 D 4386 None
Q565 D 2052996261 D 4387 None
Q918 E 2063217449 D 4388 NA
Q866 E 2079749157 D 4389 NA
Q355 D 2093900731 D 4390 None
Q30 D 2277746226 D 4391 None
Q5 D 5668008721 D 4392 None
entity_id prediction page_views pop_class seqNum dissonance
Q3086 E 23 E 1 NA
Q3265 E 46 E 2 NA
Q4483 E 52 E 3 NA
Q3152 E 71 E 4 NA
Q3517 E 74 E 5 NA
Q4017 E 90 E 6 NA
Q3649 E 112 E 7 NA
Q3254 E 120 E 8 NA
Q3527 E 121 E 9 NA
Q3522 E 128 E 10 NA
Q3186 E 138 E 11 NA
Q3634 E 161 E 12 NA
Q4486 E 163 E 13 NA
Q3652 E 176 E 14 NA
Q4475 E 179 E 15 NA
Q3633 E 202 E 16 NA
Q3287 E 221 E 17 NA
Q3781 E 237 E 18 NA
Q3654 E 248 E 19 NA
Q3641 E 286 E 20 NA
Q2711 D 287 E 21 Moderate negative
Q4281 E 298 E 22 NA
Q3293 E 304 E 23 NA
Q3084 E 307 E 24 NA
Q293 D 329 E 25 Moderate negative
Q2631 E 330 E 26 NA
Q3307 E 330 E 27 NA
Q4003 E 337 E 28 NA
Q4170 E 338 E 29 NA
Q3979 E 341 E 30 NA
⋮ ⋮ ⋮ ⋮ ⋮ ⋮
Q408 D 108249066 D 4363 None
Q212 D 121262940 D 4364 None
Q90 D 132079969 D 4365 None
Q739 D 149356287 D 4366 None
Q150 D 154676097 D 4367 None
Q16 D 161566243 D 4368 None
Q60 D 161626758 D 4369 None
Q188 D 173805898 D 4370 None
Q2184 E 226793302 D 4371 NA
Q515 E 233190433 D 4372 NA
Q649 D 243081566 D 4373 None
Q1321 D 252584097 D 4374 None
Q2736 E 270665514 D 4375 NA
Q414 D 282547432 D 4376 None
Q96 D 307141203 D 4377 None
Q38 D 363184639 D 4378 None
Q328 E 378666601 D 4379 NA
Q183 D 440162350 D 4380 None
Q29 D 500913282 D 4381 None
Q145 D 595132607 D 4382 None
Q159 D 653480145 D 4383 None
Q142 D 679432828 D 4384 None
Q1860 E 1419981073 D 4385 NA
Q356 D 2047631596 D 4386 None
Q565 D 2052996261 D 4387 None
Q918 E 2063217449 D 4388 NA
Q866 E 2079749157 D 4389 NA
Q355 D 2093900731 D 4390 None
Q30 D 2277746226 D 4391 None
Q5 D 5668008721 D 4392 None
entity_id prediction page_views pop_class seqNum dissonance
Q3086 E 23 E 1 NA
Q3265 E 46 E 2 NA
Q4483 E 52 E 3 NA
Q3152 E 71 E 4 NA
Q3517 E 74 E 5 NA
Q4017 E 90 E 6 NA
Q3649 E 112 E 7 NA
Q3254 E 120 E 8 NA
Q3527 E 121 E 9 NA
Q3522 E 128 E 10 NA
Q3186 E 138 E 11 NA
Q3634 E 161 E 12 NA
Q4486 E 163 E 13 NA
Q3652 E 176 E 14 NA
Q4475 E 179 E 15 NA
Q3633 E 202 E 16 NA
Q3287 E 221 E 17 NA
Q3781 E 237 E 18 NA
Q3654 E 248 E 19 NA
Q3641 E 286 E 20 NA
Q2711 D 287 E 21 Moderate negative
Q4281 E 298 E 22 NA
Q3293 E 304 E 23 NA
Q3084 E 307 E 24 NA
Q293 D 329 E 25 Moderate negative
Q2631 E 330 E 26 NA
Q3307 E 330 E 27 NA
Q4003 E 337 E 28 NA
Q4170 E 338 E 29 NA
Q3979 E 341 E 30 NA
⋮ ⋮ ⋮ ⋮ ⋮ ⋮
Q408 D 108249066 D 4363 None
Q212 D 121262940 D 4364 None
Q90 D 132079969 D 4365 None
Q739 D 149356287 D 4366 None
Q150 D 154676097 D 4367 None
Q16 D 161566243 D 4368 None
Q60 D 161626758 D 4369 None
Q188 D 173805898 D 4370 None
Q2184 E 226793302 D 4371 NA
Q515 E 233190433 D 4372 NA
Q649 D 243081566 D 4373 None
Q1321 D 252584097 D 4374 None
Q2736 E 270665514 D 4375 NA
Q414 D 282547432 D 4376 None
Q96 D 307141203 D 4377 None
Q38 D 363184639 D 4378 None
Q328 E 378666601 D 4379 NA
Q183 D 440162350 D 4380 None
Q29 D 500913282 D 4381 None
Q145 D 595132607 D 4382 None
Q159 D 653480145 D 4383 None
Q142 D 679432828 D 4384 None
Q1860 E 1419981073 D 4385 NA
Q356 D 2047631596 D 4386 None
Q565 D 2052996261 D 4387 None
Q918 E 2063217449 D 4388 NA
Q866 E 2079749157 D 4389 NA
Q355 D 2093900731 D 4390 None
Q30 D 2277746226 D 4391 None
Q5 D 5668008721 D 4392 None
In [38]:
## E
articles_by_pop[prediction == 'E' & pop_class == 'E',
dissonance := 'None'];
articles_by_pop[prediction == 'E' & pop_class == 'D',
dissonance := 'Moderate positive'];
articles_by_pop[prediction == 'E' & pop_class >= 'C',
dissonance := 'High positive'];
entity_id prediction page_views pop_class seqNum dissonance
Q3086 E 23 E 1 None
Q3265 E 46 E 2 None
Q4483 E 52 E 3 None
Q3152 E 71 E 4 None
Q3517 E 74 E 5 None
Q4017 E 90 E 6 None
Q3649 E 112 E 7 None
Q3254 E 120 E 8 None
Q3527 E 121 E 9 None
Q3522 E 128 E 10 None
Q3186 E 138 E 11 None
Q3634 E 161 E 12 None
Q4486 E 163 E 13 None
Q3652 E 176 E 14 None
Q4475 E 179 E 15 None
Q3633 E 202 E 16 None
Q3287 E 221 E 17 None
Q3781 E 237 E 18 None
Q3654 E 248 E 19 None
Q3641 E 286 E 20 None
Q2711 D 287 E 21 Moderate negative
Q4281 E 298 E 22 None
Q3293 E 304 E 23 None
Q3084 E 307 E 24 None
Q293 D 329 E 25 Moderate negative
Q2631 E 330 E 26 None
Q3307 E 330 E 27 None
Q4003 E 337 E 28 None
Q4170 E 338 E 29 None
Q3979 E 341 E 30 None
⋮ ⋮ ⋮ ⋮ ⋮ ⋮
Q408 D 108249066 D 4363 None
Q212 D 121262940 D 4364 None
Q90 D 132079969 D 4365 None
Q739 D 149356287 D 4366 None
Q150 D 154676097 D 4367 None
Q16 D 161566243 D 4368 None
Q60 D 161626758 D 4369 None
Q188 D 173805898 D 4370 None
Q2184 E 226793302 D 4371 NA
Q515 E 233190433 D 4372 NA
Q649 D 243081566 D 4373 None
Q1321 D 252584097 D 4374 None
Q2736 E 270665514 D 4375 NA
Q414 D 282547432 D 4376 None
Q96 D 307141203 D 4377 None
Q38 D 363184639 D 4378 None
Q328 E 378666601 D 4379 NA
Q183 D 440162350 D 4380 None
Q29 D 500913282 D 4381 None
Q145 D 595132607 D 4382 None
Q159 D 653480145 D 4383 None
Q142 D 679432828 D 4384 None
Q1860 E 1419981073 D 4385 NA
Q356 D 2047631596 D 4386 None
Q565 D 2052996261 D 4387 None
Q918 E 2063217449 D 4388 NA
Q866 E 2079749157 D 4389 NA
Q355 D 2093900731 D 4390 None
Q30 D 2277746226 D 4391 None
Q5 D 5668008721 D 4392 None
entity_id prediction page_views pop_class seqNum dissonance
Q3086 E 23 E 1 None
Q3265 E 46 E 2 None
Q4483 E 52 E 3 None
Q3152 E 71 E 4 None
Q3517 E 74 E 5 None
Q4017 E 90 E 6 None
Q3649 E 112 E 7 None
Q3254 E 120 E 8 None
Q3527 E 121 E 9 None
Q3522 E 128 E 10 None
Q3186 E 138 E 11 None
Q3634 E 161 E 12 None
Q4486 E 163 E 13 None
Q3652 E 176 E 14 None
Q4475 E 179 E 15 None
Q3633 E 202 E 16 None
Q3287 E 221 E 17 None
Q3781 E 237 E 18 None
Q3654 E 248 E 19 None
Q3641 E 286 E 20 None
Q2711 D 287 E 21 Moderate negative
Q4281 E 298 E 22 None
Q3293 E 304 E 23 None
Q3084 E 307 E 24 None
Q293 D 329 E 25 Moderate negative
Q2631 E 330 E 26 None
Q3307 E 330 E 27 None
Q4003 E 337 E 28 None
Q4170 E 338 E 29 None
Q3979 E 341 E 30 None
⋮ ⋮ ⋮ ⋮ ⋮ ⋮
Q408 D 108249066 D 4363 None
Q212 D 121262940 D 4364 None
Q90 D 132079969 D 4365 None
Q739 D 149356287 D 4366 None
Q150 D 154676097 D 4367 None
Q16 D 161566243 D 4368 None
Q60 D 161626758 D 4369 None
Q188 D 173805898 D 4370 None
Q2184 E 226793302 D 4371 Moderate positive
Q515 E 233190433 D 4372 Moderate positive
Q649 D 243081566 D 4373 None
Q1321 D 252584097 D 4374 None
Q2736 E 270665514 D 4375 Moderate positive
Q414 D 282547432 D 4376 None
Q96 D 307141203 D 4377 None
Q38 D 363184639 D 4378 None
Q328 E 378666601 D 4379 Moderate positive
Q183 D 440162350 D 4380 None
Q29 D 500913282 D 4381 None
Q145 D 595132607 D 4382 None
Q159 D 653480145 D 4383 None
Q142 D 679432828 D 4384 None
Q1860 E 1419981073 D 4385 Moderate positive
Q356 D 2047631596 D 4386 None
Q565 D 2052996261 D 4387 None
Q918 E 2063217449 D 4388 Moderate positive
Q866 E 2079749157 D 4389 Moderate positive
Q355 D 2093900731 D 4390 None
Q30 D 2277746226 D 4391 None
Q5 D 5668008721 D 4392 None
entity_id prediction page_views pop_class seqNum dissonance
Q3086 E 23 E 1 None
Q3265 E 46 E 2 None
Q4483 E 52 E 3 None
Q3152 E 71 E 4 None
Q3517 E 74 E 5 None
Q4017 E 90 E 6 None
Q3649 E 112 E 7 None
Q3254 E 120 E 8 None
Q3527 E 121 E 9 None
Q3522 E 128 E 10 None
Q3186 E 138 E 11 None
Q3634 E 161 E 12 None
Q4486 E 163 E 13 None
Q3652 E 176 E 14 None
Q4475 E 179 E 15 None
Q3633 E 202 E 16 None
Q3287 E 221 E 17 None
Q3781 E 237 E 18 None
Q3654 E 248 E 19 None
Q3641 E 286 E 20 None
Q2711 D 287 E 21 Moderate negative
Q4281 E 298 E 22 None
Q3293 E 304 E 23 None
Q3084 E 307 E 24 None
Q293 D 329 E 25 Moderate negative
Q2631 E 330 E 26 None
Q3307 E 330 E 27 None
Q4003 E 337 E 28 None
Q4170 E 338 E 29 None
Q3979 E 341 E 30 None
⋮ ⋮ ⋮ ⋮ ⋮ ⋮
Q408 D 108249066 D 4363 None
Q212 D 121262940 D 4364 None
Q90 D 132079969 D 4365 None
Q739 D 149356287 D 4366 None
Q150 D 154676097 D 4367 None
Q16 D 161566243 D 4368 None
Q60 D 161626758 D 4369 None
Q188 D 173805898 D 4370 None
Q2184 E 226793302 D 4371 Moderate positive
Q515 E 233190433 D 4372 Moderate positive
Q649 D 243081566 D 4373 None
Q1321 D 252584097 D 4374 None
Q2736 E 270665514 D 4375 Moderate positive
Q414 D 282547432 D 4376 None
Q96 D 307141203 D 4377 None
Q38 D 363184639 D 4378 None
Q328 E 378666601 D 4379 Moderate positive
Q183 D 440162350 D 4380 None
Q29 D 500913282 D 4381 None
Q145 D 595132607 D 4382 None
Q159 D 653480145 D 4383 None
Q142 D 679432828 D 4384 None
Q1860 E 1419981073 D 4385 Moderate positive
Q356 D 2047631596 D 4386 None
Q565 D 2052996261 D 4387 None
Q918 E 2063217449 D 4388 Moderate positive
Q866 E 2079749157 D 4389 Moderate positive
Q355 D 2093900731 D 4390 None
Q30 D 2277746226 D 4391 None
Q5 D 5668008721 D 4392 None
In [39]:
## Build a matrix where columns are the metric and rows are classes
create_alt_diss_matrix = function(articledata, metric, classes) {
d_mtrx = matrix(0, nrow=length(classes), ncol=length(metric));
rownames(d_mtrx) = classes;
colnames(d_mtrx) = metric;
## NOTE: R matrix values are [row,col] dimensions
for(real_rating in classes) {
for(diss_rating in metric) {
d_mtrx[real_rating, diss_rating] = length(articledata[prediction == real_rating & dissonance == diss_rating]$entity_id);
}
}
d_mtrx;
}
alternative_dissonance_matrix.1 = create_alt_diss_matrix(articles_by_pop,
dissonance_metric, assessment_classes);
In [40]:
## Normalise by row
round(100*prop.table(alternative_dissonance_matrix.1, 1), 1);
High negative Moderate negative None Moderate positive High positive
E 0 0.0 83.8 16.2 0
D 0 46.6 53.4 0.0 0
C NaN NaN NaN NaN NaN
B NaN NaN NaN NaN NaN
A NaN NaN NaN NaN NaN
In [41]:
## Number of dissonant views per assessment class and amount of dissonance
articles_by_pop[, list(dissonant_views=sum(page_views)), by=list(prediction, dissonance)];
prediction dissonance dissonant_views
E None 1149832465
D Moderate negative 398024248
E Moderate positive 10903782585
D None 27269934694
In [42]:
## Calculations of total number of dissonant views per dissonance
articles_by_pop[, list(dissonant_views=sum(page_views)), by=list(dissonance)];
dissonance dissonant_views
None 28419767159
Moderate negative 398024248
Moderate positive 10903782585
In [43]:
articles_by_pop[,sum(as.numeric(page_views))];
39721573992
In [44]:
## Proportions
100*65938379920/545180810059;
100*125047198/545180810059;
100*6713682043/545180810059;
100*120523625541/545180810059;
100*351880075357/545180810059;
12.0947727255594
0.0229368304409811
1.23145971375505
22.107092420945
64.5437383092995
In [45]:
# 87% of views are high positive
In [46]:
100*(articles_by_pop[, list(dissonant_views=sum(page_views)), by=list(dissonance)][1][,c('dissonant_views')]/articles_by_pop[,sum(as.numeric(page_views))])
dissonant_views
71.54743
In [47]:
100*(articles_by_pop[, list(dissonant_views=sum(page_views)), by=list(dissonance)][2][,c('dissonant_views')]/articles_by_pop[,sum(as.numeric(page_views))])
dissonant_views
1.002035
In [48]:
100*(articles_by_pop[, list(dissonant_views=sum(page_views)), by=list(dissonance)][3][,c('dissonant_views')]/articles_by_pop[,sum(as.numeric(page_views))])
dissonant_views
27.45053
In [49]:
100*(articles_by_pop[, list(dissonant_views=sum(page_views)), by=list(dissonance)][4][,c('dissonant_views')]/articles_by_pop[,sum(as.numeric(page_views))])
dissonant_views
NA
In [50]:
100*(articles_by_pop[, list(dissonant_views=sum(page_views)), by=list(dissonance)][5][,c('dissonant_views')]/articles_by_pop[,sum(as.numeric(page_views))])
dissonant_views
NA
Content source: hall1467/wikidata_usage_tracking
Similar notebooks: