In [11]:
library(dplyr)

In [12]:
quality_prediction_and_page_views <- read.table("../results/sql_queries/entity_views_and_aggregated_revisions/entity_views_and_aggregated_revisions_and_quality_scoring_prediction_converted_20121101.tsv", header=FALSE, sep="\t")

In [13]:
colnames(quality_prediction_and_page_views) <- c('entity_id','number_of_revisions', 'page_views', 'prediction', 'ordinal_score')

In [14]:
head(quality_prediction_and_page_views)


entity_idnumber_of_revisionspage_viewspredictionordinal_score
Q1 1118 5925113D 2
Q100 491 17385277D 2
Q1000 596 4903348D 2
Q1001 973 16341176E 1
Q1002 143 160651E 1
Q1003 128 820789E 1

In [15]:
correlation_output <- cor(quality_prediction_and_page_views$page_views,quality_prediction_and_page_views$ordinal_score, method="spearman")


0.40610963522853

In [29]:
correlation_output


0.40610963522853

In [16]:
quality_prediction_and_page_views_model <- lm(quality_prediction_and_page_views$page_views ~ quality_prediction_and_page_views$ordinal_score)

In [17]:
summary(quality_prediction_and_page_views_model)


Call:
lm(formula = quality_prediction_and_page_views$page_views ~ quality_prediction_and_page_views$ordinal_score)

Residuals:
       Min         1Q     Median         3Q        Max 
 -24419800   -3697762   -3591400   -2879327 5643588634 

Coefficients:
                                                 Estimate Std. Error t value
(Intercept)                                     -17022962    5500204  -3.095
quality_prediction_and_page_views$ordinal_score  20721525    4129651   5.018
                                                Pr(>|t|)    
(Intercept)                                      0.00198 ** 
quality_prediction_and_page_views$ordinal_score 5.43e-07 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 119700000 on 4390 degrees of freedom
Multiple R-squared:  0.005703,	Adjusted R-squared:  0.005476 
F-statistic: 25.18 on 1 and 4390 DF,  p-value: 5.435e-07

Class A Items with Least Pages Views


In [18]:
class_a_quality_prediction_and_page_views <- filter(quality_prediction_and_page_views, prediction=="A")

In [19]:
sorted_ascend_class_a_quality_prediction_and_page_views <- dplyr::arrange(class_a_quality_prediction_and_page_views, page_views)

In [20]:
head(sorted_ascend_class_a_quality_prediction_and_page_views, n=10)


entity_idnumber_of_revisionspage_viewspredictionordinal_score

Class E Items with Most Pages Views


In [ ]:


In [21]:
class_e_quality_prediction_and_page_views <- filter(quality_prediction_and_page_views, prediction=="E")

In [22]:
sorted_desc_class_e_quality_prediction_and_page_views <- dplyr::arrange(class_e_quality_prediction_and_page_views, desc(page_views))

In [23]:
head(sorted_desc_class_e_quality_prediction_and_page_views, n=10)


entity_idnumber_of_revisionspage_viewspredictionordinal_score
Q866 621 2079749157E 1
Q918 438 2063217449E 1
Q1860 1050 1419981073E 1
Q328 643 378666601E 1
Q2736 702 270665514E 1
Q515 617 233190433E 1
Q2184 315 226793302E 1
Q432 562 67870906E 1
Q2807 881 64656434E 1
Q1065 882 62315605E 1

In [24]:
nrow(class_e_quality_prediction_and_page_views)


3259

In [25]:
nrow(filter(quality_prediction_and_page_views, prediction=="D"))


1133

In [26]:
nrow(filter(quality_prediction_and_page_views, prediction=="C"))


0

In [27]:
nrow(filter(quality_prediction_and_page_views, prediction=="B"))


0

In [28]:
nrow(filter(quality_prediction_and_page_views, prediction=="A"))


0

In [ ]: