Probability of a Defect after Defect


In [1]:
library(data.table)

data <- fread("../data/train_numeric.csv",select = c("Id", "Response"));
data <- data[,c('Id', 'Response')];
data$nextId = data$Id + 1;
IdList <- data[data$nextId %in% data$Id];


Read 1183747 rows and 2 (of 970) columns from 1.993 GB file in 00:00:30

In [2]:
data[Id %in% IdList$Id] -> current;
data[Id %in% IdList$nextId] -> nextId;

prob = table(current$Response,nextId$Response);

prob1 = prob[2,2]/(prob[2,1]+prob[2,2]);
prob2 = prob[2,1]/(prob[1,1]+prob[2,1]);

print("Percentage of defect after defect");
print(prob1);

print("Percentage of defect after no defect");
print(prob2);


[1] "Percentage of defect after defect"
[1] 0.1008182
[1] "Percentage of defect after no defect"
[1] 0.005226673