Probability of a Defect after Defect



In [1]:

    
library(data.table)

data <- fread("../data/train_numeric.csv",select = c("Id", "Response"));
data <- data[,c('Id', 'Response')];
data$nextId = data$Id + 1;
IdList <- data[data$nextId %in% data$Id];









    



Read 1183747 rows and 2 (of 970) columns from 1.993 GB file in 00:00:30



In [2]:

    
data[Id %in% IdList$Id] -> current;
data[Id %in% IdList$nextId] -> nextId;

prob = table(current$Response,nextId$Response);

prob1 = prob[2,2]/(prob[2,1]+prob[2,2]);
prob2 = prob[2,1]/(prob[1,1]+prob[2,1]);

print("Percentage of defect after defect");
print(prob1);

print("Percentage of defect after no defect");
print(prob2);









    



[1] "Percentage of defect after defect"
[1] 0.1008182
[1] "Percentage of defect after no defect"
[1] 0.005226673