Diferente de um extrator de caracteristicas, nesse arquivo apenas focamos em entender se apenas contarmos com apenas alguns lags anteriores é suficiente para realizar uma classifciação de qualidade.
Basicamente, apenas pegamos os últimos momentos capturados pelo percurso e tranformamos esses momentos finais em atributos de uma instância de um possível modelo. Nessa simples abordagem é muito utilizada em regressão para previsão dos próximos passos ou valores.
Com essa abordagem podemos simplificar e deixar por conta dos classificadores extrair ou linearmente separar as classes binárias, como é o nosso caso.
Abaixo apenas consideramos os últimos 2 lags e assim fazer com que o classificador, SVM, separe cada instância.
In [12]:
library(caret)
library(kernlab)
library(pROC)
groups <- read.csv(file="./MovementAAL/groups/MovementAAL_DatasetGroup.csv",head=TRUE,sep=",")
targetAll <- read.csv(file="./MovementAAL/dataset/MovementAAL_target.csv",head=TRUE,sep=",")
In [2]:
#Group 1
allDataGroup1<-list()
allDataGroup1Target<-list()
groups1 = groups[groups$dataset_ID==1, ]
index<-1
for (id in groups1$X.sequence_ID){
caminho <-paste("./MovementAAL/dataset/MovementAAL_RSS_",id,".csv",sep="")
allDataGroup1[[index]]<-read.csv(file=caminho,head=TRUE,sep=",")
allDataGroup1Target[index]<-targetAll[[2]][id]
index<-index+1
}
wtData <- NULL
minStepsBack = 1
for (i in 1:length(allDataGroup1)){
aa<- t(unlist(allDataGroup1[[i]][(nrow(allDataGroup1[[i]])-minStepsBack):nrow(allDataGroup1[[i]]),]))
wtData <- rbind(wtData, aa)
}
wtData <- as.data.frame(wtData)
data = unlist(allDataGroup1Target)
target = factor(data,labels=c("No","Yes"))
frameDataFinal <- data.frame(cbind(target, wtData))
head(frameDataFinal)
##use only lagged data
Out[2]:
In [3]:
inTraining <- createDataPartition(frameDataFinal$target, p = .7, list = TRUE,times=10)
allAccuracyGroup1 <- c()
for( i in 1:length(inTraining)){
training <- frameDataFinal[ inTraining[[i]],]
testing <- frameDataFinal[-inTraining[[i]],]
fitControl <- trainControl(method = "none", classProbs = TRUE)
svmLinearFit <- train(target ~ ., data = training,
method = "svmLinear",
trControl = fitControl,
family=binomial)
preds<- predict(svmLinearFit, newdata = testing)
matrix <- confusionMatrix(preds,frameDataFinal$target[-inTraining[[i]]])
allAccuracyGroup1 <- c(allAccuracyGroup1,matrix[3]$overall[[1]])
}
mean(allAccuracyGroup1)
sd(allAccuracyGroup1)
Out[3]:
Out[3]:
In [4]:
#Group 2
allDataGroup2<-list()
allDataGroup2Target<-list()
groups2 = groups[groups$dataset_ID==2, ]
index<-1
for (id in groups2$X.sequence_ID){
caminho <-paste("./MovementAAL/dataset/MovementAAL_RSS_",id,".csv",sep="")
allDataGroup2[[index]]<-read.csv(file=caminho,head=TRUE,sep=",")
allDataGroup2Target[index]<-targetAll[[2]][id]
index<-index+1
}
wtData <- NULL
minStepsBack = 1
for (i in 1:length(allDataGroup2)){
aa<- t(unlist(allDataGroup2[[i]][(nrow(allDataGroup2[[i]])-minStepsBack):nrow(allDataGroup2[[i]]),]))
wtData <- rbind(wtData, aa)
}
wtData <- as.data.frame(wtData)
data = unlist(allDataGroup2Target)
target = factor(data,labels=c("No","Yes"))
frameDataFinal <- data.frame(cbind(target, wtData))
head(frameDataFinal)
##use only lagged data
Out[4]:
In [5]:
inTraining <- createDataPartition(frameDataFinal$target, p = .7, list = TRUE,times=10)
allAccuracyGroup2 <- c()
for( i in 1:length(inTraining)){
training <- frameDataFinal[ inTraining[[i]],]
testing <- frameDataFinal[-inTraining[[i]],]
fitControl <- trainControl(method = "none", classProbs = TRUE)
svmLinearFit <- train(target ~ ., data = training,
method = "svmLinear",
trControl = fitControl,
family=binomial)
preds<- predict(svmLinearFit, newdata = testing)
matrix <- confusionMatrix(preds,frameDataFinal$target[-inTraining[[i]]])
allAccuracyGroup2 <- c(allAccuracyGroup2,matrix[3]$overall[[1]])
}
mean(allAccuracyGroup2)
sd(allAccuracyGroup2)
Out[5]:
Out[5]:
In [6]:
#Group 3
allDataGroup3<-list()
allDataGroup3Target<-list()
groups3 = groups[groups$dataset_ID==3, ]
index<-1
for (id in groups3$X.sequence_ID){
caminho <-paste("./MovementAAL/dataset/MovementAAL_RSS_",id,".csv",sep="")
allDataGroup3[[index]]<-read.csv(file=caminho,head=TRUE,sep=",")
allDataGroup3Target[index]<-targetAll[[2]][id]
index<-index+1
}
wtData <- NULL
minStepsBack = 1
for (i in 1:length(allDataGroup3)){
aa<- t(unlist(allDataGroup3[[i]][(nrow(allDataGroup3[[i]])-minStepsBack):nrow(allDataGroup3[[i]]),]))
wtData <- rbind(wtData, aa)
}
wtData <- as.data.frame(wtData)
data = unlist(allDataGroup3Target)
target = factor(data,labels=c("No","Yes"))
frameDataFinal <- data.frame(cbind(target, wtData))
head(frameDataFinal)
##use only lagged data
Out[6]:
In [7]:
inTraining <- createDataPartition(frameDataFinal$target, p = .7, list = TRUE,times=10)
allAccuracyGroup3 <- c()
for( i in 1:length(inTraining)){
training <- frameDataFinal[ inTraining[[i]],]
testing <- frameDataFinal[-inTraining[[i]],]
fitControl <- trainControl(method = "none", classProbs = TRUE)
svmLinearFit <- train(target ~ ., data = training,
method = "svmLinear",
trControl = fitControl,
family=binomial)
preds<- predict(svmLinearFit, newdata = testing)
matrix <- confusionMatrix(preds,frameDataFinal$target[-inTraining[[i]]])
allAccuracyGroup3 <- c(allAccuracyGroup3,matrix[3]$overall[[1]])
}
mean(allAccuracyGroup3)
sd(allAccuracyGroup3)
Out[7]:
Out[7]:
In [8]:
#All Groups
allData<-list()
allDataTarget<-list()
targetAll <- read.csv(file="./MovementAAL/dataset/MovementAAL_target.csv",head=TRUE,sep=",")
index<-1
for (id in targetAll$X.sequence_ID){
caminho <-paste("./MovementAAL/dataset/MovementAAL_RSS_",id,".csv",sep="")
allData[[index]]<-read.csv(file=caminho,head=TRUE,sep=",")
allDataTarget[index]<-targetAll[[2]][id]
index<-index+1
}
wtData <- NULL
minStepsBack = 1
for (i in 1:length(allData)){
aa<- t(unlist(allData[[i]][(nrow(allData[[i]])-minStepsBack):nrow(allData[[i]]),]))
wtData <- rbind(wtData, aa)
}
wtData <- as.data.frame(wtData)
data = unlist(allDataTarget)
target = factor(data,labels=c("No","Yes"))
frameDataFinal <- data.frame(cbind(target, wtData))
head(frameDataFinal)
Out[8]:
In [9]:
inTraining <- createDataPartition(frameDataFinal$target, p = .7, list = TRUE,times=10)
allAccuracy <- c()
for( i in 1:length(inTraining)){
training <- frameDataFinal[ inTraining[[i]],]
testing <- frameDataFinal[-inTraining[[i]],]
fitControl <- trainControl(method = "none", classProbs = TRUE)
svmLinearFit <- train(target ~ ., data = training,
method = "svmLinear",
trControl = fitControl,
family=binomial)
preds<- predict(svmLinearFit, newdata = testing)
matrix <- confusionMatrix(preds,frameDataFinal$target[-inTraining[[i]]])
allAccuracy <- c(allAccuracy,matrix[3]$overall[[1]])
}
mean(allAccuracy)
sd(allAccuracy)
Out[9]:
Out[9]:
In [10]:
#All groups datasets Confusion Matrix
inTraining <- createDataPartition(frameDataFinal$target, p = .7, list = TRUE,times=1)
training <- frameDataFinal[ inTraining[[1]],]
testing <- frameDataFinal[-inTraining[[1]],]
fitControl <- trainControl(method = "none", classProbs = TRUE)
svmLinearFit <- train(target ~ ., data = training,
method = "svmLinear",
trControl = fitControl,
family=binomial)
preds<- predict(svmLinearFit, newdata = testing)
matrix <- confusionMatrix(preds,frameDataFinal$target[-inTraining[[1]]])
matrix
Out[10]:
In [11]:
#ROC CURVE AND AUC
predsProb<- predict(svmLinearFit, newdata = testing,type="prob")
outcome<- predsProb[,2]
classes <- frameDataFinal$target[-inTraining[[1]]]
rocobj <- roc(classes, outcome,levels=c("No","Yes"))
plot(rocobj)
Out[11]: