In [1]:
# First read in some data
library('tidyverse')
cr_hour <- read_csv('crh.csv')
cr_hour <- cr_hour %>%
mutate(ds = as.POSIXct(timestamp), cr_scaled = cr*100000) %>% #scaling to better understand expected value later on
filter(month == 8) %>%
select(ds,cr_scaled)
head(cr_hour)
AnomalyDetectionTs(x, max_anoms = 0.1, direction = "pos", alpha = 0.05,
only_last = NULL, threshold = "None", e_value = FALSE,
longterm = FALSE, piecewise_median_period_weeks = 2, plot = FALSE,
y_log = FALSE, xlabel = "", ylabel = "count", title = NULL,
verbose = FALSE)
Arguments
In [2]:
library(AnomalyDetection)
ad = AnomalyDetectionTs(cr_hour, max_anoms=0.02, direction='both',
plot=TRUE, e_value = TRUE,
xlabel = 'day', ylabel='Conversion Rate')
In [3]:
ad$anoms
In [4]:
options(repr.plot.width=7, repr.plot.height=4)
ad$plot
In [5]:
#wont always have an anomaly
library(AnomalyDetection)
res = AnomalyDetectionTs(cr_hour, max_anoms=0.02, direction='both', plot=TRUE, e_value = TRUE,
xlabel = 'day', ylabel='Conversion Rate',only_last='day')
res
In [6]:
#lets see if it can detect a longer time period
cr_day <- read_csv('cr.csv')
cr_day <- cr_day %>%
mutate(d = as.POSIXct(d), cr = gsub('\t','',cr)) %>%
mutate(cr_scaled = as.numeric(cr)*100000) %>%
select(d,cr_scaled)
library(AnomalyDetection)
res = AnomalyDetectionTs(cr_day, max_anoms=0.02, direction='both', plot=TRUE, e_value = TRUE,
xlabel = 'day', ylabel='Conversion Rate')
res
In [7]:
#lets look at the good old iris data set
library(e1071);
library(caret);
library(NLP);
library(tm);
data(iris)
#split to see how well the predictions work
iris$SpeciesClass[iris$Species=="versicolor"] <- "TRUE"
iris$SpeciesClass[iris$Species!="versicolor"] <- "FALSE"
trainPositive<-subset(iris,SpeciesClass=="TRUE")
testnegative<-subset(iris,SpeciesClass=="FALSE")
inTrain<-createDataPartition(1:nrow(trainPositive),p=0.6,list=FALSE)
#take only true predictors into a single DF
trainpredictors<-trainPositive[inTrain,1:4]
trainLabels<-trainPositive[inTrain,6]
testPositive<-trainPositive[-inTrain,]
testPosNeg<-rbind(testPositive,testnegative)
testpredictors<-testPosNeg[,1:4]
testLabels<-testPosNeg[,6]
#see what this looks like
print(head(iris))
print('')
print(head(trainpredictors))
In [8]:
#feed in all positive classes. SVM will use this to predict if class is same or not later on
svm.model<-svm(trainpredictors,y=NULL,
type='one-classification',
nu=0.10,
scale=TRUE,
kernel="radial")
#SVM params
# nu - what is the proportion of outliers we expect in the data?
# kernel -- type: linear, rbf (non linear function)
# gamma - param of the RBF kernel type and controls the influence of individual training samples
# this affects the smoothnees of the model. Low gamma has low var & high bias, high gamma is high var, low bias
#svm(x, y = NULL, scale = TRUE, type = NULL, kernel =
#"radial", degree = 3, gamma = if (is.vector(x)) 1 else 1 / ncol(x),
#coef0 = 0, cost = 1, nu = 0.5,
#class.weights = NULL, cachesize = 40, tolerance = 0.001, epsilon = 0.1,
#shrinking = TRUE, cross = 0, probability = FALSE, fitted = TRUE,
#..., subset, na.action = na.omit)
In [9]:
svm.predtrain<-predict(svm.model,trainpredictors)
svm.predtest<-predict(svm.model,testpredictors)
confTrain<-table(Predicted=svm.predtrain,Reference=trainLabels)
confTest<-table(Predicted=svm.predtest,Reference=testLabels)
confusionMatrix(confTest,positive='TRUE')
print(confTrain)
print(confTest)
In [ ]: