In [74]:
%load_ext rmagic

Mutations


In [75]:
%%R


dir = "25-05-2014"
mutations.mod = read.csv(paste(dir, "mutations", "mutations.mod.csv", sep="/"), col.names = c("input", "type", "new", "old", "pos", "class"), header = F, sep=" ")
mutations.ext = read.csv(paste(dir, "mutations", "mutations.ext.csv", sep="/"), col.names = c("input", "type", "new", "old", "pos", "size", "class"), header = F, sep=" ")
mutations.con = read.csv(paste(dir, "mutations", "mutations.con.csv", sep="/"), col.names = c("input", "type", "new", "old", "pos", "size", "class"), header = F, sep=" ")

1) Byte Modification


In [76]:
%%R
data = mutations.mod
types = levels(factor(data$input))
classes = levels(factor(data$class))

Summary:


In [77]:
%%R
print(summary(data))


  input       type           new              old             pos       
 arg :1025   mod:1501   Min.   :  0.00   Min.   :  0.0   Min.   : 0.00  
 file: 476              1st Qu.: 45.00   1st Qu.: 47.0   1st Qu.: 0.00  
                        Median : 65.00   Median : 82.0   Median :33.00  
                        Mean   : 91.05   Mean   :101.1   Mean   :34.19  
                        3rd Qu.:116.00   3rd Qu.:146.0   3rd Qu.:50.00  
                        Max.   :255.00   Max.   :255.0   Max.   :95.00  
 class   
 B: 421  
 R:1080  
         
         
         
         

Distribution:


In [78]:
%%R

hist(data$old, main="old byte", col="lightgreen")
hist(data$new, main="new byte", col="lightgreen")
hist(data$pos, main="position of the modification (0%-100%)", col="lightgreen")


Correlation:

with input type


In [79]:
%%R

colors = rainbow(2)

plot(x=data$new, y=data$old, col = data$input)
legend(210, 240, types, pch=1, cex = 1.4, col = factor(types))
plot(x=data$new, y=data$pos, col = data$input)
plot(x=data$old, y=data$pos, col = data$input)


with class


In [80]:
%%R

plot(x=data$old, y=data$new, col = data$class)
legend(210, 240, classes, pch=1, cex = 1.4, col = factor(classes))
plot(x=data$old, y=data$pos, col = data$class)
plot(x=data$new, y=data$pos, col = data$class)


1) Byte Extension


In [81]:
%%R
data = mutations.ext
types = levels(factor(data$input))
classes = levels(factor(data$class))

Summary


In [82]:
%%R
print(summary(data))


  input      type          new              old              pos       
 arg :569   ext:794   Min.   :  0.00   Min.   :  0.00   Min.   : 0.00  
 file:225             1st Qu.: 64.25   1st Qu.: 45.00   1st Qu.: 0.00  
                      Median :125.00   Median : 65.00   Median :30.00  
                      Mean   :126.63   Mean   : 62.06   Mean   :32.64  
                      3rd Qu.:188.50   3rd Qu.: 78.00   3rd Qu.:50.00  
                      Max.   :255.00   Max.   :255.00   Max.   :95.00  
      size      class  
 Min.   :   0   B:179  
 1st Qu.:2400   R:615  
 Median :5000          
 Mean   :4928          
 3rd Qu.:7300          
 Max.   :9900          

Distribution


In [83]:
%%R

hist(data$old, main="old byte", col="lightgreen")
hist(data$new, main="new byte", col="lightgreen")
hist(data$size, main="size of extension", col="lightgreen")
hist(data$pos, main="position of the extension (0%-100%)", col="lightgreen")


Correlation

with input type:


In [84]:
%%R

plot(x=data$new, y=data$old, col = data$input)
legend(200, 240, types, pch=1, cex = 1.4, col = factor(types))
plot(x=data$new, y=data$pos, col = data$input)
plot(x=data$new, y=data$size, col = data$input)



In [85]:
%%R

plot(x=data$old, y=data$pos, col = data$input)
plot(x=data$old, y=data$size, col = data$input)
plot(x=data$pos, y=data$size, col = data$input)


with class:


In [86]:
%%R

plot(x=data$new, y=data$old, col = data$class)
legend(200, 240, classes, pch=1, cex = 1.4, col = factor(classes))
plot(x=data$new, y=data$pos, col = data$class)
plot(x=data$new, y=data$size, col = data$class)



In [87]:
%%R

plot(x=data$old, y=data$pos, col = data$class)
plot(x=data$old, y=data$size, col = data$class)
plot(x=data$pos, y=data$size, col = data$class)


Events

Sizes


In [88]:
%%R

dir = "25-05-2014"
events.sizes = read.csv(paste(dir, "event_sizes", "events.sizes.csv", sep="/"), col.names = c("size", "class"), header = F, sep=" ")

data = events.sizes
classes = levels(factor(data$class))

Summary


In [89]:
%%R
print(summary(data))


      size        class   
 Min.   :    10   B:1154  
 1st Qu.:    48   R:2217  
 Median :   219           
 Mean   :  5914           
 3rd Qu.:  2756           
 Max.   :168050           

Distribution


In [90]:
%%R
hist(data$size, main="sizes", col="lightgreen")
hist(data$size[data$size<1000], main="sizes < 1000", col="lightgreen")


Correlation


In [91]:
%%R

plot(data.frame(data$size,1), type = 'o', pch = '|', ylab = '', col = data$class)
legend(140000, 1.4, classes, pch=1, cex = 1.4, col = factor(classes))
plot(data.frame(data$size[data$size<1000],1), type = 'o', pch = '|', ylab = '', col = data$class[data$size<1000])