In [5]:
using DataFrames
using Gadfly
using Vega

In [6]:
function mergedata()
    event = readtable("../data/event_type.csv")
    resource = readtable("../data/resource_type.csv")
    severity = readtable("../data/severity_type.csv")
    log_feature = readtable("../data/log_feature.csv")
    train = readtable("../data/train.csv")
    test = readtable("../data/test.csv")

    common = join(join(join(event,resource,on=:id,kind=:outer),severity,on=:id,kind=:outer),log_feature,on=:id,kind=:o\
uter)
    @show size(common)

    train = join(train,common,on=:id,kind=:left)
    test = join(test,common,on=:id,kind=:left)

    @show size(train),size(test)
end


Out[6]:
mergedata (generic function with 1 method)

In [7]:
ev = readcsv("../data/event_type.csv")


Out[7]:
31171x2 Array{Any,2}:
      "id"  "event_type"   
  6597      "event_type 11"
  8011      "event_type 15"
  2597      "event_type 15"
  5022      "event_type 15"
  5022      "event_type 11"
  6852      "event_type 11"
  6852      "event_type 15"
  5611      "event_type 15"
 14838      "event_type 15"
 14838      "event_type 11"
  2588      "event_type 15"
  2588      "event_type 11"
     ⋮                     
  6288      "event_type 11"
 13296      "event_type 11"
  1989      "event_type 11"
 15206      "event_type 11"
 15084      "event_type 11"
  8114      "event_type 11"
  8955      "event_type 11"
  3761      "event_type 11"
  8720      "event_type 11"
  6488      "event_type 11"
   878      "event_type 11"
  4464      "event_type 11"

In [10]:
length(unique(ev[:,1])),length(unique(ev[:,2]))


Out[10]:
(18553,54)

In [11]:
re = readcsv("../data/resource_type.csv")


Out[11]:
21077x2 Array{Any,2}:
      "id"  "resource_type"  
  6597      "resource_type 8"
  8011      "resource_type 8"
  2597      "resource_type 8"
  5022      "resource_type 8"
  6852      "resource_type 8"
  5611      "resource_type 8"
 14838      "resource_type 8"
  2588      "resource_type 8"
  4848      "resource_type 8"
  6914      "resource_type 8"
  5337      "resource_type 8"
 10460      "resource_type 8"
     ⋮                       
  6288      "resource_type 8"
 13296      "resource_type 8"
  1989      "resource_type 8"
 15206      "resource_type 8"
 15084      "resource_type 8"
  8114      "resource_type 8"
  8955      "resource_type 8"
  3761      "resource_type 8"
  8720      "resource_type 8"
  6488      "resource_type 8"
   878      "resource_type 8"
  4464      "resource_type 8"

In [12]:
length(unique(re[:,1])),length(unique(re[:,2]))


Out[12]:
(18553,11)

In [13]:
se = readcsv("../data/severity_type.csv")


Out[13]:
18553x2 Array{Any,2}:
      "id"  "severity_type"  
  6597      "severity_type 2"
  8011      "severity_type 2"
  2597      "severity_type 2"
  5022      "severity_type 1"
  6852      "severity_type 1"
  5611      "severity_type 2"
 14838      "severity_type 1"
  2588      "severity_type 1"
  4848      "severity_type 1"
  6914      "severity_type 1"
  5337      "severity_type 1"
 10460      "severity_type 1"
     ⋮                       
  6288      "severity_type 1"
 13296      "severity_type 1"
  1989      "severity_type 1"
 15206      "severity_type 1"
 15084      "severity_type 1"
  8114      "severity_type 2"
  8955      "severity_type 1"
  3761      "severity_type 1"
  8720      "severity_type 1"
  6488      "severity_type 2"
   878      "severity_type 2"
  4464      "severity_type 1"

In [14]:
length(unique(se[:,1])),length(unique(se[:,2]))


Out[14]:
(18553,6)

In [15]:
log = readcsv("../data/log_feature.csv")


Out[15]:
58672x3 Array{Any,2}:
      "id"  "log_feature"    "volume"
  6597      "feature 68"    6        
  8011      "feature 68"    7        
  2597      "feature 68"    1        
  5022      "feature 172"   2        
  5022      "feature 56"    1        
  5022      "feature 193"   4        
  5022      "feature 71"    3        
  6852      "feature 201"   2        
  6852      "feature 56"    1        
  6852      "feature 80"    2        
  5611      "feature 80"    2        
 14838      "feature 203"   5        
     ⋮                               
  3761      "feature 87"    1        
  3761      "feature 209"   1        
  3761      "feature 54"    4        
  3761      "feature 170"   4        
  8720      "feature 170"   4        
  8720      "feature 155"  10        
  8720      "feature 54"    1        
  8720      "feature 209"   1        
  6488      "feature 54"    3        
   878      "feature 62"    1        
  4464      "feature 209"   1        
  4464      "feature 87"    2        

In [16]:
length(unique(log[])),length(unique(log[:,2]))


LoadError: syntax: unhandled expr (error unexpected "]" in argument list)
while loading In[16], in expression starting on line 1

 [inlined code] from ./essentials.jl:78
 in include_string(::ASCIIString, ::ASCIIString) at ./loading.jl:371

In [ ]: