In [3]:
using DataFrames
using Gadfly
using Vega
In [4]:
function mergedata()
event = readtable("../data/event_type.csv")
resource = readtable("../data/resource_type.csv")
severity = readtable("../data/severity_type.csv")
log_feature = readtable("../data/log_feature.csv")
train = readtable("../data/train.csv")
test = readtable("../data/test.csv")
common = join(join(join(event,resource,on=:id,kind=:outer),severity,on=:id,kind=:outer),log_feature,on=:id,kind=:o\
uter)
@show size(common)
train = join(train,common,on=:id,kind=:left)
test = join(test,common,on=:id,kind=:left)
@show size(train),size(test)
end
Out[4]:
In [5]:
ev = readcsv("../data/event_type.csv")
Out[5]:
In [6]:
length(unique(ev[:,1])),length(unique(ev[:,2]))
Out[6]:
In [7]:
re = readcsv("../data/resource_type.csv")
Out[7]:
In [8]:
length(unique(re[:,1])),length(unique(re[:,2]))
Out[8]:
In [9]:
se = readcsv("../data/severity_type.csv")
Out[9]:
In [10]:
length(unique(se[:,1])),length(unique(se[:,2]))
Out[10]:
In [11]:
log = readcsv("../data/log_feature.csv")
Out[11]:
In [12]:
length(unique(log[:,1])),length(unique(log[:,2]))
Out[12]:
In [13]:
tr = readcsv("../data/train.csv")
Out[13]:
In [14]:
length(unique(tr[:,1])),length(unique(tr[:,2])),length(unique(tr[:,3]))
Out[14]:
In [15]:
train = tr[2:end,[1,3]]
Out[15]:
In [16]:
[sum(train[:,2] .== i) for i=0:3]
Out[16]:
In [ ]:
In [ ]:
In [ ]: