In [ ]:
# ENV["MPLBACKEND"] = "Agg"
using Plots, DataFrames, OnlineAI
pyplot()
default(size=(500,300))
# load the table
df = readtable(joinpath(Pkg.dir("ExamplePlots"), "examples",
"meetup", "winequality-white.csv"), separator=';')
M = Array(df)
df[2,:]
In [ ]:
sp = subplot(M, n=ncols(M), lt=:hist, size=(1000,900), title=names(df)')
# sp.o.fig[:subplots_adjust](hspace=.5); sp
In [ ]:
# extract the most correlated variables to wine quality
C = cor(M)
indices = sortperm(abs(C[1:end-1, end]), rev=true)
indices = sort(indices[1:6])
nms = names(df)[indices]
In [ ]:
idx_w_quality = vcat(indices,12)
corrplot(M[:, idx_w_quality], size = (1200,1200), labels = vcat(nms,:quality)')
In [ ]:
# notes: grouping by string labels, dataframe column names for data, separate opacities
quality = ASCIIString[
if q > 7
"High Quality"
elseif q < 5
"Low Quality"
else
" Tastes like wine..."
end for q in df[:quality]
]
# fields = (:citric_acid, :alcohol)
fields = nms[[1,6]]
default(xlab=fields[1], ylab=fields[2])
scatter(df, fields..., group=quality, m=[2 3 3], w=0, smooth=true, opacity=[0.1 1 1])
# plot!(xlim=(.985,1.005),ylim=(7,15))
# plot!(xlab=fields[1], ylab=fields[2])
# gui()
In [ ]:
default(grid=false, leg=false)
subplot(df, fields...,
group = quality,
marker = 3,
line = (:scatter,0),
smooth = true,
opacity = [0.05 1 1],
title = sort(unique(quality))',
n = 3, nr = 1,
size = (1000,400),
linky = true
)
In [ ]:
sp = subplot(df, fields...,
group = :quality,
line = (:hexbin, ColorGradient(:heat,[0,0.01,1])),
n = 7, nr = 1,
title = map(i->"Quality: $i", (3:9)'),
size = (1200,300),
linky = true
)
# sp.o.fig[:subplots_adjust](wspace=.6); sp
In [ ]:
In [ ]:
In [ ]:
In [ ]:
scatter(df, :density, :quality, smooth=true, m=(1,0.01), xlim=(0.99,1))
In [ ]: