In [ ]:
# ENV["MPLBACKEND"] = "Agg"

using Plots, DataFrames, OnlineAI

pyplot()
default(size=(500,300))

# load the table
df = readtable(joinpath(Pkg.dir("ExamplePlots"), "examples", 
                "meetup", "winequality-white.csv"), separator=';')

M = Array(df)
df[2,:]

In [ ]:
sp = subplot(M, n=ncols(M), lt=:hist, size=(1000,900), title=names(df)')

# sp.o.fig[:subplots_adjust](hspace=.5); sp

In [ ]:
# extract the most correlated variables to wine quality
C = cor(M)
indices = sortperm(abs(C[1:end-1, end]), rev=true)
indices = sort(indices[1:6])
nms = names(df)[indices]

In [ ]:
idx_w_quality = vcat(indices,12)
corrplot(M[:, idx_w_quality], size = (1200,1200), labels = vcat(nms,:quality)')

In [ ]:
# notes: grouping by string labels, dataframe column names for data, separate opacities
quality = ASCIIString[
    if q > 7
        "High Quality"
    elseif q < 5
        "Low Quality"
    else
        " Tastes like wine..."
    end for q in df[:quality]
]

# fields = (:citric_acid, :alcohol)
fields = nms[[1,6]]
default(xlab=fields[1], ylab=fields[2])

scatter(df, fields..., group=quality, m=[2 3 3], w=0, smooth=true, opacity=[0.1 1 1])

# plot!(xlim=(.985,1.005),ylim=(7,15))
# plot!(xlab=fields[1], ylab=fields[2])
# gui()

In [ ]:
default(grid=false, leg=false)

subplot(df, fields...,
    group = quality,
    marker = 3,
    line = (:scatter,0), 
    smooth = true,
    opacity = [0.05 1 1],
    title = sort(unique(quality))',
    n = 3, nr = 1,
    size = (1000,400),
    linky = true
)

In [ ]:
sp = subplot(df, fields...,
    group = :quality,
    line = (:hexbin, ColorGradient(:heat,[0,0.01,1])),
    n = 7, nr = 1, 
    title = map(i->"Quality: $i", (3:9)'),
    size = (1200,300),
    linky = true
)

# sp.o.fig[:subplots_adjust](wspace=.6); sp

In [ ]:


In [ ]:


In [ ]:


In [ ]:
scatter(df, :density, :quality, smooth=true, m=(1,0.01), xlim=(0.99,1))

In [ ]: