Multi Dimensional Data Visualisation

Chapter 05 - Two Dimensional: p = 2

Load libraries


In [1]:
library(repr)
options(repr.plot.width=8, repr.plot.height=4.5)
options(jupyter.plot_mimetypes = 'image/png')

In [2]:
# Load the libraries
library(ggplot2)
library(scales)

In [3]:
# Modify the bw theme
theme_viz <- theme_bw() + theme(
  legend.background = element_rect(fill = "transparent", colour = NA),
  axis.line = element_line(colour = "grey85"),
  panel.background = element_rect(fill = "transparent", colour = NA),
  panel.border = element_rect(colour = NA),
  plot.background = element_rect(fill = "transparent", colour = NA))

In [4]:
# Set it as the defualt
theme_set(theme_viz)

In [5]:
# ColorBrewer class for palette PuRd
# http://colorbrewer2.org/?type=sequential&scheme=PuRd&n=7
PuRdclass1 <- "#f1eef6"
PuRdclass2 <- "#d4b9da"
PuRdclass3 <- "#c994c7"
PuRdclass4 <- "#df65b0"
PuRdclass5 <- "#e7298a"
PuRdclass6 <- "#ce1256"
PuRdclass7 <- "#91003f"

Load data


In [6]:
str(diamonds)


Classes 'tbl_df', 'tbl' and 'data.frame':	53940 obs. of  10 variables:
 $ carat  : num  0.23 0.21 0.23 0.29 0.31 0.24 0.24 0.26 0.22 0.23 ...
 $ cut    : Ord.factor w/ 5 levels "Fair"<"Good"<..: 5 4 2 4 2 3 3 3 1 3 ...
 $ color  : Ord.factor w/ 7 levels "D"<"E"<"F"<"G"<..: 2 2 2 6 7 7 6 5 2 5 ...
 $ clarity: Ord.factor w/ 8 levels "I1"<"SI2"<"SI1"<..: 2 3 5 4 2 6 7 3 4 5 ...
 $ depth  : num  61.5 59.8 56.9 62.4 63.3 62.8 62.3 61.9 65.1 59.4 ...
 $ table  : num  55 61 65 58 58 57 57 55 61 61 ...
 $ price  : int  326 326 327 334 335 336 336 337 337 338 ...
 $ x      : num  3.95 3.89 4.05 4.2 4.34 3.94 3.95 4.07 3.87 4 ...
 $ y      : num  3.98 3.84 4.07 4.23 4.35 3.96 3.98 4.11 3.78 4.05 ...
 $ z      : num  2.43 2.31 2.31 2.63 2.75 2.48 2.47 2.53 2.49 2.39 ...

Categorical vs. Categorical


In [7]:
# Get the summary for diamonds$cut
summary(diamonds$cut)


Out[7]:
Fair
1610
Good
4906
Very Good
12082
Premium
13791
Ideal
21551

In [8]:
# Set the defualt scale and dataset
g <- ggplot(diamonds) + scale_fill_brewer(palette = "PuRd") + scale_color_brewer(palette = "PuRd")

Dot Chart


In [21]:
cutClarityDot <- g + aes(cut, clarity, color = clarity) + geom_jitter(size = 1, alpha = 0.2)
cutClarityDot



In [22]:
ggsave("../assets/img/cutClarityDot.png", cutClarityDot, width = 8, height = 4.5, units = "in", bg = "transparent")

Bar Chart - Stacked


In [15]:
cutClarityBarStack <- g + aes(cut, fill = clarity) + geom_bar(position = 'stack')
cutClarityBarStack



In [12]:
ggsave("../assets/img/cutClarityBar.png", cutClarityBar, width = 8, height = 4.5, units = "in", bg = "transparent")

Bar Chart - Staggered


In [14]:
cutClarityBarDodge <- g + aes(cut, fill = clarity) + geom_bar(position = "dodge") 
cutClarityBarDodge



In [16]:
ggsave("../assets/img/cutClarityBarDodge.png", cutClarityBarDodge, width = 8, height = 4.5, units = "in", bg = "transparent")

CoxComb Chart


In [15]:
cutCoxcomb <- g + aes(cut, fill = cut) + geom_bar(width = 1) + coord_polar() 
cutCoxcomb



In [16]:
ggsave("../assets/img/cutCoxcomb.png", cutCoxcomb, width = 8, height = 4.5, units = "in", bg = "transparent")

Stacked Chart


In [17]:
cutStacked <- g + aes(x = "", fill = cut) + geom_bar(width = 0.5) + xlab("cut")
cutStacked



In [18]:
ggsave("../assets/img/cutStacked.png", cutStacked, width = 8, height = 4.5, units = "in", bg = "transparent")

Bullseye Chart


In [19]:
cutBullseye <- g + aes(x = "", fill = cut) + geom_bar(width = 1) + xlab("cut") + coord_polar(theta = "x") 
cutBullseye



In [20]:
ggsave("../assets/img/cutBullseye.png", cutBullseye, width = 8, height = 4.5, units = "in", bg = "transparent")

Pie Chart


In [21]:
cutPie <- g + aes(x = "", fill = cut) + geom_bar() + xlab("cut") + coord_polar(theta = "y") 
cutPie



In [22]:
ggsave("../assets/img/cutPie.png", cutPie, width = 8, height = 4.5, units = "in", bg = "transparent")

Quantiative Visualisation


In [23]:
summary(diamonds$price)


Out[23]:
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
    326     950    2401    3933    5324   18820 

Dot Plot


In [24]:
priceDot <- g + aes('', price) + geom_jitter(size = 1, alpha = 0.1, width = 0.25, color = PuRdclass4) +
    coord_flip() + xlab('price')
priceDot



In [25]:
ggsave("../assets/img/priceDot.png", priceDot, width = 8, height = 4.5, units = "in", bg = "transparent")

Histogram


In [45]:
priceHist <- g + aes(price) + geom_histogram(bins = 400, fill = PuRdclass4)
priceHist



In [27]:
ggsave("../assets/img/priceHist.png", priceHist, width = 8, height = 4.5, units = "in", bg = "transparent")

In [50]:
priceHistLog <- g + aes(log10(price)) + geom_histogram(bins = 400, fill = PuRdclass4)
priceHistLog


Frequency Polygon


In [52]:
priceFreqpoly <- g + aes(log10(price)) + geom_freqpoly(bins = 400, fill = PuRdclass4)
priceFreqpoly


Error: Unknown parameters: fill

In [29]:
ggsave("../assets/img/priceFreqpoly.png", priceFreqpoly, width = 8, height = 4.5, units = "in", bg = "transparent")

Density Plot


In [53]:
priceDensity <- g + aes(log10(price)) + geom_density(adjust = 1/10, fill = PuRdclass4) + scale_y_continuous(labels = comma)
priceDensity



In [31]:
ggsave("../assets/img/priceDensity.png", priceDensity, width = 8, height = 4.5, units = "in", bg = "transparent")

In [ ]: