In [1]:
Pkg.add("Cairo")


INFO: No packages to install, update or remove
INFO: Package database updated

In [2]:
Pkg.update()


INFO: Updating METADATA...
INFO: Updating RCalling...
INFO: Computing changes...
INFO: No packages to install, update or remove

In [3]:
using Gadfly

In [5]:
# This is a simple plot
plot(x = rand(10), y = rand(10))

## Add geom.point and geom.line

plot(x = rand(10), y = rand(10), Geom.point, Geom.line)
# Note the case, plots both points and lines


Out[5]:
x -1.2 -1.0 -0.8 -0.6 -0.4 -0.2 0.0 0.2 0.4 0.6 0.8 1.0 1.2 1.4 1.6 1.8 2.0 2.2 -1.00 -0.95 -0.90 -0.85 -0.80 -0.75 -0.70 -0.65 -0.60 -0.55 -0.50 -0.45 -0.40 -0.35 -0.30 -0.25 -0.20 -0.15 -0.10 -0.05 0.00 0.05 0.10 0.15 0.20 0.25 0.30 0.35 0.40 0.45 0.50 0.55 0.60 0.65 0.70 0.75 0.80 0.85 0.90 0.95 1.00 1.05 1.10 1.15 1.20 1.25 1.30 1.35 1.40 1.45 1.50 1.55 1.60 1.65 1.70 1.75 1.80 1.85 1.90 1.95 2.00 -1 0 1 2 -1.0 -0.9 -0.8 -0.7 -0.6 -0.5 -0.4 -0.3 -0.2 -0.1 0.0 0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 1.0 1.1 1.2 1.3 1.4 1.5 1.6 1.7 1.8 1.9 2.0 -1.5 -1.0 -0.5 0.0 0.5 1.0 1.5 2.0 2.5 -1.00 -0.95 -0.90 -0.85 -0.80 -0.75 -0.70 -0.65 -0.60 -0.55 -0.50 -0.45 -0.40 -0.35 -0.30 -0.25 -0.20 -0.15 -0.10 -0.05 0.00 0.05 0.10 0.15 0.20 0.25 0.30 0.35 0.40 0.45 0.50 0.55 0.60 0.65 0.70 0.75 0.80 0.85 0.90 0.95 1.00 1.05 1.10 1.15 1.20 1.25 1.30 1.35 1.40 1.45 1.50 1.55 1.60 1.65 1.70 1.75 1.80 1.85 1.90 1.95 2.00 -1 0 1 2 -1.0 -0.9 -0.8 -0.7 -0.6 -0.5 -0.4 -0.3 -0.2 -0.1 0.0 0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 1.0 1.1 1.2 1.3 1.4 1.5 1.6 1.7 1.8 1.9 2.0 y

In [6]:
## Produce more complex plots
plot(x = 1:10, y = 2.^rand(10),
Scale.y_sqrt, Geom.point, Geom.smooth,
Guide.xlabel("Stimulus"), Guide.ylabel("Response"),
Guide.title("Some Training")
)
## We added smotthing line, we added title, and x and y axis labels


Out[6]:
Stimulus -12.5 -10.0 -7.5 -5.0 -2.5 0.0 2.5 5.0 7.5 10.0 12.5 15.0 17.5 20.0 22.5 -10.0 -9.5 -9.0 -8.5 -8.0 -7.5 -7.0 -6.5 -6.0 -5.5 -5.0 -4.5 -4.0 -3.5 -3.0 -2.5 -2.0 -1.5 -1.0 -0.5 0.0 0.5 1.0 1.5 2.0 2.5 3.0 3.5 4.0 4.5 5.0 5.5 6.0 6.5 7.0 7.5 8.0 8.5 9.0 9.5 10.0 10.5 11.0 11.5 12.0 12.5 13.0 13.5 14.0 14.5 15.0 15.5 16.0 16.5 17.0 17.5 18.0 18.5 19.0 19.5 20.0 -10 0 10 20 -10 -9 -8 -7 -6 -5 -4 -3 -2 -1 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 0.52 0.62 0.72 0.82 0.92 1.02 1.12 1.22 1.32 1.42 1.52 1.62 1.72 1.82 1.92 0.002 0.022 0.042 0.062 0.082 0.102 0.122 0.142 0.162 0.182 0.202 0.222 0.242 0.262 0.282 0.302 0.322 0.342 0.362 0.382 0.402 0.422 0.442 0.462 0.482 0.502 0.522 0.542 0.562 0.582 0.602 0.622 0.642 0.662 0.682 0.702 0.722 0.742 0.762 0.782 0.802 0.822 0.842 0.862 0.882 0.902 0.922 0.942 0.962 0.982 1.002 1.022 1.042 1.062 1.082 1.102 1.122 1.142 1.162 1.182 1.202 1.222 1.242 1.262 1.282 1.302 1.322 1.342 1.362 1.382 1.402 1.422 1.442 1.462 1.482 1.502 1.522 1.542 1.562 1.582 1.602 1.622 1.642 1.662 1.682 1.702 1.722 1.742 1.762 1.782 1.802 1.822 0.02 0.52 1.02 1.52 2.02 0.002 0.052 0.102 0.152 0.202 0.252 0.302 0.352 0.402 0.452 0.502 0.552 0.602 0.652 0.702 0.752 0.802 0.852 0.902 0.952 1.002 1.052 1.102 1.152 1.202 1.252 1.302 1.352 1.402 1.452 1.502 1.552 1.602 1.652 1.702 1.752 1.802 1.852 Response Some Training

In [7]:
# We can take the previous plot and export to png and pdf and others
myplot = plot(x = 1:10, y = 2.^rand(10),
Scale.y_sqrt, Geom.point, Geom.smooth,
Guide.xlabel("Stimulus"), Guide.ylabel("Response"),
Guide.title("Some Training")
)

draw(PNG("myplot.png", 4inch, 3inch), myplot)

In [8]:
myplot
# you can see the result by just typing the name of the object


Out[8]:
Stimulus -12.5 -10.0 -7.5 -5.0 -2.5 0.0 2.5 5.0 7.5 10.0 12.5 15.0 17.5 20.0 22.5 -10.0 -9.5 -9.0 -8.5 -8.0 -7.5 -7.0 -6.5 -6.0 -5.5 -5.0 -4.5 -4.0 -3.5 -3.0 -2.5 -2.0 -1.5 -1.0 -0.5 0.0 0.5 1.0 1.5 2.0 2.5 3.0 3.5 4.0 4.5 5.0 5.5 6.0 6.5 7.0 7.5 8.0 8.5 9.0 9.5 10.0 10.5 11.0 11.5 12.0 12.5 13.0 13.5 14.0 14.5 15.0 15.5 16.0 16.5 17.0 17.5 18.0 18.5 19.0 19.5 20.0 -10 0 10 20 -10 -9 -8 -7 -6 -5 -4 -3 -2 -1 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 0.32 0.42 0.52 0.62 0.72 0.82 0.92 1.02 1.12 1.22 1.32 1.42 1.52 1.62 1.72 1.82 1.92 2.02 0.002 0.022 0.042 0.062 0.082 0.102 0.122 0.142 0.162 0.182 0.202 0.222 0.242 0.262 0.282 0.302 0.322 0.342 0.362 0.382 0.402 0.422 0.442 0.462 0.482 0.502 0.522 0.542 0.562 0.582 0.602 0.622 0.642 0.662 0.682 0.702 0.722 0.742 0.762 0.782 0.802 0.822 0.842 0.862 0.882 0.902 0.922 0.942 0.962 0.982 1.002 1.022 1.042 1.062 1.082 1.102 1.122 1.142 1.162 1.182 1.202 1.222 1.242 1.262 1.282 1.302 1.322 1.342 1.362 1.382 1.402 1.422 1.442 1.462 1.482 1.502 1.522 1.542 1.562 1.582 1.602 1.622 1.642 1.662 1.682 1.702 1.722 1.742 1.762 1.782 1.802 1.822 1.842 1.862 1.882 1.902 1.922 0.02 0.52 1.02 1.52 2.02 0.002 0.052 0.102 0.152 0.202 0.252 0.302 0.352 0.402 0.452 0.502 0.552 0.602 0.652 0.702 0.752 0.802 0.852 0.902 0.952 1.002 1.052 1.102 1.152 1.202 1.252 1.302 1.352 1.402 1.452 1.502 1.552 1.602 1.652 1.702 1.752 1.802 1.852 1.902 1.952 Response Some Training

In [9]:
Pkg.add("RDatasets")


INFO: Installing RDatasets v0.1.2
INFO: Package database updated

In [10]:
## We can also plot data frames
using DataFrames
using RDatasets


WARNING: Base.String is deprecated, use AbstractString instead.
  likely near /Users/arindambose/.julia/v0.4/RDatasets/src/dataset.jl:1
WARNING: Base.String is deprecated, use AbstractString instead.
  likely near /Users/arindambose/.julia/v0.4/RDatasets/src/dataset.jl:1
WARNING: Base.String is deprecated, use AbstractString instead.
  likely near /Users/arindambose/.julia/v0.4/RDatasets/src/datasets.jl:1

In [12]:
plot(dataset("datasets", "iris"), x = "SepalLength",
y = "SepalWidth", Geom.point)

# Here we took a data set iris, and then plotted the two variables


Out[12]:
SepalLength -1 0 1 2 3 4 5 6 7 8 9 10 11 12 13 0.0 0.2 0.4 0.6 0.8 1.0 1.2 1.4 1.6 1.8 2.0 2.2 2.4 2.6 2.8 3.0 3.2 3.4 3.6 3.8 4.0 4.2 4.4 4.6 4.8 5.0 5.2 5.4 5.6 5.8 6.0 6.2 6.4 6.6 6.8 7.0 7.2 7.4 7.6 7.8 8.0 8.2 8.4 8.6 8.8 9.0 9.2 9.4 9.6 9.8 10.0 10.2 10.4 10.6 10.8 11.0 11.2 11.4 11.6 11.8 12.0 0 5 10 15 0.0 0.5 1.0 1.5 2.0 2.5 3.0 3.5 4.0 4.5 5.0 5.5 6.0 6.5 7.0 7.5 8.0 8.5 9.0 9.5 10.0 10.5 11.0 11.5 12.0 -1.0 -0.5 0.0 0.5 1.0 1.5 2.0 2.5 3.0 3.5 4.0 4.5 5.0 5.5 6.0 6.5 7.0 7.5 -0.5 -0.4 -0.3 -0.2 -0.1 0.0 0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 1.0 1.1 1.2 1.3 1.4 1.5 1.6 1.7 1.8 1.9 2.0 2.1 2.2 2.3 2.4 2.5 2.6 2.7 2.8 2.9 3.0 3.1 3.2 3.3 3.4 3.5 3.6 3.7 3.8 3.9 4.0 4.1 4.2 4.3 4.4 4.5 4.6 4.7 4.8 4.9 5.0 5.1 5.2 5.3 5.4 5.5 5.6 5.7 5.8 5.9 6.0 6.1 6.2 6.3 6.4 6.5 6.6 6.7 6.8 6.9 7.0 -2.5 0.0 2.5 5.0 7.5 -0.6 -0.4 -0.2 0.0 0.2 0.4 0.6 0.8 1.0 1.2 1.4 1.6 1.8 2.0 2.2 2.4 2.6 2.8 3.0 3.2 3.4 3.6 3.8 4.0 4.2 4.4 4.6 4.8 5.0 5.2 5.4 5.6 5.8 6.0 6.2 6.4 6.6 6.8 7.0 SepalWidth

In [13]:
plot(dataset("car", "SLID"), 
x = "Wages", color = "Language",
Geom.histogram)

# Here we plot a staggered histogram


Out[13]:
Wages -60 -50 -40 -30 -20 -10 0 10 20 30 40 50 60 70 80 90 100 110 -50 -48 -46 -44 -42 -40 -38 -36 -34 -32 -30 -28 -26 -24 -22 -20 -18 -16 -14 -12 -10 -8 -6 -4 -2 0 2 4 6 8 10 12 14 16 18 20 22 24 26 28 30 32 34 36 38 40 42 44 46 48 50 52 54 56 58 60 62 64 66 68 70 72 74 76 78 80 82 84 86 88 90 92 94 96 98 100 -50 0 50 100 -50 -45 -40 -35 -30 -25 -20 -15 -10 -5 0 5 10 15 20 25 30 35 40 45 50 55 60 65 70 75 80 85 90 95 100 English Other French Language -250 -200 -150 -100 -50 0 50 100 150 200 250 300 350 400 450 -200 -190 -180 -170 -160 -150 -140 -130 -120 -110 -100 -90 -80 -70 -60 -50 -40 -30 -20 -10 0 10 20 30 40 50 60 70 80 90 100 110 120 130 140 150 160 170 180 190 200 210 220 230 240 250 260 270 280 290 300 310 320 330 340 350 360 370 380 390 400 -200 0 200 400 -200 -180 -160 -140 -120 -100 -80 -60 -40 -20 0 20 40 60 80 100 120 140 160 180 200 220 240 260 280 300 320 340 360 380 400

In [14]:
## Drawing kernel density
plot(dataset("ggplot2", "diamonds"), x="Price",
Geom.density)


Out[14]:
Price -6×10⁴ -5×10⁴ -4×10⁴ -3×10⁴ -2×10⁴ -1×10⁴ 0 1×10⁴ 2×10⁴ 3×10⁴ 4×10⁴ 5×10⁴ 6×10⁴ 7×10⁴ 8×10⁴ -5.0×10⁴ -4.8×10⁴ -4.6×10⁴ -4.4×10⁴ -4.2×10⁴ -4.0×10⁴ -3.8×10⁴ -3.6×10⁴ -3.4×10⁴ -3.2×10⁴ -3.0×10⁴ -2.8×10⁴ -2.6×10⁴ -2.4×10⁴ -2.2×10⁴ -2.0×10⁴ -1.8×10⁴ -1.6×10⁴ -1.4×10⁴ -1.2×10⁴ -1.0×10⁴ -8.0×10³ -6.0×10³ -4.0×10³ -2.0×10³ 0 2.0×10³ 4.0×10³ 6.0×10³ 8.0×10³ 1.0×10⁴ 1.2×10⁴ 1.4×10⁴ 1.6×10⁴ 1.8×10⁴ 2.0×10⁴ 2.2×10⁴ 2.4×10⁴ 2.6×10⁴ 2.8×10⁴ 3.0×10⁴ 3.2×10⁴ 3.4×10⁴ 3.6×10⁴ 3.8×10⁴ 4.0×10⁴ 4.2×10⁴ 4.4×10⁴ 4.6×10⁴ 4.8×10⁴ 5.0×10⁴ 5.2×10⁴ 5.4×10⁴ 5.6×10⁴ 5.8×10⁴ 6.0×10⁴ 6.2×10⁴ 6.4×10⁴ 6.6×10⁴ 6.8×10⁴ 7.0×10⁴ -5×10⁴ 0 5×10⁴ 1×10⁵ -5.0×10⁴ -4.5×10⁴ -4.0×10⁴ -3.5×10⁴ -3.0×10⁴ -2.5×10⁴ -2.0×10⁴ -1.5×10⁴ -1.0×10⁴ -5.0×10³ 0 5.0×10³ 1.0×10⁴ 1.5×10⁴ 2.0×10⁴ 2.5×10⁴ 3.0×10⁴ 3.5×10⁴ 4.0×10⁴ 4.5×10⁴ 5.0×10⁴ 5.5×10⁴ 6.0×10⁴ 6.5×10⁴ 7.0×10⁴ -0.0005 -0.0004 -0.0003 -0.0002 -0.0001 0.0000 0.0001 0.0002 0.0003 0.0004 0.0005 0.0006 0.0007 0.0008 0.0009 -0.00040 -0.00038 -0.00036 -0.00034 -0.00032 -0.00030 -0.00028 -0.00026 -0.00024 -0.00022 -0.00020 -0.00018 -0.00016 -0.00014 -0.00012 -0.00010 -0.00008 -0.00006 -0.00004 -0.00002 0.00000 0.00002 0.00004 0.00006 0.00008 0.00010 0.00012 0.00014 0.00016 0.00018 0.00020 0.00022 0.00024 0.00026 0.00028 0.00030 0.00032 0.00034 0.00036 0.00038 0.00040 0.00042 0.00044 0.00046 0.00048 0.00050 0.00052 0.00054 0.00056 0.00058 0.00060 0.00062 0.00064 0.00066 0.00068 0.00070 0.00072 0.00074 0.00076 0.00078 0.00080 -0.0005 0.0000 0.0005 0.0010 -0.00040 -0.00035 -0.00030 -0.00025 -0.00020 -0.00015 -0.00010 -0.00005 0.00000 0.00005 0.00010 0.00015 0.00020 0.00025 0.00030 0.00035 0.00040 0.00045 0.00050 0.00055 0.00060 0.00065 0.00070 0.00075 0.00080

In [15]:
## Using pyplot

using PyPlot


WARNING: using PyPlot.plot in module Main conflicts with an existing identifier.
WARNING: using PyPlot.draw in module Main conflicts with an existing identifier.

In [16]:
p = scatter(x = rand(10),
y = rand(10))


Out[16]:
PyObject <matplotlib.collections.PathCollection object at 0x316bbd350>

In [17]:
Pkg.add("Bokeh")


INFO: Cloning cache of Bokeh from git://github.com/bokeh/Bokeh.jl.git
INFO: Cloning cache of Mustache from git://github.com/jverzani/Mustache.jl.git
INFO: Installing Bokeh v0.2.0
INFO: Installing MacroTools v0.2.0
INFO: Installing Mustache v0.0.14
INFO: Installing Requires v0.2.1
INFO: Building Bokeh
INFO: Package database updated

In [18]:
Pkg.available()


Out[18]:
755-element Array{AbstractString,1}:
 "AbstractDomains"       
 "Accumulo"              
 "ActiveAppearanceModels"
 "AffineTransforms"      
 "AmplNLWriter"          
 "AndorSIF"              
 "AnsiColor"             
 "AppConf"               
 "AppleAccelerate"       
 "ApproxFun"             
 "Arbiter"               
 "Arduino"               
 "ArgParse"              
 ⋮                       
 "XGBoost"               
 "XSim"                  
 "XSV"                   
 "YAML"                  
 "Yelp"                  
 "Yeppp"                 
 "YT"                    
 "ZChop"                 
 "ZipFile"               
 "Zlib"                  
 "ZMQ"                   
 "ZVSimulator"           

In [19]:
Pkg.add("A*")


LoadError: unknown package A*
 in error at /Applications/Julia-0.4.1.app/Contents/Resources/julia/lib/julia/sys.dylib
 [inlined code] from pkg/entry.jl:49
 in anonymous at task.jl:447
while loading In[19], in expression starting on line 1

 in sync_end at /Applications/Julia-0.4.1.app/Contents/Resources/julia/lib/julia/sys.dylib
 [inlined code] from task.jl:422
 in add at pkg/entry.jl:46
 in add at pkg/entry.jl:73
 in anonymous at pkg/dir.jl:31
 in cd at file.jl:22
 in cd at pkg/dir.jl:31
 in add at pkg.jl:23

In [20]:
Pkg.status()


21 required packages:
 - Bokeh                         0.2.0
 - Cairo                         0.2.31
 - DataFrames                    0.6.10
 - DataFramesMeta                0.1.0
 - Distances                     0.2.1
 - Distributions                 0.8.7
 - GLM                           0.4.8
 - Gadfly                        0.4.0
 - HypothesisTests               0.2.10
 - IJulia                        1.1.8
 - Interact                      0.2.1
 - MCMC                          0.3.0
 - NHST                          0.0.2
 - PyCall                        1.2.0
 - PyPlot                        2.1.1
 - RCall                         0.3.1
 - RDatasets                     0.1.2
 - Stats                         0.1.0
 - StatsBase                     0.7.4
 - TimeModels                    0.0.3
 - Winston                       0.11.13
54 additional packages:
 - ArrayViews                    0.6.4
 - BinDeps                       0.3.19
 - Calculus                      0.1.14
 - Codecs                        0.1.5
 - ColorTypes                    0.2.0
 - Colors                        0.6.0
 - Compat                        0.7.8
 - Compose                       0.4.0
 - Conda                         0.1.8
 - Contour                       0.0.8
 - DataArrays                    0.2.20
 - DataStructures                0.3.13
 - Dates                         0.4.4
 - Docile                        0.5.19
 - DualNumbers                   0.1.5
 - FactCheck                     0.4.1
 - FixedPointNumbers             0.1.1
 - ForwardDiff                   0.1.2
 - GZip                          0.2.18
 - Graphics                      0.1.3
 - Grid                          0.4.0
 - Hexagons                      0.0.4
 - Homebrew                      0.2.0
 - ImmutableArrays               0.0.11
 - IniFile                       0.2.4
 - Iterators                     0.1.9
 - JSON                          0.5.0
 - KernelDensity                 0.1.2
 - LaTeXStrings                  0.1.6
 - Loess                         0.0.5
 - MacroTools                    0.2.0
 - MathProgBase                  0.3.19
 - Measures                      0.0.1
 - Mustache                      0.0.14
 - NLopt                         0.2.3
 - NaNMath                       0.1.1
 - Nettle                        0.2.0
 - Optim                         0.4.4
 - PDMats                        0.3.6
 - Polynomials                   0.0.4
 - RCalling                      0.0.0-             master (unregistered)
 - Reactive                      0.2.4
 - Reexport                      0.0.3
 - Requires                      0.2.1
 - Roots                         0.1.20
 - SHA                           0.1.2
 - Showoff                       0.0.6
 - SortingAlgorithms             0.0.6
 - StatsFuns                     0.2.0
 - TimeSeries                    0.6.5
 - Tk                            0.3.6
 - URIParser                     0.1.1
 - WoodburyMatrices              0.1.2
 - ZMQ                           0.3.1

In [21]:
pwd()


Out[21]:
"/Users/arindambose/Documents/julia"

In [23]:
n = 50
srand(1)
x = rand(n)
y = rand(n)

area = pi .* (15 .* rand(n)).^2
scatter(x, y, s = area, alpha = 0.5)


Out[23]:
PyObject <matplotlib.collections.PathCollection object at 0x3183882d0>

In [24]:
help(repmat)


LoadError: UndefVarError: help not defined
while loading In[24], in expression starting on line 1

In [25]:
?repmat


search: 
Out[25]:
repmat(A, n, m)

Construct a matrix by repeating the given matrix n times in dimension 1 and m times in dimension 2.

repmat


In [26]:
help(repmat)


LoadError: UndefVarError: help not defined
while loading In[26], in expression starting on line 1

In [28]:
using DataFrames
df = DataFrame(A = [1,2],
B = [pi, e],
C = ["xx", "yy"])
show(df)


2x3 DataFrames.DataFrame
| Row | A | B       | C    |
|-----|---|---------|------|
| 1   | 1 | 3.14159 | "xx" |
| 2   | 2 | 2.71828 | "yy" |

In [29]:
iris = dataset("datasets", "iris")
show(names(iris))


[:SepalLength,:SepalWidth,:PetalLength,:PetalWidth,:Species]

In [30]:
describe(iris:SepalLength)


LoadError: UndefVarError: SepalLength not defined
while loading In[30], in expression starting on line 1

In [31]:
describe(:SepalLength)


LoadError: MethodError: `describe` has no method matching describe(::Symbol)
Closest candidates are:
  describe(::Any, !Matched::DataFrames.AbstractDataFrame)
  describe{T<:Number}(::Any, !Matched::AbstractArray{T<:Number,N})
  describe{T}(::Any, !Matched::AbstractArray{T,N})
while loading In[31], in expression starting on line 1

In [32]:
head(iris)


Out[32]:
SepalLengthSepalWidthPetalLengthPetalWidthSpecies
15.13.51.40.2setosa
24.93.01.40.2setosa
34.73.21.30.2setosa
44.63.11.50.2setosa
55.03.61.40.2setosa
65.43.91.70.4setosa

In [33]:
aggregate(iris, :Species, sum)


Out[33]:
SpeciesSepalLength_sumSepalWidth_sumPetalLength_sumPetalWidth_sum
1setosa250.3171.473.112.3
2versicolor296.8138.5213.0000000000000366.3
3virginica329.4148.70000000000002277.6101.3

In [34]:
aggregate(iris, :Species, [sum, mean])


Out[34]:
SpeciesSepalLength_sumSepalLength_meanSepalWidth_sumSepalWidth_meanPetalLength_sumPetalLength_meanPetalWidth_sumPetalWidth_mean
1setosa250.35.006171.43.42873.11.46212.30.24600000000000002
2versicolor296.85.936138.52.77213.000000000000034.26000000000000166.31.3259999999999998
3virginica329.46.587999999999999148.700000000000022.974277.65.5520000000000005101.32.026

In [35]:
## reshape data using stack function
help("stack")


LoadError: UndefVarError: help not defined
while loading In[35], in expression starting on line 2

In [36]:
?stack


search: 
Out[36]:

Stacks a DataFrame; convert from a wide to long format

stack(df::AbstractDataFrame, measure_vars, id_vars)
stack(df::AbstractDataFrame, measure_vars)
stack(df::AbstractDataFrame)
melt(df::AbstractDataFrame, id_vars, measure_vars)
melt(df::AbstractDataFrame, id_vars)

Arguments

  • df : the AbstractDataFrame to be stacked

  • measure_vars : the columns to be stacked (the measurement variables), a normal column indexing type, like a Symbol, Vector{Symbol}, Int, etc.; for melt, defaults to all variables that are not id_vars

  • id_vars : the identifier columns that are repeated during stacking, a normal column indexing type; for stack defaults to all variables that are not measure_vars

If neither measure_vars or id_vars are given, measure_vars defaults to all floating point columns.

Result

  • ::DataFrame : the long-format dataframe with column :value holding the values of the stacked columns (measure_vars), with column :variable a Vector of Symbols with the measure_vars name, and with columns for each of the id_vars.

See also stackdf and meltdf for stacking methods that return a view into the original DataFrame. See unstack for converting from long to wide format.

Examples

d1 = DataFrame(a = repeat([1:3;], inner = [4]),
               b = repeat([1:4;], inner = [3]),
               c = randn(12),
               d = randn(12),
               e = map(string, 'a':'l'))

d1s = stack(d1, [:c, :d])
d1s2 = stack(d1, [:c, :d], [:a])
d1m = melt(d1, [:a, :b, :e])
stack stackdf stackplot StackOverflowError vstack hstack unstack


In [37]:
## from wide to long use melt
using StatsBase

In [38]:
names(iris)


Out[38]:
5-element Array{Symbol,1}:
 :SepalLength
 :SepalWidth 
 :PetalLength
 :PetalWidth 
 :Species    

In [39]:
summarystats(iris[:SepalLength])


Out[39]:
Summary Stats:
Mean:         5.843333
Minimum:      4.300000
1st Quartile: 5.100000
Median:       5.800000
3rd Quartile: 6.400000
Maximum:      7.900000

In [40]:
describe(iris[:SepalLength])


Summary Stats:
Mean:         5.843333
Minimum:      4.300000
1st Quartile: 5.100000
Median:       5.800000
3rd Quartile: 6.400000
Maximum:      7.900000

In [41]:
table(iris[:SepalLength])


LoadError: PyError (:PyObject_Call) <type 'exceptions.TypeError'>
TypeError('table() takes exactly 0 arguments (1 given)',)

while loading In[41], in expression starting on line 1

 [inlined code] from /Users/arindambose/.julia/v0.4/PyCall/src/exception.jl:81
 in pycall at /Users/arindambose/.julia/v0.4/PyCall/src/PyCall.jl:361
 in table at /Users/arindambose/.julia/v0.4/PyPlot/src/PyPlot.jl:460

In [42]:
using StatsBase

In [43]:
mycount = counts(iris[:Species])


LoadError: MethodError: `counts` has no method matching counts(::DataArrays.PooledDataArray{ASCIIString,UInt8,1})
while loading In[43], in expression starting on line 1

In [44]:
?counts


search: 
Out[44]:

No documentation found.

StatsBase.counts is a generic Function.

# 16 methods for generic function "counts":
counts(x::AbstractArray{T<:Integer,N}) at /Users/arindambose/.julia/v0.4/StatsBase/src/counts.jl:54
counts(x::AbstractArray{T<:Integer,N}, levels::UnitRange{T<:Integer}) at /Users/arindambose/.julia/v0.4/StatsBase/src/counts.jl:50
counts(x::AbstractArray{T<:Integer,N}, levels::UnitRange{T<:Integer}, wv::StatsBase.WeightVec{W,Vec<:AbstractArray{T<:Real,1}}) at /Users/arindambose/.julia/v0.4/StatsBase/src/counts.jl:51
counts(x::AbstractArray{T<:Integer,N}, k::Integer) at /Users/arindambose/.julia/v0.4/StatsBase/src/counts.jl:52
counts(x::AbstractArray{T<:Integer,N}, k::Integer, wv::StatsBase.WeightVec{W,Vec<:AbstractArray{T<:Real,1}}) at /Users/arindambose/.julia/v0.4/StatsBase/src/counts.jl:53
counts(x::AbstractArray{T<:Integer,N}, wv::StatsBase.WeightVec{W,Vec<:AbstractArray{T<:Real,1}}) at /Users/arindambose/.julia/v0.4/StatsBase/src/counts.jl:55
counts(x::AbstractArray{T<:Integer,N}, y::AbstractArray{T<:Integer,N}) at /Users/arindambose/.julia/v0.4/StatsBase/src/counts.jl:144
counts(x::AbstractArray{T<:Integer,N}, y::AbstractArray{T<:Integer,N}, levels::Tuple{UnitRange{T<:Integer},UnitRange{T<:Integer}}) at /Users/arindambose/.julia/v0.4/StatsBase/src/counts.jl:130
counts(x::AbstractArray{T<:Integer,N}, y::AbstractArray{T<:Integer,N}, levels::Tuple{UnitRange{T<:Integer},UnitRange{T<:Integer}}, wv::StatsBase.WeightVec{W,Vec<:AbstractArray{T<:Real,1}}) at /Users/arindambose/.julia/v0.4/StatsBase/src/counts.jl:134
counts(x::AbstractArray{T<:Integer,N}, y::AbstractArray{T<:Integer,N}, levels::UnitRange{T<:Integer}) at /Users/arindambose/.julia/v0.4/StatsBase/src/counts.jl:137
counts(x::AbstractArray{T<:Integer,N}, y::AbstractArray{T<:Integer,N}, levels::UnitRange{T<:Integer}, wv::StatsBase.WeightVec{W,Vec<:AbstractArray{T<:Real,1}}) at /Users/arindambose/.julia/v0.4/StatsBase/src/counts.jl:138
counts(x::AbstractArray{T<:Integer,N}, y::AbstractArray{T<:Integer,N}, ks::Tuple{Integer,Integer}) at /Users/arindambose/.julia/v0.4/StatsBase/src/counts.jl:140
counts(x::AbstractArray{T<:Integer,N}, y::AbstractArray{T<:Integer,N}, ks::Tuple{Integer,Integer}, wv::StatsBase.WeightVec{W,Vec<:AbstractArray{T<:Real,1}}) at /Users/arindambose/.julia/v0.4/StatsBase/src/counts.jl:141
counts(x::AbstractArray{T<:Integer,N}, y::AbstractArray{T<:Integer,N}, k::Integer) at /Users/arindambose/.julia/v0.4/StatsBase/src/counts.jl:142
counts(x::AbstractArray{T<:Integer,N}, y::AbstractArray{T<:Integer,N}, k::Integer, wv::StatsBase.WeightVec{W,Vec<:AbstractArray{T<:Real,1}}) at /Users/arindambose/.julia/v0.4/StatsBase/src/counts.jl:143
counts(x::AbstractArray{T<:Integer,N}, y::AbstractArray{T<:Integer,N}, wv::StatsBase.WeightVec{W,Vec<:AbstractArray{T<:Real,1}}) at /Users/arindambose/.julia/v0.4/StatsBase/src/counts.jl:145
counts addcounts! countlines count_ones count_zeros count countnz


In [45]:
mycount = countmap(iris[:Species])


Out[45]:
Dict{Union{ASCIIString,DataArrays.NAtype},Int64} with 3 entries:
  "virginica"  => 50
  "setosa"     => 50
  "versicolor" => 50

In [46]:
proportionmap(iris[:Species])


Out[46]:
Dict{Union{ASCIIString,DataArrays.NAtype},Float64} with 3 entries:
  "virginica"  => 0.3333333333333333
  "setosa"     => 0.3333333333333333
  "versicolor" => 0.3333333333333333

In [51]:
Pkg.clone("git://github.com/nalimilan/Tables.jl.git")


INFO: Cloning Tables from git://github.com/nalimilan/Tables.jl.git
LoadError: Tables already exists
while loading In[51], in expression starting on line 1

In [48]:
using Tables


LoadError: LoadError: ArgumentError: NamedArrays not found in path
while loading /Users/arindambose/.julia/v0.4/Tables/src/Tables.jl, in expression starting on line 3
while loading In[48], in expression starting on line 1

 in require at /Applications/Julia-0.4.1.app/Contents/Resources/julia/lib/julia/sys.dylib
 in include at /Applications/Julia-0.4.1.app/Contents/Resources/julia/lib/julia/sys.dylib
 in include_from_node1 at /Applications/Julia-0.4.1.app/Contents/Resources/julia/lib/julia/sys.dylib
 in require at /Applications/Julia-0.4.1.app/Contents/Resources/julia/lib/julia/sys.dylib

In [50]:
Pkg.add("NamedArrays")


INFO: Cloning cache of NamedArrays from git://github.com/davidavdav/NamedArrays.jl.git
INFO: Installing NamedArrays v0.4.6
INFO: Package database updated

In [52]:
Pkg.clone("git://github.com/nalimilan/Tables.jl.git")


INFO: Cloning Tables from git://github.com/nalimilan/Tables.jl.git
LoadError: Tables already exists
while loading In[52], in expression starting on line 1

In [ ]: