Julia is a high-level dynamic programming language designed to address the requirements of high-performance numerical and scientific computing while also being effective for general purpose programming.
Julia's core is implemented in C and C++, its parser in Scheme, and the LLVM compiler framework is used for just-in-time generation of machine code for x86(-64).
Development began in 2009, open-sourced in February 2012
Currently has 250+ contributors to the language, 400+ overall
Stable release: v0.2.1 (2014/02/11), pre-release: v0.3 (nightly build)
In [1]:
subtypes(Number)
Out[1]:
In [2]:
subtypes(Real)
Out[2]:
In [3]:
subtypes(Integer)
Out[3]:
In [4]:
# Floating Point
@show 5/3
# Mathematical Constant
@show pi
# Rational
@show 2//3 + 1
# BigInt
@show big(2) ^ 1000 ;
In [5]:
subtypes(String)
Out[5]:
In [6]:
s = "Hello World"
@show typeof(s)
@show s[7] ;
In [7]:
# Unicode Names and Values
你好 = "(。◕_◕。)ノ "
@show typeof(你好)
@show 你好 ^ 3 ;
In [8]:
type NewType
i::Integer
s::String
end
new_t = NewType(33, "this is a NewType")
@show new_t.i
@show new_t.s ;
In [9]:
# Vectors
v = [1, 1]
Out[9]:
In [10]:
# Vector Operations
@show v + [2, 0] # vector addition
@show v + 1 # same as v + [1,1]
@show 5*v # scalar multiplication
Out[10]:
In [11]:
println( "Dot Product : ", dot(v, v) )
println( "Norm : ", norm(v) )
In [12]:
# Matrices
M = [1 1 ; 0 1]
Out[12]:
In [13]:
# Matrix Addition
M + 1 ,
M + [0 0 ; 5 5]
Out[13]:
In [14]:
# Matrix Multiplication
2M ,
M ^ 2 ,
M * v
Out[14]:
In [15]:
# Gaussian Elimination
b = M * v
M \ b # solve back for v
Out[15]:
In [16]:
# Named functions
f(x) = 10x
function g(x)
return x * 10
end
@show f(5)
@show g(5) ;
In [17]:
# Anonymous functions assigned to variables
h = x -> x * 10
i = function(x)
x * 10
end
@show h(5)
@show i(5) ;
In [18]:
# Operators are functions
+(4,5)
Out[18]:
In [19]:
p = +
p(2,3)
Out[19]:
In [20]:
bar(x::String) = println("You entered the string: $x")
bar(x::Integer) = x * 10
bar(x::NewType) = println(x.s)
methods(bar)
Out[20]:
In [21]:
bar("Hello")
bar(new_t)
bar(5)
Out[21]:
In [22]:
# Adding strings
"Hello" + "World"
In [23]:
# But the addition operator is a function, so we can apply multi-dispatch
+(a::String, b::String) = a * b
"Hello" + "World"
Out[23]:
In [24]:
+(a::Number, b::String) = string(a) + b
+(a::String, b::Number) = a + string(b)
99 + "bottles"
Out[24]:
In [25]:
# Method Overloading
type SimpleObject
data::Union(Integer, String)
set::Function
function SimpleObject()
this = new()
this.data = ""
function setter(x::Integer)
println("Setting an integer")
this.data = x
end
function setter(x::String)
println("Setting a string")
this.data = x
end
this.set = setter
return this
end
end
obj = SimpleObject()
obj.set(99)
obj.set("hello")
Out[25]:
In [26]:
# Sum of odd integers between 1 and 5
values = 1:5
myMapper = x -> x
myFilter = x -> x % 2 == 1
myReducer = (x,y) -> x + y
mapped = map( myMapper, values )
filtered = filter( myFilter, mapped )
reduced = reduce( myReducer, filtered )
Out[26]:
In [27]:
# Code Generation
# Functions for exponentiating to the powers of 1 to 5
for n in 1:5
s = "power$n(x) = x ^ $n"
println(s)
expression = parse(s)
eval(expression)
end
power5( 2 )
Out[27]:
In [28]:
# Macros: Crude Timer Example
macro timeit(expression)
quote
t = time()
result = $expression # evaluation
elapsed = time() - t
println( "elapsed time: ", elapsed )
return result
end
end
@timeit cos(2pi)
@timeit cos(2pi)
Out[28]:
In [29]:
using StatsBase
x = rand(100) # uniform distribution [0,1)
println( "mean: ", mean(x) )
println( "variance: ", var(x) )
println( "skewness: ", skewness(x) )
println( "kurtosis: ", kurtosis(x) )
In [30]:
describe(x)
In [31]:
using Distributions
distr = Normal(0, 2)
println( "pdf @ origin = ", pdf(distr, 0.0) )
println( "cdf @ origin = ", cdf(distr, 0.0) )
In [32]:
x = rand(distr, 1000)
fit_mle(Normal, x)
Out[32]:
In [33]:
using DataFrames
df = DataFrame(
A = [6, 3, 4],
B = ["a", "b", "c"],
C = [1//2, 3//4, 5//6],
D = [true, true, false]
)
df[:C][2] = NA
df
Out[33]:
In [34]:
# Joins
names = DataFrame(ID = [5, 4], Name = ["Jack", "Jill"])
jobs = DataFrame(ID = [5, 4], Job = ["Lawyer", "Doctor"])
full = join(names, jobs, on = :ID)
Out[34]:
In [35]:
using RDatasets
iris = dataset("datasets", "iris")
head(iris)
Out[35]:
In [36]:
# Group by Species, then compute mean of PetalLength per group
by( iris, :Species, df -> mean(df[:PetalLength]) )
Out[36]:
In [37]:
using ASCIIPlots
x = iris[:PetalLength]
y = iris[:PetalWidth]
scatterplot(x, y)
Out[37]:
In [38]:
using Winston
scatter(x, y, ".")
xlabel("PetalLength")
ylabel("PetalWidth")
Out[38]:
In [39]:
using Gadfly
set_default_plot_size(20cm, 12cm)
plot(iris, x = "PetalLength", y = "PetalWidth", color = "Species", Geom.point)
Out[39]:
In [40]:
# K-means Clustering
using Clustering
features = array(iris[:, 1:4])' # use matrix() on Julia v0.2
result = kmeans( features, 3 ) # onto 3 clusters
plot(iris, x = "PetalLength", y = "PetalWidth", color = result.assignments, Geom.point)
Out[40]:
In [41]:
# Principal Component Analysis
using MultivariateStats
pc = fit(PCA, features; maxoutdim = 2)
reduced = transform(pc, features)
@show size(reduced)
plot(iris, x = reduced[1,:], y = reduced[2,:], color = "Species", Geom.point)
Out[41]:
In [42]:
using MultivariateStats
# Generate a noisy linear system
features = rand(1000, 3) # feature matrix
coeffs = rand(3) # ground truth of weights
targets = features * coeffs + 0.1 * randn(1000) # generate response
# Linear Least Square Regression
coeffs_llsq = llsq(features, targets; bias=false)
# Ridge Regression
coeffs_ridge = ridge(features, targets, 0.1; bias=false) # regularization coef = 0.1
@show coeffs
@show coeffs_llsq
@show coeffs_ridge ;
In [43]:
# Cross Validation: K-Fold Example
using MLBase, MultivariateStats
n = length(targets)
# Define training and error evaluation functions
function training(inds)
coeffs = ridge(features[inds, :], targets[inds], 0.1; bias=false)
return coeffs
end
function error_evaluation(coeffs, inds)
y = features[inds, :] * coeffs
rms_error = sqrt(mean(abs2(targets[inds] .- y)))
return rms_error
end
# Cross validate
scores = cross_validate(
inds -> training(inds),
(coeffs, inds) -> error_evaluation(coeffs, inds),
n, # total number of samples
Kfold(n, 3)) # cross validation plan: 3-fold
# Get the mean and std of scores
@show scores
@show mean_and_std(scores) ;
In [44]:
# Model Tuning: Grid Search
using MLBase, MultivariateStats
# Hold out 20% of records for testing
n_test = int(length(targets) * 0.2)
train_rows = shuffle([1:length(targets)] .> n_test)
features_train, features_test = features[train_rows, :], features[!train_rows, :]
targets_train, targets_test = targets[train_rows], targets[!train_rows]
# Define estimation function
function estfun(regcoef, bias)
coeffs = ridge(features_train, targets_train, regcoef; bias=bias)
return bias ? (coeffs[1:end-1], coeffs[end]) : (coeffs, 0.0)
end
# Define error evaluation function as mean squared deviation
evalfun(coeffs) = msd(features_test * coeffs[1] + coeffs[2], targets_test)
result = gridtune(estfun, evalfun,
("regcoef", [0.01, 0.1, 1.0]),
("bias", [true, false]);
ord=Reverse, # smaller msd value indicates better model
verbose=true) # show progress information
best_model, best_config, best_score = result
# Print results
coeffs, bias = best_model
println("Best model:")
println(" coeffs = $(coeffs')"),
println(" bias = $bias")
println("Best config: regcoef = $(best_config[1]), bias = $(best_config[2])")
println("Best score: $(best_score)")
In [45]:
# Regression Tree
using DecisionTree
# Train model, make predictions on test records
model = build_tree(targets_train, features_train)
predictions = apply_tree(model, features_test)
@show cor(targets_test, predictions)
@show R2(targets_test, predictions)
scatter(targets_test, predictions, ".")
xlabel("actual"); ylabel("predicted")
Out[45]:
In [46]:
# Support Vector Machine
using LIBSVM
features = array(iris[:, 1:4])
labels = array(iris[:Species])
# Hold out 20% of records for testing
n_test = int(length(labels) * 0.2)
train_rows = shuffle([1:length(labels)] .> n_test)
features_train, features_test = features[train_rows, :], features[!train_rows, :]
labels_train, labels_test = labels[train_rows], labels[!train_rows]
model = svmtrain(labels_train, features_train')
(predictions, decision_values) = svmpredict(model, features_test')
confusion_matrix(labels_test, predictions)
Out[46]:
In [47]:
# Random Forest
using DecisionTree
# Train forest using 2 random features per split and 10 trees
model = build_forest(labels_train, features_train, 2, 10)
predictions = apply_forest(model, features_test)
# Pretty print of one tree in forest
print_tree(model.trees[1])
confusion_matrix(labels_test, predictions)
Out[47]: