In [1]:
## Print the working directory on this computer
pwd()
Out[1]:
In [4]:
## list the files in the working directory
readdir()
Out[4]:
In [15]:
## read the GTD-export.csv file and import to a data frame.
# Load the DataFrames package, Distributions pakage, and HypothesisTests package
using DataFrames, Distributions, HypothesisTests, StatsBase, StatsFuns
mydata = DataFrame(readcsv("GTD-Export.csv"))
## Simple preprocessing data
## we now need to delete the second row
## we also will rename the x1 ... x7
## First rename the variables
mydata = rename!(mydata, Dict(:x1 => :Gtdid,
:x2=>:Date, :x3=>:country, :x4=>:city,
:x5=>:perpetrator, :x11=>:fatalities,
:x12=>:injured, :x17=>:attacktype,
:x20=>:weapon))
## next, remove the row 1 from the data set
mydata = deleterows!(mydata, 1)
## next, we remove the unwanted columns x6..x10, x13...x16,
## x18, x19,x21...x24
mydata = delete!(mydata, [:x6, :x7, :x8,
:x9, :x10, :x13, :x14, :x15, :x16,
:x18, :x19, :x21, :x22, :x23, :x24])
Out[15]:
In [16]:
mydata
Out[16]:
In [17]:
## Let's describe this data set
describe(mydata)
In [18]:
using DataArrays
In [19]:
describe(mydata[:weapon])
In [20]:
stack(mydata[:weapon])
In [21]:
countmap(mydata[:weapon])
Out[21]:
In [24]:
cityattack = proportionmap(mydata[:city])
Out[24]:
In [25]:
cityattack*100
In [26]:
cityattack[1]
In [27]:
cityattack[:1]
In [28]:
?Dict
Out[28]:
In [29]:
?count
Out[29]:
In [30]:
?counts
Out[30]:
In [31]:
count(cityattack)
In [32]:
for (k,v) in cityattack
println("k", "v")
end
In [33]:
cityattack[1
]
In [34]:
cityattack[:1]
In [35]:
cityattack[:1,:1]
In [36]:
cityattack
Out[36]:
In [37]:
keys(cityattack)
Out[37]:
In [38]:
values(cityattack)
Out[38]:
In [39]:
values(cityattack)*100
In [40]:
myvals = Array(values(cityattack))
In [42]:
myvals = [values(cityattack)]
Out[42]:
In [43]:
myvals * 100
In [44]:
myvals .* 100
In [45]:
myvals .= myvals * 100
In [46]:
myvals = myvals * 100
In [50]:
myvals = collect(myvals);
In [51]:
myvals * 100
In [49]:
a = collect(1:10); a * 2
Out[49]:
In [52]:
myvalues = [keys(cityattack), values(cityattack)]
Out[52]:
In [53]:
myvalues[1]
Out[53]:
In [54]:
myvalues[1,2]
In [55]:
println(myvalues[1], myvalues[2])
In [56]:
myvalues[2]*100
In [57]:
[myvalues[2]] * 100
In [93]:
## Arrays
for key in keys(cityattack)
println("$(cityattack[key] * 100)")
end
In [92]:
percent
In [79]:
?round
Out[79]:
In [94]:
cityattack("Lahore")
In [95]:
myvalues
Out[95]:
In [96]:
size(myvalues)
Out[96]:
In [97]:
ndims(myvalues)
Out[97]:
In [98]:
eye(3)
Out[98]:
In [99]:
reshape(myvalues, 25)
In [100]:
reshape(myvalues, 2)
Out[100]:
In [101]:
reshape(myvalues, 2, 2)
In [102]:
reshape(myvalues, 2, 25)
In [ ]:
In [103]:
length(myvalues)
Out[103]:
In [104]:
length(myvalues[1])
Out[104]:
In [105]:
length(myvalues[2])
Out[105]:
In [110]:
a = [10, 20, 30, 40]
Out[110]:
In [111]:
reshape(a, 2,2)
Out[111]:
In [112]:
reshape(myvalues, 2, 2)
In [113]:
round(myvalues[2] * 100, 2)
In [114]:
myvalues[2] * 100
In [116]:
[myvalues[2]]
Out[116]:
In [117]:
[myvalues[2]] * 100
In [118]:
[myvalues[2] * 100]
In [119]:
a = [1 2 3 4]
Out[119]:
In [120]:
myvalues[2].*100
In [121]:
[myvalues[2]] .* 100
In [123]:
a = [10, 20, 30]
Out[123]:
In [124]:
typeof(myvalues)
Out[124]:
In [125]:
myvalues
Out[125]:
In [126]:
size(myvalues)
Out[126]:
In [127]:
ndims(myvalues)
Out[127]:
In [128]:
eye(3)
Out[128]:
In [129]:
diagm(2, 4)
In [130]:
diagm([2,4])
Out[130]:
In [131]:
size(eye(3))
Out[131]:
In [133]:
Array{Int64, 1} == Vector{Int64}
Out[133]:
In [134]:
a = [10, 20, 30, 40]
Out[134]:
In [135]:
reshape(a, 2, 2)
Out[135]:
In [136]:
reshape(myvalues, 1, 2)
Out[136]:
In [139]:
myvals = reshape(myvalues, 2, 1)
Out[139]:
In [140]:
myvals
Out[140]:
In [141]:
myvals[2]
Out[141]:
In [142]:
myvals[1,1]
Out[142]:
In [143]:
myvals[1,2]
In [144]:
myvals[2,1]
Out[144]:
In [145]:
myvals[2]*100
In [146]:
squeeze(myvals, 1)
In [147]:
myvals[1:3]
In [148]:
myvals[1:2]
Out[148]:
In [149]:
myvals
Out[149]:
In [150]:
myvals1 = [myvals[1]; myvals[2]]
Out[150]:
In [151]:
myval[end]
In [153]:
myvals[end][2]
In [154]:
myvals .* 100
In [155]:
myvals[2] .* 10
In [156]:
names(mydata)
Out[156]:
In [157]:
?by
Out[157]:
In [164]:
results = by(mydata, :city, nrow)
x2 = round((results[:x1]/sum(results[:x1]))* 100, 2)
results[:percent] = x2
Out[164]:
In [165]:
results
Out[165]:
In [172]:
using PyPlot
PyPlot.plot(results, "percent")
In [174]:
rcountry = by(mydata, :country, nrow)
Out[174]:
In [175]:
?by
Out[175]:
In [177]:
by(mydata, ["country", "weapon"], countmap)
In [179]:
by(mydata, "country", nrow)
In [195]:
results3 = by(mydata, :country, nrow)
Out[195]:
In [194]:
results2 = by(mydata, [:country, :weapon],nrow)
#results3 = melt(results2, [:x1, :weapon])
Out[194]:
In [190]:
?stack
Out[190]:
In [196]:
results3
Out[196]:
In [221]:
Gadfly.plot(results3, x = "country", y = "x1", Geom.bar)
Out[221]:
In [222]:
?xtab
Out[222]:
In [209]:
using PyPlot
In [213]:
PyPlot.plot(results3, :x1)
In [224]:
using DataArrays
In [226]:
?xtabs
Out[226]:
In [227]:
mydata
Out[227]:
In [230]:
xtab(mydata, "perpetrator", "city")
In [231]:
Pkg.add("Stats")
In [232]:
using Stats
In [233]:
?reshape
Out[233]:
In [234]:
reshape(results2, 2)
In [235]:
reshape(myvals, 2)
Out[235]:
In [242]:
citystats = by(mydata, [:weapon], nrow)
Out[242]:
In [243]:
using RDatasets
In [244]:
iris = dataset("datasets", "iris")
Out[244]:
In [245]:
names(iris)
Out[245]:
In [246]:
by(iris, :Species, size)
Out[246]:
In [247]:
by(mydata, :country, size)
Out[247]:
In [259]:
by(mydata, :injury, sum)
In [250]:
mydata[:injured]
Out[250]:
In [253]:
median(mydata[:injured])
In [252]:
by(iris, :Species, df -> mean(df[:PetalLength]))
Out[252]:
In [260]:
by(mydata, :country, size)
Out[260]:
In [261]:
aggregate(mydata, :country, sum )
In [262]:
names(mydata)
Out[262]:
In [263]:
typeof(mydata[:injured])
Out[263]:
In [265]:
by(iris, :Species, df -> DataFrame(N = size(df, 1)))
Out[265]:
In [266]:
by(iris, :Species) do df
DataFrame(m = mean(df[:PetalLength]), s² = var(df[:PetalLength]))
end
Out[266]:
In [270]:
aggregate(iris, :Species, mean)
Out[270]:
In [279]:
for subdf in groupby(mydata, [:country, :city])
println(size(subdf, 1))
end
In [280]:
names(mydata)
Out[280]:
In [285]:
convert(UInt64, "injured")
In [286]:
typeof(mydata[:injured])
Out[286]:
In [287]:
summary(mydata[:injured])
Out[287]:
In [288]:
summary(iris[:PetalLength])
Out[288]:
In [289]:
DataArray(mydata[:injured])
Out[289]:
In [291]:
by(mydata, :country, df -> mean(mydata[:injured]))
In [292]:
mydata
Out[292]:
In [293]:
mydata2 = DataFrame(readdlm("wolfriver.csv", ','))
In [294]:
pwd()
Out[294]:
In [295]:
readdir()
Out[295]:
In [302]:
mydata2 = DataFrame(readcsv("health.csv"))
mydata2 = deleterows!(mydata2, 1)
Out[302]:
In [303]:
describe(mydata2)
In [305]:
mean(mydata2[:x2])
Out[305]:
In [319]:
by(mydata2, :x3, df1 -> mean(mydata2[:x4]))
Out[319]:
In [321]:
a = 3;
a < 5 && println("small")
a > 5 && println("large")
Out[321]:
In [324]:
a <= 10 ? println("hello") : println("not")
In [326]:
@which ^
Out[326]:
In [327]:
mymat = [2 1 1 1]
Out[327]:
In [328]:
w = [1,1]
Out[328]:
In [331]:
M = reshape([2.0, 1, 1, 1], (2,2))
Out[331]:
In [332]:
Pkg.add("Lapack")
In [334]:
Pkg.add("lapack")
In [335]:
?writedlm
Out[335]:
In [337]:
writedlm("mydata2.csv", mydata2)
In [338]:
?writecsv
Out[338]:
In [339]:
writecsv("mydata2.csv", mydata2)
In [340]:
writecsv(mydata2.csv, mydata2)
In [345]:
writetable("mydata2.csv", mydata2)
In [346]:
readdir()
Out[346]:
In [347]:
1.0
nextfloat(ans)
In [348]:
1.0
Out[348]:
In [349]:
nextfloat(ans)
Out[349]:
In [350]:
describe(mydata)
In [351]:
summary(mydata[:injured])
Out[351]:
In [ ]: