In [1]:
#Install packages
Pkg.update()
Pkg.add("JSON")
Pkg.add("PyPlot")
In [2]:
addprocs(9-nprocs()) #Do file processing in parallel
Out[2]:
In [3]:
@everywhere begin
using JSON
import Base.haskey; haskey(a::Nothing, b::ASCIIString) = false
increment!{S,T<:Integer}(dict::Dict{S,T}, key::S, count::T=1)=if haskey(dict, key) dict[key] += count else dict[key] = count end
function parsefile(filename)
locations = Dict{Array{Any,1},Int}()
datafile = open(filename)
n=0
@time while true
thisentry = nothing
try
thisentry = JSON.parse(datafile)
catch parse_error
isa(parse_error, EOFError) && eof(datafile) ? break : throw(parse_error)
end
try
thiscoordinates = thisentry["geo"]["coordinates"]
n += 1
#println(n, " ", thisentry["id"]) #DEBUG: Print message id
increment!(locations, thiscoordinates)
catch key_error
isa(key_error, KeyError) || isa(key_error, MethodError) ? continue : throw(key_error)
end
end
println("Loaded ", n, " records from ", datafile.name)
locations
end
end
filenames = [@sprintf("/data/2012_%02d.json", month) for month=4:11]
locations_all = pmap(parsefile, filenames)
#Collate data from individual files
locations = Dict{Array{Any,1},Int}()
for location_data in locations_all, (coord, count) in location_data
increment!(locations, coord, count)
end
println("Loaded ", length(locations), " distinct locations")
In [9]:
#Massage data into form suitable for pyplot
latitudes = Float64[];
longitudes = Float64[];
frequencies= Int[];
for (point, count) in locations
if count >= 50
push!(latitudes, point[1])
push!(longitudes, point[2])
push!(frequencies, count)
end
end
length(latitudes)
Out[9]:
This code uses Python's matplotlib and its Basemap data to plot the points on a map of Eastern Massachusetts. The area of each dot is proportional to the number of tweets from that point.
In [25]:
using PyPlot
using PyCall
@pyimport mpl_toolkits.basemap as basemap
scalefactor=30/sqrt(maximum(frequencies))
m=basemap.Basemap(projection="merc", resolution="h",
llcrnrlat=42.18,llcrnrlon=-71.3,
urcrnrlat=42.54,urcrnrlon=-70.825)
m[:drawmapboundary](fill_color="#4771a5")
m[:fillcontinents](color="#555555")
m[:drawcoastlines]()
for i=1:length(longitudes)
m[:plot](longitudes[i], latitudes[i], "ro",
markersize=sqrt(frequencies[i])*scalefactor,latlon=true)
end