By Stuart Geiger and Jamie Whitacre, made at a SciPy 2016 sprint. See the rendered, interactive, embedable map here.
In [ ]:
!pip install pygithub
!pip install geopy
!pip install ipywidgets
In [ ]:
from github import Github
In [ ]:
#this is my private login credentials, stored in ghlogin.py
import ghlogin
In [ ]:
g = Github(login_or_token=ghlogin.gh_user, password=ghlogin.gh_passwd)
With this Github object, you can get all kinds of Github objects, which you can then futher explore.
In [ ]:
user = g.get_user("staeiou")
In [143]:
from geopy.geocoders import Nominatim
We can plot points on a map using ipyleaflets and ipywidgets. We first set up a map object, which is created with various parameters. Then we create Marker objects, which are then appended to the map. We then display the map inline in this notebook.
In [144]:
import ipywidgets
from ipyleaflet import (
Map,
Marker,
TileLayer, ImageOverlay,
Polyline, Polygon, Rectangle, Circle, CircleMarker,
GeoJSON,
DrawControl
)
In [ ]:
For our mapping script, we want to get profiles for everyone who has made a commit to any of the repositories in the Jupyter organization, find their location (if any), then add it to a list. The API has a get_contributors function for repo objects, which returns a list of contributors ordered by number of commits, but not one that works across all repos in an org. So we have to iterate through all the repos in the org, and run the get_contributors method for We also want to make sure we don't add any duplicates to our list to over-represent any areas, so we keep track of people in a dictionary.
I've written a few functions to make it easy to retreive and map an organization's contributors.
In [ ]:
def get_org_contributor_locations(github, org_name):
"""
For a GitHub organization, get location for contributors to any repo in the org.
Returns a dictionary of {username URLS : geopy Locations}, then a dictionary of various metadata.
"""
# Set up empty dictionaries and metadata variables
contributor_locs = {}
locations = []
none_count = 0
error_count = 0
user_loc_count = 0
duplicate_count = 0
geolocator = Nominatim()
# For each repo in the organization
for repo in github.get_organization(org_name).get_repos():
#print(repo.name)
# For each contributor in the repo
for contributor in repo.get_contributors():
print('.', end="")
# If the contributor_locs dictionary doesn't have an entry for this user
if contributor_locs.get(contributor.url) is None:
# Try-Except block to handle API errors
try:
# If the contributor has no location in profile
if(contributor.location is None):
#print("No Location")
none_count += 1
else:
# Get coordinates for location string from Nominatim API
location=geolocator.geocode(contributor.location)
#print(contributor.location, " | ", location)
# Add a new entry to the dictionary. Value is user's URL, key is geocoded location object
contributor_locs[contributor.url] = location
user_loc_count += 1
except Exception:
print('!', end="")
error_count += 1
else:
duplicate_count += 1
return contributor_locs,{'no_loc_count':none_count, 'user_loc_count':user_loc_count,
'duplicate_count':duplicate_count, 'error_count':error_count}
In [ ]:
def map_location_dict(map_obj,org_location_dict):
"""
Maps the locations in a dictionary of {ids : geoPy Locations}.
Must be passed a map object, then the dictionary. Returns the map object.
"""
for username, location in org_location_dict.items():
if(location is not None):
mark = Marker(location=[location.latitude,location.longitude])
mark.visible
map_obj += mark
return map_obj
In [ ]:
In [ ]:
jupyter_orgs = ['jupyter', 'ipython', 'jupyter-attic','jupyterhub']
In [ ]:
In [ ]:
orgs_location_dict = {}
orgs_metadata_dict = {}
for org in jupyter_orgs:
# For a status update, print when we get to a new org in the list
print(org)
orgs_location_dict[org], orgs_metadata_dict[org] = get_org_contributor_locations(g,org)
In [ ]:
orgs_metadata_dict
In [ ]:
center = [30, 5]
zoom = 2
jupyter_orgs_maps = Map(default_tiles=TileLayer(opacity=1.0), center=center, zoom=zoom,
layout=ipywidgets.Layout(height="600px"))
for org_name,org_location_dict in orgs_location_dict.items():
jupyter_orgs_maps += map_location_dict(jupyter_orgs_maps,org_location_dict)
In [ ]:
jupyter_orgs_maps
In [ ]:
def org_dict_to_csv(org_location_dict, filename, hashed_usernames = True):
"""
Outputs a dict of users : locations to a CSV file.
Requires org_location_dict and filename, optional hashed_usernames parameter.
Uses hashes of usernames by default for privacy reasons. Think carefully
about publishing location data about uniquely identifiable users. Hashing
allows you to check unique users without revealing personal information.
"""
try:
import hashlib
with open(filename, 'w') as f:
f.write("user, longitude, latitude\n")
for user, location in org_location_dict.items():
if location is not None:
if hashed_usernames:
user_output = hashlib.sha1(user.encode('utf-8')).hexdigest()
else:
user_output = user
line = user_output + ", " + str(location.longitude) + ", " \
+ str(location.latitude) + "\n"
f.write(line)
f.close()
except Exception as e:
return e
In [146]:
def csv_to_js_var(input_file, output_file):
import pandas as pd
import json
df = pd.read_csv(input_file)
dct = df.to_dict()
with open(output_file,'w') as f:
f.write('var addressPoints = '+json.dumps([[ll,l,u] for u,l,ll in zip(dct['user'].values(),dct[' longitude'].values(), dct[' latitude'].values())], indent=2)+';')
In [ ]:
In [ ]:
In [ ]:
In [ ]:
def org_dict_to_geojson(org_location_dict, filename, hashed_usernames = True):
"""
CURRENTLY BROKEN!
Outputs a dict of users : locations to a CSV file.
Requires org_location_dict and filename, optional hashed_usernames parameter.
Uses hashes of usernames by default for privacy reasons. Think carefully
about publishing location data about uniquely identifiable users. Hashing
allows you to check unique users without revealing personal information.
"""
import hashlib
with open(filename, 'w') as f:
header = """
{ "type": "FeatureCollection",
"features": [
"""
f.write(header)
for user, location in org_location_dict.items():
if location is not None:
if hashed_usernames:
user_output = hashlib.sha1(user.encode('utf-8')).hexdigest()
else:
user_output = user
line = """
{
"type": "Feature",
"geometry": {
"type": "Point",
"coordinates": [%s, %s]
},
"properties": {
"name": "%s"
}
},
""" % (location.longitude, location.latitude, user_output)
f.write(line)
f.write("]}")
f.close()
In [ ]:
In [ ]:
In [ ]:
org_dict_to_csv(orgs_location_dict['ipython'], "org_data/ipython.csv")
In [147]:
for org_name, org_location_dict in orgs_location_dict.items():
org_dict_to_csv(org_location_dict, "org_data/" + org_name + ".csv")
csv_to_js_var("org_data/" + org_name + ".csv", "org_data/" + org_name + ".js")
In [ ]:
def csv_to_org_dict(filename):
"""
TODO: Write function to read an outputted CSV file back to an org_dict.
Should convert lon/lat pairs to geopy Location objects for full compatibility.
Also, think about a general class object for org_dicts.
"""
Note that this will have duplicates across the organizations, as it is just getting the location data from each of the organizations and putting it into a different dictionary.
In [ ]:
In [ ]:
In [ ]: