If you see this text, you may want to enable the nbextension "Collapsable Headings", so you can hide this in common usage.
In [ ]:
# set values here - you can also override below
api_key = open(".credentials", "r").readlines()[1].strip()
In [ ]:
orgs_to_check = [ "mozilla"
, "mozilla-services"
, "mozilla-mobile"
, "mozilla-partners"
, "taskcluster"
, "mozilla-conduit"
, "mozilla-lockwise"
, "mozilla-platform-ops"
, "nss-dev"
, "mozilla-releng"
, "mozilla-private"
, "mozilla-frontend-infra"
, "mozilla-bteam"
, "iodide-project"
, "mozilla-games"
, "mozillaReality"
, "mozilla-standards"
, "mozilla-tw"
, "mozilla-extensions"
]
In [ ]:
import github3
def print_limits():
print("reset at: {}, remaining {}".format(gh.rate_limit()["rate"]["reset"], gh.rate_limit()["rate"]["remaining"]))
try:
gh = github3.login(token=api_key)
print("You are authenticated as {}".format(gh.me().login))
except ConnectionError:
print_limits()
try:
from functools import lru_cache
except ImportError:
from backports.functools_lru_cache import lru_cache
From here on, use gh to access all data
In [ ]:
@lru_cache(maxsize=32)
def _search_for_user(user):
l = list(gh.search_users(query="type:user "+user))
print("found {} potentials for {}".format(len(l), user))
return l
def get_user_counts(user):
l = _search_for_user(user)
for u in l:
yield u
# if it was an email addr, try again with the mailbox name
if '@' in user:
l2 = _search_for_user(user.split('@')[0])
for u in l2:
yield u
In [ ]:
displayed_users = set() # cache to avoid duplicate output
def show_users(user_list):
global displayed_users
unique_users = set(user_list)
count = len(unique_users)
if count >10:
print("... too many to be useful ...")
else:
for u in [x for x in unique_users if not x in displayed_users]:
displayed_users.add(u)
user = u.user
user.refresh()
print(user.login, user.name, user.location, user.email)
if 0 < count <= 10:
return [u.login for u in unique_users]
else:
return []
In [ ]:
class OutsideCollaboratorIterator(github3.structs.GitHubIterator):
def __init__(self, org):
super(OutsideCollaboratorIterator, self).__init__(
count=-1, #get all
url=org.url + "/outside_collaborators",
cls=github3.users.ShortUser,
session=org.session,
)
@lru_cache(maxsize=32)
def get_collaborators(org):
collabs = [x.login.lower() for x in OutsideCollaboratorIterator(org)]
return collabs
def is_collaborator(org, login):
return bool(login.lower() in get_collaborators(org))
# provide same interface for members -- but the iterator is free :D
@lru_cache(maxsize=32)
def get_members(org):
collabs = [x.login.lower() for x in org.members()]
return collabs
def is_member(org, login):
return bool(login.lower() in get_members(org))
In [ ]:
def check_login_perms(logins):
any_perms = False
for login in logins:
is_collab = False
for org in orgs_to_check:
o = gh.organization(org)
if is_member(o, login):
url = "https://github.com/orgs/{}/people?utf8=%E2%9C%93&query={}".format(o.login, login)
print("{} has {} as a member: {}".format(o.login, login, url))
is_collab = True
if is_collaborator(o, login):
url = "https://github.com/orgs/{}/outside-collaborators?utf8=%E2%9C%93&query={}".format(o.login, login)
print("{} has {} as a collaborator: {}".format(o.login, login, url))
is_collab = True
if is_collab:
any_perms = True
else:
print("No permissions found for {}".format(login))
return any_perms
In [ ]:
import re
import os
re_flags = re.MULTILINE | re.IGNORECASE
def process_from_email(email_body):
# get rid of white space
email_body = os.linesep.join(
[s.strip() for s in email_body.splitlines() if s.strip()]
)
user = set()
# Extract data from internal email format
match = re.search(r'^Full Name: (?P<full_name>\S.*)$', email_body, re_flags)
if match:
# add base and some variations
full_name = match.group("full_name")
user.add(full_name)
# remove spaces
user.add(full_name.replace(' ', ''))
# reversed no spaces
user.add(''.join(full_name.split()[::-1]))
match = re.search(r'^Email: (?P<primary_email>.*)$', email_body, re_flags)
primary_email = match.group("primary_email") if match else None
user.add(primary_email)
print("Check these URLs for Heroku activity:")
print(" Mozillians: https://mozillians.org/en-US/search/?q={}".format(primary_email.replace('@', '%40')))
print(" Heroku: https://dashboard.heroku.com/teams/mozillacorporation/access?filter={}".format(primary_email.replace('@', '%40')))
print(email_body)
match = re.search(r'^Github Profile: (?P<github_profile>.*)$', email_body, re_flags)
declared_github = match.group("github_profile") if match else None
user.add(declared_github)
match = re.search(r'^Zimbra Alias: (?P<other_email>.*)$', email_body, re_flags)
user.add(match.group("other_email") if match else None)
# we consider each token in the IM line as a possible GitHub login
match = re.search(r'^IM:\s*(.*)$', email_body, re_flags)
if match:
im_line = match.groups()[0]
matches = re.finditer(r'\W*((\w+)(?:\s+\w+)*)', im_line)
user.update([x.group(1) for x in matches] if matches else None)
match = re.search(r'^Bugzilla Email: (?P<bz_email>.*)$', email_body, re_flags)
user.add(match.group("bz_email") if match else None)
# grab the department name, for a heuristic on whether we expect to find perms
expect_github_login = False
match = re.search(r'^\s*Dept Name: (?P<dept_name>\S.*)$', email_body, re_flags)
if match:
department_name = match.groups()[0].lower()
dept_keys_infering_github = ["firefox", "engineering", "qa", "operations"]
for key in dept_keys_infering_github:
if key in department_name:
expect_github_login = True
break
# clean up some noise, case insensitively
# the tokens to ignore are added based on discovery,
# they tend to cause the searches to get rate limited.
user = {x.lower() for x in user if x and (len(x) > 2)}
user = user - {None, "irc", "slack", "skype", "b", 'hotmail', 'mozilla', 'ro', 'com', 'softvision', 'mail',
'twitter', 'blog', 'https', 'jabber', 'net', 'github', 'gmail',
'facebook', 'guy', 'pdx', 'yahoo', }
global displayed_users
displayed_users = set()
try:
print("Trying '{}'".format("', '".join(user)))
guesses = set()
for term in user:
new = show_users(get_user_counts(term))
guesses.update({x.lower() for x in new})
# include declared_github if it exists
if declared_github:
guesses.add(declared_github.lower())
print("Checking logins {}".format(guesses))
found_perms = False
if len(guesses):
found_perms = check_login_perms(guesses)
elif expect_github_login:
print("\nWARNING: expected GitHub login for dept '{}'".format(department_name))
print("Finished all reporting.")
if declared_github and not found_perms:
# print some text to copy/paste into email
print(", even for declared login '{}'.".format(declared_github))
if expect_github_login and not found_perms:
print("WARNING: expected GitHub permissions for dept '{}'".format(department_name))
except github3.exceptions.ForbiddenError as e:
print("API limit reached, try again in 5 minutes.\n")
print(str(e))
print(gh.rate_limit())
Currently, there a two common use cases:
For anything else, you're on your own!
All usage requires the following setup:
Usage steps - for each user:
1. Copy entire text of email
2. Paste between the ``"""`` marks in the cell below.
3. Execute that cell
The cell below should have the following text:
process_from_email(r"""
# paste email body here
""")
Or if you're not processing an email, fake the two fields 'email:' and 'im:':
process_from_email(r"""
# comma separated list
im: various possible names comma
# Only 1 email
email: primary_email@mozilla.com
""")
In [ ]:
process_from_email(r"""
""")
In [ ]:
check_login_perms([
])