In this notebook we do some simple analysis of information about members registered on meetup.com. We extract the info using the official meetup API where you can also get your API key as a registered member.
N.B. This is work in progress.
In [1]:
%matplotlib inline
import re
import os
import json
import requests
import pandas as pd
In [2]:
server = 'https://api.meetup.com'
group_urlname = 'Python-Users-Berlin-PUB'
from meetup_api_key import key
Get information about a group on Meetup.com:
In [3]:
requests.get("https://api.meetup.com/%s?key=%s" % (group_urlname, key)).json()
Out[3]:
Get information about two members of that group:
In [4]:
url = server + "/2/members?offset=1&page=2&order=name&group_urlname=%s&key=%s" % (group_urlname, key)
info = requests.get(url).json()
# hide key so it doesn't show up in some repository:
for f in ('next', 'url'):
info['meta'][f] = re.sub('key=\w+', 'key=******', info['meta'][f])
In [5]:
info
Out[5]:
In [6]:
def get_all_members(group_urlname, verbose=False):
"Read members info from a sequence of pages."
total = []
offset = 1
page = 200
url = "{server}/2/members?offset={offset}&format=json&group_urlname={group_urlname}&page={page}&key={key}&order=name"
url = url.format(server=server, offset=offset, page=page, group_urlname=group_urlname, key=key)
info = requests.get(url).json()
total += info['results']
if verbose:
print(url)
print(len(total), info['meta']['count'])
while True:
next_url = info['meta']['next']
print(next_url)
if not next_url:
break
js = requests.get(next_url).json()
total += info['results']
print(len(total), info['meta']['count'])
if verbose:
print('found %d members' % len(total))
return total
In [7]:
path = 'pub-members.json'
if os.path.exists(path):
members = json.load(open(path))
else:
members = get_all_members('Python-Users-Berlin-PUB')
json.dump(members, open(path, 'w'))
In [8]:
members[0]
Out[8]:
In [9]:
members[0]['topics']
Out[9]:
In [10]:
pd.DataFrame(members[0]['topics'])
Out[10]:
Now build a dataframe with this information for all members:
In [11]:
df = pd.concat([pd.DataFrame(m['topics']) for m in members])
In [12]:
len(df)
Out[12]:
In [13]:
s = df.groupby('name').size().sort_values(ascending=True)[-20:]
s.plot.barh(title='Most cited topics people are interested in', figsize=(10, 5))
Out[13]:
In [14]:
path = 'pydata-members.json'
if os.path.exists(path):
members = json.load(open(path))
else:
members = get_all_members('PyData-Berlin')
json.dump(members, open(path, 'w'))
In [15]:
df = pd.concat([pd.DataFrame(m['topics']) for m in members])
s = df.groupby('name').size().sort_values(ascending=True)[-20:]
s.plot.barh(title='Most cited topics people are interested in', figsize=(10, 5))
Out[15]: