In [79]:
# need pandas
import pandas as pd

In [80]:
# create a dataframe for the learners csv
df_learn=pd.read_csv("learn.csv")

In [82]:
# What were the most popular topics?
df_learn.sort("topic").sort("popularity",ascending=False)


Out[82]:
topic popularity teacher
23 advanced git 22.0 Ross Barnowski
21 advanced git 22.0 Aaron Culich
24 advanced git 22.0 Katy Huff
22 advanced git 22.0 Kyle Barbary
16 spark/hadoop 22.0 Spark Team (Zhao & Jey?)
6 advanced python 20.0 Sven Chilton
7 advanced python 20.0 Matthias Boussonier?
30 vizualization 16.0 Jennifer Jones (matplotlib)
27 vizualization 16.0 John Naulty (highchart)
29 vizualization 16.0 Biye (d3)
28 vizualization 16.0 Ross Barnowski (pyqtgraph)
14 pandas 14.0 Sven Chilton
13 pandas 14.0 Jennifer Jones
44 LaTeX and whatnot 14.0 Jennifer Jones (LaTeX)
12 pandas 14.0 Sean Wahl
43 LaTeX and whatnot 14.0 Chris Paciorek (r)
15 pandas 14.0 notes on databases from Josh Rehak
18 workflows/pipelines 14.0 Jess Hamrick
42 LaTeX and whatnot 14.0 Mike Pacer (latex & rst & fonts)
25 Webscraping 14.0 John Bohannon
26 Webscraping 14.0 Sven Chilton (twitter)
40 computer vision & image analysis 13.0 Stefan van der Walt
36 gpus & parallelization 13.0 Aaron Culich
35 gpus & parallelization 13.0 Biye
3 scikit-learn 13.0 Shannon McCurdy
2 scikit-learn 13.0 Ross Barnowski
19 beginner git 10.0 Harrison Dekker
11 high performance python 10.0 Chick Markley
20 beginner git 10.0 John Naulty
9 cython/wrapping/forthon/swig/f2py 9.0 Kyle Barbary
8 cython/wrapping/forthon/swig/f2py 9.0 Ross Barnowski
10 cython/wrapping/forthon/swig/f2py 9.0 Sven Chilton
31 javascript 8.5 John Naulty (johnny5library)
32 javascript 8.5 Biye
39 webservers 8.0 Alex Goodell
33 make 7.0 Katy Huff
34 make 7.0 Chris Paciorek
0 advanced julia 7.0 Kyle Barbary
37 sampling techniques 7.0 Daniel Turek
38 sampling techniques 7.0 Shannon McCurdy
1 advanced julia 7.0 Chris' friend
5 intro python 7.0 John Bohannon
4 intro python 7.0 Alex Goodell
17 R 6.0 Daniel Turek?
41 hardware & embedded systems 4.0 John Bohannon (lightning talk)

In [130]:
# We only have time for 12 meetings this semester
# So, we pick the 12 most popular topics
fall_topics=df_learn.drop_duplicates(cols='topic', inplace=False).sort("popularity", ascending=False)[:12]["topic"]

In [131]:
# Given this, list the topics and the teachers in order of topic popularity
topic_mask = df_learn.isin(fall_topics.values)["topic"]
df_learn[topic_mask].sort("topic").sort("popularity",ascending=False)


Out[131]:
topic popularity teacher
23 advanced git 22 Ross Barnowski
21 advanced git 22 Aaron Culich
22 advanced git 22 Kyle Barbary
24 advanced git 22 Katy Huff
16 spark/hadoop 22 Spark Team (Zhao & Jey?)
6 advanced python 20 Sven Chilton
7 advanced python 20 Matthias Boussonier?
27 vizualization 16 John Naulty (highchart)
28 vizualization 16 Ross Barnowski (pyqtgraph)
29 vizualization 16 Biye (d3)
30 vizualization 16 Jennifer Jones (matplotlib)
44 LaTeX and whatnot 14 Jennifer Jones (LaTeX)
14 pandas 14 Sven Chilton
15 pandas 14 notes on databases from Josh Rehak
13 pandas 14 Jennifer Jones
18 workflows/pipelines 14 Jess Hamrick
12 pandas 14 Sean Wahl
43 LaTeX and whatnot 14 Chris Paciorek (r)
26 Webscraping 14 Sven Chilton (twitter)
25 Webscraping 14 John Bohannon
42 LaTeX and whatnot 14 Mike Pacer (latex & rst & fonts)
3 scikit-learn 13 Shannon McCurdy
2 scikit-learn 13 Ross Barnowski
35 gpus & parallelization 13 Biye
40 computer vision & image analysis 13 Stefan van der Walt
36 gpus & parallelization 13 Aaron Culich
11 high performance python 10 Chick Markley

In [ ]: