In [19]:
# need pandas
import pandas as pd

In [20]:
# create a dataframe for the learners csv
df_learn=pd.read_csv("learn.csv")

In [21]:
# What were the most popular topics?
df_learn.sort("popularity",ascending=False)


Out[21]:
topic popularity
6 make 18
10 julia 18
3 text editors 17
18 fhs 17
1 matplotlib 15
19 ipython 15
37 advanced git 14
17 shell 13
12 linking 13
29 computer architectures 12
26 profiling 12
21 C 12
36 cmake 12
4 seaborne 12
24 c++ 11
40 testing 11
32 parallel programming 11
35 shiny 11
39 hardware drivers 10
8 scikit-learn 10
7 cuda 10
2 d3.js 9
28 microcontrollers 9
0 r 9
41 regex 9
16 python wrapping 9
14 git 8
9 sql 8
5 latex 7
31 pandas vs. r ...language battle 7
13 pandas 7
23 java 6
11 python 6
15 python guis 6
33 stan 5
22 scoping in interpreted languages 4
30 xeon phi 4
20 pymc 4
42 text analysis 3
34 linear programming 2
27 functional languages 2
38 licensing 1
25 object orientation 1
43 none 0

In [22]:
# create a dataframe for the teachers csv
df_teach=pd.read_csv("teach.csv")
# what did people say they could teach?
df_teach


Out[22]:
name topic
0 Katy Huff cmake
1 Katy Huff fhs
2 Katy Huff shell
3 Katy Huff profiling
4 Rachel Slaybaugh latex
5 Rachel Slaybaugh git
6 Rachel Slaybaugh testing
7 Rochelle Terman r
8 Rochelle Terman latex
9 Rochelle Terman text analysis
10 Rochelle Terman regex
11 Caroline Sofiatti seaborne
12 Caroline Sofiatti matplotlib
13 Caroline Sofiatti d3.js
14 Denia Djokic none
15 Britta Fiore none
16 Chris Paciorek r
17 Chris Paciorek shell
18 Chris Paciorek parallel programming
19 Chris Paciorek gpu
20 Chris Paciorek make
21 Alex Chong microcontrollers
22 Alex Chong computer architectures
23 Greg Telian python
24 Greg Telian microcontrollers
25 Sean Wahl python
26 Sean Wahl matplotlib
27 Min RK ipython
28 Min RK shell
29 Min RK python wrapping
30 James Kendrick none
31 Sven Chilton python
32 Sven Chilton c++
33 Sven Chilton latex
34 Jose Buraschi python
35 Jose Buraschi sql
36 Jose Buraschi spark
37 Andrew Greenop none
38 Joey Curtis pandas
39 Joey Curtis latex
40 Joey Curtis text editors
41 Anders Priest microcontrollers
42 Daniel Turek r
43 Daniel Turek stan
44 Daniel Turek nimble
45 Karthik Ram shiny
46 Tenzing Joshi pandas
47 Tenzing Joshi python
48 Tenzing Joshi latex
49 Kelly Rowland googletest
50 Madicken Munk none
51 Thomas Kluyver ipython
52 Thomas Kluyver pythonguis
53 Thomas Kluyver licensing
54 Thomas Kluyver C
55 Kyle Barbary julia
56 Daniel Wooten xeon phi

In [23]:
# How many people showed up yesterday?
len(pd.Series(df_teach.name).unique())


Out[23]:
25

In [24]:
# So, let's schedule these topics largely in order of popularity
# what teachers should we get on the schedule first?
df_learn.merge(df_teach,on='topic').sort('popularity',ascending=False)


Out[24]:
topic popularity name
15 julia 18 Kyle Barbary
13 make 18 Chris Paciorek
6 text editors 17 Joey Curtis
28 fhs 17 Katy Huff
3 matplotlib 15 Caroline Sofiatti
4 matplotlib 15 Sean Wahl
29 ipython 15 Min RK
30 ipython 15 Thomas Kluyver
26 shell 13 Chris Paciorek
25 shell 13 Katy Huff
27 shell 13 Min RK
33 profiling 12 Katy Huff
31 C 12 Thomas Kluyver
37 computer architectures 12 Alex Chong
42 cmake 12 Katy Huff
7 seaborne 12 Caroline Sofiatti
41 shiny 11 Karthik Ram
44 testing 11 Rachel Slaybaugh
32 c++ 11 Sven Chilton
39 parallel programming 11 Chris Paciorek
36 microcontrollers 9 Anders Priest
34 microcontrollers 9 Alex Chong
35 microcontrollers 9 Greg Telian
45 regex 9 Rochelle Terman
1 r 9 Chris Paciorek
0 r 9 Rochelle Terman
24 python wrapping 9 Min RK
2 r 9 Daniel Turek
5 d3.js 9 Caroline Sofiatti
14 sql 8 Jose Buraschi
23 git 8 Rachel Slaybaugh
8 latex 7 Rachel Slaybaugh
9 latex 7 Rochelle Terman
11 latex 7 Joey Curtis
12 latex 7 Tenzing Joshi
10 latex 7 Sven Chilton
21 pandas 7 Joey Curtis
22 pandas 7 Tenzing Joshi
16 python 6 Greg Telian
17 python 6 Sean Wahl
18 python 6 Sven Chilton
19 python 6 Jose Buraschi
20 python 6 Tenzing Joshi
40 stan 5 Daniel Turek
38 xeon phi 4 Daniel Wooten
46 text analysis 3 Rochelle Terman
43 licensing 1 Thomas Kluyver
47 none 0 Denia Djokic
48 none 0 Britta Fiore
49 none 0 James Kendrick
50 none 0 Andrew Greenop
51 none 0 Madicken Munk

In [25]:
# what topics were popular, but had no volunteer teacher?
df_learn[~df_learn.topic.isin(df_teach.topic)].sort('popularity', ascending=False)


Out[25]:
topic popularity
37 advanced git 14
12 linking 13
7 cuda 10
8 scikit-learn 10
39 hardware drivers 10
31 pandas vs. r ...language battle 7
15 python guis 6
23 java 6
20 pymc 4
22 scoping in interpreted languages 4
27 functional languages 2
34 linear programming 2
25 object orientation 1