In [19]:
# need pandas
import pandas as pd
In [20]:
# create a dataframe for the learners csv
df_learn=pd.read_csv("learn.csv")
In [21]:
# What were the most popular topics?
df_learn.sort("popularity",ascending=False)
Out[21]:
topic
popularity
6
make
18
10
julia
18
3
text editors
17
18
fhs
17
1
matplotlib
15
19
ipython
15
37
advanced git
14
17
shell
13
12
linking
13
29
computer architectures
12
26
profiling
12
21
C
12
36
cmake
12
4
seaborne
12
24
c++
11
40
testing
11
32
parallel programming
11
35
shiny
11
39
hardware drivers
10
8
scikit-learn
10
7
cuda
10
2
d3.js
9
28
microcontrollers
9
0
r
9
41
regex
9
16
python wrapping
9
14
git
8
9
sql
8
5
latex
7
31
pandas vs. r ...language battle
7
13
pandas
7
23
java
6
11
python
6
15
python guis
6
33
stan
5
22
scoping in interpreted languages
4
30
xeon phi
4
20
pymc
4
42
text analysis
3
34
linear programming
2
27
functional languages
2
38
licensing
1
25
object orientation
1
43
none
0
In [22]:
# create a dataframe for the teachers csv
df_teach=pd.read_csv("teach.csv")
# what did people say they could teach?
df_teach
Out[22]:
name
topic
0
Katy Huff
cmake
1
Katy Huff
fhs
2
Katy Huff
shell
3
Katy Huff
profiling
4
Rachel Slaybaugh
latex
5
Rachel Slaybaugh
git
6
Rachel Slaybaugh
testing
7
Rochelle Terman
r
8
Rochelle Terman
latex
9
Rochelle Terman
text analysis
10
Rochelle Terman
regex
11
Caroline Sofiatti
seaborne
12
Caroline Sofiatti
matplotlib
13
Caroline Sofiatti
d3.js
14
Denia Djokic
none
15
Britta Fiore
none
16
Chris Paciorek
r
17
Chris Paciorek
shell
18
Chris Paciorek
parallel programming
19
Chris Paciorek
gpu
20
Chris Paciorek
make
21
Alex Chong
microcontrollers
22
Alex Chong
computer architectures
23
Greg Telian
python
24
Greg Telian
microcontrollers
25
Sean Wahl
python
26
Sean Wahl
matplotlib
27
Min RK
ipython
28
Min RK
shell
29
Min RK
python wrapping
30
James Kendrick
none
31
Sven Chilton
python
32
Sven Chilton
c++
33
Sven Chilton
latex
34
Jose Buraschi
python
35
Jose Buraschi
sql
36
Jose Buraschi
spark
37
Andrew Greenop
none
38
Joey Curtis
pandas
39
Joey Curtis
latex
40
Joey Curtis
text editors
41
Anders Priest
microcontrollers
42
Daniel Turek
r
43
Daniel Turek
stan
44
Daniel Turek
nimble
45
Karthik Ram
shiny
46
Tenzing Joshi
pandas
47
Tenzing Joshi
python
48
Tenzing Joshi
latex
49
Kelly Rowland
googletest
50
Madicken Munk
none
51
Thomas Kluyver
ipython
52
Thomas Kluyver
pythonguis
53
Thomas Kluyver
licensing
54
Thomas Kluyver
C
55
Kyle Barbary
julia
56
Daniel Wooten
xeon phi
In [23]:
# How many people showed up yesterday?
len(pd.Series(df_teach.name).unique())
Out[23]:
25
In [24]:
# So, let's schedule these topics largely in order of popularity
# what teachers should we get on the schedule first?
df_learn.merge(df_teach,on='topic').sort('popularity',ascending=False)
Out[24]:
topic
popularity
name
15
julia
18
Kyle Barbary
13
make
18
Chris Paciorek
6
text editors
17
Joey Curtis
28
fhs
17
Katy Huff
3
matplotlib
15
Caroline Sofiatti
4
matplotlib
15
Sean Wahl
29
ipython
15
Min RK
30
ipython
15
Thomas Kluyver
26
shell
13
Chris Paciorek
25
shell
13
Katy Huff
27
shell
13
Min RK
33
profiling
12
Katy Huff
31
C
12
Thomas Kluyver
37
computer architectures
12
Alex Chong
42
cmake
12
Katy Huff
7
seaborne
12
Caroline Sofiatti
41
shiny
11
Karthik Ram
44
testing
11
Rachel Slaybaugh
32
c++
11
Sven Chilton
39
parallel programming
11
Chris Paciorek
36
microcontrollers
9
Anders Priest
34
microcontrollers
9
Alex Chong
35
microcontrollers
9
Greg Telian
45
regex
9
Rochelle Terman
1
r
9
Chris Paciorek
0
r
9
Rochelle Terman
24
python wrapping
9
Min RK
2
r
9
Daniel Turek
5
d3.js
9
Caroline Sofiatti
14
sql
8
Jose Buraschi
23
git
8
Rachel Slaybaugh
8
latex
7
Rachel Slaybaugh
9
latex
7
Rochelle Terman
11
latex
7
Joey Curtis
12
latex
7
Tenzing Joshi
10
latex
7
Sven Chilton
21
pandas
7
Joey Curtis
22
pandas
7
Tenzing Joshi
16
python
6
Greg Telian
17
python
6
Sean Wahl
18
python
6
Sven Chilton
19
python
6
Jose Buraschi
20
python
6
Tenzing Joshi
40
stan
5
Daniel Turek
38
xeon phi
4
Daniel Wooten
46
text analysis
3
Rochelle Terman
43
licensing
1
Thomas Kluyver
47
none
0
Denia Djokic
48
none
0
Britta Fiore
49
none
0
James Kendrick
50
none
0
Andrew Greenop
51
none
0
Madicken Munk
In [25]:
# what topics were popular, but had no volunteer teacher?
df_learn[~df_learn.topic.isin(df_teach.topic)].sort('popularity', ascending=False)
Out[25]:
topic
popularity
37
advanced git
14
12
linking
13
7
cuda
10
8
scikit-learn
10
39
hardware drivers
10
31
pandas vs. r ...language battle
7
15
python guis
6
23
java
6
20
pymc
4
22
scoping in interpreted languages
4
27
functional languages
2
34
linear programming
2
25
object orientation
1
Content source: paul-laskowski/berkeley
Similar notebooks: