In [79]:
# need pandas
import pandas as pd
In [80]:
# create a dataframe for the learners csv
df_learn=pd.read_csv("learn.csv")
In [82]:
# What were the most popular topics?
df_learn.sort("topic").sort("popularity",ascending=False)
Out[82]:
topic
popularity
teacher
23
advanced git
22.0
Ross Barnowski
21
advanced git
22.0
Aaron Culich
24
advanced git
22.0
Katy Huff
22
advanced git
22.0
Kyle Barbary
16
spark/hadoop
22.0
Spark Team (Zhao & Jey?)
6
advanced python
20.0
Sven Chilton
7
advanced python
20.0
Matthias Boussonier?
30
vizualization
16.0
Jennifer Jones (matplotlib)
27
vizualization
16.0
John Naulty (highchart)
29
vizualization
16.0
Biye (d3)
28
vizualization
16.0
Ross Barnowski (pyqtgraph)
14
pandas
14.0
Sven Chilton
13
pandas
14.0
Jennifer Jones
44
LaTeX and whatnot
14.0
Jennifer Jones (LaTeX)
12
pandas
14.0
Sean Wahl
43
LaTeX and whatnot
14.0
Chris Paciorek (r)
15
pandas
14.0
notes on databases from Josh Rehak
18
workflows/pipelines
14.0
Jess Hamrick
42
LaTeX and whatnot
14.0
Mike Pacer (latex & rst & fonts)
25
Webscraping
14.0
John Bohannon
26
Webscraping
14.0
Sven Chilton (twitter)
40
computer vision & image analysis
13.0
Stefan van der Walt
36
gpus & parallelization
13.0
Aaron Culich
35
gpus & parallelization
13.0
Biye
3
scikit-learn
13.0
Shannon McCurdy
2
scikit-learn
13.0
Ross Barnowski
19
beginner git
10.0
Harrison Dekker
11
high performance python
10.0
Chick Markley
20
beginner git
10.0
John Naulty
9
cython/wrapping/forthon/swig/f2py
9.0
Kyle Barbary
8
cython/wrapping/forthon/swig/f2py
9.0
Ross Barnowski
10
cython/wrapping/forthon/swig/f2py
9.0
Sven Chilton
31
javascript
8.5
John Naulty (johnny5library)
32
javascript
8.5
Biye
39
webservers
8.0
Alex Goodell
33
make
7.0
Katy Huff
34
make
7.0
Chris Paciorek
0
advanced julia
7.0
Kyle Barbary
37
sampling techniques
7.0
Daniel Turek
38
sampling techniques
7.0
Shannon McCurdy
1
advanced julia
7.0
Chris' friend
5
intro python
7.0
John Bohannon
4
intro python
7.0
Alex Goodell
17
R
6.0
Daniel Turek?
41
hardware & embedded systems
4.0
John Bohannon (lightning talk)
In [130]:
# We only have time for 12 meetings this semester
# So, we pick the 12 most popular topics
fall_topics=df_learn.drop_duplicates(cols='topic', inplace=False).sort("popularity", ascending=False)[:12]["topic"]
In [131]:
# Given this, list the topics and the teachers in order of topic popularity
topic_mask = df_learn.isin(fall_topics.values)["topic"]
df_learn[topic_mask].sort("topic").sort("popularity",ascending=False)
Out[131]:
topic
popularity
teacher
23
advanced git
22
Ross Barnowski
21
advanced git
22
Aaron Culich
22
advanced git
22
Kyle Barbary
24
advanced git
22
Katy Huff
16
spark/hadoop
22
Spark Team (Zhao & Jey?)
6
advanced python
20
Sven Chilton
7
advanced python
20
Matthias Boussonier?
27
vizualization
16
John Naulty (highchart)
28
vizualization
16
Ross Barnowski (pyqtgraph)
29
vizualization
16
Biye (d3)
30
vizualization
16
Jennifer Jones (matplotlib)
44
LaTeX and whatnot
14
Jennifer Jones (LaTeX)
14
pandas
14
Sven Chilton
15
pandas
14
notes on databases from Josh Rehak
13
pandas
14
Jennifer Jones
18
workflows/pipelines
14
Jess Hamrick
12
pandas
14
Sean Wahl
43
LaTeX and whatnot
14
Chris Paciorek (r)
26
Webscraping
14
Sven Chilton (twitter)
25
Webscraping
14
John Bohannon
42
LaTeX and whatnot
14
Mike Pacer (latex & rst & fonts)
3
scikit-learn
13
Shannon McCurdy
2
scikit-learn
13
Ross Barnowski
35
gpus & parallelization
13
Biye
40
computer vision & image analysis
13
Stefan van der Walt
36
gpus & parallelization
13
Aaron Culich
11
high performance python
10
Chick Markley
In [ ]:
Content source: thehackerwithin/berkeley
Similar notebooks: