In [ ]:
import pandas as pd
In [ ]:
df = pd.read_table("cfp.tsv")
df['selected'] = False
In [ ]:
sel_2015 = {
"talks": """
simple hacks to make your django website faster
pretty printing in python
machine learning techniques for building a large scale
laying out your django projects
python and riak
building flexible filesystems with fuse-python
symengine: the future fast core of computer algebra systems
test driven development with ansible
explore big data using simple python code
introduction to nipype and how do we create
python load balancer: 0 to 1 million requests per second
creating, deployment & customizing
building nextgen iot solutions
consuming government data with python and d3
python traceback for humans
how to build microservices using zeromq and wsgi
rip nagios. hello docker shinken
building offensive web security framework in python
how to detect phishing urls using pyspark decision trees
fedmsg: the message bus of fedora infrastructure
concurrent data processing in python
analyzing arguments during a debate using natural language processing
avoiding common pitfalls of datetime from a webapp
python 3 metaprogramming
rest apis - what, why and how
solving logical puzzles with natural language processing
""",
"workshops": """
getting started with ansible
let's learn statistics
using devstack to contribute to openstack
building nextgen iot solutions using python and cloud
reasoning under uncertainty with python
python on your mobile phone(advanced concepts)
django projects the right way
symbolic computation with python, sympy
thinking in functions
"""
}
In [ ]:
sel_2016 = {
"talks": """
hacking the python ast
helix and salt: case study in high volume and distributed python applications
realtime microservices with server side flux
building an automatic keyphrase extraction system using nltk
testing native binaries using cffi and py.test
the trends in choosing licenses in python ecosystem
good bye, call stack; hello, event driven architectures
algorithmic music generation
python byte code hacks
load testing using locust.io
continuous integration for data scientists
building companion chatbot with python
deploying your python backend with
big data analysis using pyspark
flying a drone
containerize upstream projects effortlessly
financial modelling and simulation with python
micropython - porting python to microcontrollers
creating a recommendation engine based on nlp and contextual
open source health monitoring and evaluation systems
concurrency in modern robots
building a secure iot platform using paho and flask
don't write tests, generate them
real time sentiment analysis with apache storm and python
""",
"workshops": """
building a lie detector: multi-modal sentiment analysis
docker workshop
optimizing neural networks with theano
productive coding with pycharm
demystifying the django rest framework
scaling django with kubernetes
"""
}
In [ ]:
# 2016
for prop_type, title in sel_2016.items():
titles = title.splitlines()
titles = [t.lstrip().rstrip() for t in titles]
titles = [t for t in titles if t]
for tl in titles:
xdf = df[df.title.str.contains(tl, case=False)]
if xdf.shape[0] != 1:
print(tl)
else:
df.loc[xdf.index[0], "selected"] = True
In [ ]:
# 2015
for prop_type, title in sel_2015.items():
titles = title.splitlines()
titles = [t.lstrip().rstrip() for t in titles]
titles = [t for t in titles if t]
for tl in titles:
xdf = df[df.title.str.contains(tl, case=False)]
if xdf.shape[0] != 1:
if xdf.shape[0] > 1:
xdf = xdf[xdf['type'] == "Workshops"]
else:
df.loc[xdf.index[0], "selected"] = True
In [ ]:
df['selected'].value_counts()
In [ ]:
print(df['selected'].sum() / df.shape[0])
In [ ]:
df.to_csv('tagged.tsv', sep="\t", index=False)