In [1]:
import re
import pandas as pd
In [2]:
df = pd.read_table("cfp.tsv")
In [3]:
df.head()
Out[3]:
In [4]:
sel_2015 = {
"talks": """
simple hacks to make your django website faster
pretty printing in python
machine learning techniques for building a large scale
laying out your django projects
python and riakdb
building flexible filesystems with fuse-python
symengine: the future fast core of computer algebra systems
test driven development with ansible
explore big data using simple python code
introduction to nipype and how do we create
python load balancer: 0 to 1 million requests per second
creating, deployment & customizing
building nextgen iot solutions
consuming government data with python and d3
python traceback for humans
how to build microservices using zeromq and wsgi
rip nagios. hello docker shinken
building offensive web security framework in python
how to detect phishing urls using pyspark decision trees
fedmsg: the message bus of fedora infrastructure
concurrent data processing in python
analyzing arguments during a debate using natural language processing
avoiding common pitfalls of datetime from a webapp
python 2 metaprogramming, macros, madness & more
rest apis - what, why and how
solving logical puzzles with natural language processing
""",
"workshops": """
getting started with ansible
let's learn statistics
using devstack to contribute to openstack
building nextgen iot solutions using python and cloud
reasoning under uncertainty with python
python on your mobile phone(advanced concepts)
django projects the right way
symbolic computation with python, sympy
thinking in functions
"""
}
In [5]:
sel_2016 = {
"talks": """
hacking the python ast
helix and salt: case study in high volume and distributed python applications
realtime microservices with server side flux
building an automatic keyphrase extraction system using nltk
testing native binaries using cffi and py.test
the trends in choosing licenses in python ecosystems
good bye, call stack; hello, event driven architectures
algorithmic music generation
python byte code hacks
load testing using locust.io
continuous integration for data scientists
building companion chatbot with python
deploying your python backend with
big data analysis using pyspark
flying a drone
containerize upstream projects effortlessly
financial modelling and simulation with python
micropython - porting python to microcontrollers
creating a recommendation engine based on nlp and contextual
open source health monitoring and evaluation systems
concurrency in modern robots
building a secure iot platform using paho and flask
don't write tests, generate them
real time sentiment analysis with apache storm and python
""",
"workshops": """
building a lie detector: multi-modal sentiment analysis
docker workshop
talking to machines: optimizing neural networks with theano
productive coding with pycharm
demystifying the django rest framework
scaling django with kubernetes
"""
}
In [6]:
df['proposal_type'] = ""
In [7]:
df['selected'] = False
In [17]:
for proposal_type, proposals in sel_2016.iteritems():
proposals = [p.lstrip().rstrip() for p in proposals.splitlines()]
proposals = [p for p in proposals if p]
for proposal in proposals:
rel_ix = df[df.title.str.contains(proposal, case=False)][df.year == 2016].index
if rel_ix.shape[0] != 1:
print proposal
else:
df.loc[rel_ix[0], "selected"] = True
df.loc[rel_ix[0], "proposal_type"] = proposal_type
In [20]:
df[df.title.str.contains('theano', case=False)]
Out[20]:
In [21]:
df.loc[214, "selected"] = True
In [22]:
df.selected.value_counts()
Out[22]:
In [23]:
df.to_csv("tagged.tsv", sep="\t", index=False)
In [ ]: