Experiments with the results of the Data Bootcamp entry poll.
This IPython notebook was created by Dave Backus for the NYU Stern course Data Bootcamp.
In [5]:
import pandas as pd # data package
import matplotlib.pyplot as plt # graphics
import sys # system module, used to get Python version
import datetime as dt # date tools, used to note current date
print('\nPython version: ', sys.version)
print('Pandas version: ', pd.__version__)
print("Today's date:", dt.date.today())
In [6]:
url1 = 'http://pages.stern.nyu.edu/~dbackus/Data/'
url2 = 'Data-Bootcamp-entry-poll_s16.csv'
url = url1 + url2
file = url2
ep = pd.read_csv(url, header=0)
print('Dimensions:', ep.shape)
In [10]:
# fix variable names
# rename variables and price dtypes
variables = ['time', 'program', 'career', 'programming', 'stats',
'media', 'other', 'major', 'data', 'why', 'topics']
variables = [var.title() for var in variables]
ep.columns = variables
ep.dtypes
Out[10]:
In [9]:
# summarize results
for var in list(ep):
print('\n', var, '\n', ep[var].value_counts().head(5), sep='')
In [ ]:
In [23]:
ep['Stats'].str.contains('one', na=False).head(10)*1
Out[23]:
In [ ]:
In [ ]:
In [ ]:
In [ ]: