In [1]:
# this is a python comment
# this cell contains python code
# executing the cell yields the results of the python command
2+2
Out[1]:
In [2]:
# live code some graphics here
import matplotlib.pyplot as plt
%matplotlib inline
plt.plot([3,1,4,1,5])
Out[2]:
In [3]:
plt.style.use("fivethirtyeight")
In [4]:
plt.plot([3,1,4,1,5])
Out[4]:
In [5]:
# your turn: plot some additional digits of pi
import sympy
In [6]:
# to digits and then plot
pi_str = str(sympy.N(sympy.pi, n=100))
pi_digits = [int(x) for x in pi_str if x != '.']
In [7]:
plt.plot(pi_digits)
Out[7]:
In [8]:
# live code an example of loading the va data csv with pandas here
import pandas as pd
In [9]:
df = pd.read_csv('../3-data/IHME_PHMRC_VA_DATA_ADULT_Y2013M09D11_0.csv', low_memory=False)
In [10]:
# DataFrame.iloc method selects row and columns by "integer location"
df.iloc[5:10, 5:10]
Out[10]:
In [11]:
# If you are new to this sort of thing, what do you think this does?
df.iloc[5:10, :10]
Out[11]:
In [12]:
# I don't have time to show you the details now, but I find that
# pandas DataFrames have really done things well. For example:
df.gs_text34
Out[12]:
In [13]:
df.gs_text34.value_counts()
Out[13]:
In [14]:
# you can guess what the next line does,
# even if you have never used python before:
import sklearn.neighbors
In [15]:
# here is how sklearn creates a "classifier":
clf = sklearn.neighbors.KNeighborsClassifier()
In [16]:
# I didn't mention `numpy` before, but this is "the fundamental
# package for scientific computing with Python"
import numpy as np
In [17]:
# sklearn gets mixed up with Pandas DataFrames and Series,
# so you need to turn things into np.arrays:
X = np.array(df.loc[:, ['va46']])
y = np.array(df.gs_text34)
In [18]:
# one nice thing about sklearn is that it has all different
# fancy machine learning methods, but they all follow a
# common pattern:
clf.fit(X, y)
Out[18]:
In [19]:
clf.predict([[19]])
Out[19]: