This iPython Notebook goes through some common packages and more complicated aspects of python. It contains examples and problems to work on.
In [3]:
# Best practice is to set up all your imports at the top of the script / notebook
import numpy as np # Numpy is a package for scientific computing (linear algebra) -
import scipy as sp # Scipy is a package for math, science and engineering -
import pandas as pd # Pandas is a package for data structures -
import matplotlib.pyplot as plt # Matplotlib is a package for plotting -
import statsmodels.api as sm # Stats models is a package for statistics -
# This sets a higher resolution for figures
%config InlineBackend.figure_format = 'retina'
# Pylab: pylab is part of the plotting library matplotlib, and imports matplotlib and numpy packages.
# The inline argument makes all your plots appear inline with the notebook
%pylab inline
# This is a useful tutorial about pylab:
# Note that numpy and matplotlib were redundantly imported here (as they are imported with pylab)
In [75]:
# A package is basically just a collection of functions, grouped together
# If you want a list of what's in a function, you should consult the documentation,
# A quick check for whats in a package can be done with the dir command
In [104]:
## Numpy things
# Numpy is best for linear algebra, and handling matrix type data
# Numpy gives you a new type to work with arrays / vectors / matrices. This is more practical than using lists
list_vector = list([1,2,3,4,5])
np_vector = np.array([1,2,3,4,5])
print 'List vector: ', list_vector, ' is of type ', type(list_vector)
print 'Numpy vector: ', np_vector, ' is of type ', type(np_vector), '\n'
# Numpy comes with a set of functions to manipulate these arrays
a = np.array([1, 2, 3, 4])
b = np.array([5, 6, 7, 8])
c = np.concatenate([a, b])
print 'Concatenated Vector: ', c
In [99]:
# Numpy also has functions to create special matrices (just like the matlab ones)
zeros_matrix = np.zeros([2, 2])
ones_matrix = np.ones([2, 2])
print 'Zeros Matrix: \n', zeros_matrix
print 'Ones Matrix: \n', ones_matrix
summed_matrix = zeros_matrix + ones_matrix
multiplied_matrix = zeros_matrix * ones_matrix
print 'Summed Matrix: \n', summed_matrix
print 'Multiplied Matrix: \n', multiplied_matrix
In [101]:
# Indexing in Python
# Python is a 0-based language. That means that everything starts counting at zero.
v = [1, 2, 3, 4]
print 'First Index: ', v[0]
print 'Second Index: ', v[1]
print 'Matrix Index: ', zeros_matrix[0, 0]
In [103]:
## Example data, plotting
# Create a simple sinusoid, and plot it
# 1 second of data samples at spacing of 1/1000 seconds
t = arange(0, 1, 1.0/1000)
# sine wave of 10 Hz
s = np.sin(2 * pi * 10 * t)
# plot first 20 points of the resulting data
In [6]:
# Import some data from Stats Models (this lets you access example data, like in R)
# Note that it loads the data in a Pandas data frame
duncan_prestige = sm.datasets.get_rdataset("Duncan", "car")
In [48]:
# Remember to use the help function to figure out what's going on
In [49]:
# Check the doc for what we just imported
# Remember, if you want to check what attributes an object has, do dir(object)
print duncan_prestige.title # Print the title of the data we just loada
print ' '
print duncan_prestige.__doc__
In [50]:
# The head function lets you print out a defined number of rows of data
In [10]:
In [11]:
# Make a scatter plot of the data
plt.title('Education vs. Income in Duncan Data')
In [12]:
cor_coef, p_value = sp.stats.stats.pearsonr(,
print 'The correlation coefficient is: ', cor_coef
print 'The p-vaue is: ', p_value, '\n'
if p_value < 0.05:
print 'Woohooo! Victory! It\'s Significant! Science Accomplished!'
print 'Awww....'
In [47]:
# Add a best fit line to the plot
# Numpy has function to fit a line to a set of data. This returns a line equation
fit_line_params = np.polyfit(,, 1)
# Use the line equation to get some points of the line
fit_line_points = np.polyval(fit_line_params, range(0,max(
# Plot the scatter plot again
# hold is a function that holds the current plot so that subsequent plot call land on the same figure
# Plot the best fit line points
# Add things to plot
plt.title('Education vs. Prestige with Fit Line')
In [ ]:
# Programming Challenge - Temperature converter
# Write a function that converts between Celsius and Fahrenheit
# The formulas for conversion are:
# temp_in_celsius = (5/9) * (temp_in_fahrenheit - 32)
# temp_in_fahrenheit = (9/5) * temp_in_celsius + 32
In [54]:
a = sm.datasets.get_rdataset('longley', 'age')
In [ ]: