In [2]:
%matplotlib inline
from matplotlib import pyplot as plt

''' Here is a simple line plot example '''

years = list(range(1950, 2011, 10))
gdp = [300.2, 543.3, 1075.9, 2862.5, 5979.6, 10289.7, 14958.3]

# create a line chart, years on x-axis, gdp on y
plt.plot(years, gdp, color='green', marker='o', linestyle='solid')

# add title
plt.title("Nominal GDP")

# add y-axis label
plt.ylabel("Billions of $")
plt.show()



In [8]:
''' Simple bar chart example '''

movies = ['Annie Hall', "Ben Hur", "Casablanca", 'Gandhi', 'West Side Story']
num_oscars = [5, 11, 3, 8, 10]

# Bars have a default width of 0.8 and are left justified,
# so we'll shift it 0.1 to the right to center the bars.
xs = [i + 0.1 for i, _ in enumerate(movies)]

# plots bars with left x-coordinates (xs), heights (num_oscars)
plt.bar(xs, num_oscars)

plt.ylabel('# of Academy Awards')
plt.title('Famous Old Movies')

# label x-axis with movie names at bar centers
plt.xticks([i + 0.5 for i, _ in enumerate(movies)], movies)
plt.show()



In [21]:
''' Using a Bar Chart for a Histogram '''

from collections import Counter

grades = [81.2, 95.7, 99.1, 86, 88, 71.3, 0.0, 75.8, 82, 67, 100, 97, 75, 82.5, 78.0]
decile = lambda grade: grade // 10 * 10
histogram = Counter(decile(grade) for grade in grades)

plt.bar([x - 4 for x in histogram.keys()],
        histogram.values(),
        8,
        color='g',
        edgecolor='none')

plt.axis([-5, 105, 0, 5])  # x-axis from -5 to 105, y-axis from 0 to 5
plt.xticks([10 * i for i in range(11)])
plt.xlabel("Decile")
plt.ylabel("# of Students")
plt.title("Distribution of Exam 1 Grades")
plt.show()



In [10]:
''' line charts '''
%matplotlib inline
from matplotlib import pyplot as plt

variance = [2**x for x in range(8)]
bias_squared = variance[::-1]
total_error= [x + y for x, y in zip(variance, bias_squared)]
xs = [i for i, _ in enumerate(variance)]

# multiple calls to plt.plot will add multiple lines to the same graph
plt.plot(xs, variance, 'g-', label='variance', linewidth=4)
plt.plot(xs, bias_squared, 'r-', label='bias**2', linewidth=4)
plt.plot(xs, total_error, 'b:', label='total_error', linewidth=4)

# because we've assigned labels to each series, the legend is easy
plt.legend(loc=9)  # this means top-center
plt.xlabel("model complexity")
plt.title("The Bias-Variance Tradeoff")
plt.show()



In [7]:
''' scaterplots '''

# A plot of the number of friends people have on DataSciencester
# and the average number of minutes they spend on the site per day.
friends = [70,65,72,63,71,64,60,64,67]
minutes = [175, 170, 205, 120, 130, 105, 145, 190, 195]
labels = ['a','b','c','d','e','f','g','h','i']

plt.scatter(friends, minutes)

# label each point
for label, friend_count, minute_count in zip(labels, friends, minutes):
    plt.annotate(label,
                 xy = (friend_count, minute_count),
                 xytext = (5, -5),
                 textcoords='offset points')
plt.title("Daily Minutes vs Number of Friends")
plt.xlabel("# of friends")
plt.ylabel("daily minutes spent on the site")
plt.show()



In [12]:
# same as above, but you might get a mis-leading picture if you let
# matplotlib choose the axis
test_1_grades = [99,90,85,97,80]
test_2_grades = [100,85,60,90,70]

plt.scatter(test_1_grades, test_2_grades)
plt.title("Axes Aren't Comparable")
plt.xlabel("test 1 grade")
plt.ylabel("test 2 grade")
plt.show()


TODO = """
Matplot lib is only one plotting library. Though perhaps the most common.
Bokeh is a Python port of the popular D3 JavaScript data display package.
Also, the R package ggplot is available through Python.
"""



In [ ]: