In [6]:
import pandas as pd
unrate = pd.read_csv('UNRATE.csv')
unrate['DATE'] = pd.to_datetime(unrate['DATE'])
print unrate.head(12)


         DATE  VALUE
0  1948-01-01    3.4
1  1948-02-01    3.8
2  1948-03-01    4.0
3  1948-04-01    3.9
4  1948-05-01    3.5
5  1948-06-01    3.6
6  1948-07-01    3.6
7  1948-08-01    3.9
8  1948-09-01    3.8
9  1948-10-01    3.7
10 1948-11-01    3.8
11 1948-12-01    4.0

In [8]:
import matplotlib.pyplot as plt
plt.plot()
plt.show()



In [15]:
first_twelve = unrate[0:12]
plt.plot(first_twelve['DATE'],first_twelve['VALUE'])
plt.xticks(rotation=45)
plt.xlabel('MONTH')
plt.ylabel('Unemployment_rate')
plt.title('Month unemployment trend,1948')
plt.show()
#print help(plt.xticks)



In [25]:
import numpy as np
fig = plt.figure()
ax1 = fig.add_subplot(2,4,1)
ax2 = fig.add_subplot(2,4,3)
ax3 = fig.add_subplot(2,4,6)
ax1.plot(np.random.randint(1,5,5),np.arange(5))
plt.show()



In [27]:
fig = plt.figure(figsize=(10,5))
plt.plot(unrate[0:12]['DATE'],unrate[0:12]['VALUE'],c='red')
plt.plot(unrate['DATE'][12:24],unrate['VALUE'][12:24],c='blue')
plt.show()



In [1]:
import pandas as pd
reviews = pd.read_csv('fandango_scores.csv')
cols = ['FILM','RT_user_norm','Metacritic_user_nom','IMDB_norm', 'Fandango_Ratingvalue', 'Fandango_Stars']
norm_reviews = reviews[cols]
print norm_reviews[:1]


                             FILM  RT_user_norm  Metacritic_user_nom  \
0  Avengers: Age of Ultron (2015)           4.3                 3.55   

   IMDB_norm  Fandango_Ratingvalue  Fandango_Stars  
0        3.9                   4.5             5.0  

In [20]:
import matplotlib.pyplot as plt
from numpy import arange
#The Axes.bar() method has 2 required parameters, left and height. 
#We use the left parameter to specify the x coordinates of the left sides of the bar. 
#We use the height parameter to specify the height of each bar
num_cols = ['RT_user_norm', 'Metacritic_user_nom', 'IMDB_norm', 'Fandango_Ratingvalue', 'Fandango_Stars']

bar_heights = norm_reviews.ix[0, num_cols].values
print (bar_heights)
bar_positions = arange(5) + 1
print (bar_positions)
fig, ax = plt.subplots()
ax.bar(bar_positions, bar_heights, 0.5)
plt.show()


[4.2999999999999998 3.5499999999999998 3.8999999999999999 4.5 5.0]
[1 2 3 4 5]

In [26]:
tick_positions = range(1,6)
#print tick_positions
fig,ax = plt.subplots()

ax.bar(bar_positions,bar_heights,0.5)
ax.set_xticks(tick_positions)
ax.set_xticklabels(num_cols,rotation = 45)

ax.set_xlabel('Rating Source')
ax.set_ylabel('Average Rating ')
ax.set_title('FILM')
plt.grid()
plt.show()



In [28]:
import matplotlib.pyplot as plt
from numpy import arange
num_cols = ['RT_user_norm', 'Metacritic_user_nom', 'IMDB_norm', 'Fandango_Ratingvalue', 'Fandango_Stars']

bar_widths = norm_reviews.ix[0, num_cols].values
bar_positions = arange(5) + 1
tick_positions = range(1,6)
fig, ax = plt.subplots()
ax.barh(bar_positions, bar_widths, 0.5)

ax.set_yticks(tick_positions)
ax.set_yticklabels(num_cols)
ax.set_ylabel('Rating Source')
ax.set_xlabel('Average Rating')
ax.set_title('Average User Rating For Avengers: Age of Ultron (2015)')
plt.show()



In [31]:
fig,ax = plt.subplots()
ax.scatter(norm_reviews['Fandango_Ratingvalue'],norm_reviews['RT_user_norm'],linewidths=5)

ax.set_xlabel('Fandango')
ax.set_ylabel('Rotten Tomatoes')

plt.grid()
plt.show()



In [41]:
#switch axes
fig = plt.figure(figsize=(5,10))

ax1 = fig.add_subplot(2,1,1)
ax2 = fig.add_subplot(2,1,2)

ax1.scatter(norm_reviews['Fandango_Ratingvalue'],norm_reviews['RT_user_norm'],linewidths=5)

ax1.set_xlabel('Fandango')
ax1.set_ylabel('Rotten Tomatoes')
ax2.scatter(norm_reviews['RT_user_norm'],norm_reviews['Fandango_Ratingvalue'],linewidths=5)

ax2.set_ylabel('Fandango')
ax2.set_xlabel('Rotten Tomatoes')
plt.grid()
plt.show()



In [44]:
reviews = pd.read_csv('fandango_scores.csv')
cols = ['FILM','RT_user_norm','Metacritic_user_nom','IMDB_norm','Fandango_Ratingvalue']
norm_reviews = reviews[cols]
print norm_reviews[:5]


                             FILM  RT_user_norm  Metacritic_user_nom  \
0  Avengers: Age of Ultron (2015)           4.3                 3.55   
1               Cinderella (2015)           4.0                 3.75   
2                  Ant-Man (2015)           4.5                 4.05   
3          Do You Believe? (2015)           4.2                 2.35   
4   Hot Tub Time Machine 2 (2015)           1.4                 1.70   

   IMDB_norm  Fandango_Ratingvalue  
0       3.90                   4.5  
1       3.55                   4.5  
2       3.90                   4.5  
3       2.70                   4.5  
4       2.55                   3.0  

In [46]:
fandango_distribution = norm_reviews['Fandango_Ratingvalue'].value_counts()
fandango_distribution = fandango_distribution.sort_index()

imdb_distribution = norm_reviews['IMDB_norm'].value_counts()
imdb_distribution = imdb_distribution.sort_index()

print fandango_distribution
print imdb_distribution


2.7     2
2.8     2
2.9     5
3.0     4
3.1     3
3.2     5
3.3     4
3.4     9
3.5     9
3.6     8
3.7     9
3.8     5
3.9    12
4.0     7
4.1    16
4.2    12
4.3    11
4.4     7
4.5     9
4.6     4
4.8     3
Name: Fandango_Ratingvalue, dtype: int64
2.00     1
2.10     1
2.15     1
2.20     1
2.30     2
2.45     2
2.50     1
2.55     1
2.60     2
2.70     4
2.75     5
2.80     2
2.85     1
2.90     1
2.95     3
3.00     2
3.05     4
3.10     1
3.15     9
3.20     6
3.25     4
3.30     9
3.35     7
3.40     1
3.45     7
3.50     4
3.55     7
3.60    10
3.65     5
3.70     8
3.75     6
3.80     3
3.85     4
3.90     9
3.95     2
4.00     1
4.05     1
4.10     4
4.15     1
4.20     2
4.30     1
Name: IMDB_norm, dtype: int64

In [64]:
fig,ax = plt.subplots()
ax.hist(norm_reviews['Fandango_Ratingvalue'])
ax.hist(norm_reviews['Fandango_Ratingvalue'],bins=20)
ax.hist(norm_reviews['Fandango_Ratingvalue'],range=(4,5),bins=20)
ax.set_ylim(0,50)
plt.grid()
plt.show()
#print help(plt.hist)



In [68]:
fig,ax = plt.subplots()
ax.boxplot(norm_reviews['RT_user_norm'])
ax.set_xticklabels(['Rotten Tomatoes'])
ax.set_ylim(0,5)
plt.show()



In [77]:
num_cols = ['RT_user_norm', 'Metacritic_user_nom', 'IMDB_norm', 'Fandango_Ratingvalue']

fig,ax = plt.subplots()
ax.boxplot(norm_reviews[num_cols].values,showmeans=True,meanline=True)
ax.set_xticklabels(num_cols,rotation=50)
ax.set_ylim(0,5)

plt.show()



In [88]:
#print help(plt.boxplot)

In [78]:
import pandas as pd
import matplotlib.pyplot as plt

women_degress = pd.read_csv('percent-bachelors-degrees-women-usa.csv')
plt.plot(women_degress['Year'],women_degress['Biology'])
plt.show()



In [82]:
plt.plot(women_degress['Year'],women_degress['Biology'],c='blue',label='women')
plt.plot(women_degress['Year'],100-women_degress['Biology'],c='green',label='men')
plt.legend(loc='center')
plt.title('Percentage of Biology Degrees Awarded by gender')
plt.grid()
plt.show()
#print help(plt.legend)



In [90]:
fig, ax = plt.subplots()
ax.plot(women_degress['Year'], women_degress['Biology'], label='Women')
ax.plot(women_degress['Year'], 100-women_degress['Biology'], label='Men')

ax.tick_params(bottom="on", top="on", left="on", right="on")
ax.set_title('Percentage of Biology Degrees Awarded By Gender')
ax.legend(loc="upper right")

plt.show()

#print help(plt.tick_params)



In [97]:
fig, ax = plt.subplots()
ax.plot(women_degress['Year'], women_degress['Biology'], c='blue', label='Women')
ax.plot(women_degress['Year'], 100-women_degress['Biology'], c='green', label='Men')
ax.tick_params(bottom="off", top="off", left="off", right="off")

for key,spine in ax.spines.items():
    spine.set_visible(False)

ax.legend('upper right')
plt.show()



In [103]:
#print help(ax.spines.items())

In [104]:
major_cats = ['Biology', 'Computer Science', 'Engineering', 'Math and Statistics']

fig = plt.figure(figsize=(12,12))

for sp in range(0,4):
    ax = fig.add_subplot(2,2,sp+1)
    ax.plot(women_degress['Year'],women_degress[major_cats[sp]],c='blue',label='Women')
    ax.plot(women_degress['Year'],100-women_degress[major_cats[sp]],c='green',label='Men')

plt.legend(loc='upper right')
plt.grid()
plt.show()



In [106]:
import pandas as pd 
import matplotlib.pyplot as plt

women_degrees = pd.read_csv('percent-bachelors-degrees-women-usa.csv')
major_cats = ['Biology', 'Computer Science', 'Engineering', 'Math and Statistics']

cb_dark_blue = (0/255, 107/255, 164/255)
cb_orange = (255/255, 128/255, 14/255)

fig = plt.figure(figsize=(12,12))

for sp in range(0,4):
    ax = fig.add_subplot(2,2,sp+1)
    ax.plot(women_degrees['Year'], women_degrees[major_cats[sp]], c=cb_dark_blue, label='Women')
    ax.plot(women_degrees['Year'], 100-women_degrees[major_cats[sp]], c=cb_orange, label='Men')
    for key,spine in ax.spines.items():
        spine.set_visible(False)
    ax.set_xlim(1968, 2011)
    ax.set_ylim(0,100)
    ax.set_title(major_cats[sp])
    ax.tick_params(bottom="off", top="off", left="off", right="on")


plt.legend(loc='upper right')
plt.show()


<matplotlib.figure.Figure at 0x118b99d90>

In [107]:
cb_dark_blue = (0/255, 107/255, 164/255)
cb_orange = (255/255, 128/255, 14/255)

fig = plt.figure(figsize=(12, 12))

for sp in range(0,4):
    ax = fig.add_subplot(2,2,sp+1)
    # Set the line width when specifying how each line should look.
    ax.plot(women_degrees['Year'], women_degrees[major_cats[sp]], c=cb_dark_blue, label='Women', linewidth=10)
    ax.plot(women_degrees['Year'], 100-women_degrees[major_cats[sp]], c=cb_orange, label='Men', linewidth=10)
    for key,spine in ax.spines.items():
        spine.set_visible(False)
    ax.set_xlim(1968, 2011)
    ax.set_ylim(0,100)
    ax.set_title(major_cats[sp])
    ax.tick_params(bottom="off", top="off", left="off", right="off")

plt.legend(loc='upper right')
plt.show()



In [ ]:


In [110]:
stem_cats = ['Engineering', 'Computer Science', 'Psychology', 'Biology', 'Physical Sciences', 'Math and Statistics']
fig = plt.figure(figsize=(18, 3))

for sp in range(0,6):
    ax = fig.add_subplot(1,6,sp+1)
    ax.plot(women_degrees['Year'], women_degrees[stem_cats[sp]], c=cb_dark_blue, label='Women', linewidth=3)
    ax.plot(women_degrees['Year'], 100-women_degrees[stem_cats[sp]], c=cb_orange, label='Men', linewidth=3)
    for key,spine in ax.spines.items():
        spine.set_visible(False)
    ax.set_xlim(1968, 2011)
    ax.set_ylim(0,100)
    ax.set_title(stem_cats[sp])
    ax.tick_params(bottom="off", top="off", left="off", right="off")

plt.legend(loc='upper right')
plt.show()



In [111]:
fig = plt.figure(figsize=(18, 3))

for sp in range(0,6):
    ax = fig.add_subplot(1,6,sp+1)
    ax.plot(women_degrees['Year'], women_degrees[stem_cats[sp]], c=cb_dark_blue, label='Women', linewidth=3)
    ax.plot(women_degrees['Year'], 100-women_degrees[stem_cats[sp]], c=cb_orange, label='Men', linewidth=3)
    for key,spine in ax.spines.items():
        spine.set_visible(False)
    ax.set_xlim(1968, 2011)
    ax.set_ylim(0,100)
    ax.set_title(stem_cats[sp])
    ax.tick_params(bottom="off", top="off", left="off", right="off")
plt.legend(loc='upper right')
plt.show()
fig = plt.figure(figsize=(18, 3))

for sp in range(0,6):
    ax = fig.add_subplot(1,6,sp+1)
    ax.plot(women_degrees['Year'], women_degrees[stem_cats[sp]], c=cb_dark_blue, label='Women', linewidth=3)
    ax.plot(women_degrees['Year'], 100-women_degrees[stem_cats[sp]], c=cb_orange, label='Men', linewidth=3)
    for key,spine in ax.spines.items():
        spine.set_visible(False)
    ax.set_xlim(1968, 2011)
    ax.set_ylim(0,100)
    ax.set_title(stem_cats[sp])
    ax.tick_params(bottom="off", top="off", left="off", right="off")
    
    if sp == 0:
        ax.text(2005, 87, 'Men')
        ax.text(2002, 8, 'Women')
    elif sp == 5:
        ax.text(2005, 62, 'Men')
        ax.text(2001, 35, 'Women')
plt.show()



In [ ]: