Topic: Challenge Set 3
Subject: Pandas
Date: 01/30/2017
Name: Prashant Tatineni
In [76]:
import pandas as pd
import matplotlib.pyplot as plt
import dateutil.parser
import numpy as np
%matplotlib inline
In [11]:
df = pd.read_csv('2013_movies.csv')
In [12]:
df.head()
Out[12]:
Challenge 1
In [13]:
df['ReleaseDateParsed'] = df.ReleaseDate.apply(lambda x: dateutil.parser.parse(x))
In [14]:
zerotime = dateutil.parser.parse('2012-12-31')
In [15]:
df['Days'] = (df.ReleaseDateParsed - zerotime)
df['Days'] = df.Days.apply(lambda x: x.days)
In [68]:
plt.scatter(df.Days, df.DomesticTotalGross);
Challenge 2
In [70]:
plt.scatter(df.Runtime, df.DomesticTotalGross);
Challenge 3
In [16]:
(df.groupby('Rating').mean())[['Runtime','DomesticTotalGross']]
Out[16]:
Challenge 4
In [27]:
x = df[df.Rating == 'G']['Days']
y = df[df.Rating == 'G']['DomesticTotalGross']
plt.scatter(x,y);
In [23]:
x = df[df.Rating == 'PG']['Days']
y = df[df.Rating == 'PG']['DomesticTotalGross']
plt.scatter(x,y);
In [24]:
x = df[df.Rating == 'PG-13']['Days']
y = df[df.Rating == 'PG-13']['DomesticTotalGross']
plt.scatter(x,y);
In [25]:
x = df[df.Rating == 'R']['Days']
y = df[df.Rating == 'R']['DomesticTotalGross']
plt.scatter(x,y);
Challenge 5
In [33]:
(df.groupby('Director').mean())[['DomesticTotalGross']].sort_values('DomesticTotalGross', ascending=False).head(1)
Out[33]:
Challenge 6
In [38]:
df['Month'] = df['ReleaseDateParsed'].apply(lambda x: x.month)
In [78]:
# Months
i = [1,2,3,4,5,6,7,8,9,10,11,12]
# Mean
m = (df.groupby('Month').mean())
# Standard Deviation
s = (df.groupby('Month').std())
# Calculate Standard Error
n = (df.groupby('Month').count())
sem = s.DomesticTotalGross/np.sqrt(n.DomesticTotalGross)
In [83]:
plt.bar(i, m.DomesticTotalGross, color='green', yerr=sem);
plt.title('Mean Domestic Total Gross by Month in 2013');