In [13]:
# %sh
# # echo "Before update: "
# # python --version
# # pip freeze | grep pandas
# # wget http://repo.continuum.io/archive/Anaconda2-4.0.0-Linux-x86_64.sh
# # bash Anaconda2-4.0.0-Linux-x86_64.sh
# # pip install pandas
# echo "After update: "
# python --version
# pip freeze | grep pandas
In [14]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
In [15]:
# %sh
# wget https://raw.githubusercontent.com/pmk2109/DataQuest/master/Guided%20Project-%20Visualizing%20Pixar\'s%20Roller%20Coaster/PixarMovies.csv
In [16]:
pixar_movies = pd.read_csv("PixarMovies.csv")
pixar_movies.shape
Out[16]:
In [17]:
pixar_movies.head(15)
Out[17]:
In [18]:
pixar_movies.dtypes
Out[18]:
In [19]:
pixar_movies.describe()
Out[19]:
In [20]:
# Data cleaning
pixar_movies["Domestic %"] = pixar_movies["Domestic %"].str.rstrip("%").astype("float")
pixar_movies["International %"] = pixar_movies["International %"].str.rstrip("%").astype("float")
pixar_movies["IMDB Score"] = pixar_movies["IMDB Score"] * 10
# Filtered N.A. data
filtered_pixar = pixar_movies.dropna()
# Set column Movie as index
filtered_pixar.set_index("Movie", inplace=True)
# Check results
print("Pixar movies shape: {0} rows x {1} columns".format(pixar_movies.shape[0], pixar_movies.shape[1]))
print("Filtered Pixar movies shape: {0} rows x {1} columns".format(filtered_pixar.shape[0], filtered_pixar.shape[1]))
filtered_pixar.head(3)
Out[20]:
In [21]:
scores = ["RT Score", "IMDB Score", "Metacritic Score"]
critics_reviews = filtered_pixar[scores]
# Generate line plot
critics_reviews.plot(figsize=(10, 6))
plt.show()
In [22]:
# Generate box plot
critics_reviews.boxplot(figsize=(10,6))
plt.show()
In [23]:
revenue = ["Domestic %", "International %"]
revenue_proportions = filtered_pixar[revenue]
# Generate stack bar plot
revenue_proportions.plot(kind="bar", figsize=(10,6), stacked=True)
plt.show()
In [ ]: