In [1]:
    
import pandas as pd
star_wars = pd.read_csv("../data/GP08/star_wars.csv", encoding="ISO-8859-1")
    
In [2]:
    
star_wars = star_wars[pd.notnull(star_wars["RespondentID"])]
    
In [3]:
    
star_wars.head()
    
    Out[3]:
In [4]:
    
star_wars.columns
    
    Out[4]:
In [5]:
    
yes_no = {"Yes": True, "No": False}
for col in [
    "Have you seen any of the 6 films in the Star Wars franchise?",
    "Do you consider yourself to be a fan of the Star Wars film franchise?"
    ]:
    star_wars[col] = star_wars[col].map(yes_no)
star_wars.head()
    
    Out[5]:
In [6]:
    
import numpy as np
movie_mapping = {
    "Star Wars: Episode I  The Phantom Menace": True,
    np.nan: False,
    "Star Wars: Episode II  Attack of the Clones": True,
    "Star Wars: Episode III  Revenge of the Sith": True,
    "Star Wars: Episode IV  A New Hope": True,
    "Star Wars: Episode V The Empire Strikes Back": True,
    "Star Wars: Episode VI Return of the Jedi": True
}
for col in star_wars.columns[3:9]:
    star_wars[col] = star_wars[col].map(movie_mapping)
    
In [7]:
    
star_wars = star_wars.rename(columns={
        "Which of the following Star Wars films have you seen? Please select all that apply.": "seen_1",
        "Unnamed: 4": "seen_2",
        "Unnamed: 5": "seen_3",
        "Unnamed: 6": "seen_4",
        "Unnamed: 7": "seen_5",
        "Unnamed: 8": "seen_6"
        })
star_wars.head()
    
    Out[7]:
In [8]:
    
star_wars = star_wars.rename(columns={
        "Please rank the Star Wars films in order of preference with 1 being your favorite film in the franchise and 6 being your least favorite film.": "ranking_1",
        "Unnamed: 10": "ranking_2",
        "Unnamed: 11": "ranking_3",
        "Unnamed: 12": "ranking_4",
        "Unnamed: 13": "ranking_5",
        "Unnamed: 14": "ranking_6"
        })
star_wars.head()
    
    Out[8]:
In [9]:
    
star_wars[star_wars.columns[9:15]] = star_wars[star_wars.columns[9:15]].astype(float)
    
In [10]:
    
star_wars[star_wars.columns[9:15]].mean()
    
    Out[10]:
In [11]:
    
%matplotlib inline
import matplotlib.pyplot as plt
plt.bar(range(6), star_wars[star_wars.columns[9:15]].mean())
    
    Out[11]:
    
In [12]:
    
star_wars[star_wars.columns[3:9]].sum()
    
    Out[12]:
In [13]:
    
plt.bar(range(6), star_wars[star_wars.columns[3:9]].sum())
    
    Out[13]:
    
In [14]:
    
males = star_wars[star_wars["Gender"] == "Male"]
females = star_wars[star_wars["Gender"] == "Female"]
    
In [15]:
    
plt.bar(range(6), males[males.columns[9:15]].mean())
plt.show()
plt.bar(range(6), females[females.columns[9:15]].mean())
plt.show()
    
    
    
In [16]:
    
plt.bar(range(6), males[males.columns[3:9]].sum())
plt.show()
plt.bar(range(6), females[females.columns[3:9]].sum())
plt.show()
    
    
    
In [ ]: