In [2]:
%matplotlib inline
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os

In [3]:
file_name = "flying-etiquette.csv"

data = pd.read_csv(file_name)

In [3]:
data.head()


Out[3]:
RespondentID How often do you travel by plane? Do you ever recline your seat when you fly? How tall are you? Do you have any children under 18? In a row of three seats, who should get to use the two arm rests? In a row of two seats, who should get to use the middle arm rest? Who should have control over the window shade? Is itrude to move to an unsold seat on a plane? Generally speaking, is it rude to say more than a few words tothe stranger sitting next to you on a plane? ... Is itrude to wake a passenger up if you are trying to walk around? In general, is itrude to bring a baby on a plane? In general, is it rude to knowingly bring unruly children on a plane? Have you ever used personal electronics during take off or landing in violation of a flight attendant's direction? Have you ever smoked a cigarette in an airplane bathroom when it was against the rules? Gender Age Household Income Education Location (Census Region)
0 3436139758 Once a year or less NaN NaN NaN NaN NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
1 3434278696 Once a year or less About half the time 6'3" Yes The arm rests should be shared The arm rests should be shared Everyone in the row should have some say No, not rude at all No, not at all rude ... No, not at all rude No, not at all rude No, not at all rude No No Male 30-44 NaN Graduate degree Pacific
2 3434275578 Once a year or less Usually 5'8" No Whoever puts their arm on the arm rest first The arm rests should be shared The person in the window seat should have excl... No, not rude at all No, not at all rude ... Yes, somewhat rude Yes, somewhat rude Yes, very rude No No Male 30-44 $100,000 - $149,999 Bachelor degree Pacific
3 3434268208 Once a year or less Always 5'11" No The arm rests should be shared The arm rests should be shared Everyone in the row should have some say No, not rude at all No, not at all rude ... Yes, somewhat rude Yes, somewhat rude Yes, very rude No No Male 30-44 $0 - $24,999 Bachelor degree Pacific
4 3434250245 Once a month or less About half the time 5'7" No The person in the middle seat gets both arm rests The person in aisle Everyone in the row should have some say No, not rude at all No, not at all rude ... Yes, somewhat rude Yes, somewhat rude Yes, very rude Yes No Male 30-44 $50,000 - $99,999 Bachelor degree Pacific

5 rows × 27 columns


In [16]:
data["Household Income"].value_counts()


Out[16]:
45-60    275
> 60     258
30-44    254
18-29    220
dtype: int64

Now let's do some filters and crosstab stuff


In [26]:
gender_edu_demos = data.groupby("Location (Census Region)")
questions = gender_edu_demos.first().columns

In [27]:
for q in questions:
    if q not in ["RespondentID","Questions"]:
        df = pd.DataFrame()
        for name, group in gender_edu_demos:
            df[name] = group[q].value_counts(normalize=True)

        df.plot(kind="barh", title=q)
        plt.show()



In [8]:
rude_vs_freq = data.groupby("How often do you travel by plane?")["In general, is itrude to bring a baby on a plane?"]

In [9]:
rude_vs_freq.value_counts().plot(kind='barh')


Out[9]:
<matplotlib.axes.AxesSubplot at 0x107eaa4d0>

In [33]:
create_all_graphs(data)



In [30]:
df = pd.DataFrame()
q = "In general, is itrude to bring a baby on a plane?"
travel_data = data.groupby("Location (Census Region)")[q]
for location, group in travel_data:
    if location == "Pacific":
        df['West Coast'] = group.value_counts(normalize=True)
    else:
        try:
            df["Rest of Nation"].append(group.value_counts(normalize=True))
        except KeyError:
            df["Rest of Nation"] = group.value_counts(normalize=True)

df.plot(kind='barh',title=q, figsize=(10.67,5.33))
#title = "charts/" + "01-" + q.replace(" ","_") + "_results_.png"
#plt.savefig(title,format="png")



In [32]:
## helper functions here ##
def create_all_graphs(data):
    for index, question in enumerate(data.columns):
        if question not in ["RespondentID","Questions"]:
            data.icol(index).value_counts(normalize=True).plot(kind='barh',title=question, figsize=(10.67,5.33) )
            title = "charts/" + question.replace(" ","_") + "_results.png"
            plt.savefig(title,format="png")
            plt.show()