In [2]:
%matplotlib inline
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
In [3]:
file_name = "flying-etiquette.csv"
data = pd.read_csv(file_name)
In [3]:
data.head()
Out[3]:
In [16]:
data["Household Income"].value_counts()
Out[16]:
Now let's do some filters and crosstab stuff
In [26]:
gender_edu_demos = data.groupby("Location (Census Region)")
questions = gender_edu_demos.first().columns
In [27]:
for q in questions:
if q not in ["RespondentID","Questions"]:
df = pd.DataFrame()
for name, group in gender_edu_demos:
df[name] = group[q].value_counts(normalize=True)
df.plot(kind="barh", title=q)
plt.show()
In [8]:
rude_vs_freq = data.groupby("How often do you travel by plane?")["In general, is itrude to bring a baby on a plane?"]
In [9]:
rude_vs_freq.value_counts().plot(kind='barh')
Out[9]:
In [33]:
create_all_graphs(data)
In [30]:
df = pd.DataFrame()
q = "In general, is itrude to bring a baby on a plane?"
travel_data = data.groupby("Location (Census Region)")[q]
for location, group in travel_data:
if location == "Pacific":
df['West Coast'] = group.value_counts(normalize=True)
else:
try:
df["Rest of Nation"].append(group.value_counts(normalize=True))
except KeyError:
df["Rest of Nation"] = group.value_counts(normalize=True)
df.plot(kind='barh',title=q, figsize=(10.67,5.33))
#title = "charts/" + "01-" + q.replace(" ","_") + "_results_.png"
#plt.savefig(title,format="png")
In [32]:
## helper functions here ##
def create_all_graphs(data):
for index, question in enumerate(data.columns):
if question not in ["RespondentID","Questions"]:
data.icol(index).value_counts(normalize=True).plot(kind='barh',title=question, figsize=(10.67,5.33) )
title = "charts/" + question.replace(" ","_") + "_results.png"
plt.savefig(title,format="png")
plt.show()