In [ ]:
import os
import pandas as pd
import numpy as np
%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns
In [ ]:
# Pandas options
pd.set_option('display.max_columns', None)
pd.set_option('display.max_colwidth', 100000)
In [ ]:
!pwd
# We'll save plots here
!mkdir -p ../website/static/img/rsvp
!ls ../website/static/img
plotdir = "../website/static/img/rsvp"
In [ ]:
# Seaborn options
sns.set_style("white")
sns.set_style("ticks")
In [ ]:
df = pd.read_csv('results.csv')
df.rename(columns=lambda k: k.strip().strip("'"), inplace=True)
df = df.set_index(['name', 'partner'])
In [ ]:
def disp(col):
return pd.DataFrame(df[pd.notnull(df[col])][col])
In [ ]:
disp('marriage_advice')
In [ ]:
disp('marriage_observations')
In [ ]:
disp('impression')
In [ ]:
disp('other')
In [ ]:
disp('tradition')
In [ ]:
disp('next_country')
In [ ]:
disp('book_recommend')
In [ ]:
disp('allergies')
In [ ]:
disp('science_grade')
In [ ]:
def to_numbers(series):
vals = []
for val in series.dropna():
if ',' in val:
vals.extend(float(v.strip().strip('$°Cc')) for v in val.split(','))
elif 'and' in val:
vals.extend(float(v.strip().strip('$°Cc')) for v in val.split('and'))
elif ' ' in val:
vals.append(float(val.split()[0].strip().strip('$°Cc')))
else:
vals.append(float(val.strip().strip('$°Cc')))
return np.asarray(vals)
def visualize(dist, bw=0.3, xlabel=None, save=False):
sns.violinplot(dist,
inner='stick', bw=bw,
vert=False, color="coolwarm_r", lw=2)
plt.axvline(dist.mean(), lw=2, color='#ac1735')
plt.text(dist.mean() * 1.03, 1.42,
"Mean: %.2f" % dist.mean(),
fontsize='x-large', color='#ac1735')
plt.yticks(())
if xlabel is not None:
plt.xlabel(xlabel)
sns.despine(left=True)
plt.tight_layout()
if save:
plt.savefig(os.path.join(plotdir, save))
In [ ]:
ages = df['age'].copy()
ages[0] = '77,82'
ages[6] = '32,32'
ages[7] = '25,26'
ages[12] = '28,29'
visualize(to_numbers(ages), bw=0.3, xlabel="Age", save="age.svg")
In [ ]:
visualize(to_numbers(df['temperature']), bw=0.4, xlabel="Ideal temperature", save="temp.svg")
In [ ]:
visualize(to_numbers(df['siblings']), bw=0.3, xlabel="# of siblings", save="siblings.svg")
In [ ]:
visualize(to_numbers(df['places_lived']), bw=0.3, xlabel="# of places lived", save="places.svg")
In [ ]:
visualize(to_numbers(df['cities_lived']), xlabel="# of cities lived")
In [ ]:
visualize(to_numbers(df['broken_bones']), bw=0.4, xlabel="# of broken bones", save="bones.svg")
In [ ]:
visualize(to_numbers(df['shoe_size']), bw=0.4, xlabel="Shoe size")
In [ ]:
wage = df['wage'].copy()
wage[0] = '0.25'
wage[6] = np.nan
wage[9] = '8.00'
visualize(to_numbers(wage), bw=0.3, xlabel="Hourly wage of first job", save="wage.svg")
In [ ]:
def bars(counts, na='None', title=None, save=False):
if na in counts.index:
counts[na] = np.nan
ax = counts.dropna().plot(kind="barh")
ax.invert_yaxis()
ax.get_xaxis().set_major_locator(matplotlib.ticker.MaxNLocator(integer=True))
plt.axvline(0, c='k')
sns.despine()
plt.gcf().set_size_inches([3.5, 4])
if title is not None:
plt.title(title)
plt.tight_layout()
if save:
plt.savefig(os.path.join(plotdir, save))
In [ ]:
bars(pd.concat([df['my_receive_affection'],
df['partner_receive_affection']]).value_counts(),
na='Prefer not to say', title="How I receive affection", save="receive.svg")
In [ ]:
bars(pd.concat([df['my_give_affection'],
df['partner_give_affection']]).value_counts(),
na='Prefer not to say', title="How I give affection", save="give.svg")
In [ ]:
cg = df['card_game'].copy()
cg[3] = 'Euchre'
cg[15] = 'Dutch Blitz'
bars(cg.value_counts(), title="Card game", save="cardgame.svg")
In [ ]:
cn = df['car_name'].copy()
cn[5] = "C1"
cn[13] = "It depends..."
cn[15] = "It depends..."
bars(cn.value_counts(), title="Car name", save="carname.svg")
In [ ]:
bars(df['morning_night'].value_counts(), title="Time of day", save="timeofday.svg")
In [ ]:
bars(df['accomodation'].value_counts(), title="Accommodation", save="accommodation.svg")
In [ ]:
bars(df['wine'].value_counts(), title="Wine", save="wine.svg")
In [ ]:
bars(df['hot_beverage'].value_counts(), title="Hot beverage", save="beverage.svg")
In [ ]:
bars(df['bread'].value_counts(), title="Bread", save="bread.svg")
In [ ]:
bars(df['cake'].value_counts(), title="Cake", save="cake.svg")