In [ ]:
import os
import pandas as pd
import numpy as np
%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns

In [ ]:
# Pandas options
pd.set_option('display.max_columns', None)
pd.set_option('display.max_colwidth', 100000)

In [ ]:
!pwd
# We'll save plots here
!mkdir -p ../website/static/img/rsvp
!ls ../website/static/img
plotdir = "../website/static/img/rsvp"

In [ ]:
# Seaborn options
sns.set_style("white")
sns.set_style("ticks")

In [ ]:
df = pd.read_csv('results.csv')
df.rename(columns=lambda k: k.strip().strip("'"), inplace=True)
df = df.set_index(['name', 'partner'])

Long-form text


In [ ]:
def disp(col):
    return pd.DataFrame(df[pd.notnull(df[col])][col])

In [ ]:
disp('marriage_advice')

In [ ]:
disp('marriage_observations')

In [ ]:
disp('impression')

In [ ]:
disp('other')

Short form


In [ ]:
disp('tradition')

In [ ]:
disp('next_country')

In [ ]:
disp('book_recommend')

In [ ]:
disp('allergies')

In [ ]:
disp('science_grade')

Numerical


In [ ]:
def to_numbers(series):
    vals = []
    for val in series.dropna():
        if ',' in val:
            vals.extend(float(v.strip().strip('$°Cc')) for v in val.split(','))
        elif 'and' in val:
            vals.extend(float(v.strip().strip('$°Cc')) for v in val.split('and'))
        elif ' ' in val:
            vals.append(float(val.split()[0].strip().strip('$°Cc')))
        else:
            vals.append(float(val.strip().strip('$°Cc')))
    return np.asarray(vals)

def visualize(dist, bw=0.3, xlabel=None, save=False):
    sns.violinplot(dist,
                   inner='stick', bw=bw,
                   vert=False, color="coolwarm_r", lw=2)
    plt.axvline(dist.mean(), lw=2, color='#ac1735')
    plt.text(dist.mean() * 1.03, 1.42,
             "Mean: %.2f" % dist.mean(),
             fontsize='x-large', color='#ac1735')
    plt.yticks(())
    if xlabel is not None:
        plt.xlabel(xlabel)
    sns.despine(left=True)
    plt.tight_layout()
    if save:
        plt.savefig(os.path.join(plotdir, save))

In [ ]:
ages = df['age'].copy()
ages[0] = '77,82'
ages[6] = '32,32'
ages[7] = '25,26'
ages[12] = '28,29'
visualize(to_numbers(ages), bw=0.3, xlabel="Age", save="age.svg")

In [ ]:
visualize(to_numbers(df['temperature']), bw=0.4, xlabel="Ideal temperature", save="temp.svg")

In [ ]:
visualize(to_numbers(df['siblings']), bw=0.3, xlabel="# of siblings", save="siblings.svg")

In [ ]:
visualize(to_numbers(df['places_lived']), bw=0.3, xlabel="# of places lived", save="places.svg")

In [ ]:
visualize(to_numbers(df['cities_lived']), xlabel="# of cities lived")

In [ ]:
visualize(to_numbers(df['broken_bones']), bw=0.4, xlabel="# of broken bones", save="bones.svg")

In [ ]:
visualize(to_numbers(df['shoe_size']), bw=0.4, xlabel="Shoe size")

In [ ]:
wage = df['wage'].copy()
wage[0] = '0.25'
wage[6] = np.nan
wage[9] = '8.00'
visualize(to_numbers(wage), bw=0.3, xlabel="Hourly wage of first job", save="wage.svg")

Bars


In [ ]:
def bars(counts, na='None', title=None, save=False):
    if na in counts.index:
        counts[na] = np.nan
    ax = counts.dropna().plot(kind="barh")
    ax.invert_yaxis()
    ax.get_xaxis().set_major_locator(matplotlib.ticker.MaxNLocator(integer=True))
    plt.axvline(0, c='k')
    sns.despine()
    plt.gcf().set_size_inches([3.5, 4])
    if title is not None:
        plt.title(title)
    plt.tight_layout()
    if save:
        plt.savefig(os.path.join(plotdir, save))

In [ ]:
bars(pd.concat([df['my_receive_affection'],
                df['partner_receive_affection']]).value_counts(),
     na='Prefer not to say', title="How I receive affection", save="receive.svg")

In [ ]:
bars(pd.concat([df['my_give_affection'],
                df['partner_give_affection']]).value_counts(),
     na='Prefer not to say', title="How I give affection", save="give.svg")

In [ ]:
cg = df['card_game'].copy()
cg[3] = 'Euchre'
cg[15] = 'Dutch Blitz'
bars(cg.value_counts(), title="Card game", save="cardgame.svg")

In [ ]:
cn = df['car_name'].copy()
cn[5] = "C1"
cn[13] = "It depends..."
cn[15] = "It depends..."
bars(cn.value_counts(), title="Car name", save="carname.svg")

In [ ]:
bars(df['morning_night'].value_counts(), title="Time of day", save="timeofday.svg")

In [ ]:
bars(df['accomodation'].value_counts(), title="Accommodation", save="accommodation.svg")

In [ ]:
bars(df['wine'].value_counts(), title="Wine", save="wine.svg")

In [ ]:
bars(df['hot_beverage'].value_counts(), title="Hot beverage", save="beverage.svg")

In [ ]:
bars(df['bread'].value_counts(), title="Bread", save="bread.svg")

In [ ]:
bars(df['cake'].value_counts(), title="Cake", save="cake.svg")