Psych 45: Semantic memory demo stats


In [37]:
%matplotlib inline
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import scipy as sp
sns.set(style='ticks', context='poster', font_scale=1)

In [38]:
data = pd.read_csv('/Users/Pam_sf_wang/Documents/Psych45/Psych45_semantic_demo_temp.csv',
                  index_col=[0],header=[0, 1], skipinitialspace=True)
data.head()


Out[38]:
fruit sport bird vehicle ... crime vegetable Unnamed: 19_level_0 Unnamed: 20_level_0 Unnamed: 21_level_0 Unnamed: 22_level_0
Timestamp apple fig strawberry football hockey wrestling ostrich wren robin boat ... embezzling murder vagrancy parsley carrot onion Unnamed: 19_level_1 Unnamed: 20_level_1 Unnamed: 21_level_1 Unnamed: 22_level_1
4/26/16 11:47 1 4 2 1 1 5 5 2 1 3 ... 4 2 6 6 2 3 NaN NaN NaN NaN
4/28/16 11:04 2 6 2 1 2 2 2 5 3 2 ... 2 1 2 3 1 1 NaN NaN NaN NaN
4/28/16 11:05 1 3 1 1 1 2 2 1 1 2 ... 2 1 5 1 1 2 NaN NaN NaN NaN
4/28/16 11:06 1 1 1 1 1 1 3 1 1 1 ... 1 1 7 7 1 1 NaN NaN NaN NaN
4/28/16 11:24 1 2 1 2 2 2 1 2 1 1 ... 2 1 2 4 1 3 NaN NaN NaN NaN

5 rows × 22 columns


In [39]:
print 'We currently have data from ' + str(data.count()[0]) + ' students.'


We currently have data from 340 students.

In [42]:
df = data.unstack().reset_index(name='rating')
df.rename(columns={'level_0': 'category', 'Timestamp': 'item', 'level_2': 'timestamp'}, inplace=True)
df = df[np.isfinite(df['rating'])]
df.head()


Out[42]:
category item timestamp rating
0 fruit apple 4/26/16 11:47 1
1 fruit apple 4/28/16 11:04 2
2 fruit apple 4/28/16 11:05 1
3 fruit apple 4/28/16 11:06 1
4 fruit apple 4/28/16 11:24 1

In [43]:
category_list = df.category.unique()
#category_list =(['fruit', 'sport', 'bird', 'vehicle', 'crime', 'vegetable'])
category_list


Out[43]:
array(['fruit', 'sport', 'bird', 'vehicle', 'crime', 'vegetable'], dtype=object)

In [44]:



Out[44]:
array(['fruit', 'sport', 'bird', 'vehicle', 'crime', 'vegetable'], dtype=object)

In [45]:
f, axes = plt.subplots(ncols=len(category_list), figsize=(15, 3), sharey=True)
plt.locator_params(nbins=5)
first = True

for ax, category in zip(axes, category_list):
    ax.hlines(y=1, xmin=-1, xmax=4, linestyles='dashed', colors='green')
    g = sns.pointplot(x='item', y='rating', ax=ax, jitter=True, alpha=.4, 
                      ci=95, palette=['darkgray'],
                      data=df.loc[df.category == category])
    g.set_title(category)
    g.set_ylabel('')
    g.set_xlabel('')
    g.set_xticklabels(df.loc[df.category == category].item.unique(), rotation=90)
    
f.text(0.07, 0.5, 'Rating', va='center', rotation='vertical', fontsize='xx-large')
sns.despine()



In [19]:
f, ax = plt.subplots(ncols=1, figsize=(10, 4), sharey=True)
g = sns.stripplot(x='item', y='rating', jitter=True, alpha=.1, size=12, linewidth=1,
                  data=df.loc[df.category == 'fruit'], 
                  order=['apple', 'strawberry', 'fig'],
                  palette=['limegreen', 'hotpink', 'mediumpurple'], ax=ax)
g.set_xlabel('')


Out[19]:
<matplotlib.text.Text at 0x110a1e250>

In [20]:
f, ax = plt.subplots(ncols=1, figsize=(10, 4), sharey=True)
g = sns.stripplot(x='item', y='rating', jitter=True, alpha=.1, size=12, linewidth=1,
                  data=df.loc[df.category == 'sport'], 
                  order=['football', 'hockey', 'wrestling'],
                  palette=['peru', 'black', 'blue'], ax=ax)
g.set_xlabel('')


Out[20]:
<matplotlib.text.Text at 0x110a4c950>

In [21]:
f, ax = plt.subplots(ncols=1, figsize=(10, 4), sharey=True)
g = sns.stripplot(x='item', y='rating', jitter=True, alpha=.1, size=12, linewidth=1,
                  data=df.loc[df.category == 'vehicle'], 
                  order=['car', 'boat', 'tricycle'],
                  palette=['gray', 'deepskyblue', 'crimson'], ax=ax)
g.set_xlabel('')


Out[21]:
<matplotlib.text.Text at 0x110a07850>

In [ ]: