Psych 45: Semantic memory demo stats


In [9]:
%matplotlib inline

import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import scipy as sp
sns.set(style='ticks', context='poster', font_scale=1)

In [11]:
data = pd.read_csv('http://web.stanford.edu/class/psych45/demos/Semantic_memory.csv', index_col=[0],
                   header=[0, 1], skipinitialspace=True)
data.head()


---------------------------------------------------------------------------
HTTPError                                 Traceback (most recent call last)
<ipython-input-11-2881ea899176> in <module>()
      1 data = pd.read_csv('http://web.stanford.edu/class/psych45/demos/Semantic_memory.csv', index_col=[0],
----> 2                    header=[0, 1], skipinitialspace=True)
      3 data.head()

//anaconda/lib/python2.7/site-packages/pandas/io/parsers.pyc in parser_f(filepath_or_buffer, sep, dialect, compression, doublequote, escapechar, quotechar, quoting, skipinitialspace, lineterminator, header, index_col, names, prefix, skiprows, skipfooter, skip_footer, na_values, na_fvalues, true_values, false_values, delimiter, converters, dtype, usecols, engine, delim_whitespace, as_recarray, na_filter, compact_ints, use_unsigned, low_memory, buffer_lines, warn_bad_lines, error_bad_lines, keep_default_na, thousands, comment, decimal, parse_dates, keep_date_col, dayfirst, date_parser, memory_map, float_precision, nrows, iterator, chunksize, verbose, encoding, squeeze, mangle_dupe_cols, tupleize_cols, infer_datetime_format, skip_blank_lines)
    472                     skip_blank_lines=skip_blank_lines)
    473 
--> 474         return _read(filepath_or_buffer, kwds)
    475 
    476     parser_f.__name__ = name

//anaconda/lib/python2.7/site-packages/pandas/io/parsers.pyc in _read(filepath_or_buffer, kwds)
    236 
    237     filepath_or_buffer, _ = get_filepath_or_buffer(filepath_or_buffer,
--> 238                                                    encoding)
    239 
    240     if kwds.get('date_parser', None) is not None:

//anaconda/lib/python2.7/site-packages/pandas/io/common.pyc in get_filepath_or_buffer(filepath_or_buffer, encoding)
    135 
    136     if _is_url(filepath_or_buffer):
--> 137         req = _urlopen(str(filepath_or_buffer))
    138         return maybe_read_encoded_stream(req, encoding)
    139 

//anaconda/lib/python2.7/urllib2.pyc in urlopen(url, data, timeout, cafile, capath, cadefault, context)
    152     else:
    153         opener = _opener
--> 154     return opener.open(url, data, timeout)
    155 
    156 def install_opener(opener):

//anaconda/lib/python2.7/urllib2.pyc in open(self, fullurl, data, timeout)
    435         for processor in self.process_response.get(protocol, []):
    436             meth = getattr(processor, meth_name)
--> 437             response = meth(req, response)
    438 
    439         return response

//anaconda/lib/python2.7/urllib2.pyc in http_response(self, request, response)
    548         if not (200 <= code < 300):
    549             response = self.parent.error(
--> 550                 'http', request, response, code, msg, hdrs)
    551 
    552         return response

//anaconda/lib/python2.7/urllib2.pyc in error(self, proto, *args)
    473         if http_err:
    474             args = (dict, 'default', 'http_error_default') + orig_args
--> 475             return self._call_chain(*args)
    476 
    477 # XXX probably also want an abstract factory that knows when it makes

//anaconda/lib/python2.7/urllib2.pyc in _call_chain(self, chain, kind, meth_name, *args)
    407             func = getattr(handler, meth_name)
    408 
--> 409             result = func(*args)
    410             if result is not None:
    411                 return result

//anaconda/lib/python2.7/urllib2.pyc in http_error_default(self, req, fp, code, msg, hdrs)
    556 class HTTPDefaultErrorHandler(BaseHandler):
    557     def http_error_default(self, req, fp, code, msg, hdrs):
--> 558         raise HTTPError(req.get_full_url(), code, msg, hdrs, fp)
    559 
    560 class HTTPRedirectHandler(BaseHandler):

HTTPError: HTTP Error 404: Not Found

In [4]:
print 'We currently have data from ' + str(data.count()[0]) + ' students.'


We currently have data from 125 students.

In [5]:
df = data.unstack().reset_index(name='rating')
df.rename(columns={'level_0': 'category', 'Timestamp': 'item', 'level_2': 'timestamp'}, inplace=True)
df.head()


Out[5]:
category item timestamp rating
0 fruit apple 4/26/16 11:47 1
1 fruit apple 4/28/16 11:04 2
2 fruit apple 4/28/16 11:05 1
3 fruit apple 4/28/16 11:06 1
4 fruit apple 4/28/16 11:24 1

In [6]:
category_list = df.category.unique()

In [7]:
f, axes = plt.subplots(ncols=len(category_list), figsize=(15, 3), sharey=True)
plt.locator_params(nbins=5)
first = True

for ax, category in zip(axes, category_list):
    ax.hlines(y=1, xmin=-1, xmax=4, linestyles='dashed', colors='green')
    g = sns.pointplot(x='item', y='rating', ax=ax, jitter=True, alpha=.4, 
                      ci=95, palette=['darkgray'],
                      data=df.loc[df.category == category])
    g.set_title(category)
    g.set_ylabel('')
    g.set_xlabel('')
    g.set_xticklabels(df.loc[df.category == category].item.unique(), rotation=90)
    
f.text(0.07, 0.5, 'Rating', va='center', rotation='vertical', fontsize='xx-large')
sns.despine()



In [34]:
f, ax = plt.subplots(ncols=1, figsize=(10, 4), sharey=True)
g = sns.stripplot(x='item', y='rating', jitter=True, alpha=.1, size=12, linewidth=1,
                  data=df.loc[df.category == 'fruit'], 
                  order=['apple', 'strawberry', 'fig'],
                  palette=['limegreen', 'hotpink', 'mediumpurple'], ax=ax)
g.set_xlabel('')


Out[34]:
<matplotlib.text.Text at 0x115b0b5d0>

In [35]:
f, ax = plt.subplots(ncols=1, figsize=(10, 4), sharey=True)
g = sns.stripplot(x='item', y='rating', jitter=True, alpha=.1, size=12, linewidth=1,
                  data=df.loc[df.category == 'sport'], 
                  order=['football', 'hockey', 'wrestling'],
                  palette=['peru', 'black', 'blue'], ax=ax)
g.set_xlabel('')


Out[35]:
<matplotlib.text.Text at 0x115b2ee10>

In [40]:
f, ax = plt.subplots(ncols=1, figsize=(10, 4), sharey=True)
g = sns.stripplot(x='item', y='rating', jitter=True, alpha=.1, size=12, linewidth=1,
                  data=df.loc[df.category == 'vehicle'], 
                  order=['car', 'boat', 'tricycle'],
                  palette=['gray', 'deepskyblue', 'crimson'], ax=ax)
g.set_xlabel('')


Out[40]:
<matplotlib.text.Text at 0x114f9dc90>

In [ ]: