In [2]:

    
# -*- coding: utf-8 -*-
%matplotlib inline    
print "importing libraries"
import pandas as pd #for dealing with csv import
import os # for joining paths and filenames sensibly
import matplotlib.pyplot as plt  # Matplotlib's pyplot: MATLAB-like syntax

print "loading datafiles"
filename=os.path.join('data','crowdstorm_dataset.csv') 
df = pd.read_csv(filename)

print "analysing data"
#This will help http://pandas.pydata.org/pandas-docs/stable/indexing.html


#EXAMPLE: let's look at a single row (ie one player-ref interaction
df.iloc[4]


#EXAMPLE: take a single column of the data file
# nb this is nonsense for a variable like height because each player contributes multple entries (1 per interaction)

#Don't do this for proper analysis, since variable ordering is implicit
#...and probably screwed up by dropping nan values

#tolist() converts from series to list
#dropna() drops nans


height=df['height'].dropna().tolist()

print "plotting"

plt.clf()
plt.hist(height,20)

print "saving plots"
plt.title('Height histogram for '+ str(len(height))+ ' dyads')
plt.xlabel('Height in cm')
plt.ylabel('frequency')
plt.savefig('figs/height_hist.png', dpi=300, facecolor='w', edgecolor='w',
            orientation='portrait', papertype=None, format=None,
            transparent=False, bbox_inches=None, pad_inches=0.1)
            
            
"""
TODO

Calculate an aggregate "red cardness" variable from yellowReds and redCards variables (and yellowCard?)
Calculate an estimate skin colour variabel from rater1 and rater2 variables

Are these two related?
"""









    



importing libraries
loading datafiles






    



---------------------------------------------------------------------------
IOError                                   Traceback (most recent call last)
<ipython-input-2-5c86c9442f11> in <module>()
      8 print "loading datafiles"
      9 filename=os.path.join('data','crowdstorm_dataset.csv')
---> 10 df = pd.read_csv(filename)
     11 
     12 print "analysing data"

/Library/Python/2.7/site-packages/pandas-0.13.1_213_gc174c3d-py2.7-macosx-10.9-intel.egg/pandas/io/parsers.pyc in parser_f(filepath_or_buffer, sep, dialect, compression, doublequote, escapechar, quotechar, quoting, skipinitialspace, lineterminator, header, index_col, names, prefix, skiprows, skipfooter, skip_footer, na_values, na_fvalues, true_values, false_values, delimiter, converters, dtype, usecols, engine, delim_whitespace, as_recarray, na_filter, compact_ints, use_unsigned, low_memory, buffer_lines, warn_bad_lines, error_bad_lines, keep_default_na, thousands, comment, decimal, parse_dates, keep_date_col, dayfirst, date_parser, memory_map, nrows, iterator, chunksize, verbose, encoding, squeeze, mangle_dupe_cols, tupleize_cols, infer_datetime_format)
    419                     infer_datetime_format=infer_datetime_format)
    420 
--> 421         return _read(filepath_or_buffer, kwds)
    422 
    423     parser_f.__name__ = name

/Library/Python/2.7/site-packages/pandas-0.13.1_213_gc174c3d-py2.7-macosx-10.9-intel.egg/pandas/io/parsers.pyc in _read(filepath_or_buffer, kwds)
    217 
    218     # Create the parser.
--> 219     parser = TextFileReader(filepath_or_buffer, **kwds)
    220 
    221     if nrows is not None:

/Library/Python/2.7/site-packages/pandas-0.13.1_213_gc174c3d-py2.7-macosx-10.9-intel.egg/pandas/io/parsers.pyc in __init__(self, f, engine, **kwds)
    501             self.options['has_index_names'] = kwds['has_index_names']
    502 
--> 503         self._make_engine(self.engine)
    504 
    505     def _get_options_with_defaults(self, engine):

/Library/Python/2.7/site-packages/pandas-0.13.1_213_gc174c3d-py2.7-macosx-10.9-intel.egg/pandas/io/parsers.pyc in _make_engine(self, engine)
    609     def _make_engine(self, engine='c'):
    610         if engine == 'c':
--> 611             self._engine = CParserWrapper(self.f, **self.options)
    612         else:
    613             if engine == 'python':

/Library/Python/2.7/site-packages/pandas-0.13.1_213_gc174c3d-py2.7-macosx-10.9-intel.egg/pandas/io/parsers.pyc in __init__(self, src, **kwds)
    971         kwds['allow_leading_cols'] = self.index_col is not False
    972 
--> 973         self._reader = _parser.TextReader(src, **kwds)
    974 
    975         # XXX

/Library/Python/2.7/site-packages/pandas-0.13.1_213_gc174c3d-py2.7-macosx-10.9-intel.egg/pandas/parser.so in pandas.parser.TextReader.__cinit__ (pandas/parser.c:3200)()

/Library/Python/2.7/site-packages/pandas-0.13.1_213_gc174c3d-py2.7-macosx-10.9-intel.egg/pandas/parser.so in pandas.parser.TextReader._setup_parser_source (pandas/parser.c:5559)()

IOError: File data/crowdstorm_dataset.csv does not exist

Project: red_card

Crowdstorming analysis, see: https://osf.io/gvm2z/

Research Question 1: Are soccer referees more likely to give red cards to dark skin toned players than light skin toned players?

Research Question 2: Are soccer referees from countries high in skin-tone prejudice more likely to award red cards to dark skin toned players?