Deadline for initial analysis: June 15th
In [2]:
# -*- coding: utf-8 -*-
%matplotlib inline
print "importing libraries"
import pandas as pd #for dealing with csv import
import os # for joining paths and filenames sensibly
import matplotlib.pyplot as plt # Matplotlib's pyplot: MATLAB-like syntax
print "loading datafiles"
filename=os.path.join('data','crowdstorm_dataset.csv')
df = pd.read_csv(filename)
print "analysing data"
#This will help http://pandas.pydata.org/pandas-docs/stable/indexing.html
#EXAMPLE: let's look at a single row (ie one player-ref interaction
df.iloc[4]
#EXAMPLE: take a single column of the data file
# nb this is nonsense for a variable like height because each player contributes multple entries (1 per interaction)
#Don't do this for proper analysis, since variable ordering is implicit
#...and probably screwed up by dropping nan values
#tolist() converts from series to list
#dropna() drops nans
height=df['height'].dropna().tolist()
print "plotting"
plt.clf()
plt.hist(height,20)
print "saving plots"
plt.title('Height histogram for '+ str(len(height))+ ' dyads')
plt.xlabel('Height in cm')
plt.ylabel('frequency')
plt.savefig('figs/height_hist.png', dpi=300, facecolor='w', edgecolor='w',
orientation='portrait', papertype=None, format=None,
transparent=False, bbox_inches=None, pad_inches=0.1)
"""
TODO
Calculate an aggregate "red cardness" variable from yellowReds and redCards variables (and yellowCard?)
Calculate an estimate skin colour variabel from rater1 and rater2 variables
Are these two related?
"""
In [ ]: