In [3]:
from __future__ import division
from scipy import stats
import random, pymongo
import sklearn.metrics as metrics
import matplotlib.pyplot as plt
import pandas as pd
import rpy2.robjects as robjects

%matplotlib inline

r = robjects.r

In [4]:
df = pd.read_csv("final-cf.csv", index_col="_unit_id")

df = df.query("confidence > 0.5").copy()

y_true = df["gender-cf"]
y_pred = df["gender"]


---------------------------------------------------------------------------
IOError                                   Traceback (most recent call last)
<ipython-input-4-9bd00e794ea8> in <module>()
----> 1 df = pd.read_csv("final-cf.csv", index_col="_unit_id")
      2 
      3 df = df.query("confidence > 0.5").copy()
      4 
      5 y_true = df["gender-cf"]

/usr/local/lib/python2.7/site-packages/pandas/io/parsers.pyc in parser_f(filepath_or_buffer, sep, dialect, compression, doublequote, escapechar, quotechar, quoting, skipinitialspace, lineterminator, header, index_col, names, prefix, skiprows, skipfooter, skip_footer, na_values, na_fvalues, true_values, false_values, delimiter, converters, dtype, usecols, engine, delim_whitespace, as_recarray, na_filter, compact_ints, use_unsigned, low_memory, buffer_lines, warn_bad_lines, error_bad_lines, keep_default_na, thousands, comment, decimal, parse_dates, keep_date_col, dayfirst, date_parser, memory_map, float_precision, nrows, iterator, chunksize, verbose, encoding, squeeze, mangle_dupe_cols, tupleize_cols, infer_datetime_format, skip_blank_lines)
    468                     skip_blank_lines=skip_blank_lines)
    469 
--> 470         return _read(filepath_or_buffer, kwds)
    471 
    472     parser_f.__name__ = name

/usr/local/lib/python2.7/site-packages/pandas/io/parsers.pyc in _read(filepath_or_buffer, kwds)
    244 
    245     # Create the parser.
--> 246     parser = TextFileReader(filepath_or_buffer, **kwds)
    247 
    248     if (nrows is not None) and (chunksize is not None):

/usr/local/lib/python2.7/site-packages/pandas/io/parsers.pyc in __init__(self, f, engine, **kwds)
    560             self.options['has_index_names'] = kwds['has_index_names']
    561 
--> 562         self._make_engine(self.engine)
    563 
    564     def _get_options_with_defaults(self, engine):

/usr/local/lib/python2.7/site-packages/pandas/io/parsers.pyc in _make_engine(self, engine)
    697     def _make_engine(self, engine='c'):
    698         if engine == 'c':
--> 699             self._engine = CParserWrapper(self.f, **self.options)
    700         else:
    701             if engine == 'python':

/usr/local/lib/python2.7/site-packages/pandas/io/parsers.pyc in __init__(self, src, **kwds)
   1064         kwds['allow_leading_cols'] = self.index_col is not False
   1065 
-> 1066         self._reader = _parser.TextReader(src, **kwds)
   1067 
   1068         # XXX

pandas/parser.pyx in pandas.parser.TextReader.__cinit__ (pandas/parser.c:3163)()

pandas/parser.pyx in pandas.parser.TextReader._setup_parser_source (pandas/parser.c:5779)()

IOError: File final-cf.csv does not exist

In [ ]:
females_r = robjects.IntVector(list(females['questions_total']))
males_r = robjects.IntVector(list(males['questions_total']))

In [ ]:
wilr = robjects.r['wilcox.test']
result = wilr(females_r,males_r, alternative="t", correct=True, exact=False)
# for x in wilr(females_r,males_r):
#     print x

In [ ]:
x <- c("red","yellow","blue","red")
y <- c("red",  "blue", "blue" ,"red") 
xy.df <- data.frame(x,y)
ck <- cohen.kappa(xy.df)
ck
ck$agree

In [ ]: