VoxCharta Part II

Stats by institution

Michael Gully-Santiago

December 17, 2014

This version is looking for institutional differences in VoxCharta use.


In [2]:
%%html
<script type="text/javascript">
     show=true;
     function toggle(){
         if (show){$('div.input').hide();}else{$('div.input').show();}
            show = !show}
 </script>
 <h2><a href="javascript:toggle()" target="_self">Click to toggle code input</a></h2>


Click to toggle code input


In [3]:
%pylab inline
import numpy as np
from numpy.random import randn
import pandas as pd
from scipy import stats
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns
from pandas import Series, DataFrame


Populating the interactive namespace from numpy and matplotlib

In [5]:
institutes = pd.read_csv('data/institutes.csv', na_values='-', parse_dates=['DateStarted'])
users = pd.read_csv('data/users.csv', parse_dates=['DateUser'], index_col='User')
users.head()


Out[5]:
Votes Up Down Comments Posts DateUser Affiliation
User
tomo 2348 2348 0 0 0 2012-07-05 NTHU
Reiko 1902 1902 0 21 1 2011-11-13 AIfA-Cosmo
Franz Bauer 1384 1382 2 0 0 2011-03-26 PUC-Institute of Astrophysics
Brian Siana 1361 1361 0 0 0 2010-08-18 UCR
Andrew Zentner 1214 1212 2 0 3 2011-02-15 U. Pittsburgh

In [6]:
users['Rank'] = users.Votes.rank(method='first', ascending=False)

In [11]:
affiliations = users.groupby(by='Affiliation')

In [24]:
institutes.set_index('Institute', inplace=True)

In [25]:
institutes['membership'] = affiliations.size()

In [34]:
sns.jointplot('Users', 'Votes', data=institutes, kind='scatter', xlim=[0,60], ylim=[0,2000])


Out[34]:
<seaborn.axisgrid.JointGrid at 0x115c371d0>

In [35]:
institutes['meanness'] = institutes.Down.div(institutes.Votes)

In [46]:
sns.jointplot('Votes', 'meanness', data=institutes, xlim=[100, 5000], ylim=[0, 0.2])
mean_place = (institutes.Votes > 2000) & (institutes.meanness > 0.14)

print institutes.index[mean_place].values[0]


unaffiliated
Out[46]:
'\nplt.annotate(institutes.index[mean_place].values[0], \n             xy=(institutes[\'Votes\'][mean_place], institutes[\'meanness\'][mean_place]), \n             textcoords=\'offset points\',\n             fontsize=16.0,\n             arrowprops=dict(arrowstyle="fancy", #linestyle="dashed",\n                color="0.5",\n                shrinkB=9,\n                connectionstyle="arc3,rad=0.3",\n                ),\n            )\n'

In [50]:
users['meanness'] = users.Down.div(users.Votes)
users.head()


Out[50]:
Votes Up Down Comments Posts DateUser Affiliation Rank meanness
User
tomo 2348 2348 0 0 0 2012-07-05 NTHU 1 0.000000
Reiko 1902 1902 0 21 1 2011-11-13 AIfA-Cosmo 2 0.000000
Franz Bauer 1384 1382 2 0 0 2011-03-26 PUC-Institute of Astrophysics 3 0.001445
Brian Siana 1361 1361 0 0 0 2010-08-18 UCR 4 0.000000
Andrew Zentner 1214 1212 2 0 3 2011-02-15 U. Pittsburgh 5 0.001647

In [62]:
from __future__ import division
tot_peeps = users.Votes[users.Votes > 30].count()
print tot_peeps
neg_voters = (users.Down > 0) & (users.Votes > 30)
mean_peeps = np.sum(neg_voters)
print mean_peeps
print mean_peeps/tot_peeps

sns.jointplot('Votes', 'Down', data=users[neg_voters], xlim=[20, 1000], ylim=[0, 20])


813
94
0.115621156212
Out[62]:
<seaborn.axisgrid.JointGrid at 0x123073850>