In [2]:
%%html
<script type="text/javascript">
show=true;
function toggle(){
if (show){$('div.input').hide();}else{$('div.input').show();}
show = !show}
</script>
<h2><a href="javascript:toggle()" target="_self">Click to toggle code input</a></h2>
In [3]:
%pylab inline
import numpy as np
from numpy.random import randn
import pandas as pd
from scipy import stats
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns
from pandas import Series, DataFrame
In [5]:
institutes = pd.read_csv('data/institutes.csv', na_values='-', parse_dates=['DateStarted'])
users = pd.read_csv('data/users.csv', parse_dates=['DateUser'], index_col='User')
users.head()
Out[5]:
In [6]:
users['Rank'] = users.Votes.rank(method='first', ascending=False)
In [11]:
affiliations = users.groupby(by='Affiliation')
In [24]:
institutes.set_index('Institute', inplace=True)
In [25]:
institutes['membership'] = affiliations.size()
In [34]:
sns.jointplot('Users', 'Votes', data=institutes, kind='scatter', xlim=[0,60], ylim=[0,2000])
Out[34]:
In [35]:
institutes['meanness'] = institutes.Down.div(institutes.Votes)
In [46]:
sns.jointplot('Votes', 'meanness', data=institutes, xlim=[100, 5000], ylim=[0, 0.2])
mean_place = (institutes.Votes > 2000) & (institutes.meanness > 0.14)
print institutes.index[mean_place].values[0]
Out[46]:
In [50]:
users['meanness'] = users.Down.div(users.Votes)
users.head()
Out[50]:
In [62]:
from __future__ import division
tot_peeps = users.Votes[users.Votes > 30].count()
print tot_peeps
neg_voters = (users.Down > 0) & (users.Votes > 30)
mean_peeps = np.sum(neg_voters)
print mean_peeps
print mean_peeps/tot_peeps
sns.jointplot('Votes', 'Down', data=users[neg_voters], xlim=[20, 1000], ylim=[0, 20])
Out[62]: