Community: SuperUser

Inicialization and importing data are at the end of this notebook. For better visualization of the analysis they were placed at the bottom, but it's necessary to run them first so the analysis work as expected. Click here to go there.

Data Summary

Women



In [4]:

    
females.describe()









    Out[4]:






  
    
      
      accepted_rate
      answers_accepted_total
      answers_total
      mean_utility
      questions_avg
      questions_total
      reputation
    
  
  
    
      count
       788.000000
       788.000000
       788.000000
       303.000000
       788.000000
       788.000000
         788.000000
    
    
      mean
         0.165580
         1.648477
         5.181472
        -0.021532
         1.914466
         2.753807
         238.482234
    
    
      std
         0.309572
        11.771632
        32.435824
         0.884182
         9.667593
         7.462421
         727.567127
    
    
      min
         0.000000
         0.000000
         0.000000
        -1.581139
        -4.000000
         0.000000
          51.000000
    
    
      25%
         0.000000
         0.000000
         0.000000
        -0.811107
         0.000000
         0.000000
         101.000000
    
    
      50%
         0.000000
         0.000000
         1.000000
        -0.214010
         0.333333
         1.000000
         116.000000
    
    
      75%
         0.222222
         1.000000
         2.000000
         0.707107
         1.909382
         3.000000
         156.000000
    
    
      max
         1.000000
       273.000000
       761.000000
         2.626129
       236.000000
       102.000000
       15164.000000



In [5]:

    
females.median()









    Out[5]:





accepted_rate               0.000000
answers_accepted_total      0.000000
answers_total               1.000000
mean_utility               -0.214010
questions_avg               0.333333
questions_total             1.000000
reputation                116.000000
dtype: float64

Men



In [6]:

    
males.describe()









    Out[6]:






  
    
      
      accepted_rate
      answers_accepted_total
      answers_total
      mean_utility
      questions_avg
      questions_total
      reputation
    
  
  
    
      count
       18264.000000
       18264.000000
       18264.000000
       7859.000000
       18264.000000
       18264.000000
       18264.000000
    
    
      mean
           0.179639
           1.698806
           5.264838
          0.033028
           1.571798
           2.522120
         267.312308
    
    
      std
           0.317351
          16.896761
          42.707281
          0.869367
          11.400197
           6.882908
        1473.342909
    
    
      min
           0.000000
           0.000000
           0.000000
         -2.000000
          -5.000000
           0.000000
          50.000000
    
    
      25%
           0.000000
           0.000000
           0.000000
         -0.707107
           0.000000
           0.000000
         101.000000
    
    
      50%
           0.000000
           0.000000
           1.000000
         -0.068199
           0.333333
           1.000000
         118.000000
    
    
      75%
           0.272727
           1.000000
           2.000000
          0.712290
           1.857143
           2.000000
         170.000000
    
    
      max
           1.000000
        1031.000000
        2717.000000
          3.844818
        1213.000000
         249.000000
       99647.000000



In [7]:

    
males.median()









    Out[7]:





accepted_rate               0.000000
answers_accepted_total      0.000000
answers_total               1.000000
mean_utility               -0.068199
questions_avg               0.333333
questions_total             1.000000
reputation                118.000000
dtype: float64

Top contributors



In [9]:

    
pyplot.close('all')
histogram(females["reputation"], males["reputation"], 100, "Reputation")
histogram(females[females["reputation"]<= 1000]["reputation"], males[males["reputation"]<= 1000]["reputation"], 100, "Reputation")
histogram(females[females["reputation"]<= 400]["reputation"], males[males["reputation"]<= 400]["reputation"], 100, "Reputation")
pyplot.show()

Top Women



In [10]:

    
top_females = females[females["reputation"]> 400]
top_females.describe()









    Out[10]:






  
    
      
      accepted_rate
      answers_accepted_total
      answers_total
      mean_utility
      questions_avg
      questions_total
      reputation
    
  
  
    
      count
       61.000000
        61.000000
        61.000000
       27.000000
        61.000000
        61.000000
          61.000000
    
    
      mean
        0.377280
        16.672131
        49.967213
        0.301585
         7.987154
        12.229508
        1487.803279
    
    
      std
        0.241225
        39.498828
       107.280313
        0.547925
        31.902247
        20.407020
        2274.934327
    
    
      min
        0.000000
         0.000000
         0.000000
       -0.529357
         0.000000
         0.000000
         411.000000
    
    
      25%
        0.222222
         2.000000
         6.000000
        0.000165
         0.000000
         0.000000
         517.000000
    
    
      50%
        0.344828
         4.000000
        18.000000
        0.184230
         2.000000
         4.000000
         726.000000
    
    
      75%
        0.500000
        15.000000
        45.000000
        0.616961
         3.500000
        16.000000
        1285.000000
    
    
      max
        1.000000
       273.000000
       761.000000
        1.413399
       236.000000
       102.000000
       15164.000000



In [11]:

    
top_females.median()









    Out[11]:





accepted_rate               0.344828
answers_accepted_total      4.000000
answers_total              18.000000
mean_utility                0.184230
questions_avg               2.000000
questions_total             4.000000
reputation                726.000000
dtype: float64

Top Men



In [12]:

    
top_males = males[males["reputation"]> 400]
top_males.describe()









    Out[12]:






  
    
      
      accepted_rate
      answers_accepted_total
      answers_total
      mean_utility
      questions_avg
      questions_total
      reputation
    
  
  
    
      count
       1525.000000
       1525.000000
       1525.000000
       705.000000
       1525.000000
       1525.000000
        1525.000000
    
    
      mean
          0.340457
         15.854426
         46.017705
         0.311329
          4.718238
         10.862295
        1690.312787
    
    
      std
          0.209724
         56.517607
        141.280692
         0.699377
         32.919486
         19.735440
        4874.398334
    
    
      min
          0.000000
          0.000000
          0.000000
        -1.138071
         -5.000000
          0.000000
         401.000000
    
    
      25%
          0.200000
          2.000000
          7.000000
        -0.120996
          0.000000
          1.000000
         511.000000
    
    
      50%
          0.333333
          5.000000
         16.000000
         0.206316
          2.000000
          4.000000
         713.000000
    
    
      75%
          0.444444
         11.000000
         38.000000
         0.657500
          4.000000
         13.000000
        1346.000000
    
    
      max
          1.000000
       1031.000000
       2717.000000
         3.170435
       1213.000000
        249.000000
       99647.000000



In [13]:

    
top_males.median()









    Out[13]:





accepted_rate               0.333333
answers_accepted_total      5.000000
answers_total              16.000000
mean_utility                0.206316
questions_avg               2.000000
questions_total             4.000000
reputation                713.000000
dtype: float64

Common women contributors



In [14]:

    
common_females = females[females["reputation"] <= 400]
common_females.describe()









    Out[14]:






  
    
      
      accepted_rate
      answers_accepted_total
      answers_total
      mean_utility
      questions_avg
      questions_total
      reputation
    
  
  
    
      count
       727.000000
       727.000000
       727.000000
       276.000000
       727.000000
       727.000000
       727.000000
    
    
      mean
         0.147817
         0.387895
         1.423659
        -0.053141
         1.404928
         1.958735
       133.656121
    
    
      std
         0.308215
         0.848442
         2.578656
         0.904928
         3.720874
         4.215732
        63.030995
    
    
      min
         0.000000
         0.000000
         0.000000
        -1.581139
        -4.000000
         0.000000
        51.000000
    
    
      25%
         0.000000
         0.000000
         0.000000
        -0.853553
         0.000000
         0.000000
       101.000000
    
    
      50%
         0.000000
         0.000000
         1.000000
        -0.325780
         0.000000
         1.000000
       111.000000
    
    
      75%
         0.000000
         0.000000
         2.000000
         0.707107
         1.500000
         2.000000
       141.000000
    
    
      max
         1.000000
         6.000000
        24.000000
         2.626129
        63.000000
        64.000000
       400.000000



In [15]:

    
common_females.median()









    Out[15]:





accepted_rate               0.00000
answers_accepted_total      0.00000
answers_total               1.00000
mean_utility               -0.32578
questions_avg               0.00000
questions_total             1.00000
reputation                111.00000
dtype: float64

Common men contributors



In [16]:

    
common_males = males[males["reputation"] <= 400]
common_males.describe()









    Out[16]:






  
    
      
      accepted_rate
      answers_accepted_total
      answers_total
      mean_utility
      questions_avg
      questions_total
      reputation
    
  
  
    
      count
       16739.000000
       16739.000000
       16739.000000
       7154.000000
       16739.000000
       16739.000000
       16739.000000
    
    
      mean
           0.164988
           0.409164
           1.552064
          0.005602
           1.285143
           1.762292
         137.670410
    
    
      std
           0.321421
           0.869438
           2.757249
          0.879639
           6.492368
           3.051703
          62.310956
    
    
      min
           0.000000
           0.000000
           0.000000
         -2.000000
          -4.000000
           0.000000
          50.000000
    
    
      25%
           0.000000
           0.000000
           0.000000
         -0.721378
           0.000000
           0.000000
         101.000000
    
    
      50%
           0.000000
           0.000000
           1.000000
         -0.146447
           0.000000
           1.000000
         115.000000
    
    
      75%
           0.166667
           1.000000
           2.000000
          0.737146
           1.500000
           2.000000
         150.000000
    
    
      max
           1.000000
          17.000000
          45.000000
          3.844818
         743.000000
          47.000000
         400.000000



In [17]:

    
common_males.median()









    Out[17]:





accepted_rate               0.000000
answers_accepted_total      0.000000
answers_total               1.000000
mean_utility               -0.146447
questions_avg               0.000000
questions_total             1.000000
reputation                115.000000
dtype: float64

Second Question: The contributions made by both genders are perceived with the same quality by the community ?

Hypothesis 1: Both genders have the same acceptance rate.

H0: acceptanceRate(Males) = acceptanceRate(Females);

H1: acceptanceRate(Males) != acceptanceRate(Females).

Data

It doesn't make sense to verify quality of something that haven't been done. So, we don't verify users who didn't post any answers.



In [18]:

    
females_acc_rate = females[females['answers_total'] > 0]['accepted_rate']
males_acc_rate = males[males['answers_total'] > 0]['accepted_rate']

The data's shape



In [19]:

    
show_data_shape(females_acc_rate, males_acc_rate, "norm", 30, "Accepted Rate")









    












    












    












    












    



Levene's test:  0.859952344621
Skewness for Females:  0.962686576701
Skewness for Males:  0.984564339402

Hypothesis test



In [20]:

    
print "Two-sample Kolmogorov-Smirnov test: ", stats.ks_2samp(females_acc_rate, males_acc_rate)[1]
print "Two-sample unpaired t-test: ", stats.ttest_ind(females_acc_rate, males_acc_rate)[1]
print "Two-sample Mann Whitney U test: ",2* stats.mannwhitneyu(females_acc_rate, males_acc_rate)[1]









    



Two-sample Kolmogorov-Smirnov test:  0.999985911864
Two-sample unpaired t-test:  0.837588594046
Two-sample Mann Whitney U test:  0.902526177263

Looking at the top contributors



In [21]:

    
top_females_acc_rate = top_females[top_females['answers_total'] > 0]['accepted_rate']
top_males_acc_rate = top_males[top_males['answers_total'] > 0]['accepted_rate']

The data's shape



In [22]:

    
show_data_shape(top_females_acc_rate, top_males_acc_rate, "expon", 30, "Accepted Rate")









    












    












    












    












    



Levene's test:  0.421231233001
Skewness for Females:  1.04377695226
Skewness for Males:  0.88917740164

Hypothesis test



In [23]:

    
print "Two-sample Kolmogorov-Smirnov test: ", stats.ks_2samp(top_females_acc_rate, top_males_acc_rate)[1]
# print "Two-sample unpaired t-test: ", stats.ttest_ind(top_females_acc_rate, top_males_acc_rate, equal_var=False)[1]
print "Two-sample Mann Whitney U test: ",2* stats.mannwhitneyu(top_females_acc_rate, top_males_acc_rate)[1]









    



Two-sample Kolmogorov-Smirnov test:  0.517712004393
Two-sample Mann Whitney U test:  0.199668144595

Looking at the common contributors



In [24]:

    
common_females_acc_rate = common_females[common_females['answers_total'] > 0]['accepted_rate']
common_males_acc_rate = common_males[common_males['answers_total'] > 0]['accepted_rate']

The data's shape



In [25]:

    
show_data_shape(common_females_acc_rate, common_males_acc_rate, "expon", 30, "Accepted Rate")









    












    












    












    












    



Levene's test:  0.888485773203
Skewness for Females:  1.02395148841
Skewness for Males:  1.00897523968

Hypotesis test



In [26]:

    
print "Two-sample Kolmogorov-Smirnov test: ", stats.ks_2samp(common_females_acc_rate, common_males_acc_rate)[1]
print "Two-sample unpaired t-test: ", stats.ttest_ind(common_females_acc_rate, common_males_acc_rate)[1]
print "Two-sample Mann Whitney U test: ",2* stats.mannwhitneyu(common_females_acc_rate, common_males_acc_rate)[1]









    



Two-sample Kolmogorov-Smirnov test:  0.999976750102
Two-sample unpaired t-test:  0.888485773201
Two-sample Mann Whitney U test:  0.882075787725

Hypothesis 2: The mean utility of the answer for each user is the same between genders.

H0: meanUtilityBy(Males) = meanUtilityBy(Females);

H1: meanUtilityBy(Males) != meanUtilityBy(Females).

Data

It doesn't make sense to verify quality of something that haven't been done. So, we don't verify users who didn't post any answers.



In [27]:

    
females_mean_utility = females['mean_utility'].dropna()
males_mean_utility = males['mean_utility'].dropna()

The data's shape



In [28]:

    
show_data_shape(females_mean_utility, males_mean_utility, "expon", 30, "Mean Utility Answers")









    












    












    












    












    



Levene's test:  0.610969055971
Skewness for Females:  0.49164056925
Skewness for Males:  0.464702622392

Hypothesis test



In [30]:

    
print "Two-sample Kolmogorov-Smirnov test: ", stats.ks_2samp(females_mean_utility, males_mean_utility)[1]
print "Two-sample unpaired t-test: ", stats.ttest_ind(females_mean_utility, males_mean_utility)[1]
print "Two-sample Mann Whitney U test: ",2* stats.mannwhitneyu(females_mean_utility, males_mean_utility)[1]









    



Two-sample Kolmogorov-Smirnov test:  0.263044672617
Two-sample unpaired t-test:  0.284080186296
Two-sample Mann Whitney U test:  0.222665770297

Looking at the top contributors



In [31]:

    
top_females_mean_utility = top_females['mean_utility'].dropna()
top_males_mean_utility = top_males['mean_utility'].dropna()

The data's shape



In [32]:

    
show_data_shape(top_females_mean_utility, top_males_mean_utility, "expon", 30, "Mean Utility Answers")









    












    












    












    












    



Levene's test:  0.250648647929
Skewness for Females:  0.603584133533
Skewness for Males:  0.946775902152

Hypotesis test



In [33]:

    
print "Two-sample Kolmogorov-Smirnov test: ", stats.ks_2samp(top_females_mean_utility, top_males_mean_utility)[1]
# print "Two-sample unpaired t-test: ", stats.ttest_ind(top_females_mean_utility, top_males_mean_utility, equal_var=False)[1]
print "Two-sample Mann Whitney U test: ",2* stats.mannwhitneyu(top_females_mean_utility, top_males_mean_utility)[1]









    



Two-sample Kolmogorov-Smirnov test:  0.858087125685
Two-sample Mann Whitney U test:  0.930901063935

Looking at the common contributors



In [34]:

    
common_females_mean_utility = common_females['mean_utility'].dropna()
common_males_mean_utility = common_males['mean_utility'].dropna()

The data's shape



In [35]:

    
show_data_shape(common_females_mean_utility, common_males_mean_utility, "expon", 30, "Mean Utility Answers")









    












    












    












    












    



Levene's test:  0.669413702501
Skewness for Females:  0.554597546791
Skewness for Males:  0.477422213681

Hypotesis test



In [36]:

    
print "Two-sample Kolmogorov-Smirnov test: ", stats.ks_2samp(common_females_mean_utility, common_males_mean_utility)[1]
print "Two-sample unpaired t-test: ", stats.ttest_ind(common_females_mean_utility, common_males_mean_utility)[1]
print "Two-sample Mann Whitney U test: ",2* stats.mannwhitneyu(common_females_mean_utility, common_males_mean_utility)[1]









    



Two-sample Kolmogorov-Smirnov test:  0.143978331
Two-sample unpaired t-test:  0.276862026631
Two-sample Mann Whitney U test:  0.171205762895

Hypothesis 3: The mean of the scores of the questions made by each user doesn't change between genders.

H0: questionScoreMean(Males) = questionScoreMean(Females);

H1: questionScoreMean(Males) != questionScoreMean(Females).

Data

It doesn't make sense to verify quality of something that haven't been done. So, we don't verify users who didn't post any questions.



In [37]:

    
females_questions_mean = females[females['questions_total'] > 0]['questions_avg']
males_questions_mean = males[males['questions_total'] > 0]['questions_avg']

The shape of the data



In [38]:

    
show_data_shape(females_questions_mean, males_questions_mean, "expon", 30, "Mean Score Questions")









    












    












    












    












    



Levene's test:  0.384390776645
Skewness for Females:  15.9799781413
Skewness for Males:  66.0843131017

Hypothesis test



In [39]:

    
print "Two-sample Kolmogorov-Smirnov test: ", stats.ks_2samp(females_questions_mean, males_questions_mean)[1]
print "Two-sample unpaired t-test: ", stats.ttest_ind(females_questions_mean, males_questions_mean)[1]
print "Two-sample Mann Whitney U test: ",2* stats.mannwhitneyu(females_questions_mean, males_questions_mean)[1]









    



Two-sample Kolmogorov-Smirnov test:  0.235373755179
Two-sample unpaired t-test:  0.483082872243
Two-sample Mann Whitney U test:  0.307601673266

Looking at the contributors



In [40]:

    
top_females_questions_mean = top_females[top_females['questions_total'] > 0]['questions_avg']
top_males_questions_mean = top_males[top_males['questions_total'] > 0]['questions_avg']

The data's shape



In [41]:

    
show_data_shape(top_females_questions_mean, top_males_questions_mean, "expon", 30, "Mean Score Questions")









    












    












    












    












    



Levene's test:  0.436436983966
Skewness for Females:  5.47090033262
Skewness for Males:  28.9467519731

Hypotesis test



In [42]:

    
print "Two-sample Kolmogorov-Smirnov test: ", stats.ks_2samp(top_females_questions_mean, top_males_questions_mean)[1]
# print "Two-sample unpaired t-test: ", stats.ttest_ind(top_females_questions_mean, top_males_questions_mean, equal_var=False)[1]
print "Two-sample Mann Whitney U test: ",2* stats.mannwhitneyu(top_females_questions_mean, top_males_questions_mean)[1]









    



Two-sample Kolmogorov-Smirnov test:  0.553323276216
Two-sample Mann Whitney U test:  0.629538542561

Looking at the common contributors



In [43]:

    
common_females_questions_mean = common_females[common_females['questions_total'] > 0]['questions_avg']
common_males_questions_mean = common_males[common_males['questions_total'] > 0]['questions_avg']

The data's shape



In [44]:

    
show_data_shape(common_females_questions_mean, common_males_questions_mean, "expon", 30, "Mean Score Questions")









    












    












    












    












    



Levene's test:  0.562164833112
Skewness for Females:  7.41428196711
Skewness for Males:  73.6346541163

Hypotesis test



In [45]:

    
print "Two-sample Kolmogorov-Smirnov test: ", stats.ks_2samp(common_females_questions_mean, common_males_questions_mean)[1]
print "Two-sample unpaired t-test: ", stats.ttest_ind(common_females_questions_mean, common_males_questions_mean)[1]
print "Two-sample Mann Whitney U test: ",2* stats.mannwhitneyu(common_females_questions_mean, common_males_questions_mean)[1]









    



Two-sample Kolmogorov-Smirnov test:  0.193728907933
Two-sample unpaired t-test:  0.783415213503
Two-sample Mann Whitney U test:  0.268812655017

Hypothesis 4: Reputation is the same between genders.

H0: reputation(Males) = reputation(Females);

H1: reputation(Males) != reputation(Females).



In [46]:

    
females_reputation = females['reputation']
males_reputation = males['reputation']



In [47]:

    
show_data_shape(females_reputation, males_reputation, "expon", 50, "Reputation")









    












    












    












    












    



Levene's test:  0.618743396293
Skewness for Females:  14.0647577589
Skewness for Males:  40.8431733142

Hypothesis test



In [48]:

    
print "Two-sample Kolmogorov-Smirnov test: ", stats.ks_2samp(females_reputation, males_reputation)[1]
print "Two-sample unpaired t-test: ", stats.ttest_ind(females_reputation, males_reputation)[1]
print "Two-sample Mann Whitney U test: ",2* stats.mannwhitneyu(females_reputation, males_reputation)[1]









    



Two-sample Kolmogorov-Smirnov test:  0.0507162704314
Two-sample unpaired t-test:  0.584785773801
Two-sample Mann Whitney U test:  0.0171976083892

Inicialization

Here you can find the data importing and some useful functions used for analysing the data. Please, run this first, otherwise the analysis will not work.

Importing the data from the MongoDB database and inserting into a panda dataframe for easy manipulation.



In [1]:

    
from __future__ import division
import pymongo, time, pylab, numpy, pandas
from scipy import stats
import matplotlib as mpl
from matplotlib import pyplot

%matplotlib inline

client = pymongo.MongoClient('localhost', 27017)

community = 'superuser'
stats_db = client[community].statistics

cursor = stats_db.find({'$or': [{'questions_total':{'$gt':0}}, {'answers_total':{'$gt':0}}, {'comments_total':{'$gt':0}}] },
                       {u'_id': False, u'accepted_rate': True, u'reputation': True,
                        u'questions_avg': True, u'answers_total': True, u'gender':True,
                        u'questions_total': True, u'answers_accepted_total': True,
                        u'mean_utility': True,})

df =  pandas.DataFrame(list(cursor))

males = df[df['gender']=='Male']
females = df[df['gender']=='Female']

Utility functions for ploting.



In [2]:

    
pyplot.rcdefaults()
mpl.style.use('ggplot')

def histogram(sample1, sample2, bins, aspect):
    fig, axes = pyplot.subplots(nrows=1, ncols=2, figsize=(15,7))
    axes[0].hist(list(sample1), bins)
    axes[0].set_title(aspect + " by Females - Histogram")
    axes[1].hist(list(sample2), bins)
    axes[1].set_title(aspect + " by Males - Histogram")

def pdf_plot(sample1, sample2, aspect):
    fig, axes = pyplot.subplots(nrows=1, ncols=2, figsize=(15,7))
#     pdf_max = 20
#     xs = numpy.linspace(0,pdf_max,200)

#     d= stats.gaussian_kde(list(sample1))
#     d.covariance_factor = lambda : .25
#     d._compute_covariance()
#     axes[0].plot(xs,d(xs))

#     d= stats.gaussian_kde(list(sample2))
#     d.covariance_factor = lambda : .25
#     d._compute_covariance()
#     axes[1].plot(xs,d(xs))
    axes[0] = sample1.plot(ax=axes[0], kind="density")
    axes[1] = sample2.plot(ax=axes[1], kind="density")
    
    axes[0].set_title(aspect + " by Females - Density")
    axes[1].set_title(aspect + " by Males - Density")

    
    
def boxplot(sample1, sample2, aspect):
    fig, axes = pyplot.subplots(nrows=1, ncols=2, figsize=(15,7))
    axes[0].boxplot(list(sample1))
    axes[0].set_title(aspect + " by Females - Boxplot")
    axes[1].boxplot(list(sample2))
    axes[1].set_title(aspect + " by Males - Boxplot")
    

def qq_plot(sample1, sample2, distribution, aspect):
    fig, axes = pyplot.subplots(nrows=1, ncols=2, figsize=(15,7))

    pyplot.subplot(121)
    stats.probplot(list(sample1), dist=distribution, plot=pyplot)
    axes[0].set_title(aspect + " by Females - QQPlot "+ distribution)

    pyplot.subplot(122)
    stats.probplot(list(sample2), dist=distribution, plot=pyplot)
    axes[1].set_title(aspect + " by Male - QQPlot "+ distribution)

Utility functions for describing the data.



In [3]:

    
def describe(sample1, sample2):
    print sample1.describe()
    print "Median: ", sample1.median()
    print 
    print sample2.describe()
    print "Median: ", sample2.median()
    
def show_data_shape(sample1, sample2, dist, bins, aspect):
    pyplot.close('all')
    #histogram
    histogram(sample1, sample2, bins, aspect)

    #PDF
    pdf_plot(sample1, sample2, aspect)

    #QQPlot
    qq_plot(sample1, sample2, dist, aspect)

    #boxplot
    boxplot(sample1,sample2, aspect)
    pyplot.show()

    #Levene
    print "Levene's test: ", stats.levene(sample1, sample2)[1]
    
    #skewness
    print "Skewness for Females: ", stats.skew(sample1)
    print "Skewness for Males: ", stats.skew(sample2)



In [ ]:

	accepted_rate	answers_accepted_total	answers_total	mean_utility	questions_avg	questions_total	reputation
count	788.000000	788.000000	788.000000	303.000000	788.000000	788.000000	788.000000
mean	0.165580	1.648477	5.181472	-0.021532	1.914466	2.753807	238.482234
std	0.309572	11.771632	32.435824	0.884182	9.667593	7.462421	727.567127
min	0.000000	0.000000	0.000000	-1.581139	-4.000000	0.000000	51.000000
25%	0.000000	0.000000	0.000000	-0.811107	0.000000	0.000000	101.000000
50%	0.000000	0.000000	1.000000	-0.214010	0.333333	1.000000	116.000000
75%	0.222222	1.000000	2.000000	0.707107	1.909382	3.000000	156.000000
max	1.000000	273.000000	761.000000	2.626129	236.000000	102.000000	15164.000000

	accepted_rate	answers_accepted_total	answers_total	mean_utility	questions_avg	questions_total	reputation
count	18264.000000	18264.000000	18264.000000	7859.000000	18264.000000	18264.000000	18264.000000
mean	0.179639	1.698806	5.264838	0.033028	1.571798	2.522120	267.312308
std	0.317351	16.896761	42.707281	0.869367	11.400197	6.882908	1473.342909
min	0.000000	0.000000	0.000000	-2.000000	-5.000000	0.000000	50.000000
25%	0.000000	0.000000	0.000000	-0.707107	0.000000	0.000000	101.000000
50%	0.000000	0.000000	1.000000	-0.068199	0.333333	1.000000	118.000000
75%	0.272727	1.000000	2.000000	0.712290	1.857143	2.000000	170.000000
max	1.000000	1031.000000	2717.000000	3.844818	1213.000000	249.000000	99647.000000

	accepted_rate	answers_accepted_total	answers_total	mean_utility	questions_avg	questions_total	reputation
count	61.000000	61.000000	61.000000	27.000000	61.000000	61.000000	61.000000
mean	0.377280	16.672131	49.967213	0.301585	7.987154	12.229508	1487.803279
std	0.241225	39.498828	107.280313	0.547925	31.902247	20.407020	2274.934327
min	0.000000	0.000000	0.000000	-0.529357	0.000000	0.000000	411.000000
25%	0.222222	2.000000	6.000000	0.000165	0.000000	0.000000	517.000000
50%	0.344828	4.000000	18.000000	0.184230	2.000000	4.000000	726.000000
75%	0.500000	15.000000	45.000000	0.616961	3.500000	16.000000	1285.000000
max	1.000000	273.000000	761.000000	1.413399	236.000000	102.000000	15164.000000

	accepted_rate	answers_accepted_total	answers_total	mean_utility	questions_avg	questions_total	reputation
count	1525.000000	1525.000000	1525.000000	705.000000	1525.000000	1525.000000	1525.000000
mean	0.340457	15.854426	46.017705	0.311329	4.718238	10.862295	1690.312787
std	0.209724	56.517607	141.280692	0.699377	32.919486	19.735440	4874.398334
min	0.000000	0.000000	0.000000	-1.138071	-5.000000	0.000000	401.000000
25%	0.200000	2.000000	7.000000	-0.120996	0.000000	1.000000	511.000000
50%	0.333333	5.000000	16.000000	0.206316	2.000000	4.000000	713.000000
75%	0.444444	11.000000	38.000000	0.657500	4.000000	13.000000	1346.000000
max	1.000000	1031.000000	2717.000000	3.170435	1213.000000	249.000000	99647.000000

	accepted_rate	answers_accepted_total	answers_total	mean_utility	questions_avg	questions_total	reputation
count	727.000000	727.000000	727.000000	276.000000	727.000000	727.000000	727.000000
mean	0.147817	0.387895	1.423659	-0.053141	1.404928	1.958735	133.656121
std	0.308215	0.848442	2.578656	0.904928	3.720874	4.215732	63.030995
min	0.000000	0.000000	0.000000	-1.581139	-4.000000	0.000000	51.000000
25%	0.000000	0.000000	0.000000	-0.853553	0.000000	0.000000	101.000000
50%	0.000000	0.000000	1.000000	-0.325780	0.000000	1.000000	111.000000
75%	0.000000	0.000000	2.000000	0.707107	1.500000	2.000000	141.000000
max	1.000000	6.000000	24.000000	2.626129	63.000000	64.000000	400.000000

	accepted_rate	answers_accepted_total	answers_total	mean_utility	questions_avg	questions_total	reputation
count	16739.000000	16739.000000	16739.000000	7154.000000	16739.000000	16739.000000	16739.000000
mean	0.164988	0.409164	1.552064	0.005602	1.285143	1.762292	137.670410
std	0.321421	0.869438	2.757249	0.879639	6.492368	3.051703	62.310956
min	0.000000	0.000000	0.000000	-2.000000	-4.000000	0.000000	50.000000
25%	0.000000	0.000000	0.000000	-0.721378	0.000000	0.000000	101.000000
50%	0.000000	0.000000	1.000000	-0.146447	0.000000	1.000000	115.000000
75%	0.166667	1.000000	2.000000	0.737146	1.500000	2.000000	150.000000
max	1.000000	17.000000	45.000000	3.844818	743.000000	47.000000	400.000000