Community: Programmers

Inicialization and importing data are at the end of this notebook. For better visualization of the analysis they were placed at the bottom, but it's necessary to run them first so the analysis work as expected. Click here to go there.

Data Summary

Women



In [5]:

    
females.describe()









    Out[5]:






  
    
      
      accepted_rate
      answers_accepted_total
      answers_total
      mean_utility
      questions_avg
      questions_total
      reputation
    
  
  
    
      count
       383.000000
       383.000000
       383.000000
       182.000000
       383.000000
       383.000000
         383.000000
    
    
      mean
         0.066655
         0.819843
         5.007833
        -0.136622
         4.998534
         1.402089
         472.519582
    
    
      std
         0.199117
         4.582738
        19.049466
         0.786598
        12.313895
         3.761919
        1833.622104
    
    
      min
         0.000000
         0.000000
         0.000000
        -1.333772
        -3.000000
         0.000000
          51.000000
    
    
      25%
         0.000000
         0.000000
         0.000000
        -0.621652
         0.000000
         0.000000
         101.000000
    
    
      50%
         0.000000
         0.000000
         1.000000
        -0.330430
         0.000000
         1.000000
         127.000000
    
    
      75%
         0.000000
         0.000000
         2.000000
         0.166927
         5.000000
         1.000000
         213.500000
    
    
      max
         1.000000
        62.000000
       173.000000
         4.401387
       107.000000
        42.000000
       24391.000000



In [6]:

    
females.median()









    Out[6]:





accepted_rate               0.00000
answers_accepted_total      0.00000
answers_total               1.00000
mean_utility               -0.33043
questions_avg               0.00000
questions_total             1.00000
reputation                127.00000
dtype: float64

Men



In [7]:

    
males.describe()









    Out[7]:






  
    
      
      accepted_rate
      answers_accepted_total
      answers_total
      mean_utility
      questions_avg
      questions_total
      reputation
    
  
  
    
      count
       8731.000000
       8731.000000
       8731.000000
       4973.000000
       8731.000000
       8731.000000
        8731.000000
    
    
      mean
          0.081216
          0.852022
          5.113847
         -0.055142
          3.697275
          0.931394
         421.304776
    
    
      std
          0.219704
          6.046695
         25.712635
          0.809089
         16.506019
          2.419752
        1759.993363
    
    
      min
          0.000000
          0.000000
          0.000000
         -1.808970
         -5.000000
          0.000000
          50.000000
    
    
      25%
          0.000000
          0.000000
          0.000000
         -0.573311
          0.000000
          0.000000
         103.000000
    
    
      50%
          0.000000
          0.000000
          1.000000
         -0.274029
          0.000000
          0.000000
         133.000000
    
    
      75%
          0.000000
          0.000000
          2.000000
          0.281594
          3.708333
          1.000000
         239.000000
    
    
      max
          1.000000
        231.000000
        796.000000
          9.653682
       1100.000000
         76.000000
       62314.000000



In [8]:

    
males.median()









    Out[8]:





accepted_rate               0.000000
answers_accepted_total      0.000000
answers_total               1.000000
mean_utility               -0.274029
questions_avg               0.000000
questions_total             0.000000
reputation                133.000000
dtype: float64

Top contributors



In [9]:

    
pyplot.close('all')
histogram(females["reputation"], males["reputation"], 100, "Reputation")
histogram(females[females["reputation"]<= 1000]["reputation"], males[males["reputation"]<= 1000]["reputation"], 100, "Reputation")
histogram(females[females["reputation"]<= 450]["reputation"], males[males["reputation"]<= 450]["reputation"], 100, "Reputation")
pyplot.show()

Top Women



In [10]:

    
top_females = females[females["reputation"]> 450]
top_females.describe()









    Out[10]:






  
    
      
      accepted_rate
      answers_accepted_total
      answers_total
      mean_utility
      questions_avg
      questions_total
      reputation
    
  
  
    
      count
       46.000000
       46.000000
        46.000000
       30.000000
        46.000000
       46.000000
          46.000000
    
    
      mean
        0.137440
        6.021739
        33.934783
        0.187713
        12.613522
        5.717391
        2842.021739
    
    
      std
        0.126228
       12.082106
        45.645690
        0.912327
        20.468899
        9.427296
        4687.361966
    
    
      min
        0.000000
        0.000000
         0.000000
       -0.694013
         0.000000
        0.000000
         454.000000
    
    
      25%
        0.024621
        1.000000
         6.500000
       -0.327317
         0.000000
        0.000000
         685.000000
    
    
      50%
        0.100847
        2.000000
        16.000000
        0.037226
         7.785714
        1.500000
        1223.000000
    
    
      75%
        0.218690
        3.750000
        32.250000
        0.339956
        14.612069
        7.000000
        2505.250000
    
    
      max
        0.500000
       62.000000
       173.000000
        4.401387
       107.000000
       42.000000
       24391.000000



In [11]:

    
top_females.median()









    Out[11]:





accepted_rate                0.100847
answers_accepted_total       2.000000
answers_total               16.000000
mean_utility                 0.037226
questions_avg                7.785714
questions_total              1.500000
reputation                1223.000000
dtype: float64

Top Men



In [12]:

    
top_males = males[males["reputation"]> 450]
top_males.describe()









    Out[12]:






  
    
      
      accepted_rate
      answers_accepted_total
      answers_total
      mean_utility
      questions_avg
      questions_total
      reputation
    
  
  
    
      count
       1113.000000
       1113.000000
       1113.000000
       816.000000
       1113.000000
       1113.000000
        1113.000000
    
    
      mean
          0.177299
          5.757412
         31.483378
         0.325455
          9.892585
          2.809524
        2234.580413
    
    
      std
          0.200537
         16.070229
         66.073985
         0.893799
         38.515613
          5.647596
        4528.080814
    
    
      min
          0.000000
          0.000000
          0.000000
        -1.386750
         -2.000000
          0.000000
         451.000000
    
    
      25%
          0.000000
          0.000000
          5.000000
        -0.176646
          0.000000
          0.000000
         606.000000
    
    
      50%
          0.133333
          2.000000
         12.000000
         0.140681
          3.000000
          1.000000
         910.000000
    
    
      75%
          0.250000
          5.000000
         28.000000
         0.564684
         10.200000
          3.000000
        1815.000000
    
    
      max
          1.000000
        231.000000
        796.000000
         9.653682
       1100.000000
         76.000000
       62314.000000



In [13]:

    
top_males.median()









    Out[13]:





accepted_rate               0.133333
answers_accepted_total      2.000000
answers_total              12.000000
mean_utility                0.140681
questions_avg               3.000000
questions_total             1.000000
reputation                910.000000
dtype: float64

Common women contributors



In [14]:

    
common_females = females[females["reputation"] <= 450]
common_females.describe()









    Out[14]:






  
    
      
      accepted_rate
      answers_accepted_total
      answers_total
      mean_utility
      questions_avg
      questions_total
      reputation
    
  
  
    
      count
       337.000000
       337.000000
       337.000000
       152.000000
       337.00000
       337.000000
       337.000000
    
    
      mean
         0.056994
         0.109792
         1.059347
        -0.200636
         3.95910
         0.813056
       149.086053
    
    
      std
         0.205333
         0.340420
         1.823142
         0.746181
        10.35644
         1.135497
        79.347610
    
    
      min
         0.000000
         0.000000
         0.000000
        -1.333772
        -3.00000
         0.000000
        51.000000
    
    
      25%
         0.000000
         0.000000
         0.000000
        -0.663396
         0.00000
         0.000000
       101.000000
    
    
      50%
         0.000000
         0.000000
         0.000000
        -0.397628
         0.00000
         1.000000
       121.000000
    
    
      75%
         0.000000
         0.000000
         1.000000
         0.082377
         4.00000
         1.000000
       159.000000
    
    
      max
         1.000000
         2.000000
        15.000000
         4.063193
       101.00000
         6.000000
       450.000000



In [15]:

    
common_females.median()









    Out[15]:





accepted_rate               0.000000
answers_accepted_total      0.000000
answers_total               0.000000
mean_utility               -0.397628
questions_avg               0.000000
questions_total             1.000000
reputation                121.000000
dtype: float64

Common men contributors



In [16]:

    
common_males = males[males["reputation"] <= 450]
common_males.describe()









    Out[16]:






  
    
      
      accepted_rate
      answers_accepted_total
      answers_total
      mean_utility
      questions_avg
      questions_total
      reputation
    
  
  
    
      count
       7618.000000
       7618.000000
       7618.000000
       4157.000000
       7618.000000
       7618.000000
       7618.000000
    
    
      mean
          0.067178
          0.135337
          1.261223
         -0.129852
          2.792132
          0.656997
        156.382778
    
    
      std
          0.218875
          0.416205
          1.984983
          0.769721
          9.447919
          1.209820
         79.189708
    
    
      min
          0.000000
          0.000000
          0.000000
         -1.808970
         -5.000000
          0.000000
         50.000000
    
    
      25%
          0.000000
          0.000000
          0.000000
         -0.615233
          0.000000
          0.000000
        101.000000
    
    
      50%
          0.000000
          0.000000
          1.000000
         -0.353553
          0.000000
          0.000000
        123.000000
    
    
      75%
          0.000000
          0.000000
          1.000000
          0.165700
          3.000000
          1.000000
        178.000000
    
    
      max
          1.000000
          5.000000
         23.000000
          7.413026
        331.000000
         26.000000
        449.000000



In [17]:

    
common_males.median()









    Out[17]:





accepted_rate               0.000000
answers_accepted_total      0.000000
answers_total               1.000000
mean_utility               -0.353553
questions_avg               0.000000
questions_total             0.000000
reputation                123.000000
dtype: float64

Second Question: The contributions made by both genders are perceived with the same quality by the community ?

Hypothesis 1: Both genders have the same acceptance rate.

H0: acceptanceRate(Males) = acceptanceRate(Females);

H1: acceptanceRate(Males) != acceptanceRate(Females).

Data

It doesn't make sense to verify quality of something that haven't been done. So, we don't verify users who didn't post any answers.



In [18]:

    
females_acc_rate = females[females['answers_total'] > 0]['accepted_rate']
males_acc_rate = males[males['answers_total'] > 0]['accepted_rate']

The data's shape



In [28]:

    
show_data_shape(females_acc_rate, males_acc_rate, "norm", 30, "Accepted Rate")









    












    












    












    












    



Levene's test:  0.728592261497
Skewness for Females:  2.56628745179
Skewness for Males:  2.36882125452

Hypothesis test



In [29]:

    
print "Two-sample Kolmogorov-Smirnov test: ", stats.ks_2samp(females_acc_rate, males_acc_rate)[1]
print "Two-sample unpaired t-test: ", stats.ttest_ind(females_acc_rate, males_acc_rate)[1]
print "Two-sample Mann Whitney U test: ",2* stats.mannwhitneyu(females_acc_rate, males_acc_rate)[1]









    



Two-sample Kolmogorov-Smirnov test:  0.99843735701
Two-sample unpaired t-test:  0.728592261498
Two-sample Mann Whitney U test:  0.761297791248

Looking at the top contributors



In [30]:

    
top_females_acc_rate = top_females[top_females['answers_total'] > 0]['accepted_rate']
top_males_acc_rate = top_males[top_males['answers_total'] > 0]['accepted_rate']

The data's shape



In [31]:

    
show_data_shape(top_females_acc_rate, top_males_acc_rate, "expon", 30, "Accepted Rate")









    












    












    












    












    



Levene's test:  0.139907387493
Skewness for Females:  0.724413426319
Skewness for Males:  2.05749012295

Hypothesis test



In [57]:

    
print "Two-sample Kolmogorov-Smirnov test: ", stats.ks_2samp(top_females_acc_rate, top_males_acc_rate)[1]
# print "Two-sample unpaired t-test: ", stats.ttest_ind(top_females_acc_rate, top_males_acc_rate, equal_var=False)[1]
print "Two-sample Mann Whitney U test: ",2* stats.mannwhitneyu(top_females_acc_rate, top_males_acc_rate)[1]









    



Two-sample Kolmogorov-Smirnov test:  0.721167823567
Two-sample Mann Whitney U test:  0.688345914725

Looking at the common contributors



In [33]:

    
common_females_acc_rate = common_females[common_females['answers_total'] > 0]['accepted_rate']
common_males_acc_rate = common_males[common_males['answers_total'] > 0]['accepted_rate']

The data's shape



In [34]:

    
show_data_shape(common_females_acc_rate, common_males_acc_rate, "expon", 30, "Accepted Rate")









    












    












    












    












    



Levene's test:  0.998758098787
Skewness for Females:  2.51308705037
Skewness for Males:  2.45809441702

Hypotesis test



In [61]:

    
print "Two-sample Kolmogorov-Smirnov test: ", stats.ks_2samp(common_females_acc_rate, common_males_acc_rate)[1]
print "Two-sample unpaired t-test: ", stats.ttest_ind(common_females_acc_rate, common_males_acc_rate)[1]
print "Two-sample Mann Whitney U test: ",2* stats.mannwhitneyu(common_females_acc_rate, common_males_acc_rate)[1]









    



Two-sample Kolmogorov-Smirnov test:  1.0
Two-sample unpaired t-test:  0.99875809878
Two-sample Mann Whitney U test:  0.881386829017

Hypothesis 2: The mean utility of the answer for each user is the same between genders.

H0: meanUtilityBy(Males) = meanUtilityBy(Females);

H1: meanUtilityBy(Males) != meanUtilityBy(Females).

Data

It doesn't make sense to verify quality of something that haven't been done. So, we don't verify users who didn't post any answers.



In [35]:

    
females_mean_utility = females['mean_utility'].dropna()
males_mean_utility = males['mean_utility'].dropna()

The data's shape



In [36]:

    
show_data_shape(females_mean_utility, males_mean_utility, "expon", 30, "Mean Utility Answers")









    












    












    












    












    



Levene's test:  0.393929417368
Skewness for Females:  2.36391634488
Skewness for Males:  2.07820309422

Hypothesis test



In [64]:

    
print "Two-sample Kolmogorov-Smirnov test: ", stats.ks_2samp(females_mean_utility, males_mean_utility)[1]
print "Two-sample unpaired t-test: ", stats.ttest_ind(females_mean_utility, males_mean_utility)[1]
print "Two-sample Mann Whitney U test: ",2* stats.mannwhitneyu(females_mean_utility, males_mean_utility)[1]









    



Two-sample Kolmogorov-Smirnov test:  0.199388168274
Two-sample unpaired t-test:  0.181711166699
Two-sample Mann Whitney U test:  0.121796616593

Looking at the top contributors



In [39]:

    
top_females_mean_utility = top_females['mean_utility'].dropna()
top_males_mean_utility = top_males['mean_utility'].dropna()

The data's shape



In [40]:

    
show_data_shape(top_females_mean_utility, top_males_mean_utility, "expon", 30, "Mean Utility Answers")









    












    












    












    












    



Levene's test:  0.914433866537
Skewness for Females:  3.3721538152
Skewness for Males:  4.1265993826

Hypotesis test



In [67]:

    
print "Two-sample Kolmogorov-Smirnov test: ", stats.ks_2samp(top_females_mean_utility, top_males_mean_utility)[1]
# print "Two-sample unpaired t-test: ", stats.ttest_ind(top_females_mean_utility, top_males_mean_utility, equal_var=False)[1]
print "Two-sample Mann Whitney U test: ",2* stats.mannwhitneyu(top_females_mean_utility, top_males_mean_utility)[1]









    



Two-sample Kolmogorov-Smirnov test:  0.243691134256
Two-sample Mann Whitney U test:  0.151675715549

Looking at the common contributors



In [41]:

    
common_females_mean_utility = common_females['mean_utility'].dropna()
common_males_mean_utility = common_males['mean_utility'].dropna()

The data's shape



In [42]:

    
show_data_shape(common_females_mean_utility, common_males_mean_utility, "expon", 30, "Mean Utility Answers")









    












    












    












    












    



Levene's test:  0.540587665681
Skewness for Females:  2.0204157619
Skewness for Males:  1.49730987513

Hypotesis test



In [43]:

    
print "Two-sample Kolmogorov-Smirnov test: ", stats.ks_2samp(common_females_mean_utility, common_males_mean_utility)[1]
print "Two-sample unpaired t-test: ", stats.ttest_ind(common_females_mean_utility, common_males_mean_utility)[1]
print "Two-sample Mann Whitney U test: ",2* stats.mannwhitneyu(common_females_mean_utility, common_males_mean_utility)[1]









    



Two-sample Kolmogorov-Smirnov test:  0.162034229676
Two-sample unpaired t-test:  0.265011417789
Two-sample Mann Whitney U test:  0.215199989705

Hypothesis 3: The mean of the scores of the questions made by each user doesn't change between genders.

H0: questionScoreMean(Males) = questionScoreMean(Females);

H1: questionScoreMean(Males) != questionScoreMean(Females).

Data

It doesn't make sense to verify quality of something that haven't been done. So, we don't verify users who didn't post any questions.



In [44]:

    
females_questions_mean = females[females['questions_total'] > 0]['questions_avg']
males_questions_mean = males[males['questions_total'] > 0]['questions_avg']

The shape of the data



In [45]:

    
show_data_shape(females_questions_mean, males_questions_mean, "expon", 30, "Mean Score Questions")









    












    












    












    












    



Levene's test:  0.59464892137
Skewness for Females:  3.8414267351
Skewness for Males:  26.6955300255

Hypothesis test



In [46]:

    
print "Two-sample Kolmogorov-Smirnov test: ", stats.ks_2samp(females_questions_mean, males_questions_mean)[1]
print "Two-sample unpaired t-test: ", stats.ttest_ind(females_questions_mean, males_questions_mean)[1]
print "Two-sample Mann Whitney U test: ",2* stats.mannwhitneyu(females_questions_mean, males_questions_mean)[1]









    



Two-sample Kolmogorov-Smirnov test:  0.535352764773
Two-sample unpaired t-test:  0.716499112038
Two-sample Mann Whitney U test:  0.891724322274

Looking at the contributors



In [47]:

    
top_females_questions_mean = top_females[top_females['questions_total'] > 0]['questions_avg']
top_males_questions_mean = top_males[top_males['questions_total'] > 0]['questions_avg']

The data's shape



In [48]:

    
show_data_shape(top_females_questions_mean, top_males_questions_mean, "expon", 30, "Mean Score Questions")









    












    












    












    












    



Levene's test:  0.95770294747
Skewness for Females:  2.84914923928
Skewness for Males:  17.0412063252

Hypotesis test



In [49]:

    
print "Two-sample Kolmogorov-Smirnov test: ", stats.ks_2samp(top_females_questions_mean, top_males_questions_mean)[1]
# print "Two-sample unpaired t-test: ", stats.ttest_ind(top_females_questions_mean, top_males_questions_mean, equal_var=False)[1]
print "Two-sample Mann Whitney U test: ",2* stats.mannwhitneyu(top_females_questions_mean, top_males_questions_mean)[1]









    



Two-sample Kolmogorov-Smirnov test:  0.303993271641
Two-sample Mann Whitney U test:  0.124785533252

Looking at the common contributors



In [50]:

    
common_females_questions_mean = common_females[common_females['questions_total'] > 0]['questions_avg']
common_males_questions_mean = common_males[common_males['questions_total'] > 0]['questions_avg']

The data's shape



In [51]:

    
show_data_shape(common_females_questions_mean, common_males_questions_mean, "expon", 30, "Mean Score Questions")









    












    












    












    












    



Levene's test:  0.305260370133
Skewness for Females:  4.09606543196
Skewness for Males:  10.9504884314

Hypotesis test



In [52]:

    
print "Two-sample Kolmogorov-Smirnov test: ", stats.ks_2samp(common_females_questions_mean, common_males_questions_mean)[1]
print "Two-sample unpaired t-test: ", stats.ttest_ind(common_females_questions_mean, common_males_questions_mean)[1]
print "Two-sample Mann Whitney U test: ",2* stats.mannwhitneyu(common_females_questions_mean, common_males_questions_mean)[1]









    



Two-sample Kolmogorov-Smirnov test:  0.674317041232
Two-sample unpaired t-test:  0.49163797857
Two-sample Mann Whitney U test:  0.794475444927

Hypothesis 4: Reputation is the same between genders.

H0: reputation(Males) = reputation(Females);

H1: reputation(Males) != reputation(Females).



In [53]:

    
females_reputation = females['reputation']
males_reputation = males['reputation']



In [54]:

    
show_data_shape(females_reputation, males_reputation, "expon", 50, "Reputation")









    












    












    












    












    



Levene's test:  0.552625372729
Skewness for Females:  9.51646785135
Skewness for Males:  16.8041524801

Hypothesis test



In [55]:

    
print "Two-sample Kolmogorov-Smirnov test: ", stats.ks_2samp(females_reputation, males_reputation)[1]
print "Two-sample unpaired t-test: ", stats.ttest_ind(females_reputation, males_reputation)[1]
print "Two-sample Mann Whitney U test: ",2* stats.mannwhitneyu(females_reputation, males_reputation)[1]









    



Two-sample Kolmogorov-Smirnov test:  0.00321579664365
Two-sample unpaired t-test:  0.577952825719
Two-sample Mann Whitney U test:  0.0663889795178

Inicialization

Here you can find the data importing and some useful functions used for analysing the data. Please, run this first, otherwise the analysis will not work.

Importing the data from the MongoDB database and inserting into a panda dataframe for easy manipulation.



In [2]:

    
from __future__ import division
import pymongo, time, pylab, numpy, pandas
from scipy import stats
import matplotlib as mpl
from matplotlib import pyplot

%matplotlib inline

client = pymongo.MongoClient('localhost', 27017)

community = 'programmers'
stats_db = client[community].statistics

cursor = stats_db.find({'$or': [{'questions_total':{'$gt':0}}, {'answers_total':{'$gt':0}}, {'comments_total':{'$gt':0}}] },
                       {u'_id': False, u'accepted_rate': True, u'reputation': True,
                        u'questions_avg': True, u'answers_total': True, u'gender':True,
                        u'questions_total': True, u'answers_accepted_total': True,
                        u'mean_utility': True,})

df =  pandas.DataFrame(list(cursor))

males = df[df['gender']=='Male']
females = df[df['gender']=='Female']

Utility functions for ploting.



In [27]:

    
pyplot.rcdefaults()
mpl.style.use('ggplot')

def histogram(sample1, sample2, bins, aspect):
    fig, axes = pyplot.subplots(nrows=1, ncols=2, figsize=(15,7))
    axes[0].hist(list(sample1), bins)
    axes[0].set_title(aspect + " by Females - Histogram")
    axes[1].hist(list(sample2), bins)
    axes[1].set_title(aspect + " by Males - Histogram")

def pdf_plot(sample1, sample2, aspect):
    fig, axes = pyplot.subplots(nrows=1, ncols=2, figsize=(15,7))
#     pdf_max = 20
#     xs = numpy.linspace(0,pdf_max,200)

#     d= stats.gaussian_kde(list(sample1))
#     d.covariance_factor = lambda : .25
#     d._compute_covariance()
#     axes[0].plot(xs,d(xs))

#     d= stats.gaussian_kde(list(sample2))
#     d.covariance_factor = lambda : .25
#     d._compute_covariance()
#     axes[1].plot(xs,d(xs))
    axes[0] = sample1.plot(ax=axes[0], kind="density")
    axes[1] = sample2.plot(ax=axes[1], kind="density")
    
    axes[0].set_title(aspect + " by Females - Density")
    axes[1].set_title(aspect + " by Males - Density")

    
    
def boxplot(sample1, sample2, aspect):
    fig, axes = pyplot.subplots(nrows=1, ncols=2, figsize=(15,7))
    axes[0].boxplot(list(sample1))
    axes[0].set_title(aspect + " by Females - Boxplot")
    axes[1].boxplot(list(sample2))
    axes[1].set_title(aspect + " by Males - Boxplot")
    

def qq_plot(sample1, sample2, distribution, aspect):
    fig, axes = pyplot.subplots(nrows=1, ncols=2, figsize=(15,7))

    pyplot.subplot(121)
    stats.probplot(list(sample1), dist=distribution, plot=pyplot)
    axes[0].set_title(aspect + " by Females - QQPlot "+ distribution)

    pyplot.subplot(122)
    stats.probplot(list(sample2), dist=distribution, plot=pyplot)
    axes[1].set_title(aspect + " by Male - QQPlot "+ distribution)

Utility functions for describing the data.



In [4]:

    
def describe(sample1, sample2):
    print sample1.describe()
    print "Median: ", sample1.median()
    print 
    print sample2.describe()
    print "Median: ", sample2.median()
    
def show_data_shape(sample1, sample2, dist, bins, aspect):
    pyplot.close('all')
    #histogram
    histogram(sample1, sample2, bins, aspect)

    #PDF
    pdf_plot(sample1, sample2, aspect)

    #QQPlot
    qq_plot(sample1, sample2, dist, aspect)

    #boxplot
    boxplot(sample1,sample2, aspect)
    pyplot.show()

    #Levene
    print "Levene's test: ", stats.levene(sample1, sample2)[1]
    
    #skewness
    print "Skewness for Females: ", stats.skew(sample1)
    print "Skewness for Males: ", stats.skew(sample2)



In [ ]:

	accepted_rate	answers_accepted_total	answers_total	mean_utility	questions_avg	questions_total	reputation
count	383.000000	383.000000	383.000000	182.000000	383.000000	383.000000	383.000000
mean	0.066655	0.819843	5.007833	-0.136622	4.998534	1.402089	472.519582
std	0.199117	4.582738	19.049466	0.786598	12.313895	3.761919	1833.622104
min	0.000000	0.000000	0.000000	-1.333772	-3.000000	0.000000	51.000000
25%	0.000000	0.000000	0.000000	-0.621652	0.000000	0.000000	101.000000
50%	0.000000	0.000000	1.000000	-0.330430	0.000000	1.000000	127.000000
75%	0.000000	0.000000	2.000000	0.166927	5.000000	1.000000	213.500000
max	1.000000	62.000000	173.000000	4.401387	107.000000	42.000000	24391.000000

	accepted_rate	answers_accepted_total	answers_total	mean_utility	questions_avg	questions_total	reputation
count	8731.000000	8731.000000	8731.000000	4973.000000	8731.000000	8731.000000	8731.000000
mean	0.081216	0.852022	5.113847	-0.055142	3.697275	0.931394	421.304776
std	0.219704	6.046695	25.712635	0.809089	16.506019	2.419752	1759.993363
min	0.000000	0.000000	0.000000	-1.808970	-5.000000	0.000000	50.000000
25%	0.000000	0.000000	0.000000	-0.573311	0.000000	0.000000	103.000000
50%	0.000000	0.000000	1.000000	-0.274029	0.000000	0.000000	133.000000
75%	0.000000	0.000000	2.000000	0.281594	3.708333	1.000000	239.000000
max	1.000000	231.000000	796.000000	9.653682	1100.000000	76.000000	62314.000000

	accepted_rate	answers_accepted_total	answers_total	mean_utility	questions_avg	questions_total	reputation
count	46.000000	46.000000	46.000000	30.000000	46.000000	46.000000	46.000000
mean	0.137440	6.021739	33.934783	0.187713	12.613522	5.717391	2842.021739
std	0.126228	12.082106	45.645690	0.912327	20.468899	9.427296	4687.361966
min	0.000000	0.000000	0.000000	-0.694013	0.000000	0.000000	454.000000
25%	0.024621	1.000000	6.500000	-0.327317	0.000000	0.000000	685.000000
50%	0.100847	2.000000	16.000000	0.037226	7.785714	1.500000	1223.000000
75%	0.218690	3.750000	32.250000	0.339956	14.612069	7.000000	2505.250000
max	0.500000	62.000000	173.000000	4.401387	107.000000	42.000000	24391.000000

	accepted_rate	answers_accepted_total	answers_total	mean_utility	questions_avg	questions_total	reputation
count	1113.000000	1113.000000	1113.000000	816.000000	1113.000000	1113.000000	1113.000000
mean	0.177299	5.757412	31.483378	0.325455	9.892585	2.809524	2234.580413
std	0.200537	16.070229	66.073985	0.893799	38.515613	5.647596	4528.080814
min	0.000000	0.000000	0.000000	-1.386750	-2.000000	0.000000	451.000000
25%	0.000000	0.000000	5.000000	-0.176646	0.000000	0.000000	606.000000
50%	0.133333	2.000000	12.000000	0.140681	3.000000	1.000000	910.000000
75%	0.250000	5.000000	28.000000	0.564684	10.200000	3.000000	1815.000000
max	1.000000	231.000000	796.000000	9.653682	1100.000000	76.000000	62314.000000

	accepted_rate	answers_accepted_total	answers_total	mean_utility	questions_avg	questions_total	reputation
count	337.000000	337.000000	337.000000	152.000000	337.00000	337.000000	337.000000
mean	0.056994	0.109792	1.059347	-0.200636	3.95910	0.813056	149.086053
std	0.205333	0.340420	1.823142	0.746181	10.35644	1.135497	79.347610
min	0.000000	0.000000	0.000000	-1.333772	-3.00000	0.000000	51.000000
25%	0.000000	0.000000	0.000000	-0.663396	0.00000	0.000000	101.000000
50%	0.000000	0.000000	0.000000	-0.397628	0.00000	1.000000	121.000000
75%	0.000000	0.000000	1.000000	0.082377	4.00000	1.000000	159.000000
max	1.000000	2.000000	15.000000	4.063193	101.00000	6.000000	450.000000

	accepted_rate	answers_accepted_total	answers_total	mean_utility	questions_avg	questions_total	reputation
count	7618.000000	7618.000000	7618.000000	4157.000000	7618.000000	7618.000000	7618.000000
mean	0.067178	0.135337	1.261223	-0.129852	2.792132	0.656997	156.382778
std	0.218875	0.416205	1.984983	0.769721	9.447919	1.209820	79.189708
min	0.000000	0.000000	0.000000	-1.808970	-5.000000	0.000000	50.000000
25%	0.000000	0.000000	0.000000	-0.615233	0.000000	0.000000	101.000000
50%	0.000000	0.000000	1.000000	-0.353553	0.000000	0.000000	123.000000
75%	0.000000	0.000000	1.000000	0.165700	3.000000	1.000000	178.000000
max	1.000000	5.000000	23.000000	7.413026	331.000000	26.000000	449.000000