Community: Mathematics

Inicialization and importing data are at the end of this notebook. For better visualization of the analysis they were placed at the bottom, but it's necessary to run them first so the analysis work as expected. Click here to go there.

Data Summary

Women



In [4]:

    
females.describe()









    Out[4]:






  
    
      
      accepted_rate
      answers_accepted_total
      answers_total
      mean_utility
      questions_avg
      questions_total
      reputation
    
  
  
    
      count
       633.000000
       633.000000
        633.000000
       161.000000
       633.000000
       633.000000
         633.000000
    
    
      mean
         0.128670
         2.690363
          7.385466
        -0.310565
         3.486357
         9.857820
         387.303318
    
    
      std
         0.270712
        21.932986
         48.434478
         0.753054
        14.726966
        17.905307
        1958.974331
    
    
      min
         0.000000
         0.000000
          0.000000
        -2.132227
        -1.000000
         0.000000
          50.000000
    
    
      25%
         0.000000
         0.000000
          0.000000
        -0.916840
         0.750000
         1.000000
          83.000000
    
    
      50%
         0.000000
         0.000000
          0.000000
        -0.453412
         1.750000
         4.000000
         116.000000
    
    
      75%
         0.058824
         1.000000
          2.000000
         0.100167
         3.250000
        11.000000
         198.000000
    
    
      max
         1.000000
       508.000000
       1057.000000
         2.873922
       356.000000
       178.000000
       45388.000000



In [5]:

    
females.median()









    Out[5]:





accepted_rate               0.000000
answers_accepted_total      0.000000
answers_total               0.000000
mean_utility               -0.453412
questions_avg               1.750000
questions_total             4.000000
reputation                116.000000
dtype: float64

Men



In [6]:

    
males.describe()









    Out[6]:






  
    
      
      accepted_rate
      answers_accepted_total
      answers_total
      mean_utility
      questions_avg
      questions_total
      reputation
    
  
  
    
      count
       7492.000000
       7492.000000
       7492.000000
       2191.000000
       7492.000000
       7492.000000
         7492.000000
    
    
      mean
          0.144217
          8.853043
         21.642285
         -0.270100
          2.778413
          6.569541
          868.969434
    
    
      std
          0.263682
         91.971931
        191.770553
          0.780461
          5.692874
         18.395609
         6045.880132
    
    
      min
          0.000000
          0.000000
          0.000000
         -1.857143
        -12.000000
          0.000000
           50.000000
    
    
      25%
          0.000000
          0.000000
          0.000000
         -0.918454
          0.000000
          1.000000
          101.000000
    
    
      50%
          0.000000
          0.000000
          1.000000
         -0.464991
          1.600000
          2.000000
          121.000000
    
    
      75%
          0.242058
          1.000000
          3.000000
          0.180160
          3.166667
          5.000000
          242.250000
    
    
      max
          1.000000
       4828.000000
       8667.000000
          3.681959
        170.000000
        650.000000
       221779.000000



In [7]:

    
males.median()









    Out[7]:





accepted_rate               0.000000
answers_accepted_total      0.000000
answers_total               1.000000
mean_utility               -0.464991
questions_avg               1.600000
questions_total             2.000000
reputation                121.000000
dtype: float64

Top contributors



In [8]:

    
pyplot.close('all')
histogram(females["reputation"], males["reputation"], 100, "Reputation")
histogram(females[females["reputation"]<= 1000]["reputation"], males[males["reputation"]<= 1000]["reputation"], 100, "Reputation")
histogram(females[females["reputation"]<= 300]["reputation"], males[males["reputation"]<= 300]["reputation"], 100, "Reputation")
pyplot.show()

Top Women



In [9]:

    
top_females = females[females["reputation"]> 300]
top_females.describe()









    Out[9]:






  
    
      
      accepted_rate
      answers_accepted_total
      answers_total
      mean_utility
      questions_avg
      questions_total
      reputation
    
  
  
    
      count
       123.000000
       123.000000
        123.000000
       47.000000
       123.000000
       123.000000
         123.000000
    
    
      mean
         0.280473
        12.983740
         34.398374
       -0.140989
         3.851004
        25.650407
        1524.268293
    
    
      std
         0.277420
        48.559915
        105.940266
        0.831035
         5.975449
        33.073436
        4272.204505
    
    
      min
         0.000000
         0.000000
          0.000000
       -1.372813
         0.000000
         0.000000
         302.000000
    
    
      25%
         0.000000
         0.000000
          1.000000
       -0.775685
         1.264245
         4.000000
         363.500000
    
    
      50%
         0.250000
         2.000000
          5.000000
       -0.190794
         2.638298
        14.000000
         504.000000
    
    
      75%
         0.483160
         8.000000
         27.500000
        0.184591
         4.200341
        34.000000
        1341.500000
    
    
      max
         1.000000
       508.000000
       1057.000000
        2.873922
        51.000000
       178.000000
       45388.000000



In [10]:

    
top_females.median()









    Out[10]:





accepted_rate               0.250000
answers_accepted_total      2.000000
answers_total               5.000000
mean_utility               -0.190794
questions_avg               2.638298
questions_total            14.000000
reputation                504.000000
dtype: float64

Top Men



In [11]:

    
top_males = males[males["reputation"]> 300]
top_males.describe()









    Out[11]:






  
    
      
      accepted_rate
      answers_accepted_total
      answers_total
      mean_utility
      questions_avg
      questions_total
      reputation
    
  
  
    
      count
       1683.000000
       1683.000000
       1683.000000
       605.000000
       1683.000000
       1683.000000
         1683.000000
    
    
      mean
          0.312377
         38.579323
         92.901961
        -0.129997
          4.761064
         19.014854
         3451.025550
    
    
      std
          0.224611
        191.129956
        396.505569
         0.704042
          9.673287
         35.062562
        12416.953799
    
    
      min
          0.000000
          0.000000
          0.000000
        -1.837117
          0.000000
          0.000000
          301.000000
    
    
      25%
          0.150000
          1.000000
          5.000000
        -0.659126
          1.500000
          2.000000
          421.500000
    
    
      50%
          0.323232
          5.000000
         15.000000
        -0.228141
          2.909091
          8.000000
          751.000000
    
    
      75%
          0.450000
         17.000000
         50.000000
         0.275737
          5.133333
         22.000000
         1965.000000
    
    
      max
          1.000000
       4828.000000
       8667.000000
         2.468788
        170.000000
        650.000000
       221779.000000



In [12]:

    
top_males.median()









    Out[12]:





accepted_rate               0.323232
answers_accepted_total      5.000000
answers_total              15.000000
mean_utility               -0.228141
questions_avg               2.909091
questions_total             8.000000
reputation                751.000000
dtype: float64

Common women contributors



In [13]:

    
common_females = females[females["reputation"] <= 300]
common_females.describe()









    Out[13]:






  
    
      
      accepted_rate
      answers_accepted_total
      answers_total
      mean_utility
      questions_avg
      questions_total
      reputation
    
  
  
    
      count
       510.000000
       510.000000
       510.000000
       114.000000
       510.000000
       510.000000
       510.000000
    
    
      mean
         0.092058
         0.207843
         0.870588
        -0.380477
         3.398413
         6.049020
       113.094118
    
    
      std
         0.256195
         0.574454
         1.963047
         0.710585
        16.146064
         7.815879
        50.373253
    
    
      min
         0.000000
         0.000000
         0.000000
        -2.132227
        -1.000000
         0.000000
        50.000000
    
    
      25%
         0.000000
         0.000000
         0.000000
        -0.995145
         0.643908
         1.000000
        74.000000
    
    
      50%
         0.000000
         0.000000
         0.000000
        -0.500692
         1.527778
         3.000000
       105.000000
    
    
      75%
         0.000000
         0.000000
         1.000000
         0.000000
         3.000000
         8.000000
       136.750000
    
    
      max
         1.000000
         4.000000
        16.000000
         1.414214
       356.000000
        73.000000
       298.000000



In [14]:

    
common_females.median()









    Out[14]:





accepted_rate               0.000000
answers_accepted_total      0.000000
answers_total               0.000000
mean_utility               -0.500692
questions_avg               1.527778
questions_total             3.000000
reputation                105.000000
dtype: float64

Common men contributors



In [15]:

    
common_males = males[males["reputation"] <= 300]
common_males.describe()









    Out[15]:






  
    
      
      accepted_rate
      answers_accepted_total
      answers_total
      mean_utility
      questions_avg
      questions_total
      reputation
    
  
  
    
      count
       5809.000000
       5809.000000
       5809.000000
       1586.000000
       5809.000000
       5809.000000
       5809.000000
    
    
      mean
          0.095497
          0.240661
          0.996729
         -0.323545
          2.203993
          2.963849
        120.888793
    
    
      std
          0.253963
          0.657856
          2.194680
          0.801489
          3.637619
          4.747850
         46.529120
    
    
      min
          0.000000
          0.000000
          0.000000
         -1.857143
        -12.000000
          0.000000
         50.000000
    
    
      25%
          0.000000
          0.000000
          0.000000
         -1.000000
          0.000000
          1.000000
        101.000000
    
    
      50%
          0.000000
          0.000000
          0.000000
         -0.554213
          1.166667
          1.000000
        111.000000
    
    
      75%
          0.000000
          0.000000
          1.000000
          0.103371
          2.750000
          3.000000
        136.000000
    
    
      max
          1.000000
          7.000000
         41.000000
          3.681959
         56.000000
         77.000000
        300.000000



In [16]:

    
common_males.median()









    Out[16]:





accepted_rate               0.000000
answers_accepted_total      0.000000
answers_total               0.000000
mean_utility               -0.554213
questions_avg               1.166667
questions_total             1.000000
reputation                111.000000
dtype: float64

Second Question: The contributions made by both genders are perceived with the same quality by the community ?

Hypothesis 1: Both genders have the same acceptance rate.

H0: acceptanceRate(Males) = acceptanceRate(Females);

H1: acceptanceRate(Males) != acceptanceRate(Females).

Data

It doesn't make sense to verify quality of something that haven't been done. So, we don't verify users who didn't post any answers.



In [17]:

    
females_acc_rate = females[females['answers_total'] > 0]['accepted_rate']
males_acc_rate = males[males['answers_total'] > 0]['accepted_rate']

The data's shape



In [18]:

    
show_data_shape(females_acc_rate, males_acc_rate, "norm", 30, "Accepted Rate")









    












    












    












    












    



Levene's test:  0.0745382705961
Skewness for Females:  0.977747918161
Skewness for Males:  0.989887017827

Hypothesis test



In [20]:

    
print "Two-sample Kolmogorov-Smirnov test: ", stats.ks_2samp(females_acc_rate, males_acc_rate)[1]
print "Two-sample unpaired t-test: ", stats.ttest_ind(females_acc_rate, males_acc_rate)[1]
print "Two-sample Mann Whitney U test: ",2* stats.mannwhitneyu(females_acc_rate, males_acc_rate)[1]









    



Two-sample Kolmogorov-Smirnov test:  0.60203965179
Two-sample unpaired t-test:  0.500845759825
Two-sample Mann Whitney U test:  0.831373005814

Looking at the top contributors



In [21]:

    
top_females_acc_rate = top_females[top_females['answers_total'] > 0]['accepted_rate']
top_males_acc_rate = top_males[top_males['answers_total'] > 0]['accepted_rate']

The data's shape



In [22]:

    
show_data_shape(top_females_acc_rate, top_males_acc_rate, "expon", 30, "Accepted Rate")









    












    












    












    












    



Levene's test:  0.00128821547976
Skewness for Females:  0.683407631269
Skewness for Males:  0.583352766716

Hypothesis test



In [35]:

    
print "Two-sample Kolmogorov-Smirnov test: ", stats.ks_2samp(top_females_acc_rate, top_males_acc_rate)[1]
print "Two-sample unpaired t-test: ", stats.ttest_ind(top_females_acc_rate, top_males_acc_rate, equal_var=False)[1]
print "Two-sample Mann Whitney U test: ",2* stats.mannwhitneyu(top_females_acc_rate, top_males_acc_rate)[1]









    



Two-sample Kolmogorov-Smirnov test:  0.265989745973
Two-sample unpaired t-test:  0.831296026267
Two-sample Mann Whitney U test:  0.747456900038

Looking at the common contributors



In [24]:

    
common_females_acc_rate = common_females[common_females['answers_total'] > 0]['accepted_rate']
common_males_acc_rate = common_males[common_males['answers_total'] > 0]['accepted_rate']

The data's shape



In [25]:

    
show_data_shape(common_females_acc_rate, common_males_acc_rate, "expon", 30, "Accepted Rate")









    












    












    












    












    



Levene's test:  0.411875597209
Skewness for Females:  1.11503475464
Skewness for Males:  1.19370211812

Hypotesis test



In [26]:

    
print "Two-sample Kolmogorov-Smirnov test: ", stats.ks_2samp(common_females_acc_rate, common_males_acc_rate)[1]
print "Two-sample unpaired t-test: ", stats.ttest_ind(common_females_acc_rate, common_males_acc_rate)[1]
print "Two-sample Mann Whitney U test: ",2* stats.mannwhitneyu(common_females_acc_rate, common_males_acc_rate)[1]









    



Two-sample Kolmogorov-Smirnov test:  0.939367889999
Two-sample unpaired t-test:  0.411875597209
Two-sample Mann Whitney U test:  0.413618034424

Hypothesis 2: The mean utility of the answer for each user is the same between genders.

H0: meanUtilityBy(Males) = meanUtilityBy(Females);

H1: meanUtilityBy(Males) != meanUtilityBy(Females).

Data

It doesn't make sense to verify quality of something that haven't been done. So, we don't verify users who didn't post any answers.



In [27]:

    
females_mean_utility = females['mean_utility'].dropna()
males_mean_utility = males['mean_utility'].dropna()

The data's shape



In [28]:

    
show_data_shape(females_mean_utility, males_mean_utility, "expon", 30, "Mean Utility Answers")









    












    












    












    












    



Levene's test:  0.552537303879
Skewness for Females:  0.849859950013
Skewness for Males:  0.825663202935

Hypothesis test



In [29]:

    
print "Two-sample Kolmogorov-Smirnov test: ", stats.ks_2samp(females_mean_utility, males_mean_utility)[1]
print "Two-sample unpaired t-test: ", stats.ttest_ind(females_mean_utility, males_mean_utility)[1]
print "Two-sample Mann Whitney U test: ",2* stats.mannwhitneyu(females_mean_utility, males_mean_utility)[1]









    



Two-sample Kolmogorov-Smirnov test:  0.790025277308
Two-sample unpaired t-test:  0.524550651542
Two-sample Mann Whitney U test:  0.687477529226

Looking at the top contributors



In [30]:

    
top_females_mean_utility = top_females['mean_utility'].dropna()
top_males_mean_utility = top_males['mean_utility'].dropna()

The data's shape



In [31]:

    
show_data_shape(top_females_mean_utility, top_males_mean_utility, "expon", 30, "Mean Utility Answers")









    












    












    












    












    



Levene's test:  0.344568452302
Skewness for Females:  1.19070826049
Skewness for Males:  0.682611124181

Hypotesis test



In [36]:

    
print "Two-sample Kolmogorov-Smirnov test: ", stats.ks_2samp(top_females_mean_utility, top_males_mean_utility)[1]
print "Two-sample unpaired t-test: ", stats.ttest_ind(top_females_mean_utility, top_males_mean_utility)[1]
print "Two-sample Mann Whitney U test: ",2* stats.mannwhitneyu(top_females_mean_utility, top_males_mean_utility)[1]









    



Two-sample Kolmogorov-Smirnov test:  0.845267092916
Two-sample unpaired t-test:  0.919022513862
Two-sample Mann Whitney U test:  0.652550355829

Looking at the common contributors



In [37]:

    
common_females_mean_utility = common_females['mean_utility'].dropna()
common_males_mean_utility = common_males['mean_utility'].dropna()

The data's shape



In [38]:

    
show_data_shape(common_females_mean_utility, common_males_mean_utility, "expon", 30, "Mean Utility Answers")









    












    












    












    












    



Levene's test:  0.327600750414
Skewness for Females:  0.55503753211
Skewness for Males:  0.921351935568

Hypotesis test



In [39]:

    
print "Two-sample Kolmogorov-Smirnov test: ", stats.ks_2samp(common_females_mean_utility, common_males_mean_utility)[1]
print "Two-sample unpaired t-test: ", stats.ttest_ind(common_females_mean_utility, common_males_mean_utility)[1]
print "Two-sample Mann Whitney U test: ",2* stats.mannwhitneyu(common_females_mean_utility, common_males_mean_utility)[1]









    



Two-sample Kolmogorov-Smirnov test:  0.837714053936
Two-sample unpaired t-test:  0.460718341709
Two-sample Mann Whitney U test:  0.851591120135

Hypothesis 3: The mean of the scores of the questions made by each user doesn't change between genders.

H0: questionScoreMean(Males) = questionScoreMean(Females);

H1: questionScoreMean(Males) != questionScoreMean(Females).

Data

It doesn't make sense to verify quality of something that haven't been done. So, we don't verify users who didn't post any questions.



In [40]:

    
females_questions_mean = females[females['questions_total'] > 0]['questions_avg']
males_questions_mean = males[males['questions_total'] > 0]['questions_avg']

The shape of the data



In [41]:

    
show_data_shape(females_questions_mean, males_questions_mean, "expon", 30, "Mean Score Questions")









    












    












    












    












    



Levene's test:  0.148278324114
Skewness for Females:  20.5260349738
Skewness for Males:  11.6192250068

Hypothesis test



In [42]:

    
print "Two-sample Kolmogorov-Smirnov test: ", stats.ks_2samp(females_questions_mean, males_questions_mean)[1]
print "Two-sample unpaired t-test: ", stats.ttest_ind(females_questions_mean, males_questions_mean)[1]
print "Two-sample Mann Whitney U test: ",2* stats.mannwhitneyu(females_questions_mean, males_questions_mean)[1]









    



Two-sample Kolmogorov-Smirnov test:  0.0908173739591
Two-sample unpaired t-test:  0.159608306156
Two-sample Mann Whitney U test:  0.328418967307

Looking at the contributors



In [43]:

    
top_females_questions_mean = top_females[top_females['questions_total'] > 0]['questions_avg']
top_males_questions_mean = top_males[top_males['questions_total'] > 0]['questions_avg']

The data's shape



In [44]:

    
show_data_shape(top_females_questions_mean, top_males_questions_mean, "expon", 30, "Mean Score Questions")









    












    












    












    












    



Levene's test:  0.384910868319
Skewness for Females:  4.88830181792
Skewness for Males:  9.30506186793

Hypotesis test



In [45]:

    
print "Two-sample Kolmogorov-Smirnov test: ", stats.ks_2samp(top_females_questions_mean, top_males_questions_mean)[1]
print "Two-sample unpaired t-test: ", stats.ttest_ind(top_females_questions_mean, top_males_questions_mean)[1]
print "Two-sample Mann Whitney U test: ",2* stats.mannwhitneyu(top_females_questions_mean, top_males_questions_mean)[1]









    



Two-sample Kolmogorov-Smirnov test:  0.0338573627755
Two-sample unpaired t-test:  0.221597960377
Two-sample Mann Whitney U test:  0.00586207254643

Looking at the common contributors



In [46]:

    
common_females_questions_mean = common_females[common_females['questions_total'] > 0]['questions_avg']
common_males_questions_mean = common_males[common_males['questions_total'] > 0]['questions_avg']

The data's shape



In [47]:

    
show_data_shape(common_females_questions_mean, common_males_questions_mean, "expon", 30, "Mean Score Questions")









    












    












    












    












    



Levene's test:  0.00213338022459
Skewness for Females:  19.2346597408
Skewness for Males:  4.4053185306

Hypotesis test



In [48]:

    
print "Two-sample Kolmogorov-Smirnov test: ", stats.ks_2samp(common_females_questions_mean, common_males_questions_mean)[1]
print "Two-sample unpaired t-test: ", stats.ttest_ind(common_females_questions_mean, common_males_questions_mean, equal_var=False)[1]
print "Two-sample Mann Whitney U test: ",2* stats.mannwhitneyu(common_females_questions_mean, common_males_questions_mean)[1]









    



Two-sample Kolmogorov-Smirnov test:  0.127606871149
Two-sample unpaired t-test:  0.205704064264
Two-sample Mann Whitney U test:  0.342905385484

Hypothesis 4: Reputation is the same between genders.

H0: reputation(Males) = reputation(Females);

H1: reputation(Males) != reputation(Females).



In [49]:

    
females_reputation = females['reputation']
males_reputation = males['reputation']



In [50]:

    
show_data_shape(females_reputation, males_reputation, "expon", 50, "Reputation")









    












    












    












    












    



Levene's test:  0.0520858308753
Skewness for Females:  19.6422145484
Skewness for Males:  21.8976253218

Hypothesis test



In [51]:

    
print "Two-sample Kolmogorov-Smirnov test: ", stats.ks_2samp(females_reputation, males_reputation)[1]
print "Two-sample unpaired t-test: ", stats.ttest_ind(females_reputation, males_reputation)[1]
print "Two-sample Mann Whitney U test: ",2* stats.mannwhitneyu(females_reputation, males_reputation)[1]









    



Two-sample Kolmogorov-Smirnov test:  7.17077135815e-15
Two-sample unpaired t-test:  0.0460230753578
Two-sample Mann Whitney U test:  8.59848057824e-07

Inicialization

Here you can find the data importing and some useful functions used for analysing the data. Please, run this first, otherwise the analysis will not work.

Importing the data from the MongoDB database and inserting into a panda dataframe for easy manipulation.



In [1]:

    
from __future__ import division
import pymongo, time, pylab, numpy, pandas
from scipy import stats
import matplotlib as mpl
from matplotlib import pyplot

%matplotlib inline

client = pymongo.MongoClient('localhost', 27017)

community = 'math'
stats_db = client[community].statistics

cursor = stats_db.find({'$or': [{'questions_total':{'$gt':0}}, {'answers_total':{'$gt':0}}, {'comments_total':{'$gt':0}}] },
                       {u'_id': False, u'accepted_rate': True, u'reputation': True,
                        u'questions_avg': True, u'answers_total': True, u'gender':True,
                        u'questions_total': True, u'answers_accepted_total': True,
                        u'mean_utility': True,})

df =  pandas.DataFrame(list(cursor))

males = df[df['gender']=='Male']
females = df[df['gender']=='Female']

Utility functions for ploting.



In [2]:

    
pyplot.rcdefaults()
mpl.style.use('ggplot')

def histogram(sample1, sample2, bins, aspect):
    fig, axes = pyplot.subplots(nrows=1, ncols=2, figsize=(15,7))
    axes[0].hist(list(sample1), bins)
    axes[0].set_title(aspect + " by Females - Histogram")
    axes[1].hist(list(sample2), bins)
    axes[1].set_title(aspect + " by Males - Histogram")

def pdf_plot(sample1, sample2, aspect):
    fig, axes = pyplot.subplots(nrows=1, ncols=2, figsize=(15,7))
#     pdf_max = 20
#     xs = numpy.linspace(0,pdf_max,200)

#     d= stats.gaussian_kde(list(sample1))
#     d.covariance_factor = lambda : .25
#     d._compute_covariance()
#     axes[0].plot(xs,d(xs))

#     d= stats.gaussian_kde(list(sample2))
#     d.covariance_factor = lambda : .25
#     d._compute_covariance()
#     axes[1].plot(xs,d(xs))
    axes[0] = sample1.plot(ax=axes[0], kind="density")
    axes[1] = sample2.plot(ax=axes[1], kind="density")
    
    axes[0].set_title(aspect + " by Females - Density")
    axes[1].set_title(aspect + " by Males - Density")

    
    
def boxplot(sample1, sample2, aspect):
    fig, axes = pyplot.subplots(nrows=1, ncols=2, figsize=(15,7))
    axes[0].boxplot(list(sample1))
    axes[0].set_title(aspect + " by Females - Boxplot")
    axes[1].boxplot(list(sample2))
    axes[1].set_title(aspect + " by Males - Boxplot")
    

def qq_plot(sample1, sample2, distribution, aspect):
    fig, axes = pyplot.subplots(nrows=1, ncols=2, figsize=(15,7))

    pyplot.subplot(121)
    stats.probplot(list(sample1), dist=distribution, plot=pyplot)
    axes[0].set_title(aspect + " by Females - QQPlot "+ distribution)

    pyplot.subplot(122)
    stats.probplot(list(sample2), dist=distribution, plot=pyplot)
    axes[1].set_title(aspect + " by Male - QQPlot "+ distribution)

Utility functions for describing the data.



In [3]:

    
def describe(sample1, sample2):
    print sample1.describe()
    print "Median: ", sample1.median()
    print 
    print sample2.describe()
    print "Median: ", sample2.median()
    
def show_data_shape(sample1, sample2, dist, bins, aspect):
    pyplot.close('all')
    #histogram
    histogram(sample1, sample2, bins, aspect)

    #PDF
    pdf_plot(sample1, sample2, aspect)

    #QQPlot
    qq_plot(sample1, sample2, dist, aspect)

    #boxplot
    boxplot(sample1,sample2, aspect)
    pyplot.show()

    #Levene
    print "Levene's test: ", stats.levene(sample1, sample2)[1]
    
    #skewness
    print "Skewness for Females: ", stats.skew(sample1)
    print "Skewness for Males: ", stats.skew(sample2)



In [ ]:

	accepted_rate	answers_accepted_total	answers_total	mean_utility	questions_avg	questions_total	reputation
count	633.000000	633.000000	633.000000	161.000000	633.000000	633.000000	633.000000
mean	0.128670	2.690363	7.385466	-0.310565	3.486357	9.857820	387.303318
std	0.270712	21.932986	48.434478	0.753054	14.726966	17.905307	1958.974331
min	0.000000	0.000000	0.000000	-2.132227	-1.000000	0.000000	50.000000
25%	0.000000	0.000000	0.000000	-0.916840	0.750000	1.000000	83.000000
50%	0.000000	0.000000	0.000000	-0.453412	1.750000	4.000000	116.000000
75%	0.058824	1.000000	2.000000	0.100167	3.250000	11.000000	198.000000
max	1.000000	508.000000	1057.000000	2.873922	356.000000	178.000000	45388.000000

	accepted_rate	answers_accepted_total	answers_total	mean_utility	questions_avg	questions_total	reputation
count	7492.000000	7492.000000	7492.000000	2191.000000	7492.000000	7492.000000	7492.000000
mean	0.144217	8.853043	21.642285	-0.270100	2.778413	6.569541	868.969434
std	0.263682	91.971931	191.770553	0.780461	5.692874	18.395609	6045.880132
min	0.000000	0.000000	0.000000	-1.857143	-12.000000	0.000000	50.000000
25%	0.000000	0.000000	0.000000	-0.918454	0.000000	1.000000	101.000000
50%	0.000000	0.000000	1.000000	-0.464991	1.600000	2.000000	121.000000
75%	0.242058	1.000000	3.000000	0.180160	3.166667	5.000000	242.250000
max	1.000000	4828.000000	8667.000000	3.681959	170.000000	650.000000	221779.000000

	accepted_rate	answers_accepted_total	answers_total	mean_utility	questions_avg	questions_total	reputation
count	123.000000	123.000000	123.000000	47.000000	123.000000	123.000000	123.000000
mean	0.280473	12.983740	34.398374	-0.140989	3.851004	25.650407	1524.268293
std	0.277420	48.559915	105.940266	0.831035	5.975449	33.073436	4272.204505
min	0.000000	0.000000	0.000000	-1.372813	0.000000	0.000000	302.000000
25%	0.000000	0.000000	1.000000	-0.775685	1.264245	4.000000	363.500000
50%	0.250000	2.000000	5.000000	-0.190794	2.638298	14.000000	504.000000
75%	0.483160	8.000000	27.500000	0.184591	4.200341	34.000000	1341.500000
max	1.000000	508.000000	1057.000000	2.873922	51.000000	178.000000	45388.000000

	accepted_rate	answers_accepted_total	answers_total	mean_utility	questions_avg	questions_total	reputation
count	1683.000000	1683.000000	1683.000000	605.000000	1683.000000	1683.000000	1683.000000
mean	0.312377	38.579323	92.901961	-0.129997	4.761064	19.014854	3451.025550
std	0.224611	191.129956	396.505569	0.704042	9.673287	35.062562	12416.953799
min	0.000000	0.000000	0.000000	-1.837117	0.000000	0.000000	301.000000
25%	0.150000	1.000000	5.000000	-0.659126	1.500000	2.000000	421.500000
50%	0.323232	5.000000	15.000000	-0.228141	2.909091	8.000000	751.000000
75%	0.450000	17.000000	50.000000	0.275737	5.133333	22.000000	1965.000000
max	1.000000	4828.000000	8667.000000	2.468788	170.000000	650.000000	221779.000000

	accepted_rate	answers_accepted_total	answers_total	mean_utility	questions_avg	questions_total	reputation
count	510.000000	510.000000	510.000000	114.000000	510.000000	510.000000	510.000000
mean	0.092058	0.207843	0.870588	-0.380477	3.398413	6.049020	113.094118
std	0.256195	0.574454	1.963047	0.710585	16.146064	7.815879	50.373253
min	0.000000	0.000000	0.000000	-2.132227	-1.000000	0.000000	50.000000
25%	0.000000	0.000000	0.000000	-0.995145	0.643908	1.000000	74.000000
50%	0.000000	0.000000	0.000000	-0.500692	1.527778	3.000000	105.000000
75%	0.000000	0.000000	1.000000	0.000000	3.000000	8.000000	136.750000
max	1.000000	4.000000	16.000000	1.414214	356.000000	73.000000	298.000000

	accepted_rate	answers_accepted_total	answers_total	mean_utility	questions_avg	questions_total	reputation
count	5809.000000	5809.000000	5809.000000	1586.000000	5809.000000	5809.000000	5809.000000
mean	0.095497	0.240661	0.996729	-0.323545	2.203993	2.963849	120.888793
std	0.253963	0.657856	2.194680	0.801489	3.637619	4.747850	46.529120
min	0.000000	0.000000	0.000000	-1.857143	-12.000000	0.000000	50.000000
25%	0.000000	0.000000	0.000000	-1.000000	0.000000	1.000000	101.000000
50%	0.000000	0.000000	0.000000	-0.554213	1.166667	1.000000	111.000000
75%	0.000000	0.000000	1.000000	0.103371	2.750000	3.000000	136.000000
max	1.000000	7.000000	41.000000	3.681959	56.000000	77.000000	300.000000