In [2]:
from __future__ import division
import pymongo, pandas, random
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt

In [3]:
connection = pymongo.MongoClient('localhost', 27017)
communities = connection.database_names()

for db in ["gender", "admin", "local", "visualizations", "results"]:
    if db in communities:communities.remove(db)

In [4]:
columns = ['community', 'total_contrib','males_contrib','females_contrib',
          'total_questions','males_questions','females_questions',
          'total_answers','males_answers','females_answers',
          'total_comments','males_comments','females_comments']
results = pandas.DataFrame(columns=columns)

In [12]:
for community in communities:
    community_db = connection[community]['statistics']
    
    cursor = community_db.find({'contributions_total': {'$gt':0}, 'gender': {'$ne': "Unknown"}},
                               {u'_id': False, u'gender':True, 'comments_total':True,
                               'questions_total':True, 'answers_total': True,
                               'contributions_total': True})
    
    df =  pandas.DataFrame(list(cursor))

    males = df.query('gender == "Male"')
    females = df.query('gender == "Female"')
    
    sum_total = df.sum()
    sum_males = males.sum()
    sum_females = females.sum()


    result = {
        'community': community,
        'total_contrib': sum_total['contributions_total'],
        'males_contrib': sum_males['contributions_total'],
        'females_contrib': sum_females['contributions_total'],
        'total_questions': sum_total['questions_total'],
        'males_questions': sum_males['questions_total'],
        'females_questions': sum_females['questions_total'],
        'total_answers': sum_total['answers_total'],
        'males_answers': sum_males['answers_total'],
        'females_answers': sum_females['answers_total'],
        'total_comments': sum_total['comments_total'],
        'males_comments': sum_males['comments_total'],
        'females_comments': sum_females['comments_total']
    }
    
    results = results.append(result, ignore_index=True)

In [13]:
results


Out[13]:
community total_contrib males_contrib females_contrib total_questions males_questions females_questions total_answers males_answers females_answers total_comments males_comments females_comments
0 academia 21771 20760 1011 1284 1155 129 5356 5100 256 15131 14505 626
1 android 34555 33451 1104 5243 4966 277 8435 8209 226 20877 20276 601
2 anime 4206 3249 957 590 503 87 1068 847 221 2548 1899 649
3 apple 72093 70252 1841 11205 10874 331 19366 18877 489 41523 40502 1021
4 arduino 2144 2078 66 217 205 12 461 455 6 1466 1418 48
5 askubuntu 195472 187254 8218 24377 23064 1313 52563 50426 2137 118533 113765 4768
6 astronomy 2340 2251 89 249 225 24 658 633 25 1433 1393 40
7 aviation 3844 3834 10 459 457 2 718 715 3 2667 2662 5
8 beer 563 517 46 83 76 7 175 166 9 305 275 30
9 bicycles 18444 18236 208 1370 1332 38 4277 4241 36 12797 12663 134
10 biology 8458 7751 707 1547 1409 138 2116 1925 191 4795 4417 378
11 bitcoin 14683 14379 304 1477 1435 42 4527 4447 80 8679 8497 182
12 blender 5128 4698 430 839 734 105 1169 1102 67 3120 2862 258
13 boardgames 10063 9836 227 1464 1411 53 2753 2705 48 5846 5720 126
14 bricks 1107 994 113 225 193 32 308 289 19 574 512 62
15 buddhism 784 733 51 84 83 1 273 246 27 427 404 23
16 chemistry 7580 7010 570 870 768 102 2026 1850 176 4684 4392 292
17 chess 4265 4228 37 416 402 14 1270 1264 6 2579 2562 17
18 chinese 3862 3524 338 411 385 26 986 870 116 2465 2269 196
19 christianity 26749 26054 695 1757 1694 63 5643 5555 88 19350 18806 544
20 codegolf 26757 26186 571 614 589 25 6513 6374 139 19630 19223 407
21 codereview 43193 41722 1471 4093 3939 154 10636 10248 388 28464 27535 929
22 cogsci 6078 5682 396 663 628 35 1270 1173 97 4145 3881 264
23 cooking 22438 18532 3906 2894 2404 490 6771 5519 1252 12773 10609 2164
24 craftcms 3296 2954 342 456 434 22 959 864 95 1881 1656 225
25 crypto 11082 10905 177 946 880 66 2248 2228 20 7888 7797 91
26 cs 19964 19621 343 1508 1415 93 3724 3688 36 14732 14518 214
27 cstheory 34534 33777 757 2445 2366 79 6284 6127 157 25805 25284 521
28 datascience 633 598 35 65 60 5 229 223 6 339 315 24
29 dba 65024 62739 2285 5672 5451 221 16497 15879 618 42855 41409 1446
... ... ... ... ... ... ... ... ... ... ... ... ... ...
95 scifi 55143 50768 4375 4472 3846 626 11368 10557 811 39303 36365 2938
96 security 41842 40996 846 3700 3507 193 13421 13276 145 24721 24213 508
97 serverfault 361823 349856 11967 47117 45350 1767 117128 113657 3471 197582 190853 6729
98 sharepoint 79604 73901 5703 10828 10072 756 27339 25188 2151 41437 38641 2796
99 skeptics 18475 17191 1284 1649 1475 174 1870 1717 153 14956 13999 957
100 softwarerecs 5610 5514 96 893 866 27 1386 1364 22 3331 3284 47
101 sound 18826 16735 2091 1738 1432 306 8041 7647 394 9047 7656 1391
102 space 3393 3323 70 370 359 11 749 731 18 2274 2233 41
103 spanish 4963 4567 396 502 483 19 1465 1345 120 2996 2739 257
104 sports 2725 2632 93 488 465 23 782 765 17 1455 1402 53
105 sqa 6420 5432 988 524 484 40 2547 2139 408 3349 2809 540
106 stackapps 6243 6106 137 705 681 24 1201 1179 22 4337 4246 91
107 stackoverflow 18312894 17424936 887958 1840501 1707503 132998 5325192 5112304 212888 11147249 10605176 542073
108 startups 446 438 8 59 59 0 167 164 3 220 215 5
109 stats 75669 71082 4587 7705 6994 711 18769 17940 829 49198 46151 3047
110 superuser 386044 372498 13546 54094 51691 2403 108369 104822 3547 223581 215985 7596
111 sustainability 1417 1360 57 149 145 4 341 312 29 927 903 24
112 tex 239231 228409 10822 19358 17904 1454 54507 52543 1964 165367 157963 7404
113 tor 1403 1139 264 160 125 35 474 418 56 769 596 173
114 travel 23800 22394 1406 2112 2016 96 6163 5654 509 15525 14724 801
115 tridion 9381 9201 180 967 916 51 2769 2750 19 5645 5535 110
116 unix 98138 95731 2407 11222 10813 409 25565 25003 562 61351 59915 1436
117 ux 52141 46822 5319 4320 3955 365 16662 15040 1622 31160 27828 3332
118 video 2279 2218 61 429 413 16 587 573 14 1263 1232 31
119 webapps 22663 21706 957 3913 3733 180 7240 6878 362 11511 11096 415
120 webmasters 30991 29475 1516 4528 4172 356 10700 10271 429 15768 15037 731
121 windowsphone 3533 3416 117 462 437 25 895 874 21 2176 2105 71
122 wordpress 68941 64267 4674 9463 8666 797 18023 17040 983 41456 38562 2894
123 workplace 18669 15334 3335 915 782 133 5504 4467 1037 12251 10086 2165
124 writers 11446 8289 3157 839 777 62 3546 2481 1065 7061 5031 2030

125 rows × 13 columns


In [14]:
results.to_csv("/Users/milena/Desktop/proportions_contrib.csv")

In [ ]: