In [1]:
from __future__ import division
import pymongo, pandas, random
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
from scipy import stats

%matplotlib inline
plt.rcdefaults()
mpl.style.use('ggplot')

connection = pymongo.MongoClient('localhost', 27017)

# Top5 comunidades em número de usuários na época do dump
# communities = ["stackoverflow", "superuser", "serverfault", "math", "programmers"]
communities = ['ux']

In [12]:
def plot_histogram(female_sample, male_sample, place, name):
    bins=range(10)
    place.hist(np.log(np.array(female_sample) +1),bins, normed=True, label="females",alpha=0.5, color="#000000")
    place.hist(np.log(np.array(male_sample)+1),bins, normed=True, label="males", alpha=0.5)
    place.set_ylim(ymax=1.2)
    legend=place.legend(loc='best')
    frame = legend.get_frame()
    frame.set_facecolor('white')
    frame.set_edgecolor('grey')
    place.set_title(name, fontsize=13, fontweight='bold')
    place.grid(color='grey', linestyle='-.', linewidth=0.5, alpha=0.7, zorder=0, which="both")
    place.set_axis_bgcolor('white')

In [3]:
def plot_histogram2(female_sample, male_sample, place, name):
    data = np.hstack([list(female_sample.dropna()), list(male_sample.dropna())])
    bins = np.histogram(data)[1]
    place.hist(np.log(np.array(female_sample) +1),bins, normed=True, label="females",alpha=0.5, color=mpl.cm.Dark2(0))
    place.hist(np.log(np.array(male_sample)+1),bins, normed=True, label="males", alpha=0.5)
    legend=place.legend(loc='best')
    frame = legend.get_frame()
    frame.set_facecolor('white')
    frame.set_edgecolor('grey')
    place.set_title(name, fontsize=13, fontweight='bold')
    place.grid(color='grey', linestyle='-.', linewidth=0.5, alpha=0.7, zorder=0, which="both")
    place.set_axis_bgcolor('white')

In [4]:
def plot_density(female_sample, male_sample, place, name):
    males_ = male_sample.dropna()
    females_ = female_sample.dropna()
    
    density_female = stats.kde.gaussian_kde(females_)
    density_male = stats.kde.gaussian_kde(males_)
    minimum = min(min(females_),min(males_))
    maximum = max(max(females_),max(males_))
    
    x = np.arange(minimum-1, maximum, .1)
    place.plot(x, density_female(x), label="females", color=mpl.cm.Dark2(0))
    place.plot(x, density_male(x), label="males")
    
    legend=place.legend(loc='best')
    frame = legend.get_frame()
    frame.set_facecolor('white')
    frame.set_edgecolor('grey')
    place.set_title(name, fontsize=13, fontweight='bold')
    place.grid(color='grey', linestyle='-.', linewidth=0.5, alpha=0.7, zorder=0, which="both")
    place.set_axis_bgcolor('white')

In [5]:
def plot_cumulative(female_sample, male_sample, place, name):
    values, base = np.histogram(female_sample)
    cumulative = np.cumsum(values)

    values2, base2 = np.histogram(male_sample)
    cumulative2 = np.cumsum(values2)

    place.set_xlim(xmin=-1, xmax=max(max(base), max(base2)))
    place.step(base[:-1], cumulative, label="females", color=mpl.cm.Dark2(0))
    place.step(base2[:-1], cumulative2, label="males")
    
    legend=place.legend(loc='best')
    frame = legend.get_frame()
    frame.set_facecolor('white')
    frame.set_edgecolor('grey')
    place.set_title(name, fontsize=13, fontweight='bold')
    place.grid(color='grey', linestyle='-.', linewidth=0.5, alpha=0.7, zorder=0, which="both")
    place.set_axis_bgcolor('white')

In [6]:
def plot_regular_histogram(female_sample, male_sample, place, name):
#     bins=range(10)
    place.hist(np.array(female_sample), label="females",alpha=0.5, color=mpl.cm.Dark2(0))
    place.hist(np.array(male_sample), label="males", alpha=0.5)
#     place.set_ylim(ymax=1.2)
    legend=place.legend(loc='best')
    frame = legend.get_frame()
    frame.set_facecolor('white')
    frame.set_edgecolor('grey')
    place.set_title(name, fontsize=13, fontweight='bold')
    place.grid(color='grey', linestyle='-.', linewidth=0.5, alpha=0.7, zorder=0, which="both")
    place.set_axis_bgcolor('white')

In [13]:
for idx, community in enumerate(communities):
    
    community_db = connection[community]['statistics']
    cursor = community_db.find({'contributions_total': {'$gt':0}, 'gender': {'$ne': "Unknown"}},
                       {u'_id': False, u'gender':True,
                       'questions_total':True,'answers_total':True,'comments_total':True,'contributions_total':True,
                       'accepted_rate':True,'mean_utility':True,'questions_avg':True,
                       'lifetime':True,'activity_freq':True,})
    
    fig, axes = plt.subplots(nrows=4, ncols=2, figsize=(10, 15), dpi=300)
    fig.suptitle(community, fontsize=15, fontweight='bold')
    fig.tight_layout()
    fig.subplots_adjust(hspace=.4, wspace=0.3, top=0.93)

    df =  pandas.DataFrame(list(cursor))
    females = df.query("gender == 'Female'")
    males = df.query("gender == 'Male'")
    
    plot_histogram(females['questions_total'], males['questions_total'], axes[0][0], u"Número de Perguntas")
    plot_histogram(females['answers_total'], males['answers_total'], axes[0][1], u"Número de Respostas")
    plot_histogram(females['comments_total'], males['comments_total'], axes[1][0], u"Número de Comentários")
    plot_histogram(females['contributions_total'], males['contributions_total'], axes[1][1], u"Número de Contribuições")
    plot_density(females['accepted_rate'], males['accepted_rate'], axes[2][0], u"Taxa de Aceitação")
    plot_density(females['mean_utility'], males['mean_utility'], axes[2][1], u"Utilidade Média")
    plot_density(females['questions_avg'], males['questions_avg'], axes[3][0], u"Média dos Votos das Perguntas")
    plot_density(females['activity_freq'], males['activity_freq'], axes[3][1], u"Frequência de Atividade")
#     plot_histogram(females['lifetime'], males['lifetime'], axes[3][1], "Tempo de Vida")

    plt.savefig("images/"+community+".pdf", format="pdf")
    plt.close()

In [18]:
help(plt.colormaps)


Help on function colormaps in module matplotlib.pyplot:

colormaps()
    Matplotlib provides a number of colormaps, and others can be added using
    :func:`~matplotlib.cm.register_cmap`.  This function documents the built-in
    colormaps, and will also return a list of all registered colormaps if called.
    
    You can set the colormap for an image, pcolor, scatter, etc,
    using a keyword argument::
    
      imshow(X, cmap=cm.hot)
    
    or using the :func:`set_cmap` function::
    
      imshow(X)
      pyplot.set_cmap('hot')
      pyplot.set_cmap('jet')
    
    In interactive mode, :func:`set_cmap` will update the colormap post-hoc,
    allowing you to see which one works best for your data.
    
    All built-in colormaps can be reversed by appending ``_r``: For instance,
    ``gray_r`` is the reverse of ``gray``.
    
    There are several common color schemes used in visualization:
    
    Sequential schemes
      for unipolar data that progresses from low to high
    Diverging schemes
      for bipolar data that emphasizes positive or negative deviations from a
      central value
    Cyclic schemes
      meant for plotting values that wrap around at the
      endpoints, such as phase angle, wind direction, or time of day
    Qualitative schemes
      for nominal data that has no inherent ordering, where color is used
      only to distinguish categories
    
    The base colormaps are derived from those of the same name provided
    with Matlab:
    
      =========   =======================================================
      Colormap    Description
      =========   =======================================================
      autumn      sequential linearly-increasing shades of red-orange-yellow
      bone        sequential increasing black-white color map with
                  a tinge of blue, to emulate X-ray film
      cool        linearly-decreasing shades of cyan-magenta
      copper      sequential increasing shades of black-copper
      flag        repetitive red-white-blue-black pattern (not cyclic at
                  endpoints)
      gray        sequential linearly-increasing black-to-white
                  grayscale
      hot         sequential black-red-yellow-white, to emulate blackbody
                  radiation from an object at increasing temperatures
      hsv         cyclic red-yellow-green-cyan-blue-magenta-red, formed
                  by changing the hue component in the HSV color space
      jet         a spectral map with dark endpoints, blue-cyan-yellow-red;
                  based on a fluid-jet simulation by NCSA [#]_
      pink        sequential increasing pastel black-pink-white, meant
                  for sepia tone colorization of photographs
      prism       repetitive red-yellow-green-blue-purple-...-green pattern
                  (not cyclic at endpoints)
      spring      linearly-increasing shades of magenta-yellow
      summer      sequential linearly-increasing shades of green-yellow
      winter      linearly-increasing shades of blue-green
      =========   =======================================================
    
    For the above list only, you can also set the colormap using the
    corresponding pylab shortcut interface function, similar to Matlab::
    
      imshow(X)
      hot()
      jet()
    
    The next set of palettes are from the `Yorick scientific visualisation
    package <http://yorick.sourceforge.net/index.php>`_, an evolution of
    the GIST package, both by David H. Munro:
    
      ============  =======================================================
      Colormap      Description
      ============  =======================================================
      gist_earth    mapmaker's colors from dark blue deep ocean to green
                    lowlands to brown highlands to white mountains
      gist_heat     sequential increasing black-red-orange-white, to emulate
                    blackbody radiation from an iron bar as it grows hotter
      gist_ncar     pseudo-spectral black-blue-green-yellow-red-purple-white
                    colormap from National Center for Atmospheric
                    Research [#]_
      gist_rainbow  runs through the colors in spectral order from red to
                    violet at full saturation (like *hsv* but not cyclic)
      gist_stern    "Stern special" color table from Interactive Data
                    Language software
      ============  =======================================================
    
    The following colormaps are based on the `ColorBrewer
    <http://colorbrewer.org>`_ color specifications and designs developed by
    Cynthia Brewer:
    
    ColorBrewer Diverging (luminance is highest at the midpoint, and
    decreases towards differently-colored endpoints):
    
      ========  ===================================
      Colormap  Description
      ========  ===================================
      BrBG      brown, white, blue-green
      PiYG      pink, white, yellow-green
      PRGn      purple, white, green
      PuOr      orange, white, purple
      RdBu      red, white, blue
      RdGy      red, white, gray
      RdYlBu    red, yellow, blue
      RdYlGn    red, yellow, green
      Spectral  red, orange, yellow, green, blue
      ========  ===================================
    
    ColorBrewer Sequential (luminance decreases monotonically):
    
      ========  ====================================
      Colormap  Description
      ========  ====================================
      Blues     white to dark blue
      BuGn      white, light blue, dark green
      BuPu      white, light blue, dark purple
      GnBu      white, light green, dark blue
      Greens    white to dark green
      Greys     white to black (not linear)
      Oranges   white, orange, dark brown
      OrRd      white, orange, dark red
      PuBu      white, light purple, dark blue
      PuBuGn    white, light purple, dark green
      PuRd      white, light purple, dark red
      Purples   white to dark purple
      RdPu      white, pink, dark purple
      Reds      white to dark red
      YlGn      light yellow, dark green
      YlGnBu    light yellow, light green, dark blue
      YlOrBr    light yellow, orange, dark brown
      YlOrRd    light yellow, orange, dark red
      ========  ====================================
    
    ColorBrewer Qualitative:
    
    (For plotting nominal data, :class:`ListedColormap` should be used,
    not :class:`LinearSegmentedColormap`.  Different sets of colors are
    recommended for different numbers of categories.  These continuous
    versions of the qualitative schemes may be removed or converted in the
    future.)
    
    * Accent
    * Dark2
    * Paired
    * Pastel1
    * Pastel2
    * Set1
    * Set2
    * Set3
    
    Other miscellaneous schemes:
    
      ============= =======================================================
      Colormap      Description
      ============= =======================================================
      afmhot        sequential black-orange-yellow-white blackbody
                    spectrum, commonly used in atomic force microscopy
      brg           blue-red-green
      bwr           diverging blue-white-red
      coolwarm      diverging blue-gray-red, meant to avoid issues with 3D
                    shading, color blindness, and ordering of colors [#]_
      CMRmap        "Default colormaps on color images often reproduce to
                    confusing grayscale images. The proposed colormap
                    maintains an aesthetically pleasing color image that
                    automatically reproduces to a monotonic grayscale with
                    discrete, quantifiable saturation levels." [#]_
      cubehelix     Unlike most other color schemes cubehelix was designed
                    by D.A. Green to be monotonically increasing in terms
                    of perceived brightness. Also, when printed on a black
                    and white postscript printer, the scheme results in a
                    greyscale with monotonically increasing brightness.
                    This color scheme is named cubehelix because the r,g,b
                    values produced can be visualised as a squashed helix
                    around the diagonal in the r,g,b color cube.
      gnuplot       gnuplot's traditional pm3d scheme
                    (black-blue-red-yellow)
      gnuplot2      sequential color printable as gray
                    (black-blue-violet-yellow-white)
      ocean         green-blue-white
      rainbow       spectral purple-blue-green-yellow-orange-red colormap
                    with diverging luminance
      seismic       diverging blue-white-red
      nipy_spectral black-purple-blue-green-yellow-red-white spectrum,
                    originally from the Neuroimaging in Python project
      terrain       mapmaker's colors, blue-green-yellow-brown-white,
                    originally from IGOR Pro
      ============= =======================================================
    
    The following colormaps are redundant and may be removed in future
    versions.  It's recommended to use the names in the descriptions
    instead, which produce identical output:
    
      =========  =======================================================
      Colormap   Description
      =========  =======================================================
      gist_gray  identical to *gray*
      gist_yarg  identical to *gray_r*
      binary     identical to *gray_r*
      spectral   identical to *nipy_spectral* [#]_
      =========  =======================================================
    
    .. rubric:: Footnotes
    
    .. [#] Rainbow colormaps, ``jet`` in particular, are considered a poor
      choice for scientific visualization by many researchers: `Rainbow Color
      Map (Still) Considered Harmful
      <http://www.jwave.vt.edu/%7Erkriz/Projects/create_color_table/color_07.pdf>`_
    
    .. [#] Resembles "BkBlAqGrYeOrReViWh200" from NCAR Command
      Language. See `Color Table Gallery
      <http://www.ncl.ucar.edu/Document/Graphics/color_table_gallery.shtml>`_
    
    .. [#] See `Diverging Color Maps for Scientific Visualization
      <http://www.cs.unm.edu/~kmorel/documents/ColorMaps/>`_ by Kenneth
      Moreland.
    
    .. [#] See `A Color Map for Effective Black-and-White Rendering of
      Color-Scale Images
      <http://www.mathworks.com/matlabcentral/fileexchange/2662-cmrmap-m>`_
      by Carey Rappaport
    
    .. [#] Changed to distinguish from ColorBrewer's *Spectral* map.
      :func:`spectral` still works, but
      ``set_cmap('nipy_spectral')`` is recommended for clarity.


In [ ]: