Import Needed Modules


In [1]:
from __future__ import division, absolute_import

%matplotlib inline
import astropy.stats
import glob
from math import exp
import matplotlib.pyplot as plt 
from matplotlib import ticker
from matplotlib.ticker import FormatStrFormatter
import numpy as np 
import os
import pandas as pd
from scipy import optimize,spatial

Define Needed Functions

Index Function

Creates an array with each element referring to a specific file, in this case, the different mock catalogs.


In [2]:
__author__     =['Victor Calderon']
__copyright__  =["Copyright 2016 Victor Calderon, Index function"]
__email__      =['victor.calderon@vanderbilt.edu']
__maintainer__ =['Victor Calderon']

def Index(directory, datatype):
    """
    Indexes the files in a directory `directory' with a
    specific data type.

    Parameters
    ----------
    directory: str
            Absolute path to the folder that is indexed.

    datatype: str
            Data type of the files to be indexed in the folder.

    Returns
    -------
    file_array: array_like 
            np.array of indexed files in the folder 'directory' 
            with specific datatype.

    Examples
    --------
    >>> Index('~/data', '.txt')
    >>> array(['A.txt', 'Z'.txt', ...])
    """
    assert(os.path.exists(directory))
    files = np.array(glob.glob('{0}/*{1}'.format(directory, datatype)))

    return files

Covert Spherical Coordinates into Cartestian

Assuming the mock catalogs give the right ascension and declination values in degrees, this converts them to radians and then gives values in 3D Cartesian space. Assuming cz values are given in km/s, this divides by the Hubble constant to give the distance in Mpc, and then converts them to 3D Cartesian space.


In [3]:
def sph_to_cart(ra,dec,cz):
    """
    Converts spherical coordinates to Cartesian coordinates.

    Parameters
    ----------
    ra: array-like
        right-ascension of galaxies in degrees
    dec: array-like
        declination of galaxies in degrees
    cz: array-like
        velocity of galaxies in km/s

    Returns
    -------
    coords: array-like, shape = N by 3
        x, y, and z coordinates
    """
    cz_dist = cz/70. #converts velocity into distance
    x_arr   = cz_dist*np.cos(np.radians(ra))*np.cos(np.radians(dec))
    y_arr   = cz_dist*np.sin(np.radians(ra))*np.cos(np.radians(dec))
    z_arr   = cz_dist*np.sin(np.radians(dec))
    coords  = np.column_stack((x_arr,y_arr,z_arr))

    return coords

Calculating Nth Nearest Neighbor Sphere Density

This returns the densities, which will eventually be ranked and used to determine the density-cut, mass histogram ratios.


In [4]:
def calc_dens(n_val,r_val):
    """
    Returns densities of spheres with radius being the distance to the 
        nth nearest neighbor.

    Parameters
    ----------
    n_val = integer
        The 'N' from Nth nearest neighbor
    r_val = array-like
        An array with the distances to the Nth nearest neighbor for
        each galaxy

    Returns
    -------
    dens: array-like
        An array with the densities of the spheres created with radii
        to the Nth nearest neighbor.
    """
    dens = np.array([(3.*(n_val+1)/(4.*np.pi*r_val[hh]**3)) \
                     for hh in range(len(r_val))])

    return dens

Calculating Stellar Mass Function and Density-Based, Mass Ratios

This returns the values needed to plot the stellar mass function, with errorbars for ECO, and the three mass ratios corresponding to each NN value, also with errors for ECO.


In [5]:
def plot_calcs(mass,bins,dlogM,mass_err=False,ratio_err=False):
    """
    Returns values for plotting the stellar mass function and 
        mass ratios

    Parameters
    ----------
    mass: array-like
        A 1D array with mass values
    bins: array=like
        A 1D array with the values which will be used as the bin edges
        by the histogram function
    dlogM: float-like
        The log difference between bin edges
    
    Optional
    --------
    mass_err  == True
        Calculates the Poisson errors on the stellar mass function.
        Returns mass_freq as a list with 2 array elements, the first being
        the stellar mass function values, the second being the errors.
    ratio_err == True
        Calculates the Poisson errors on the density-based, mass ratios.
        Creates empty list and appends ratio error arrays to it as they 
        are generated. Returns ratio_dict as a list. The first element is 
        a dictionary with the ratio values to be plotted. The second is a
        three element list. Each element is an array with the error values 
        for each of the three density-cut ratios.

    Returns
    -------
    bin_centers: array-like
        An array with the medians mass values of the mass bins
    mass-freq: array-like
        Contains the number density values of each mass bin
    ratio_dict: dictionary-like
        A dictionary with three keys, corresponding to the divisors
        2,4, and 10 (as the percentile cuts are based on these 
        divisions). Each key has the density-cut, mass ratios for
        that specific cut (50/50 for 2; 25/75 for 4; 10/90 for 10).
    """

    mass_counts, edges = np.histogram(mass,bins)
    bin_centers        = 0.5*(edges[:-1]+edges[1:])

    mass_freq  = mass_counts/float(len(mass))/dlogM

    ratio_dict = {}
    frac_val   = [2,4,10]
    
    if ratio_err == True:
        yerr = []

    for ii in frac_val:
        ratio_dict[ii] = {}

        # Calculations for the lower density cut
        frac_data      = int(len(mass)/ii)
        frac_mass      = mass[0:frac_data]
        counts, edges  = np.histogram(frac_mass,bins)

        # Calculations for the higher density cut
        frac_mass_2       = mass[-frac_data:]
        counts_2, edges_2 = np.histogram(frac_mass_2,bins)

        # Ratio determination
        ratio_counts   = (1.*counts_2)/(1.*counts)
        ratio_dict[ii] = ratio_counts

        if ratio_err == True:
            yerr.append((counts_2*1.)/(counts*1.)*np.sqrt(1./counts + 1./counts_2))

    if mass_err == True:
        mass_freq_list     = [[] for xx in xrange(2)]
        mass_freq_list[0]  = mass_freq
        mass_freq_list[1]  = np.sqrt(mass_counts)/float(len(mass))/dlogM
        mass_freq          = np.array(mass_freq_list)

    if ratio_err == True:
        ratio_dict_list    = [[] for xx in range(2)]
        ratio_dict_list[0] = ratio_dict
        ratio_dict_list[1] = yerr
        ratio_dict         = ratio_dict_list

    return bin_centers, mass_freq, ratio_dict

This returns the median distance to the Nth nearest neighbor considering all the galaxies in each bin. Bootstrap errors are also calculated if specified (for ECO). They will not always calculate correctly, as some of the mocks do not have galaxies in every bin.


In [6]:
def bin_func(mass_dist,bins,kk,bootstrap=False):
    """
    Returns median distance to Nth nearest neighbor

    Parameters
    ----------
    mass_dist: array-like
        An array with mass values in at index 0 (when transformed) and distance to the Nth 
        nearest neighbor in the others
        Example: 6239 by 7
            Has mass values and distances to 6 Nth nearest neighbors   
    bins: array=like
        A 1D array with the values which will be used as the bin edges
        by the histogram function
    kk: integer-like
        The index of mass_dist (transformed) where the appropriate distance array may be found

    Optional
    --------
    bootstrap == True
        Calculates the bootstrap errors associated with each median distance value.
        Creates an array housing arrays containing the actual distance values
        associated with every galaxy in a specific bin. Bootstrap error is then 
        performed using astropy, and upper and lower one sigma values are found
        for each median value.  These are added to a list with the median distances, 
        and then converted to an array and returned in place of just 'medians.'

    Returns
    -------
    medians: array-like
        An array with the median distance to the Nth nearest neighbor from 
        all the galaxies in each of the bins
    """
    edges        = bins
    digitized    = np.digitize(mass_dist.T[0],edges)
    digitized   -= int(1)

    bin_nums     = np.unique(digitized)
    bin_nums     = list(bin_nums)
#necessary measure, as some catalogs have galaxies beyond the upper edge
#of the mass bins I chose. need to keep the dimensions the same for plotting
    if 14 in bin_nums:
        bin_nums.remove(14)
        bin_nums = np.array(bin_nums)
    
    medians      = np.array([np.median(mass_dist.T[kk][digitized==ii]) for ii in bin_nums])

    if bootstrap == True:
        dist_in_bin    = np.array([(mass_dist.T[kk][digitized==ii]) for ii in bin_nums])
#the bootstrap will not work if there are no samples in a bin
#don't have this problem currently, based on the binning, but ECO is sensitive to this
#this populates the empty bin with zeros for where each expected value would have been
        for vv in range(len(dist_in_bin)):
            if len(dist_in_bin[vv]) == 0:
                dist_in_bin_list = list(dist_in_bin[vv])
                dist_in_bin[vv]  = np.zeros(len(dist_in_bin[0]))
        low_err_test   = np.array([np.percentile(astropy.stats.bootstrap\
                        (dist_in_bin[vv],bootnum=1000,bootfunc=np.median),16) for vv\
                                     in range(len(dist_in_bin))])
        high_err_test  = np.array([np.percentile(astropy.stats.bootstrap\
                        (dist_in_bin[vv],bootnum=1000,bootfunc=np.median),84) for vv\
                                     in range(len(dist_in_bin))])

        med_list       = [[] for yy in range(len(frac_vals))]
        med_list[0]    = medians
        med_list[1]    = low_err_test
        med_list[2]    = high_err_test
        medians        = np.array(med_list)

    return medians

Calculating Histogram Values, From Which the Ratios Are Calculated

This function is for after the fact creating dictionaries with the counts used in the histogram plots. If I were more resourceful and less scared of breaking my code, I could incorporate this into the plot_calc function. Incidentally, this is not for plotting directly, but for eventually figuring out the max and min values for the fill_between.


In [7]:
def hist_calcs(mass,bins,dlogM,eco=False):
    """
    Returns dictionaries with the counts for the upper
        and lower density portions; calculates the 
        three different percentile cuts for each mass
        array given
    
    Parameters
    ----------
    mass: array-like
        A 1D array with log stellar mass values, assumed
        to be an order which corresponds to the ascending 
        densities; (necessary, as the index cuts are based 
        on this)
    bins: array-like
        A 1D array with the values which will be used as the bin edges   
    dlogM: float-like
        The log difference between bin edges
        
    Returns
    -------
    hist_dict_low: dictionary-like
        A dictionary with three keys (the frac vals), with arrays
        as values. The values for the lower density cut
    hist_dict_high: dictionary like
        A dictionary with three keys (the frac vals), with arrays
        as values. The values for the higher density cut
    """
    hist_dict_low  = {}
    hist_dict_high = {}
    frac_val  = np.array([2,4,10])
    frac_dict = {2:0,4:1,10:2}
    
    if eco == True:
        low_err   = [[] for xx in xrange(len(frac_val))]
        high_err  = [[] for xx in xrange(len(frac_val))]
    
    for ii in frac_val:
        hist_dict_low[ii]  = {}
        hist_dict_high[ii] = {}
    
        frac_data     = int(len(mass)/ii)
        
        frac_mass     = mass[0:frac_data]
        counts, edges = np.histogram(frac_mass,bins)
        low_counts    = (counts/float(len(frac_mass))/dlogM)
        
        if eco == True:
            low_err[frac_dict[ii]]    = np.sqrt(counts)/len(frac_mass)/dlogM
        
        hist_dict_low[ii]  = low_counts

        frac_mass_2        = mass[-frac_data:]
        counts_2, edges_2  = np.histogram(frac_mass_2,bins)
        high_counts        = (counts_2/float(len(frac_mass_2))/dlogM)
        
        if eco == True:
            high_err[frac_dict[ii]]   = np.sqrt(counts_2)/len(frac_mass_2)/dlogM
        
        hist_dict_high[ii] = high_counts
        
    if eco == True:
        hist_dict_low['low_err']   = low_err
        hist_dict_high['high_err'] = high_err
    
    return hist_dict_low, hist_dict_high

Plotting Density-Cut, Mass Ratios

Plots ratios to a previously initialized figure. Works optimally in a strong for-loop.


In [8]:
def plot_all_rats(bin_centers,y_vals,neigh_val,ax,col_num,plot_idx):
    """
    Returns a plot showing the density-cut, mass ratio.  Optimally
        used with a well-initiated for-loop
        
    Parameters
    ----------
    bin_centers: array-like
        An array with the medians mass values of the mass bins
    y_vals: array-like
        An array containing the ratio values for each mass bin
    neigh_val: integer-like
        Value which will be inserted into the text label of each plot
    ax: axis-like
        A value which specifies which axis each subplot is to be 
        plotted to
    col_num: integer-like
        Integer which specifies which column is currently being 
        plotted. Used for labelling subplots
    plot_idx: integer-like
        Specifies which subplot the figure is plotted to. Used for
        labeling the x-axis
        
    Returns
    -------
    Figure with three subplots showing appropriate ratios
    """
    if plot_idx     ==16:
        ax.set_xlabel('$\log\ M_{*}$',fontsize=18)
    if col_num      ==0:
        title_label = 'Mass Ratio 50/50, {0} NN'.format(neigh_val)
        frac_val    = 10
        ax.text(0.05, 0.95, title_label,horizontalalignment='left',\
                verticalalignment='top',transform=ax.transAxes,fontsize=12)
    elif col_num    ==1:
        title_label = 'Mass Ratio 25/75, {0} NN'.format(neigh_val)
        frac_val    = 4
        ax.text(0.05, 0.95, title_label,horizontalalignment='left',\
                verticalalignment='top',transform=ax.transAxes,fontsize=12)
    elif col_num    ==2:
        title_label = 'Mass Ratio 10/90, {0} NN'.format(neigh_val)
        frac_val    = 2
        ax.text(0.05, 0.95, title_label,horizontalalignment='left',\
                verticalalignment='top',transform=ax.transAxes,fontsize=12)
    ax.set_xlim(9.1,11.9)
    ax.set_ylim([0,10])
    ax.set_xticks(np.arange(9.5, 12., 0.5))
    ax.set_yticks([1.,3.,5.,7.])
    ax.tick_params(axis='both', labelsize=12)
    ax.axhline(y=1,c="darkorchid",linewidth=0.5,zorder=0)
    ax.plot(bin_centers,y_vals,color='silver')

Plotting ECO Ratios

This can plot either the eighteen ECO ratios by themselves or on top of previously plotted subplots.


In [9]:
def plot_eco_rats(bin_centers,y_vals,neigh_val,ax,col_num,plot_idx,only=False):
    """
    Returns subplots of ECO density-cut,mass ratios
    
    Parameters
    ----------
    bin_centers: array-like
        An array with the medians mass values of the mass bins
    y_vals: array-like
        An array containing the ratio values for each mass bin
    neigh_val: integer-like
        Value which will be inserted into the text label of each plot
    ax: axis-like
        A value which specifies which axis each subplot is to be 
        plotted to
    col_num: integer-like
        Integer which specifies which column is currently being 
        plotted. Used for labelling subplots
    plot_idx: integer-like
        Specifies which subplot the figure is plotted to. Used for
        labeling the x-axis
    
    Optional
    --------
    only == True
        To be used when only plotting the ECO ratios, no mocks.
        Will add in the additional plotting specifications that
        would have been taken care of previously in a for-loop
        which plotted the mocks as well
        
    Returns
    -------
    ECO ratios plotted to any previously initialized figure
    """
    if only == True:
        if plot_idx     ==16:
            ax.set_xlabel('$\log\ M_{*}$',fontsize=18)
        if col_num      ==0:
            title_label = 'Mass Ratio 50/50, {0} NN'.format(neigh_val)
            frac_val    = 10
            ax.text(0.05, 0.95, title_label,horizontalalignment='left',\
                    verticalalignment='top',transform=ax.transAxes,fontsize=12)
        elif col_num    ==1:
            title_label = 'Mass Ratio 25/75, {0} NN'.format(neigh_val)
            frac_val    = 4
            ax.text(0.05, 0.95, title_label,horizontalalignment='left',\
                    verticalalignment='top',transform=ax.transAxes,fontsize=12)
        elif col_num    ==2:
            title_label = 'Mass Ratio 10/90, {0} NN'.format(neigh_val)
            frac_val    = 2
            ax.text(0.05, 0.95, title_label,horizontalalignment='left',\
                    verticalalignment='top',transform=ax.transAxes,fontsize=12)
        ax.set_xlim(9.1,11.9)
        ax.set_ylim([0,5])
        ax.set_xticks(np.arange(9.5, 12., 0.5))
        ax.set_yticks([1.,3.])
        ax.tick_params(axis='both', labelsize=12)
        ax.axhline(y=1,c="darkorchid",linewidth=0.5,zorder=0)
    frac_vals = np.array([2,4,10])
    y_vals_2 = y_vals[0][frac_vals[hh]]
    ax.errorbar(bin_centers,y_vals_2,yerr=y_vals[1][hh],\
                color='deeppink',linewidth=2)
#for some reason, whenever I give this frac_vals as an argument, it won't recognize it as an array. 
#And then it will begin with 10, rather than 2; even when I have it tell me what frac val it's using, 
#and then have the for loop do so as well, they do not line up
#also, it plots only ECO differently than when there are multiple curves, and it changes 
#the xlims. So I set them. And then, it made the 17th indexed plot slightly off, so I changed the xticks
#as well

Plotting Histograms (From Which the Ratios are Derived)


In [10]:
def plot_hists(mass,neigh_val,bins,dlogM,ax,col_num,plot_idx):
    """
    Returns a plot showing the density-cut, mass counts.
    
    Parameters
    ----------
    mass: array-like
        A 1D array with log stellar mass values
    neigh_val: integer-like
        Value which will be inserted into the text label of each plot
    bins: array-like
        A 1D array with the values which will be used as the bin edges   
    dlogM: float-like
        The log difference between bin edges
    ax: axis-like
        A value which specifies which axis each subplot is to be 
        plotted to
    col_num: integer-like
        Integer which specifies which column is currently being 
        plotted. Used for labelling subplots
    plot_idx: integer-like
        Specifies which subplot the figure is plotted to. Used for
        labeling the x-axis
        
    Returns
    -------
    Figure with two curves, optionally (if uncommented) plotted in step
    
    """
    ax.set_yscale('log')
    if col_num==0:
        title_label = 'Mass 50/50, {0} NN'.format(neigh_val)
        frac_val    = 2
        ax.text(0.05, 0.95, title_label,horizontalalignment='left',\
                verticalalignment='top',transform=ax.transAxes,fontsize=12)
    elif col_num==1:
        title_label = 'Mass 25/75, {0} NN'.format(neigh_val)
        frac_val    = 4
        ax.text(0.05, 0.95, title_label,horizontalalignment='left',\
                verticalalignment='top',transform=ax.transAxes,fontsize=12)
    elif col_num==2:
        title_label = 'Mass 10/90, {0} NN'.format(neigh_val)
        frac_val    = 10
        ax.text(0.05, 0.95, title_label,horizontalalignment='left',\
                verticalalignment='top',transform=ax.transAxes,fontsize=12)
    ax.set_xlim(9.1,11.9)
    ax.set_ylim([10**-3,10**1])
    ax.set_xticks(np.arange(9.5, 12., 0.5))
    ax.set_yticks([10**-2,10**0])
    
    frac_data     = (len(mass)/frac_val)

    frac_mass     = mass[0:frac_data]
    counts, edges = np.histogram(frac_mass,bins)
    low_counts    = (counts/float(len(frac_mass))/dlogM)
    
    bins_cens     = .5*(edges[:-1]+edges[1:])
#     ax.step(bins_cens, low_counts, color='lightslategrey',where='mid',alpha=0.1)
    ax.plot(bins_cens, low_counts, color='lightslategrey',alpha=0.1)

    frac_mass_2       =  mass[-frac_data:]
    counts_2, edges_2 =  np.histogram(frac_mass_2,bins)
    high_counts       = (counts_2/float(len(frac_mass_2))/dlogM)
#     ax.step(bins_cens, high_counts, color='lightslategray',where='mid',alpha=0.1)
    ax.plot(bins_cens, high_counts, color='lightslategray',alpha=0.1)

Plotting ECO Histograms


In [11]:
def plot_eco_hists(mass,bins,dlogM,ax,col):
    if   col==0:
        frac_val    = 2
    elif col==1:
        frac_val    = 4
    elif col==2:
        frac_val    = 10
    
    frac_data     = (len(mass)/frac_val)

    frac_mass     = mass[0:frac_data]
    counts, edges = np.histogram(frac_mass,bins)
    bins_cens     = .5*(edges[:-1]+edges[1:])
    ax.step(bins_cens, (counts/float(len(frac_mass))/dlogM), color='lime',where='mid')

    frac_mass_2       = mass[-frac_data:]
    counts_2, edges_2 = np.histogram(frac_mass_2,bins)
    ax.step(bins_cens, (counts_2/float(len(frac_mass_2))/dlogM), color='deeppink',where='mid')

Plots six subplots to a previously initialized figure.


In [12]:
def plot_all_meds(bin_centers,y_vals,ax,plot_idx):
    """
    Returns six subplots showing the median distance to 
        the Nth nearest neighbor for each mass bin.  Assumes a
        previously defined figure.  Best used in a for-loop
    
    Parameters
    ----------
    bin_centers: array-like
        An array with the medians mass values of the mass bins
    y_vals: array-like
        An array containing the median distance values for each mass bin
    ax: axis-like
        A value which specifies which axis each subplot is to be 
        plotted to
    plot_idx: integer-like
        Specifies which subplot the figure is plotted to. Used for
        the text label in each subplot
        
    Returns
    -------
    Subplots displaying the median distance to Nth nearest neighbor
    trends for each mass bin
    
    """
    titles = [1,2,3,5,10,20]
    ax.set_ylim(0,10**1.5)
    ax.set_xlim(9.1,11.9)
    ax.set_yscale('symlog')
    ax.set_xticks(np.arange(9.5,12.,0.5))  
    ax.tick_params(axis='both', which='major', labelsize=16)
    title_here = 'n = {0}'.format(titles[plot_idx])
    ax.text(0.05, 0.95, title_here,horizontalalignment='left',\
            verticalalignment='top',transform=ax.transAxes,fontsize=18)
    if plot_idx == 4:
        ax.set_xlabel('$\log\ M_{*}$',fontsize=18)
    ax.plot(bin_centers,y_vals,color='silver')

Plotting ECO Median Distance Trends, with Bootstrap

Plots six ECO subplots by themselves or on top of a previously initialized figure.


In [13]:
def plot_eco_meds(bin_centers,y_vals,low_lim,up_lim,ax,plot_idx,only=False):
    """
    Returns six subplots showing the median Nth nearest neighbor distance for ECO
        galaxies in each mass bin
        
    Parameters
    ----------
    bin_centers: array-like
        An array with the medians mass values of the mass bins
    y_vals: array-like
        An array containing the median distance values for each mass bin
    low_lim: array-like
        An array with the lower cut-off of the bootstrap errors for each median
    up_lim: array-like
        An array with the upper cut-off of the bootstrap errors for each median
    ax: axis-like
        A value which specifies which axis each subplot is to be 
        plotted to
    plot_idx: integer-like
        Specifies which subplot the figure is plotted to. Used for
        the text label in each subplot
    
    Optional
    --------
    only == False
        To be used when only plotting the ECO median trends, 
        no mocks.  Will add in the additional plotting 
        specifications that would have been taken care of 
        previously in a for-loop which plotted the mocks as well
    
    Returns
    -------
    Subplots displaying the median distance to Nth nearest neighbor
    trends for each mass bin, with the bootstrap errors
    
    """
    if only == True:
        titles = [1,2,3,5,10,20]
        ax.set_ylim(0,10**1.5)
        ax.set_xlim(9.1,11.9)
        ax.set_yscale('symlog')
        ax.set_xticks(np.arange(9.5,12.,0.5))  
        ax.tick_params(axis='both', which='major', labelsize=16)
        title_here = 'n = {0}'.format(titles[plot_idx])
        ax.text(0.05, 0.95, title_here,horizontalalignment='left',\
                verticalalignment='top',transform=ax.transAxes,fontsize=18)
        if plot_idx == 4:
            ax.set_xlabel('$\log\ M_{*}$',fontsize=18)
    ax.errorbar(bin_centers,y_vals,yerr=0.1,lolims=low_lim,\
        uplims=up_lim,color='deeppink')

Plotting Mock Catalog Bands

Plots over the area enclosed by the mock catalog curves, which is also to be used as error on the ECO curves.


In [14]:
def plot_bands(bin_centers,upper,lower,ax):
    """
    Returns an overlayed, fill-between plot, creating a band
        between the different mock catalog values plotted
    
    Parameters
    ----------
    bin_centers: array-like
        An array with the medians mass values of the mass bins
    upper: array-like
        Array with the max y-values among all the mocks
        for each mass bin
    lower: array-like
        Array with the min y-values among all the mocks
        for each mass bin
    ax: axis-like
        A value which specifies which axis each subplot is to be 
        plotted to
    
    Returns
    -------
    A semi-transparent band overlaying the area of the plot 
    bordered by the mocks
    """
    ax.fill_between(bin_centers,upper,lower,color='silver',alpha=0.1)

Plotting Range of Distances to Nth Nearest Neighbor


In [15]:
def plot_med_range(bin_centers,low_lim,up_lim,ax,alpha,color='gray'):
    """
    Returns a plot with a transparent band highlighting a range of
        values.
    
    Parameters
    ----------
    bin_centers: array-like
        An array with the medians mass values of the mass bins
    low_lim: array-like
        Array with the min y-values among all the mocks
        for each mass bin
    up_lim: array-like
        Array with the max y-values among all the mocks
        for each mass bin 
    ax: axis-like
        A value which specifies which axis each subplot is to be 
        plotted to
    alpha: float-like
        A value which will determine the tranparency of the band
    color: str
        Any color which Python recognizes; sets the color of the band
        
    Returns
    -------
    A colored band spanning from the max y-values to the minimum.
    
    """
    ax.fill_between(bin_centers,low_lim,up_lim,color=color,alpha=alpha)

Running through the Calculations

The below line should be the only one which needs to be changed. The rest should be dynamic and work based off of this.


In [16]:
dirpath  = r"C:\Users\Hannah\Desktop\Vanderbilt_REU\Stellar_mass_env_density\Catalogs\Resolve_plk_5001_so_mvir_hod1_Hannah_newdens_ECO_Mocks\Resolve_plk_5001_so_mvir_hod1_Hannah_newdens_ECO_Mocks"
usecols  = (0,1,8,13)
bins     = np.linspace(9.1,11.9,15)
dlogM    = (11.9-9.1)/14

The below loads the mock catalogs and imports them into arrays. First, it loads in information concerning centrals and satellites. This will only be used once. The next steps are essential. It sorts out any galaxies which have a mass less than 9.1 (although there shouldn't be any in the new mocks), to meet the stellar mass completeness limit for ECO. One should note, when making the four np.arrays, they are relying on the columns being in a set order (ra, dec, cz, and logMstar). If they are not in this order, the arrays will not house the correct information. The Cartesian coordinates are calculated and a cKD-Tree is made. The twenty nearest neighbors and their distances for each galaxy are also calculated. These are then placed in an array along with the galaxy masses.


In [17]:
ECO_cats    = (Index(dirpath,'.dat'))
  
sat_cols    = (13,25)
  
sat_names   = ['logMstar','cent_sat_flag']
SF_PD       = [(pd.read_csv(ECO_cats[ii],sep="\s+", usecols= sat_cols,header=None,\
                     skiprows=2,names=sat_names)) for ii in range(8)]
SF_PD_comp  = [(SF_PD[ii][SF_PD[ii].logMstar >= 9.1]) for ii in range(len(ECO_cats))]

names       = ['ra','dec','cz','logMstar']
PD          = [(pd.read_csv(ECO_cats[ii],sep="\s+", usecols= usecols,header=None,\
                       skiprows=2,names=names)) for ii in range(len(ECO_cats))]
PD_comp     = [(PD[ii][PD[ii].logMstar >= 9.1]) for ii in range(len(ECO_cats))]
    
ra_arr      = np.array([(np.array(PD_comp[ii])).T[0] for ii in range(len(PD_comp))])
dec_arr     = np.array([(np.array(PD_comp[ii])).T[1] for ii in range(len(PD_comp))])
cz_arr      = np.array([(np.array(PD_comp[ii])).T[2] for ii in range(len(PD_comp))])
mass_arr    = np.array([(np.array(PD_comp[ii])).T[3] for ii in range(len(PD_comp))])

coords_test = np.array([sph_to_cart(ra_arr[vv],dec_arr[vv],cz_arr[vv]) for vv in range(len(ECO_cats))])

neigh_vals  = np.array([1,2,3,5,10,20])

nn_arr      = [[] for xx in xrange(len(coords_test))]
nn_arr_nn   = [[] for yy in xrange(len(neigh_vals))]

for vv in range(len(coords_test)):
    nn_arr[vv] = spatial.cKDTree(coords_test[vv])
    nn_arr[vv] = np.array(nn_arr[vv].query(coords_test[vv],21)[0])

nn_specs       = [(np.array(nn_arr).T[ii].T[neigh_vals].T) for ii in range(len(coords_test))]
nn_mass_dist   = np.array([(np.column_stack((mass_arr[qq],nn_specs[qq]))) for qq in range(len(coords_test))])

Here, I am determining the satellite fractions of the mocks, so the fraction of the galaxies in each mock which are satellites. According to Victor, this should generally be around one-third.


In [18]:
sats_num  = np.array([(len(SF_PD_comp[ii][SF_PD_comp[ii].cent_sat_flag==0])) for ii in range(len(SF_PD_comp))])
cents_num = np.array([(len(SF_PD_comp[ii][SF_PD_comp[ii].cent_sat_flag==1])) for ii in range(len(SF_PD_comp))])
gal_tot   = np.array([(len(SF_PD_comp[ii])) for ii in range(len(SF_PD_comp))])
print sats_num/gal_tot


[ 0.38107796  0.3992126   0.37983524  0.40653291  0.37801259  0.37425113
  0.37279687  0.37240558]

In [19]:
############to see how many galaxies are in each mock
# print gal_tot

In [20]:
########to see how many galaxies would be lost if I set my max bin edge to 11.9
# test = [(PD[ii][PD[ii].logMstar >= 11.9]) for ii in range(len(ECO_cats))]
# print test

In [21]:
#######to see what the maximum/min stellar mass was in each mock
# max_mass = [(max(mass_arr[ii])) for ii in range(len(mass_arr))] 
# print max_mass
# min_mass = [(min(mass_arr[ii])) for ii in range(len(mass_arr))]
# print min_mass

Creating empty dictionaries and populating them with empty dictionaries with keys, one key for each mock. Also adds a key based on mass. These dictionaries house six smaller dictionaries, one for each Nth nearest neighbor, and are then filled with data values. For each Nth nn specified, the galaxies are sorted by distance, so there are six different orderings. These are used when taking the percentiles for the ratio plots. Within this section, bin_centers is defined. This should be the only place this variable is set. The max and min stellar mass function values for each bin are also determined.


In [22]:
nn_dist    = {}
nn_dens    = {}
mass_dat   = {}
ratio_info = {}

mass_freq  = [[] for xx in xrange(len(coords_test))]

for ii in range(len(coords_test)):
    nn_dist[ii]    = {}
    nn_dens[ii]    = {}
    mass_dat[ii]   = {}
    ratio_info[ii] = {}

    nn_dist[ii]['mass'] = nn_mass_dist[ii].T[0]

    for jj in range(len(neigh_vals)):
        nn_dist[ii][(neigh_vals[jj])]  = np.array(nn_mass_dist[ii].T[range(1,len(neigh_vals)+1)[jj]])
        nn_dens[ii][(neigh_vals[jj])]  = np.column_stack((nn_mass_dist[ii].T[0],calc_dens\
                                            (neigh_vals[jj],nn_mass_dist[ii].T[range(1,len(neigh_vals)+1)[jj]])))

        idx = np.array([nn_dens[ii][neigh_vals[jj]].T[1].argsort()])
        mass_dat[ii][(neigh_vals[jj])] = (nn_dens[ii][neigh_vals[jj]][idx].T[0])

        bin_centers, mass_freq[ii], ratio_info[ii][neigh_vals[jj]] = plot_calcs(mass_dat[ii][neigh_vals[jj]],bins,dlogM)

all_mock_meds = [[] for xx in range(len(nn_mass_dist))]

for vv in range(len(nn_mass_dist)):
    all_mock_meds[vv] = np.array([bin_func(nn_mass_dist[vv],bins,(jj+1)) for jj in range(len(nn_mass_dist[vv].T)-1)])
    
med_plot_arr = [([[] for yy in xrange(len(nn_mass_dist))]) for xx in xrange(len(neigh_vals))]

for ii in range(len(neigh_vals)):
    for jj in range(len(nn_mass_dist)):
        med_plot_arr[ii][jj] = all_mock_meds[jj][ii]    

mass_freq_plot  = (np.array(mass_freq))
max_lim = [[] for xx in range(len(mass_freq_plot.T))]
min_lim = [[] for xx in range(len(mass_freq_plot.T))]
for jj in range(len(mass_freq_plot.T)):
    max_lim[jj] = max(mass_freq_plot.T[jj])
    min_lim[jj] = min(mass_freq_plot.T[jj])

The next two cells determine the range of distances from nearest neighbors that the galaxies in the mocks actually span. In the median trends plot, it appeared as if the medians were all about the same value, so we wanted to see whether the range was extremely small or the median normal for the distribution. Had to wrangle the data around to then find the percentile cutoffs. The ordered_mass is an array which can be used to put the galaxies in order by stellar mass.


In [23]:
# ordered_mass = nn_mass_dist[0].T[0][(nn_mass_dist[0].T[0].argsort())]
dist_cont = [[[[] for zz in xrange(len(bins)-1)] for yy in \
              xrange(len(nn_mass_dist))] for xx in xrange(1,len(nn_mass_dist[0].T))]

for ii in xrange(len(nn_mass_dist)):
    sorting_test  = np.digitize(nn_mass_dist[ii].T[0],bins)
    bin_nums      = np.unique(sorting_test)
    bin_nums_list = list(bin_nums)
    if 15 in bin_nums_list:
        bin_nums_list.remove(15)
        bin_nums  = np.array(bin_nums_list)
    for jj in xrange(1,len(nn_mass_dist[ii].T)):
        for hh in bin_nums:
            dist_cont[jj-1][ii][hh-1] = (nn_mass_dist[ii].T[jj][sorting_test==hh])

Below, in a very not-straightforward manner, I am creating arrays that will be used to illustrate the 68 and 95 spread of the distance to the nth nearest neighbor. This was of concern because the medians seemed to all be extremely close together, and we wanted to make sure there was not anything awry with the mocks.


In [24]:
top_68 = [[[[]for ll in xrange(len(bin_nums))]for yy in xrange(len(nn_mass_dist))] for xx in xrange(len(neigh_vals))]
low_68 = [[[[]for ll in xrange(len(bin_nums))]for yy in xrange(len(nn_mass_dist))] for xx in xrange(len(neigh_vals))]
top_95 = [[[[]for ll in xrange(len(bin_nums))]for yy in xrange(len(nn_mass_dist))] for xx in xrange(len(neigh_vals))]
low_95 = [[[[]for ll in xrange(len(bin_nums))]for yy in xrange(len(nn_mass_dist))] for xx in xrange(len(neigh_vals))]
med_50 = [[[[]for ll in xrange(len(bin_nums))]for yy in xrange(len(nn_mass_dist))] for xx in xrange(len(neigh_vals))]

for aa in xrange(len(neigh_vals)):
    for bb in xrange(len(nn_mass_dist)):
        for cc in xrange(len(dist_cont[aa][bb])):
            top_68[aa][bb][cc] = np.percentile(dist_cont[aa][bb][cc],84)
            low_68[aa][bb][cc] = np.percentile(dist_cont[aa][bb][cc],16)
            top_95[aa][bb][cc] = np.percentile(dist_cont[aa][bb][cc],97.5)
            low_95[aa][bb][cc] = np.percentile(dist_cont[aa][bb][cc],2.5)
            med_50[aa][bb][cc] = np.median((dist_cont[aa][bb][cc]))
            
top_68 = np.array(top_68)
low_68 = np.array(low_68)
top_95 = np.array(top_95)
low_95 = np.array(low_95)
med_50 = np.array(med_50)

In [25]:
#############to make sure the correct number of medians was being calculated
# for jj in range(len(all_mock_meds)):
#     print [len(all_mock_meds[jj][ii]) for ii in range(len(all_mock_meds[0]))]

Converting ratio information into a more accessible format. Ratio_info is a dictionary with 8 keys, one for each mock catalog. Each key has another dictionary, with 6 keys, one for each Nth nearest neighbor being considered. Each of these sub-dictionaries has three keys, one for each "frac_val," or divisor which will yield the desired percentiles. First, a list of six elements is created. Each elements corresponds to a nearest neighbor value. These six elements are lists which contain 8 dictionaries, one for each mock. These dictionaries each have three keys, one for each frac_val. Then, another list is made. It has six elements, each housing three sublists. Each of the sublists corresponds to a frac_val. They house 8 np.arrays. *Note, 8 is just used as a placeholder for any number. This number corresponds to the number of mocks being considered.


In [26]:
frac_vals   = [2,4,10]
nn_plot_arr = [[[] for yy in xrange(len(nn_mass_dist))] for xx in xrange(len(neigh_vals))]

for ii in range(len(neigh_vals)):
    for jj in range(len(nn_mass_dist)):
        nn_plot_arr[ii][jj] = (ratio_info[jj][neigh_vals[ii]])
        plot_frac_arr = [[[[] for yy in xrange(len(nn_mass_dist))] \
                         for zz in xrange(len(frac_vals))] for xx in xrange(len(nn_plot_arr))]

for jj in range(len(nn_mass_dist)):
    for hh in range(len(frac_vals)):
        for ii in range(len(neigh_vals)):
            plot_frac_arr[ii][hh][jj] = nn_plot_arr[ii][jj][frac_vals[hh]]

ECO Calculations, Streamlined


In [27]:
eco_path = r"C:\Users\Hannah\Desktop\Vanderbilt_REU\Stellar_mass_env_density\Catalogs\ECO_true"
eco_cols = np.array([0,1,2,4])

Importing original ECO data and running through all the necessary calculations. In order, converting to the Cartesian coordinate system, creating a cKD-Tree, and finding the distances to the 20 nearest neighbors. These distances are added to an array with the stellar masses. The densities of the nearest neighbor spheres are calculated, and a list with six arrays is created. Each array has two columns. One the mass and one density. These arrays are then sorted to return a list of indices based on the densities, low to high, of the spheres. These indices are used to put the mass values in order so when cuts by density are made, the percentiles are correct. The density is calculated for the sole purpose of ordering the masses so they can be fed to the histogram, in this case. (Although,now that I think of it, I could have just sorted by the distances, since the numerator stays the same. The smaller the distance, the larger the density of the sphere. Still, it could be useful to have the densities...at some point. They were definitely more useful with the density in a sphere analysis). The information for the stellar mass function, counts ratios, and median distance trends are calculated. With these, errors are also calculated, using some of the optional keywords.


In [28]:
ECO_true = (Index(eco_path,'.txt'))
names    = ['ra','dec','cz','logMstar']
PD_eco   = pd.read_csv(ECO_true[0],sep="\s+", usecols=(eco_cols),header=None,\
                skiprows=1,names=names)
eco_comp = PD_eco[PD_eco.logMstar >= 9.1]

ra_eco   = (np.array(eco_comp)).T[0]
dec_eco  = (np.array(eco_comp)).T[1] 
cz_eco   = (np.array(eco_comp)).T[2] 
mass_eco = (np.array(eco_comp)).T[3]

coords_eco        = sph_to_cart(ra_eco,dec_eco,cz_eco)
eco_neighbor_tree = spatial.cKDTree(coords_eco)
eco_tree_dist     = np.array(eco_neighbor_tree.query(coords_eco,(neigh_vals[-1]+1))[0])

eco_mass_dist = np.column_stack((mass_eco,eco_tree_dist.T[neigh_vals].T))
eco_dens = ([calc_dens(neigh_vals[jj],\
            (eco_mass_dist.T[range(1,7)[jj]])) for jj in range(len(neigh_vals))])

eco_mass_dens = [(np.column_stack((mass_eco,eco_dens[ii]))) for ii in range(len(neigh_vals))]
eco_idx  = [(eco_mass_dens[jj].T[1].argsort()) for jj in range(len(neigh_vals))]
eco_mass_dat  = [(eco_mass_dens[jj][eco_idx[jj]].T[0]) for jj in range(len(neigh_vals))]

eco_ratio_info    = [[] for xx in xrange(len(eco_mass_dat))]


for qq in range(len(eco_mass_dat)):
    bin_centers, eco_freq, eco_ratio_info[qq] = plot_calcs(eco_mass_dat[qq],bins,dlogM,mass_err=True,ratio_err=True)

eco_medians   = [[] for xx in xrange(len(eco_mass_dat))]    

for jj in (range(len(eco_mass_dat))):
    eco_medians[jj] = np.array(bin_func(eco_mass_dist,bins,(jj+1),bootstrap=True))

Plotting Stellar Mass Function

There was no need for a stand-alone function, as only one plot is being made. The mocks are plotted within the for-loop, but ECO is outside of this, as it need only be plotted once. The fill between is also quite straightforward, as long as the two arrays with maxes and mins are previosuly calculated.


In [29]:
fig,ax  = plt.subplots(figsize=(8,8))
ax.set_title('Mass Distribution',fontsize=18)
ax.set_xlabel('$\log\ M_{*}$',fontsize=17)
ax.set_yscale('log')
ax.set_xlim(9.1,12.1)
ax.tick_params(axis='both', labelsize=14)

for ii in range(len(mass_freq)):
    ax.plot(bin_centers,mass_freq[ii],color='silver')
    ax.fill_between(bin_centers,max_lim,min_lim,color='silver',alpha=0.1)
ax.errorbar(bin_centers,eco_freq[0],yerr=eco_freq[1],color='deeppink',\
            linewidth=2,label='ECO')
ax.legend(loc='best')

plt.subplots_adjust(left=0.08, bottom=0.1, right=0.98, top=0.94,\
                    hspace=0.2,wspace=0.2)
plt.show()


Plotting the 18 Density-Ranked, Mass Ratios

Code for finding the upper and lower limits of the band formed by the minimum and maximum mock values. Rather intense, as information must be extracted, put into an array by column, and then compared across rows. Also difficult, as there are a number of infinities, which must be dealt with before one can properly find the max. These are replaced with NaN values to keep the number of values consistent. A dictionary is initialized,named, and given keys based on the nearest neighbor and frac_val. Each key is eventually assigned a list with two np.arrays housing the min and max information.


In [30]:
A = {}
nn_dict   = {1:0,2:1,3:2,5:3,10:4,20:5}
coln_dict = {2:0,4:1,10:2}

nn_keys  = np.sort(nn_dict.keys())
col_keys = np.sort(coln_dict.keys())
zz_num   = len(plot_frac_arr[nn_dict[1]][coln_dict[2]])

for nn in nn_keys:
    for coln in col_keys:
        bin_str    = '{0}_{1}'.format(nn,coln)
        for cc in range(zz_num):
            zz_arr = np.array(plot_frac_arr[nn_dict[nn]][coln_dict[coln]][cc])
            n_elem = len(zz_arr)
            if cc == 0:
                zz_tot = np.zeros((n_elem,1))
            zz_tot = np.insert(zz_tot,len(zz_tot.T),zz_arr,1)
        zz_tot = np.array(np.delete(zz_tot,0,axis=1))
        for kk in xrange(len(zz_tot)):
            zz_tot[kk][zz_tot[kk] == np.inf] = np.nan
        zz_tot_max = [np.nanmax(zz_tot[kk]) for kk in xrange(len(zz_tot))]
        zz_tot_min = [np.nanmin(zz_tot[kk]) for kk in xrange(len(zz_tot))]
        A[bin_str] = [zz_tot_max,zz_tot_min]


C:\Users\Hannah\Anaconda2\lib\site-packages\numpy\lib\nanfunctions.py:326: RuntimeWarning: All-NaN slice encountered
  warnings.warn("All-NaN slice encountered", RuntimeWarning)
C:\Users\Hannah\Anaconda2\lib\site-packages\numpy\lib\nanfunctions.py:227: RuntimeWarning: All-NaN axis encountered
  warnings.warn("All-NaN axis encountered", RuntimeWarning)

In [31]:
np.seterr(divide='ignore',invalid='ignore')

nrow_num  = int(6)
ncol_num  = int(3)
zz        = int(0)

fig, axes = plt.subplots(nrows=nrow_num, ncols=ncol_num, \
            figsize=(150,200), sharex= True,sharey=True)
axes_flat = axes.flatten()
# fig.suptitle("Percentile Trends", fontsize=18)

while zz <= 16:
    for ii in range(len(eco_ratio_info)):
        for hh in range(len(eco_ratio_info[0][1])):
            for jj in range(len(nn_mass_dist)):
                upper = A['{0}_{1}'.format(neigh_vals[ii],frac_vals[hh])][0]
                lower  = A['{0}_{1}'.format(neigh_vals[ii],frac_vals[hh])][1]
                plot_bands(bin_centers,upper,lower,axes_flat[zz])
                plot_all_rats(bin_centers,(plot_frac_arr[ii][hh][jj]),\
                              neigh_vals[ii],axes_flat[zz],hh,zz)
            plot_eco_rats(bin_centers,(eco_ratio_info[ii]),neigh_vals[ii],axes_flat[zz],hh,zz)
            zz += 1

plt.subplots_adjust(left=0.02, bottom=0.09, right=0.98, top=0.98,\
                    hspace=0,wspace=0)

plt.show()



In [32]:
# #######################PLOTTING ONLY ECO RATIOS
# np.seterr(divide='ignore',invalid='ignore')

# nrow_num = int(6)
# ncol_num = int(3)
# zz       = int(0)

# fig, axes = plt.subplots(nrows=nrow_num, ncols=ncol_num, \
#             figsize=(100,200), sharex= True,sharey=True)
# axes_flat = axes.flatten()
# # fig.suptitle("Percentile Trends", fontsize=18)

# while zz <= 16:
#     for ii in range(len(eco_ratio_info)):
#         for hh in range(len(eco_ratio_info[0][1])):
#             plot_eco_rats(bin_centers,(eco_ratio_info[ii]),neigh_vals[ii],axes_flat[zz],hh,zz,only=True)
#             zz += 1

# #plt.subplots_adjust(left=0.02, bottom=0.09, right=1.00, top=1.00,\
#                     #hspace=0,wspace=0)

# #plt.show()

Another dictionary is made for the max and min values of the mock median distance trends. The mock trends, ECO trends, and bands are then plotted.


In [33]:
# fig,ax = plt.subplots(figsize=(10,5))
# ax.set_yscale('symlog')
# ax.set_ylim([0,10])
# for ii in range(len(med_50[0])):
#     ax.plot(bin_centers,med_50[0][ii])
#     ax.fill_between(bin_centers,top_68[0][ii],low_68[0][ii],color='silver',alpha=0.1,zorder=1)
#     ax.fill_between(bin_centers,top_95[0][ii],low_95[0][ii],color='gainsboro',alpha=0.2,zorder=0)
# #plt.show()

In [34]:
B = {}
yy_num = len(med_plot_arr[0])

for nn in range(len(med_plot_arr)):
    for ii in range(yy_num):
        med_str  = '{0}'.format(nn)
        yy_arr   = med_plot_arr[nn][ii]
        n_y_elem = len(yy_arr)
        if ii == 0:
            yy_tot = np.zeros((n_y_elem,1))
        yy_tot = np.insert(yy_tot,len(yy_tot.T),yy_arr,1)
    yy_tot = np.array(np.delete(yy_tot,0,axis=1))
    yy_tot_max = [np.nanmax(yy_tot[kk]) for kk in xrange(len(yy_tot))]
    yy_tot_min = [np.nanmin(yy_tot[kk]) for kk in xrange(len(yy_tot))]
    B[med_str] = [yy_tot_max,yy_tot_min]

In [35]:
# %matplotlib qt

nrow_num_mass = int(2)
ncol_num_mass = int(3)

fig, axes = plt.subplots(nrows=nrow_num_mass, ncols=ncol_num_mass, \
        figsize=(100,200), sharex= True, sharey = True)
axes_flat = axes.flatten()

zz = int(0)
while zz <=4:
    for ii in range(len(med_plot_arr)):
        for vv in range(len(nn_mass_dist)):
                lower_m  = B['{0}'.format(ii)][0]
                upper_m  = B['{0}'.format(ii)][1]
                plot_med_range(bin_centers,top_95[ii][vv],low_95[ii][vv],axes_flat[zz],0.05,color='lightsteelblue')
                plot_med_range(bin_centers,top_68[ii][vv],low_68[ii][vv],axes_flat[zz],0.15,color='gainsboro')
                plot_bands(bin_centers,upper_m,lower_m,axes_flat[zz])
                plot_all_meds(bin_centers,med_plot_arr[ii][vv],axes_flat[zz],zz)
                plot_eco_meds(bin_centers,eco_medians[ii][0],eco_medians[ii][1],eco_medians[ii][2],axes_flat[zz],zz)
        zz   += 1       
        
#plt.subplots_adjust(left=0.04, bottom=0.08, right=0.96, top=0.98,\
            #hspace=0,wspace=0)
#plt.show()



In [36]:
# ######PLOTTING ONLY ECO MED TRENDS

# nrow_num_mass = int(2)
# ncol_num_mass = int(3)

# fig, axes = plt.subplots(nrows=nrow_num_mass, ncols=ncol_num_mass, \
#         figsize=(100,200), sharex= True, sharey = True)
# axes_flat = axes.flatten()

# zz = int(0)
# while zz <=4:
#     for ii in range(len(med_plot_arr)):
#         plot_eco_meds(bin_centers,eco_medians[ii][0],\
#                         eco_medians[ii][1],eco_medians[ii][2],axes_flat[zz],zz,only=True)
#         zz   += 1
        
# #plt.subplots_adjust(left=0.05, bottom=0.09, right=1.00, top=1.00,\
#             #hspace=0,wspace=0)
# #plt.show()

Plotting the 18 Density-Cut, Stellar Mass Histograms

First, it was necessary to create dictionaries, run the calculations, and then extract the information from them so it could be used. This was clearly modeled off previous code used with the ratios. It worked well enough there and to disentangle that code would have taken a commensurate amount of time, so I chose to just mirror it.


In [37]:
hist_low_info  = {}
hist_high_info = {}

for ii in xrange(len(coords_test)):
    hist_low_info[ii]  = {}
    hist_high_info[ii] = {}
    
    for jj in range(len(neigh_vals)):
        hist_low_info[ii][neigh_vals[jj]], \
        hist_high_info[ii][neigh_vals[jj]] = \
        hist_calcs(mass_dat[ii][neigh_vals[jj]],bins,dlogM)
        
frac_vals     = [2,4,10]
hist_low_arr  = [[[] for yy in xrange(len(nn_mass_dist))] for xx in \
    xrange(len(neigh_vals))]
hist_high_arr = [[[] for yy in xrange(len(nn_mass_dist))] for xx in \
    xrange(len(neigh_vals))]

for ii in range(len(neigh_vals)):
    for jj in range(len(nn_mass_dist)):
        hist_low_arr[ii][jj]  = (hist_low_info[jj][neigh_vals[ii]])
        hist_high_arr[ii][jj] = (hist_high_info[jj][neigh_vals[ii]])
        
        
        plot_low_hist  = [[[[] for yy in xrange(len(nn_mass_dist))] \
                         for zz in xrange(len(frac_vals))] for xx in \
                         xrange(len(hist_low_arr))]
        
        plot_high_hist = [[[[] for yy in xrange(len(nn_mass_dist))] \
                 for zz in xrange(len(frac_vals))] for xx in \
                 xrange(len(hist_high_arr))]

for jj in range(len(nn_mass_dist)):
    for hh in range(len(frac_vals)):
        for ii in range(len(neigh_vals)):
            plot_low_hist[ii][hh][jj]  = hist_low_arr[ii][jj][frac_vals[hh]]        
            plot_high_hist[ii][hh][jj] = hist_high_arr[ii][jj][frac_vals[hh]]

Creating dictionaries with the max and min values of the mock histograms so that fill_between can be used. Note: for some reason, it seems to plot correctly, but when plotting the histograms themseleves, it seems like it's missing some information. I believe this is because of the way the histogram steps work.


In [38]:
C = {}
D = {}
nn_dict   = {1:0,2:1,3:2,5:3,10:4,20:5}
coln_dict = {2:0,4:1,10:2}

nn_keys   = np.sort(nn_dict.keys())
col_keys  = np.sort(coln_dict.keys())

vv_num    = len(plot_low_hist[nn_dict[1]][coln_dict[2]])

for nn in nn_keys:
    for coln in col_keys:
        bin_str    = '{0}_{1}'.format(nn,coln)
        for cc in range(vv_num):
            vv_arr = np.array(plot_low_hist[nn_dict[nn]][coln_dict[coln]][cc])
            n_elem = len(vv_arr)
            if cc == 0:
                vv_tot = np.zeros((n_elem,1))
            vv_tot = np.insert(vv_tot,len(vv_tot.T),vv_arr,1)
        vv_tot = np.array(np.delete(vv_tot,0,axis=1))
        for kk in xrange(len(vv_tot)):
            vv_tot[kk][vv_tot[kk] == np.inf] = np.nan
        vv_tot_max = [np.nanmax(vv_tot[kk]) for kk in xrange(len(vv_tot))]
        vv_tot_min = [np.nanmin(vv_tot[kk]) for kk in xrange(len(vv_tot))]
        C[bin_str] = [vv_tot_max,vv_tot_min]
        
hh_num   = len(plot_high_hist[nn_dict[1]][coln_dict[2]])

for nn in nn_keys:
    for coln in col_keys:
        bin_str    = '{0}_{1}'.format(nn,coln)
        for cc in range(hh_num):
            hh_arr = np.array(plot_high_hist[nn_dict[nn]][coln_dict[coln]][cc])
            n_elem = len(hh_arr)
            if cc == 0:
                hh_tot = np.zeros((n_elem,1))
            hh_tot = np.insert(hh_tot,len(hh_tot.T),hh_arr,1)
        hh_tot     = np.array(np.delete(hh_tot,0,axis=1))
        for kk in xrange(len(hh_tot)):
            hh_tot[kk][hh_tot[kk] == np.inf] = np.nan
        hh_tot_max = [np.nanmax(hh_tot[kk]) for kk in xrange(len(hh_tot))]
        hh_tot_min = [np.nanmin(hh_tot[kk]) for kk in xrange(len(hh_tot))]
        D[bin_str] = [hh_tot_max,hh_tot_min]

In [39]:
nrow_num  = int(6)
ncol_num  = int(3)

fig, axes = plt.subplots(nrows=nrow_num, ncols=ncol_num, \
            figsize=(150,200), sharex= True,sharey=True)
axes_flat = axes.flatten()

for ii in range(len(mass_dat)):
    zz = 0
    for jj in range(len(neigh_vals)):
        for hh in range(3):
            upper    = C['{0}_{1}'.format(neigh_vals[jj],frac_vals[hh])][0]
            lower    = C['{0}_{1}'.format(neigh_vals[jj],frac_vals[hh])][1]
            upper_2  = D['{0}_{1}'.format(neigh_vals[jj],frac_vals[hh])][0]
            lower_2  = D['{0}_{1}'.format(neigh_vals[jj],frac_vals[hh])][1]
            plot_bands(bin_centers,upper,lower,axes_flat[zz])
            plot_bands(bin_centers,upper_2,lower_2,axes_flat[zz])
            plot_hists(mass_dat[ii][neigh_vals[jj]],neigh_vals[jj],bins,dlogM,\
                axes_flat[zz], hh, zz)
            if ii == 0:
                plot_eco_hists(eco_mass_dat[jj],bins,dlogM,\
                axes_flat[zz],hh)
            zz += int(1)         

plt.subplots_adjust(left=0.02, bottom=0.09, right=0.98, top=0.98,\
                    hspace=0, wspace=0)            

plt.show()


C:\Users\Hannah\Anaconda2\lib\site-packages\ipykernel\__main__.py:50: VisibleDeprecationWarning: using a non-integer number instead of an integer will result in an error in the future
C:\Users\Hannah\Anaconda2\lib\site-packages\ipykernel\__main__.py:58: VisibleDeprecationWarning: using a non-integer number instead of an integer will result in an error in the future
C:\Users\Hannah\Anaconda2\lib\site-packages\ipykernel\__main__.py:11: VisibleDeprecationWarning: using a non-integer number instead of an integer will result in an error in the future
C:\Users\Hannah\Anaconda2\lib\site-packages\ipykernel\__main__.py:16: VisibleDeprecationWarning: using a non-integer number instead of an integer will result in an error in the future

Schechter Functions!


In [40]:
def schechter_log_func(stellar_mass,phi_star,alpha,m_star):
    """
    Returns a plottable Schechter function for the 
        stellar mass functions of galaxies
    
    Parameters
    ----------
    stellar_mass: array-like
        An array of unlogged stellar mass values which 
        will eventually be the x-axis values the function
        is plotted against
    phi_star: float-like
        A constant which normalizes (?) the function;
        Moves the graph up and down
    alpha: negative integer-like
        The faint-end, or in this case, low-mass slope;
        Describes the power-law portion of the curve
    m_star: float-like
        Unlogged value of the characteristic (?) stellar
        mass; the "knee" of the function, where the 
        power-law gives way to the exponential portion
        
    Returns
    -------
    res: array-like
        Array of values prepared to be plotted on a log
        scale to display the Schechter function
        
    """
    constant = np.log(10) * phi_star
    log_M_Mstar = np.log10(stellar_mass/m_star)
    res = constant * 10**(log_M_Mstar * (alpha+1)) * \
        np.exp(-10**log_M_Mstar)
        
    return res

http://docs.scipy.org/doc/scipy-0.15.1/reference/generated/scipy.optimize.curve_fit.html The above url is where one can find documentation on the curve_fit function which is utilized to find the correct parameters.


In [50]:
xdata = 10**bin_centers
p0    = (1,-1.05,10**10.64)
param_arr = [[] for ii in range(len(mass_freq)+1)]

fig,axes  = plt.subplots(nrows=3,ncols=3,sharex=True,sharey=True,\
                        figsize=(150,200))
axes_flat = axes.flatten()

for ii in range(len(mass_freq)+1):
    if ii == 8:
        ydata = eco_freq[0]
        opt_v, est_cov = optimize.curve_fit(schechter_log_func,xdata,ydata,p0=p0,sigma=eco_freq[1])
    else:
        ydata = (mass_freq[ii])
        opt_v, est_cov = optimize.curve_fit(schechter_log_func,xdata,ydata,p0=p0)
    schech_vals   = schechter_log_func(10**bin_centers,opt_v[0],opt_v[1],opt_v[2])
    param_arr[ii] = opt_v
    param_arr = np.array(param_arr)
    
    ax = axes_flat[ii]
    ax.set_yscale('log')
    ax.set_ylim([10**-3,10**0])
    ax.set_xlim([9.1,11.9])
    ax.set_yticks([10**-2,10**-1,10**0])
    ax.plot(bin_centers,schech_vals,label='Schechter',color='silver')
    if ii == 8:
        ax.errorbar(bin_centers,ydata,yerr=eco_freq[1],color='deeppink',label='ECO')
    else:
        ax.plot(bin_centers,ydata,label='Mock',color='darkorchid')
    if ii == 0 or ii == 8:
        ax.legend(loc='best')
    if ii == 7:
        ax.set_xlabel('$\log\ M_{*}$',fontsize=18)
    
plt.subplots_adjust(left=0.03, bottom=0.08, right=0.99, top=0.99,\
                    hspace=0,wspace=0)                
    
plt.show()



In [42]:
# alpha_arr  = [[] for xx in xrange(len(param_arr))]
# m_star_arr = [[] for xx in xrange(len(param_arr))]

# for ii in range(len(param_arr)):
#     alpha_arr[ii] = param_arr[ii][1] 
#     m_star_arr[ii] = param_arr[ii][2]

Plot the best fit parameters as function of density (Nth distance and percentile) -find the schechter function for the 18 different combos? -then plot the parameters?

-find the parameters for each mock and ECO separately
    -the six nearest neighbors, so six parameters, plot versus the distance or density... which neighbor?
    -this sounds most reasonable for Nth distance
    -Not sure what the x-axis will be... Could just be... 0-5?

-as far as percentiles, maybe just do it on the 18 ECO histograms?


-the Schechter function of the density splits? (10/90,25/75,50/50)
-could base off of the number of galaxies in each mock
-same volume, but differing number of galaxies, so number density is different?
-general percentile of the entire mass function?
    -so split the stellar mass function into the 10/90,etc. cuts
    -does that even work? pick the galaxies in the 10% densest and least dense? 
    -But I can't do that overall... I'd need to specificy a nearest neighbor

In [43]:
eco_low  = {}
eco_high = {}
for jj in range(len(neigh_vals)):
    eco_low[neigh_vals[jj]]  = {}
    eco_high[neigh_vals[jj]] = {}
    eco_low[neigh_vals[jj]], eco_high[neigh_vals[jj]] = hist_calcs\
    (eco_mass_dat[jj],bins,dlogM,eco=True)

In [44]:
eco_low


Out[44]:
{1: {2: array([ 0.86711409,  0.78389262,  0.65503356,  0.6442953 ,  0.5409396 ,
          0.52751678,  0.39731544,  0.27919463,  0.19060403,  0.08456376,
          0.02550336,  0.00402685,  0.        ,  0.        ]),
  4: array([ 0.87540279,  0.84049409,  0.66863588,  0.66058002,  0.56122449,
          0.55585392,  0.38668099,  0.23093448,  0.15843179,  0.04296455,
          0.01611171,  0.00268528,  0.        ,  0.        ]),
  10: array([ 0.95302013,  0.87919463,  0.72483221,  0.68456376,  0.55704698,
          0.59060403,  0.29530201,  0.18120805,  0.10738255,  0.02013423,
          0.00671141,  0.        ,  0.        ,  0.        ]),
  'low_err': [array([ 0.03411615,  0.03243771,  0.02965198,  0.02940792,  0.02694612,
           0.0266097 ,  0.02309349,  0.01935866,  0.01599513,  0.01065403,
           0.00585087,  0.0023249 ,  0.        ,  0.        ]),
   array([ 0.04848408,  0.04750753,  0.04237308,  0.04211704,  0.03882071,
           0.03863452,  0.03222342,  0.02490231,  0.02062606,  0.01074114,
           0.00657758,  0.00268528,  0.        ,  0.        ]),
   array([ 0.07997567,  0.07681559,  0.06974701,  0.06778191,  0.06114385,
           0.0629586 ,  0.04451845,  0.03487351,  0.02684564,  0.0116245 ,
           0.00671141,  0.        ,  0.        ,  0.        ])]},
 2: {2: array([ 0.84697987,  0.79060403,  0.65771812,  0.63892617,  0.54228188,
          0.54899329,  0.41073826,  0.26308725,  0.19463087,  0.07516779,
          0.02818792,  0.00268456,  0.        ,  0.        ]),
  4: array([ 0.93984962,  0.82706767,  0.6820623 ,  0.67132116,  0.52900107,
          0.5424275 ,  0.37325456,  0.23361976,  0.14769066,  0.04027927,
          0.01342642,  0.        ,  0.        ,  0.        ]),
  10: array([ 0.91275168,  0.95302013,  0.77852349,  0.67785235,  0.4966443 ,
          0.55704698,  0.30872483,  0.18120805,  0.11409396,  0.02013423,
          0.        ,  0.        ,  0.        ,  0.        ]),
  'low_err': [array([ 0.03371774,  0.03257627,  0.02971268,  0.02928513,  0.02697953,
           0.02714597,  0.02348034,  0.01879195,  0.01616321,  0.01004472,
           0.00615111,  0.00189827,  0.        ,  0.        ]),
   array([ 0.05023708,  0.04712655,  0.04279639,  0.04245808,  0.03768977,
           0.03816507,  0.03165904,  0.02504667,  0.0199146 ,  0.01040006,
           0.00600448,  0.        ,  0.        ,  0.        ]),
   array([ 0.07826781,  0.07997567,  0.07228409,  0.06744883,  0.05773373,
           0.06114385,  0.04551899,  0.03487351,  0.02767185,  0.0116245 ,
           0.        ,  0.        ,  0.        ,  0.        ])]},
 3: {2: array([ 0.85234899,  0.78791946,  0.67651007,  0.61342282,  0.54362416,
          0.55033557,  0.40536913,  0.28053691,  0.17449664,  0.08053691,
          0.03221477,  0.00268456,  0.        ,  0.        ]),
  4: array([ 0.88077336,  0.79752954,  0.73308271,  0.67669173,  0.55048335,
          0.54779807,  0.349087  ,  0.2443609 ,  0.15574651,  0.04296455,
          0.02148228,  0.        ,  0.        ,  0.        ]),
  10: array([ 0.95973154,  0.91275168,  0.75838926,  0.63087248,  0.53691275,
          0.55033557,  0.31543624,  0.18120805,  0.12751678,  0.01342282,
          0.01342282,  0.        ,  0.        ,  0.        ]),
  'low_err': [array([ 0.03382444,  0.03252092,  0.03013415,  0.02869471,  0.0270129 ,
           0.02717914,  0.02332637,  0.01940514,  0.01530437,  0.01039727,
           0.00657581,  0.00189827,  0.        ,  0.        ]),
   array([ 0.04863257,  0.04627736,  0.04436818,  0.04262757,  0.03844742,
           0.03835354,  0.03061696,  0.02561598,  0.02045052,  0.01074114,
           0.00759513,  0.        ,  0.        ,  0.        ]),
   array([ 0.08025678,  0.07826781,  0.07134326,  0.06506953,  0.06002867,
           0.0607744 ,  0.0460111 ,  0.03487351,  0.02925436,  0.00949137,
           0.00949137,  0.        ,  0.        ,  0.        ])]},
 5: {2: array([ 0.86174497,  0.77718121,  0.64697987,  0.62013423,  0.55167785,
          0.55973154,  0.39597315,  0.29530201,  0.17583893,  0.08322148,
          0.03087248,  0.00134228,  0.        ,  0.        ]),
  4: array([ 0.8915145 ,  0.79752954,  0.71965628,  0.64178303,  0.5424275 ,
          0.56390977,  0.37325456,  0.26852846,  0.1396348 ,  0.04027927,
          0.02148228,  0.        ,  0.        ,  0.        ]),
  10: array([ 0.93959732,  0.87919463,  0.72483221,  0.60402685,  0.57718121,
          0.59060403,  0.3557047 ,  0.20805369,  0.10067114,  0.01342282,
          0.00671141,  0.        ,  0.        ,  0.        ]),
  'low_err': [array([ 0.03401036,  0.03229855,  0.02946913,  0.02885126,  0.02721226,
           0.02741017,  0.02305445,  0.01990926,  0.01536312,  0.01056914,
           0.00643736,  0.00134228,  0.        ,  0.        ]),
   array([ 0.04892821,  0.04627736,  0.04396   ,  0.04151349,  0.03816507,
           0.03891347,  0.03165904,  0.02685285,  0.01936386,  0.01040006,
           0.00759513,  0.        ,  0.        ,  0.        ]),
   array([ 0.07941047,  0.07681559,  0.06974701,  0.06367002,  0.06223905,
           0.0629586 ,  0.0488598 ,  0.03736755,  0.02599318,  0.00949137,
           0.00671141,  0.        ,  0.        ,  0.        ])]},
 10: {2: array([ 0.85771812,  0.76912752,  0.65100671,  0.61073826,  0.53288591,
          0.56241611,  0.40134228,  0.31006711,  0.18926174,  0.07651007,
          0.03624161,  0.00268456,  0.        ,  0.        ]),
  4: array([ 0.92105263,  0.77873255,  0.72234157,  0.6471536 ,  0.52094522,
          0.55048335,  0.35982814,  0.28464017,  0.14769066,  0.04296455,
          0.02416756,  0.        ,  0.        ,  0.        ]),
  10: array([ 0.97986577,  0.84563758,  0.73825503,  0.61744966,  0.55033557,
          0.54362416,  0.30872483,  0.26845638,  0.10067114,  0.03355705,
          0.01342282,  0.        ,  0.        ,  0.        ]),
  'low_err': [array([ 0.0339308 ,  0.03213076,  0.02956069,  0.02863185,  0.02674478,
           0.02747582,  0.02321022,  0.02040092,  0.01593871,  0.01013401,
           0.0069747 ,  0.00189827,  0.        ,  0.        ]),
   array([ 0.04973217,  0.04572875,  0.04404194,  0.04168683,  0.03740169,
           0.03844742,  0.03108442,  0.0276467 ,  0.0199146 ,  0.01074114,
           0.00805585,  0.        ,  0.        ,  0.        ]),
   array([ 0.08109427,  0.07533538,  0.07038986,  0.06437358,  0.0607744 ,
           0.06040268,  0.04551899,  0.04244668,  0.02599318,  0.01500717,
           0.00949137,  0.        ,  0.        ,  0.        ])]},
 20: {2: array([ 0.84563758,  0.75838926,  0.63221477,  0.60536913,  0.54228188,
          0.57583893,  0.40402685,  0.30872483,  0.20805369,  0.07651007,
          0.03892617,  0.00402685,  0.        ,  0.        ]),
  4: array([ 0.9264232 ,  0.76799141,  0.67132116,  0.60687433,  0.53168636,
          0.53437164,  0.37862513,  0.30343716,  0.19065521,  0.06176155,
          0.02416756,  0.00268528,  0.        ,  0.        ]),
  10: array([ 1.        ,  0.87248322,  0.71812081,  0.59731544,  0.52348993,
          0.53691275,  0.34228188,  0.24161074,  0.12751678,  0.03355705,
          0.00671141,  0.        ,  0.        ,  0.        ]),
  'low_err': [array([ 0.03369101,  0.03190568,  0.02913092,  0.02850572,  0.02697953,
           0.02780177,  0.02328772,  0.02035671,  0.01671127,  0.01013401,
           0.00722841,  0.0023249 ,  0.        ,  0.        ]),
   array([ 0.04987695,  0.04541228,  0.04245808,  0.04036868,  0.0377853 ,
           0.0378806 ,  0.03188599,  0.02854497,  0.02262661,  0.01287817,
           0.00805585,  0.00268528,  0.        ,  0.        ]),
   array([ 0.08192319,  0.07652184,  0.06942336,  0.06331531,  0.05927356,
           0.06002867,  0.04792905,  0.04026846,  0.02925436,  0.01500717,
           0.00671141,  0.        ,  0.        ,  0.        ])]}}

In [45]:
def param_finder(hist_counts,bin_centers,hist_err):
    xdata = 10**bin_centers
    p0    = (1,-1.05,10**10.64)
    opt_v,est_cov = optimize.curve_fit(schechter_log_func,xdata,\
                            hist_counts,p0=p0)
    alpha   = opt_v[1]
    m_star  = opt_v[2]
    res_arr = np.array([alpha,m_star])
   
    return res_arr

In [46]:
print eco_low[1][2]


[ 0.86711409  0.78389262  0.65503356  0.6442953   0.5409396   0.52751678
  0.39731544  0.27919463  0.19060403  0.08456376  0.02550336  0.00402685
  0.          0.        ]

In [47]:
# fig,ax = plt.subplots()

# ax.set_yscale('log')
# ax.plot(bin_centers,eco_low[1][2])
# plt.show()

In [48]:
test_arr = param_finder(eco_low[1][2],bin_centers,eco_low[1]['low_err'][0])