Besides the fact most of the plots refuse to show up inline, all of this code works. And I've made the eco_functions so that they will work completely separately from the mock plotting functions. As far as in the fully functioning code, I don't think that's necessary... Well, it will just overplot the titles a lot, I believe. I suppose I could add some conditional statements. So they won't plot the titles or initialize anything unless they are the only ones being plotted.
In [1]:
%matplotlib inline
import astropy.stats
import matplotlib.pyplot as plt
from matplotlib import ticker
from matplotlib.ticker import FormatStrFormatter
import numpy as np
import os
import pandas as pd
from scipy import spatial
In [2]:
#! /usr/bin/env python
# Victor Calderon
# June 29, 2016
# Vanderbilt University
from __future__ import print_function, division, absolute_import
__author__ =['Victor Calderon']
__copyright__ =["Copyright 2016 Victor Calderon, Index function"]
__email__ =['victor.calderon@vanderbilt.edu']
__maintainer__ =['Victor Calderon']
import glob
"""
"""
def Index(directory, datatype):
"""
Indexes the files in a directory `directory' with a
specific data type.
Parameters
----------
directory: str
Absolute path to the folder that is indexed.
datatype: str
Data type of the files to be indexed in the folder.
Returns
-------
file_array: array_like
num.array of indexed files in the folder 'directory'
with specific datatype.
Examples
--------
>>> Index('~/data', '.txt')
>>> array(['A.txt', 'Z'.txt', ...])
"""
assert(os.path.exists(directory))
files = np.array(glob.glob('{0}/*{1}'.format(directory, datatype)))
return files
In [3]:
def sph_to_cart(ra,dec,cz):
"""
Converts spherical coordinates to Cartesian coordinates.
Parameters
----------
ra: array-like
right-ascension of galaxies in degrees
dec: array-like
declination of galaxies in degrees
cz: array-like
velocity of galaxies in km/s
Returns
-------
coords: array-like, shape = N by 3
x, y, and z coordinates
"""
cz_dist = cz/70. #converts velocity into distance
x_arr = cz_dist*np.cos(np.radians(ra))*np.cos(np.radians(dec))
y_arr = cz_dist*np.sin(np.radians(ra))*np.cos(np.radians(dec))
z_arr = cz_dist*np.sin(np.radians(dec))
coords = np.column_stack((x_arr,y_arr,z_arr))
return coords
In [4]:
def calc_dens(n_val,r_val):
"""
Returns densities of spheres with radius being the distance to the
nth nearest neighbor.
Parameters
----------
n_val = integer
The 'N' from Nth nearest neighbor
r_val = array-like
An array with the distances to the Nth nearest neighbor for
each galaxy
Returns
-------
dens: array-like
An array with the densities of the spheres created with radii
to the Nth nearest neighbor.
"""
dens = np.array([(3.*(n_val+1)/(4.*np.pi*r_val[hh]**3)) \
for hh in range(len(r_val))])
return dens
In [5]:
def plot_calcs(mass,mass_err=False,ratio_err=False):
"""
Returns values for plotting the stellar mass function and
mass ratios
Parameters
----------
mass: array-like
A 1D array with mass values
Returns
-------
bin_centers: array-like
An array with the medians of the mass bins
"""
bins = np.linspace(9.2,11.8,14)
dlogM = 0.2
mass_counts, edges = np.histogram(mass,bins)
bin_centers = 0.5*(edges[:-1]+edges[1:])
mass_freq = mass_counts/float(len(mass))/dlogM
ratio_dict = {}
frac_val = [2,4,10]
yerr = []
for ii in frac_val:
ratio_dict[ii] = {}
# Calculations for the lower density cut
frac_data = int(len(mass)/ii)
frac_mass = mass[0:frac_data]
counts, edges = np.histogram(frac_mass,bins)
# Calculations for the higher density cut
frac_mass_2 = mass[-frac_data:]
counts_2, edges_2 = np.histogram(frac_mass_2,bins)
# Ratio determination
ratio_counts = (1.*counts_2)/(1.*counts)
ratio_dict[ii] = ratio_counts
yerr.append((counts_2*1.)/(counts*1.)*np.sqrt(1./counts + 1./counts_2))
if ratio_err == True:
ratio_dict_list = [[] for xx in range(2)]
ratio_dict_list[0] = ratio_dict
ratio_dict_list[1] = yerr
ratio_dict = ratio_dict_list
if mass_err == True:
mass_freq_list = [[] for xx in xrange(2)]
mass_freq_list[0] = mass_freq
mass_freq_list[1] = np.sqrt(mass_counts)/float(len(mass))/dlogM
mass_freq = np.array(mass_freq_list)
return bin_centers, mass_freq, ratio_dict
In [6]:
def bin_func(mass_dist,kk,bootstrap=False):
"""
Returns median distance to Nth nearest neighbor, the x-value at which to
plot the median distance, and the distances that correspond
to the galaxies in each bin.
Parameters
----------
mass_dist: array-like
An array with mass values in at index 0 and distance to the Nth
nearest neighbor in the other 6
kk: integer-like
The index of mass_dist where the appropriate distance array may be found
Returns
-------
medians: array-like
An array with the median distance to the Nth nearest neighbor for
all the galaxies in each of the bins
"""
edges = np.linspace(9.2,11.8,14)
digitized = np.digitize(mass_dist.T[0],edges)
# digitized = np.digitize(nn_mass_dist[1].T[0],edges)
digitized -= int(1)
bin_nums = np.unique(digitized)
bin_nums = list(bin_nums)
if 12 not in bin_nums:
bin_nums.append(12)
bin_nums= np.array(bin_nums)
# print bin_nums
medians = np.array([np.median(mass_dist.T[kk][digitized==ii]) for ii in bin_nums])
if bootstrap == True:
# col = np.arange(1,7)
bin_nums = np.unique(digitized)
# for kk in col:
dist_in_bin = np.array([(mass_dist.T[kk][digitized == ii]) \
for ii in range(1,len(edges))])
dist_in_bin = np.array([(mass_dist.T[kk][digitized==ii]) for ii in bin_nums])
low_err_test = np.array([np.percentile(astropy.stats.bootstrap\
(dist_in_bin[vv],bootnum=1000,\
bootfunc=np.median),16) for vv \
in range(len(dist_in_bin))])
high_err_test = np.array([np.percentile(astropy.stats.bootstrap\
(dist_in_bin[vv],bootnum=1000,\
bootfunc=np.median),84) for vv \
in range(len(dist_in_bin))])
med_list = [[] for yy in range(3)]
med_list[0] = medians
med_list[1] = low_err_test
med_list[2] = high_err_test
medians = np.array(med_list)
return medians
In [7]:
def plot_eco_rats(bin_centers,y_vals,neigh_val,ax,col_num,plot_idx,only=False):
"""
Returns subplots of ECO density-cut,mass ratios
Parameters
----------
bin_centers: array-like
An array with the medians mass values of the mass bins
y_vals: array-like
An array containing the ratio values for each mass bin
neigh_val: integer-like
Value which will be inserted into the text label of each plot
ax: axis-like
A value which specifies which axis each subplot is to be
plotted to
col_num: integer-like
Integer which specifies which column is currently being
plotted. Used for labelling subplots
plot_idx: integer-like
Specifies which subplot the figure is plotted to. Used for
labeling the x-axis
Optional
--------
only == True
To be used when only plotting the ECO ratios, no mocks.
Will add in the additional plotting specifications that
would have been taken care of previously in a for-loop
which plotted the mocks as well
Returns
-------
ECO ratios plotted to any previously initialized figure
"""
if only == True:
if plot_idx ==16:
ax.set_xlabel('$\log\ M_{*}$',fontsize=18)
if col_num ==0:
title_label = 'Mass Ratio 50/50, {0} NN'.format(neigh_val)
frac_val = 10
ax.text(0.05, 0.95, title_label,horizontalalignment='left',\
verticalalignment='top',transform=ax.transAxes,fontsize=12)
elif col_num ==1:
title_label = 'Mass Ratio 25/75, {0} NN'.format(neigh_val)
frac_val = 4
ax.text(0.05, 0.95, title_label,horizontalalignment='left',\
verticalalignment='top',transform=ax.transAxes,fontsize=12)
elif col_num ==2:
title_label = 'Mass Ratio 10/90, {0} NN'.format(neigh_val)
frac_val = 2
ax.text(0.05, 0.95, title_label,horizontalalignment='left',\
verticalalignment='top',transform=ax.transAxes,fontsize=12)
ax.set_xlim(9.2,11.8)
ax.set_ylim([0,5])
ax.set_xticks(np.arange(9.5, 12., 0.5))
ax.set_yticks([1.,3.])
ax.tick_params(axis='both', labelsize=12)
ax.axhline(y=1,c="darkorchid",linewidth=0.5,zorder=0)
frac_vals = np.array([2,4,10])
y_vals_2 = y_vals[0][frac_vals[hh]]
ax.errorbar(bin_centers,y_vals_2,yerr=y_vals[1][hh],\
color='limegreen',linewidth=2)
#for some reason, whenever I give this frac_vals as an argument, it won't recognize it as an array.
#And then it will begin with 10, rather than 2; even when I have it tell me what frac val it's using,
#and then have the for loop do so as well, they do not line up
#also, it plots only ECO differently than when there are multiple curves, and it changes
#the xlims. So I set them. And then, it made the 17th indexed plot slightly off, so I changed the xticks
#as well
In [8]:
def plot_eco_meds(bin_centers,y_vals,low_lim,up_lim,neigh_val,ax,plot_idx):
titles = [1,2,3,5,10,20]
ax.set_ylim(0,10**1.5)
ax.set_xlim(9.2,11.8)
ax.set_yscale('symlog')
ax.set_xticks(np.arange(9.5,12.,0.5))
ax.tick_params(axis='both', which='major', labelsize=16)
title_here = 'n = {0}'.format(titles[plot_idx])
ax.text(0.05, 0.95, title_here,horizontalalignment='left',\
verticalalignment='top',transform=ax.transAxes,fontsize=18)
ax.errorbar(bin_centers,y_vals,yerr=0.1,lolims=low_lim[neigh_val],\
uplims=up_lim[neigh_val],color='deeppink')
In [9]:
#assumed variables
neigh_vals = np.array([1,2,3,5,10,20])
In [10]:
eco_path = r"C:\Users\Hannah\Desktop\Vanderbilt_REU\Stellar_mass_env_density\Catalogs\ECO_true"
ECO_true = (Index(eco_path,'.txt'))
names = ['ra','dec','cz','logMstar']
PD_eco = pd.read_csv(ECO_true[0],sep="\s+", usecols=(0,1,2,4),header=None,\
skiprows=1,names=names)
eco_comp = PD_eco[PD_eco.logMstar >= 9.3]
ra_eco = (np.array(eco_comp)).T[0]
dec_eco = (np.array(eco_comp)).T[1]
cz_eco = (np.array(eco_comp)).T[2]
mass_eco = (np.array(eco_comp)).T[3]
coords_eco = sph_to_cart(ra_eco,dec_eco,cz_eco)
eco_neighbor_tree = spatial.cKDTree(coords_eco)
eco_tree_dist = np.array(eco_neighbor_tree.query(coords_eco,21)[0])
eco_mass_dist = np.column_stack((mass_eco,eco_tree_dist.T[neigh_vals].T))
eco_dens = ([calc_dens(neigh_vals[jj],\
(eco_mass_dist.T[range(1,7)[jj]])) for jj in range(len(neigh_vals))])
eco_mass_dens = [(np.column_stack((mass_eco,eco_dens[ii]))) for ii in range(len(neigh_vals))]
eco_idx = [(eco_mass_dens[jj].T[1].argsort()) for jj in range(len(neigh_vals))]
eco_mass_dat = [(eco_mass_dens[jj][eco_idx[jj]].T[0]) for jj in range(len(neigh_vals))]
eco_ratio_info = [[] for xx in xrange(len(eco_mass_dat))]
for qq in range(len(eco_mass_dat)):
bin_centers, eco_freq, eco_ratio_info[qq] = plot_calcs(eco_mass_dat[qq],mass_err=True,ratio_err=True)
eco_medians = [[] for xx in xrange(len(eco_mass_dat))]
for jj in (range(len(eco_mass_dat))):
eco_medians[jj] = np.array(bin_func(eco_mass_dist,(jj+1),bootstrap=True))
In [11]:
plt.plot(bin_centers,eco_freq[0])
plt.show()
In [12]:
###stellar mass function
fig,ax = plt.subplots(figsize=(8,8))
ax.set_title('Mass Distribution',fontsize=18)
ax.set_xlabel('$\log\ M_{*}$',fontsize=17)
ax.set_yscale('log')
ax.set_xlim(9.2,11.8)
ax.tick_params(axis='both', labelsize=14)
ax.errorbar(bin_centers,eco_freq[0],yerr=eco_freq[1],color='deeppink')
plt.show()
In [13]:
%matplotlib qt
np.seterr(divide='ignore',invalid='ignore')
frac_vals = [2,4,10]
nrow_num = int(6)
ncol_num = int(3)
zz = int(0)
fig, axes = plt.subplots(nrows=nrow_num, ncols=ncol_num, \
figsize=(100,200), sharex= True,sharey=True)
axes_flat = axes.flatten()
# fig.suptitle("Percentile Trends", fontsize=18)
while zz <= 16:
for ii in range(len(eco_ratio_info)):
for hh in range(len(eco_ratio_info[0][1])):
plot_eco_rats(bin_centers,(eco_ratio_info[ii]),neigh_vals[ii],axes_flat[zz],hh,zz,only=True)
zz += 1
plt.subplots_adjust(left=0.02, bottom=0.09, right=1.00, top=1.00,\
hspace=0,wspace=0)
plt.show()
In [14]:
print (range(len(eco_ratio_info)))
In [15]:
%matplotlib qt
nrow_num_mass = 2
ncol_num_mass = 3
zz = int(0)
fig, axes = plt.subplots(nrows=nrow_num_mass, ncols=ncol_num_mass, \
figsize=(100,200), sharex= True, sharey = True)
axes_flat = axes.flatten()
zz = int(0)
while zz <=4:
for ii in range(len(eco_medians)):
plot_eco_meds(bin_centers,eco_medians[ii][0],\
eco_medians[ii][1],eco_medians[ii][2],ii,axes_flat[zz],zz)
zz += 1
plt.subplots_adjust(left=0.05, bottom=0.09, right=1.00, top=1.00,\
hspace=0,wspace=0)
plt.show()