In [1]:
%pylab inline
from astropy.io import fits
from datetime import datetime
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
from astropy.io import fits
from astropy.table import Table,Column
import os
import warnings
import requests
mpl.rcParams['text.usetex']=True
mpl.rcParams['axes.linewidth'] = 3
warnings.filterwarnings('ignore', category=RuntimeWarning, append=True)
warnings.filterwarnings('ignore', category=UserWarning, append=True);
In [2]:
# Load data from Dropbox folder instead of clogging up Github
def download_from_dropbox(url):
local_filename = "../data/{:}".format(url.split("/")[-1].split("?")[0])
r = requests.get(url, stream=True)
with open(local_filename, 'wb') as f:
for chunk in r.iter_content(chunk_size=1024):
if chunk: # filter out keep-alive new chunks
f.write(chunk)
f.flush()
return local_filename
In [3]:
#Load Data!
ferengi_filename = download_from_dropbox("https://www.dropbox.com/s/2dsnro4r5sa48gb/ferengi_all_weighted_and_meta.fits?dl=1")
data = Table.read(ferengi_filename)
In [4]:
#Defining surface brightness bins
yedges=np.linspace(np.min(data['mu_max_i']),np.max(data['mu_max_i']),10)
In [5]:
p_x = 't00_smooth_or_features_a1_features_frac_weighted_2' #p for question in question (lol)
N_x = 't00_smooth_or_features_count_weighted_2' #N for question in question (number of people who answered question)
N_cut = 0 #number of people we require to get to question
In [6]:
#Pick out unique galaxies
galaxies = set(data['objid'])
In [7]:
#simulated redshifts of ferengi data:
reds=[.3,.4,.5,.6,.7,.8,.9,1]
#Defining lists of p_features at high and low (z=0.3) redshifts at given SB and redshifts.
scatter_dct={}
for z in reds:
for edge in yedges:
scatter_dct[z,edge,'hi']=[]
scatter_dct[z,edge,'lo']=[]
scatter_dct[z,edge,'subj_id']=[]
for i,g in enumerate(galaxies):
this_gal=(data['objid']==g)
evos = set(data[this_gal]['sim_evolution'])
# if 0 in evos:
# evos.remove(0) #remove this line if including 0 evolution data
# evos=[0]
for e in evos:
this_evo=(data[this_gal]['sim_evolution']==e)
if len(set(data[this_gal][this_evo]['sim_redshift']))==8: #only want stuff where we have data down to 0.3
p_at_3=(data[this_gal][this_evo]['sim_redshift']==.3)
p_bar_at_3 = data[this_gal][this_evo][p_at_3][p_x][0]#value of p_x at redshift 0.3
n_bar_at_3 = data[this_gal][this_evo][p_at_3][N_x][0] #number of people who answeredquestion for the 0.3 image
if n_bar_at_3 >= N_cut: #only care about galaxies with at least N people answering question at 0.3
for row in data[this_gal][this_evo]:
for y in range(0,len(yedges)-1):
for j,hi_z in enumerate(reds):
if round(row['sim_redshift'],2)==hi_z and row['mu_max_i'] > yedges[y] and row['mu_max_i'] < yedges[y+1] and row[N_x]>=N_cut: #now look at high redshift data ; only care if 5 people answerwed question
scatter_dct[hi_z,yedges[y],'hi'].append(row[p_x]) # slap p_x in high list
scatter_dct[hi_z,yedges[y],'lo'].append(p_bar_at_3) # put z=0.3 value in low list
In [8]:
#Just evolution 0:
#Defining lists of p_features at high and low (z=0.3) redshifts at given SB and redshifts.
scatter_dct_0={}
for z in reds:
for edge in yedges:
scatter_dct_0[z,edge,'hi']=[]
scatter_dct_0[z,edge,'lo']=[]
scatter_dct_0[z,edge,'subj_id']=[]
for i,g in enumerate(galaxies):
this_gal=(data['objid']==g)
# evos = set(data[this_gal]['sim_evolution'])
# if 0 in evos:
# evos.remove(0) #remove this line if including 0 evolution data
evos=[0]
for e in evos:
this_evo=(data[this_gal]['sim_evolution']==e)
if len(set(data[this_gal][this_evo]['sim_redshift']))==8: #only want stuff where we have data down to 0.3
p_at_3=(data[this_gal][this_evo]['sim_redshift']==.3)
p_bar_at_3 = data[this_gal][this_evo][p_at_3][p_x][0]#value of p_x at redshift 0.3
n_bar_at_3 = data[this_gal][this_evo][p_at_3][N_x][0] #number of people who answeredquestion for the 0.3 image
if n_bar_at_3 >= N_cut: #only care about galaxies with at least N people answering question at 0.3
for row in data[this_gal][this_evo]:
for y in range(0,len(yedges)-1):
for j,hi_z in enumerate(reds):
if round(row['sim_redshift'],2)==hi_z and row['mu_max_i'] > yedges[y] and row['mu_max_i'] < yedges[y+1] and row[N_x]>=N_cut: #now look at high redshift data ; only care if 5 people answerwed question
scatter_dct_0[hi_z,yedges[y],'hi'].append(row[p_x]) # slap p_x in high list
scatter_dct_0[hi_z,yedges[y],'lo'].append(p_bar_at_3) # put z=0.3 value in low list
In [8]:
def derivative_of_poly(x_list,fit_cos,deg):
if deg == 3:
derivative=3*fit_cos[3]*x_list**2+2*fit_cos[2]*x_list+fit_cos[1]
if deg == 2:
derivative=2*fit_cos[2]*x_list+fit_cos[1]
if deg ==1:
derivative=fit_cos[1]
return derivative
In [9]:
#Plot p_features at a given redshift vs p_features at z=0.3 at each redshift, binned by surface brightness
f=figure(figsize=(20,18))
#track 'uncorrectable' regions of z/mu/p space (shaded)
p_range_uncorrectable_dct={}
#track 'correctable regions of z/mu/p space (unshaded *and* at least 5 points in bin)
p_range_correctable_dct={}
#store range of spread of data in each bin
interval_dct={}
x_new = np.linspace(0,.95,40)
slope_cut=.35
gs=gridspec.GridSpec(9,10)
gs.update(wspace=0)
gs.update(hspace=0)
yedge_int=[8,7,6,5,4,3,2,1,0]
y_label=[]
facecolors=['#af8dc3','#999999','#7fbf7b']
for i in range(0,len(yedges)-1):
y_label.append(round((yedges[i]+yedges[i+1])/2,2))
#y_label.append(round(yedges[i],1))
y_label=y_label[::-1]
for j,y in enumerate(yedge_int):
for i,z in enumerate(reds):
ax=plt.subplot(gs[j,i])
xs=scatter_dct[z,yedges[y],'lo']
ys=scatter_dct[z,yedges[y],'hi']
# xs_0=scatter_dct_0[z,yedges[y],'lo']
# ys_0=scatter_dct_0[z,yedges[y],'hi']
plt.plot(x_new,x_new,c='k')
flat_list=[]
p_range_uncorrectable_dct[z,yedges[y]]=[]
p_range_correctable_dct[z,yedges[y]]=[]
plt.scatter(xs,ys)
# plt.scatter(xs_0,ys_0,c='r',s=30)
if len(xs)>5:
poly_params=np.polynomial.polynomial.polyfit(xs,ys,3)
poly = np.polynomial.Polynomial(poly_params)
drv=derivative_of_poly(x_new,poly_params,3)
if np.min(drv)>-1: #fit okay, go ahead and plot
plt.plot(x_new,np.polyval(poly_params[::-1],x_new),c='k',lw=3,ls='dashed')
for d,val in enumerate(drv):
if val < slope_cut:
#plt.axvline(x_new[d],lw=3,alpha=.1) #shade vertically
flat_list.append(x_new[d]) #list of x-values for shaded region
if len(flat_list)>0: # if there are bad regions, record stuff:
min_p_at_3=np.min(flat_list) #minimum x value of shaded region
max_p_at_3=np.max(flat_list) #maximum x value of shaded region
#get list of y values in shaded region
bad_p_at_z_list=[]
for p_int,p in enumerate(xs):
if p>=min_p_at_3 and p < max_p_at_3:
bad_p_at_z_list.append(ys[p_int])
#shade out 3 standard deviations above and below the mean of that area
#min_val = np.mean(bad_p_at_z_list)-1.5*np.std(bad_p_at_z_list)
min_val = 0 #I think the minimum should be 0...because...
if len(bad_p_at_z_list)>0: #need at least 1 pts to get mean and std
max_val = np.mean(bad_p_at_z_list)+1.5*np.std(bad_p_at_z_list)
else:
max_val=0
plt.axhspan(0,max_val,alpha=.1)
p_range_uncorrectable_dct[z,yedges[y]].append(0)
p_range_uncorrectable_dct[z,yedges[y]].append(max_val)
p_range_correctable_dct[z,yedges[y]].append(max_val)
p_range_correctable_dct[z,yedges[y]].append(1)
else: #if no bad regions, whole square is correctable.
p_range_uncorrectable_dct[z,yedges[y]].append(0)
p_range_uncorrectable_dct[z,yedges[y]].append(0)
p_range_correctable_dct[z,yedges[y]].append(0)
p_range_correctable_dct[z,yedges[y]].append(1)
else: #if < 0, try 2nd order instead
poly_params=np.polynomial.polynomial.polyfit(xs,ys,2)
poly = np.polynomial.Polynomial(poly_params)
drv=derivative_of_poly(x_new,poly_params,2)
if np.min(drv)>-1: #fit okay, go ahead and plot
plt.plot(x_new,np.polyval(poly_params[::-1],x_new),c='k',lw=3,ls='dashed')
for d,val in enumerate(drv):
if val < slope_cut:
#plt.axvline(x_new[d],lw=3,alpha=.1)
flat_list.append(x_new[d]) #list of x-values for shaded region
if len(flat_list)>0: # if there are bad regions, record stuff:
min_p_at_3=np.min(flat_list) #minimum x value of shaded region
max_p_at_3=np.max(flat_list) #maximum x value of shaded region
#get list of y values in shaded region
bad_p_at_z_list=[]
for p_int,p in enumerate(xs):
if p>=min_p_at_3 and p < max_p_at_3:
bad_p_at_z_list.append(ys[p_int])
#shade out 3 standard deviations around the mean of that area
min_val=0
#min_val = np.mean(bad_p_at_z_list)-1.5*np.std(bad_p_at_z_list)
if len(bad_p_at_z_list)>0: #need at least 1 pts to get mean and std
max_val = np.mean(bad_p_at_z_list)+1.5*np.std(bad_p_at_z_list)
else:
max_val=0
plt.axhspan(min_val,max_val,alpha=.1)
p_range_uncorrectable_dct[z,yedges[y]].append(0)
p_range_uncorrectable_dct[z,yedges[y]].append(max_val)
p_range_correctable_dct[z,yedges[y]].append(max_val)
p_range_correctable_dct[z,yedges[y]].append(1)
else: #if no bad regions, whole square is correctable.
p_range_uncorrectable_dct[z,yedges[y]].append(0)
p_range_uncorrectable_dct[z,yedges[y]].append(0)
p_range_correctable_dct[z,yedges[y]].append(0)
p_range_correctable_dct[z,yedges[y]].append(1)
else: #fit still bad, do a linear
poly_params=np.polynomial.polynomial.polyfit(xs,ys,1)
poly = np.polynomial.Polynomial(poly_params)
plt.plot(x_new,np.polyval(poly_params[::-1],x_new),c='k',lw=3,ls='dashed')
drv=derivative_of_poly(x_new,poly_params,1)
if drv < slope_cut:
#plt.axvline(x_new[d],lw=3,alpha=.1)
flat_list=x_new #list of x-values for shaded region
min_p_at_3=np.min(flat_list) #minimum x value of shaded region
max_p_at_3=np.max(flat_list) #maximum x value of shaded region
#get list of y values in shaded region
bad_p_at_z_list=ys
min_val=0
#min_val = np.mean(bad_p_at_z_list)-1.5*np.std(bad_p_at_z_list)
if len(bad_p_at_z_list)>0: #need at least 1 pts to get mean and std
max_val = np.mean(bad_p_at_z_list)+1.5*np.std(bad_p_at_z_list)
else:
max_val=0
plt.axhspan(min_val,max_val,alpha=.1)
p_range_uncorrectable_dct[z,yedges[y]].append(0)
p_range_uncorrectable_dct[z,yedges[y]].append(max_val)
p_range_correctable_dct[z,yedges[y]].append(max_val)
p_range_correctable_dct[z,yedges[y]].append(1)
else: #if no bad regions, whole square is correctable.
p_range_uncorrectable_dct[z,yedges[y]].append(0)
p_range_uncorrectable_dct[z,yedges[y]].append(0)
p_range_correctable_dct[z,yedges[y]].append(0)
p_range_correctable_dct[z,yedges[y]].append(1)
plt.xlim(0,1)
plt.ylim(0,1)
else: #fewer than 5 points in square, so fuck it
plt.axhspan(0,1,alpha=.1,color='k')
plt.xlim(0,1)
plt.ylim(0,1)
p_range_uncorrectable_dct[z,yedges[y]].append(0)
p_range_uncorrectable_dct[z,yedges[y]].append(0)
p_range_correctable_dct[z,yedges[y]].append(0)
p_range_correctable_dct[z,yedges[y]].append(0)
plt.tick_params(labelleft='off')
plt.tick_params(top='off',right='off',labelbottom='off')
if j==8:
plt.xlabel('%s'%reds[i],fontsize=16)
if i==7:
ax.yaxis.set_label_position("right")
plt.ylabel('%s'%str(y_label[j]).rjust(500),fontsize=16,rotation=270,labelpad=20)
f.text(.78,.5,r'$\mathrm{\mu_{max,i}~(mag/arcsec^2)}$',rotation=270,fontsize=30,va='center')
f.text(.45,.91,r'$\mathrm{redshift}$',fontsize=30,ha='center')
f.text(.1,.5,r'$\mathrm{p_{features}}$',rotation=90,fontsize=30,va='center')
f.text(.45,.08,r'$\mathrm{p_{features},z=0.3}$',fontsize=30,ha='center');
#plt.savefig('/home/mel/Documents/GZ_HUBBLE/ferengi_debias/finding_matches/images/p_vs_p_SB_redshift_slope_method_evs_highlighted.pdf')
Out[9]:
In [10]:
#Here we'll compute the scatter in the points for different bins of measured p_values, so we can provide upper limits
#on p_features for the difficult galaxies.
f=figure(figsize=(20,18))
#store range of spread of data in each bin
interval_dct={}
gs=gridspec.GridSpec(9,10)
gs.update(wspace=0)
gs.update(hspace=0)
yedge_int=[8,7,6,5,4,3,2,1,0]
y_label=[]
facecolors=['#af8dc3','#999999','#7fbf7b']
for i in range(0,len(yedges)-1):
y_label.append(round((yedges[i]+yedges[i+1])/2,1))
y_label=y_label[::-1]
for j,y in enumerate(yedge_int):
for i,z in enumerate(reds):
ax=plt.subplot(gs[j,i])
xs=scatter_dct[z,yedges[y],'lo']
ys=scatter_dct[z,yedges[y],'hi']
plt.scatter(xs,ys)
interval_dct[z,yedges[y]]=[]
plt.xlim(0,1)
plt.ylim(0,1)
this_dct={} #stores p_features at z=0.3 value for each bin (xs); use to compute spread in each bin
#stored like: this_dct[lower_bin_edge,higher_bin_edge] = [distribution of xs in bin]
bins_list=np.linspace(0,1,6) #bin measured p_features from 0 to 1
for b in range(0,len(bins_list)-1):
bin_bottom = round(bins_list[b],1)
bin_top = round(bins_list[b+1],1)
this_dct[bin_bottom,bin_top]=[]
for l,val in enumerate(ys): #check p_features at z values
if val >= bin_bottom and val < bin_top: # if it falls inside bin in question:
this_dct[bin_bottom,bin_top].append(xs[l]) #then put corresponding p_features,z=0.3 value in list
#Now compute the mean and spread of p_features,z=0.3 for each bin
p_features_means=[]
x_error_lo=[]
x_error_hi=[]
bin_centers=[]
for b in range(0,len(bins_list)-1):
bin_bottom = round(bins_list[b],3)
bin_top = round(bins_list[b+1],3)
plt.axhline(bin_bottom,c='k',lw=1,alpha=.1)
p_features_means.append(np.median(this_dct[bin_bottom,bin_top]))
try:
x_error_lo.append(p_features_means[b] - np.percentile(this_dct[bin_bottom,bin_top],10))
except ValueError:
x_error_lo.append(0)
try:
x_error_hi.append(np.percentile(this_dct[bin_bottom,bin_top],90) - p_features_means[b])
except ValueError:
x_error_hi.append(0)
bin_centers.append((bin_top-bin_bottom)/2.+bin_bottom)
try:
interval_dct[z,yedges[y]].append({'bin_bottom':bin_bottom,'bin_top':bin_top,'low_limit':np.percentile(this_dct[bin_bottom,bin_top],25),'hi_limit':np.percentile(this_dct[bin_bottom,bin_top],75)})
except ValueError:
pass
plt.errorbar(p_features_means,bin_centers,xerr=[x_error_lo,x_error_hi],fmt='o',c='#d95f02',markersize=1,elinewidth=5)
plt.tick_params(labelleft='off')
plt.tick_params(top='off',right='off',labelbottom='off')
if j==8:
plt.xlabel('%s'%reds[i],fontsize=16)
if i==7:
ax.yaxis.set_label_position("right")
plt.ylabel('%s'%y_label[j],fontsize=16,rotation=270,labelpad=20)
f.text(.78,.5,r'$\mathrm{\mu_{max,i}~(mag/arcsec^2)}$',rotation=270,fontsize=30,va='center')
f.text(.45,.91,r'$\mathrm{redshift}$',fontsize=30,ha='center')
f.text(.1,.5,r'$\mathrm{p_{bar}}$',rotation=90,fontsize=30,va='center')
f.text(.45,.08,r'$\mathrm{p_{bar},z=0.3}$',fontsize=30,ha='center')
#plt.savefig('images/p_vs_p_SB_redshift_bar_Kyle_bins0.pdf')
Out[10]:
In [12]:
#get list of debiasable ferengi galaxies
unique_galaxies = set(data['objid'])
z0ind = np.zeros(len(data),dtype=bool)
for ug in unique_galaxies:
ind = (data['objid'] == ug)
if data[ind]['sim_redshift'].min() < 0.301:
z0ind[ind] = True
data_z0 = data[z0ind]
category_list_ferengi=[]
for row in data_z0:
if row['mu_max_i'] > yedges[0] and row['mu_max_i'] <= yedges[len(yedges)-1] and row['sim_redshift'] > reds[0]-.05 and row['sim_redshift'] <= reds[len(reds)-1] + .05: # if within ferengi space, check where it is. else, consider NEI or uncorrectable.
for y in range(0,len(yedges)-1):
if row['mu_max_i'] > yedges[y] and row['mu_max_i'] <= yedges[y+1]:
for i,z in enumerate(reds):
if row['sim_redshift'] > reds[i]-.05 and row['sim_redshift'] <= reds[i] + .05: # pick out where it is in SB/z and check color
if row[p_x] > p_range_correctable_dct[z,yedges[y]][0] and row[p_x] <= p_range_correctable_dct[z,yedges[y]][1]:# if it's in correctable range::
category_list_ferengi.append({'sdss_id':row['objid'],'subject_id':row['subject_id_1'],'Correctable_Category':'correctable','sim_redshift':row['sim_redshift'],'p_features':row[p_x],'sim_evolution':row['sim_evolution'],'mu_max_i':row['mu_max_i']})
elif row[p_x] > p_range_uncorrectable_dct[z,yedges[y]][0] and row[p_x] <= p_range_uncorrectable_dct[z,yedges[y]][1]:# if it's in uncorrectable range::
category_list_ferengi.append({'sdss_id':row['objid'],'subject_id':row['subject_id_1'],'Correctable_Category':'uncorrectable','sim_redshift':row['sim_redshift'],'p_features':row[p_x],'sim_evolution':row['sim_evolution'],'mu_max_i':row['mu_max_i']})
else: #not in correctable or uncorrectable range, so nei
category_list_ferengi.append({'sdss_id':row['objid'],'subject_id':row['subject_id_1'],'Correctable_Category':'nei','sim_redshift':row['sim_redshift'],'p_features':row[p_x],'sim_evolution':row['sim_evolution'],'mu_max_i':row['mu_max_i']})
else: #galaxies outside ferengi SB and z limits - still need to have meaasureable z and SB to possibly correct.
if row['sim_redshift'] > 0 and row['sim_redshift'] < 9 and row['mu_max_i'] >0: #these have measurements for z and SB, put in NEI
category_list_ferengi.append({'sdss_id':row['objid'],'subject_id':row['subject_id_1'],'Correctable_Category':'nei','sim_redshift':row['sim_redshift'],'p_features':row[p_x],'sim_evolution':row['sim_evolution'],'mu_max_i':row['mu_max_i']})
else: #these have nan or infinite values of z or mu, put in need_redshift_list
category_list_ferengi.append({'sdss_id':row['objid'],'subject_id':row['subject_id_1'],'Correctable_Category':'nei_needs_redshift','sim_redshift':row['sim_redshift'],'p_features':row[p_x],'sim_evolution':row['sim_evolution'],'mu_max_i':row['mu_max_i']})
In [13]:
#create fits file of galaxies with Correctable_Category Label for FERENGI
c0 = Column([x['sdss_id'] for x in category_list_ferengi], name='sdss_id', format='A24')
c00 = Column([x['subject_id'] for x in category_list_ferengi], name='subject_id', format='A24')
c01 = Column([x['Correctable_Category'] for x in category_list_ferengi], name='Correctable_Category',format='A24')
c02 = Column([x['p_features'] for x in category_list_ferengi], name='p_features',format='D')
c03 = Column([x['sim_redshift'] for x in category_list_ferengi], name='sim_redshift',format='D')
c04 = Column([x['sim_evolution'] for x in category_list_ferengi], name='sim_evolution',format='D')
c05 = Column([x['mu_max_i'] for x in category_list_ferengi], name='mu_max_i',format='D')
category_table_ferengi = Table()
category_table_ferengi.add_columns([c0,c00,c01,c02,c03,c04,c05])
fname = 'ferengi_data_with_categories_slope_method.fits'
if os.path.exists(fname):
os.remove(fname)
category_table_ferengi.write(fname,format='fits')
In [14]:
print 'The number of correctable ferengi galaxies is %i' %((category_table_ferengi['Correctable_Category'] == 'correctable')).sum()
print 'The number of uncorrectable ferengi galaxies is %i' %((category_table_ferengi['Correctable_Category'] == 'uncorrectable')).sum()
print 'The number of nei galaxies is %i' %((category_table_ferengi['Correctable_Category'] == 'nei')).sum()
print 'Total: %i' %len(category_table_ferengi)
In [15]:
#hubble file - not on dropbox because it's huge.
#hubble_data=fits.getdata('../data/Hubble_t01_data.fits',1)
hubble_data=Table.read('/home/mel/Documents/GZ_HUBBLE/gz_hst_votes_and_meta_acs_11_19_2015.fits')
In [104]:
#determine which parts of hubble sample are correctable, uncorrectable, or nei
p_x_hubble = 't01_smooth_or_features_a02_features_or_disk_weighted_fraction' #task
category_list=[]
z='Z_BEST_COMBINED'
for row in hubble_data:
if row['MU_HI'] > yedges[0] and row['MU_HI'] <= yedges[len(yedges)-1] and row['Z'] > reds[0] and row['Z'] <= reds[len(reds)-1] + .05: # if within ferengi space, check where it is. else, consider NEI or uncorrectable.
for y in range(0,len(yedges)-1):
if row['MU_HI'] > yedges[y] and row['MU_HI'] <= yedges[y+1]:
for i,z in enumerate(reds):
if row['Z'] > reds[i]-.05 and row[z] <= reds[i] + .05: # pick out where it is in SB/z and check color
if row[p_x_hubble] > p_range_correctable_dct[z,yedges[y]][0] and row[p_x_hubble] <= p_range_correctable_dct[z,yedges[y]][1]:# if it's in correctable range::
category_list.append({'objid':row['OBJNO'],'Correctable_Category':'correctable','Imaging':row['IMAGING']})
elif row[p_x_hubble] > p_range_uncorrectable_dct[z,yedges[y]][0] and row[p_x_hubble] <= p_range_uncorrectable_dct[z,yedges[y]][1]:# if it's in uncorrectable range::
category_list.append({'objid':row['OBJNO'],'Correctable_Category':'uncorrectable','Imaging':row['IMAGING']})
else: #not in correctable or uncorrectable range, so nei
category_list.append({'objid':row['OBJNO'],'Correctable_Category':'nei','Imaging':row['IMAGING']})
else: #galaxies outside ferengi SB and z limits - still need to have meaasureable z and SB to possibly correct.
if row[z] >=.3 and row[z] < 9 and row['MU_HI'] >0: #these have measurements for z and SB, put in NEI
category_list.append({'objid':row['OBJNO'],'Correctable_Category':'nei','Imaging':row['IMAGING']})
elif row[z] > 0 and row[z] < .3: #don't need to be corrected, z < z0
category_list.append({'objid':row['OBJNO'],'Correctable_Category':'z_lt_3','Imaging':row['IMAGING']})
else: #these have nan or infinite values of z or mu, put in need_redshift_list
category_list.append({'objid':row['OBJNO'],'Correctable_Category':'nei_needs_redshift','Imaging':row['IMAGING']})
In [105]:
imaging_list=set(hubble_data['IMAGING'])
total_correctable=0
total_uncorrectable=0
total_z_lt_3=0
total_nei=0
total_nr=0
for survey in imaging_list:
c=0
u=0
z_lt_3=0
nei=0
nr=0
for row in category_list:
if row['Correctable_Category']=='correctable' and row['Imaging']==survey:
c+=1
if row['Correctable_Category']=='uncorrectable' and row['Imaging']==survey:
u+=1
if row['Correctable_Category']=='z_lt_3' and row['Imaging']==survey:
z_lt_3+=1
if row['Correctable_Category']=='nei' and row['Imaging']==survey:
nei+=1
if row['Correctable_Category']=='nei_needs_redshift' and row['Imaging']==survey:
nr+=1
total_correctable+=c
total_uncorrectable+=u
total_z_lt_3+=z_lt_3
total_nei+=nei
total_nr+=nr
print 'the number of correctable galaxies in %s is %i' %(survey,c)
print 'the number of uncorrectable galaxies in %s is %i' %(survey,u)
print 'the number of galaxies with z < 0.3 in %s is %i' %(survey,z_lt_3)
print 'the number of NEI galaxies in %s is (due to not enough Ferengi galaxies in bin) is %i' %(survey,nei)
print 'the number of NEI galaxies in %s is (due to needing redshift measurements) is %i' %(survey,nr)
print 'total correctable: %i' %total_correctable
print 'total uncorrectable: %i' %total_uncorrectable
print 'total z less than .3: %i' %total_z_lt_3
print 'total nei: %i' %total_nei
print 'total nr: %i' %total_nr
print 'total: %i' %len(category_list)
In [ ]:
#create fits file of galaxies with Correctable_Category Label
c0 = Column([x['objid'] for x in category_list], name='objid')
c01 = Column([x['Correctable_Category'] for x in category_list], name='Correctable_Category')
category_table = Table()
category_table.add_columns([c0,c01])
fname = 'category_table.fits'
if os.path.exists(fname):
os.remove(fname)
category_table.write(fname,format='fits')
In [25]:
low_hi_limit_list=[]
p_features='t01_smooth_or_features_a02_features_or_disk_weighted_fraction'
z='Z_BEST_COMBINED'
for row in hubble_data:
if row['MU_HI'] > yedges[0] and row['MU_HI'] <= yedges[len(yedges)-1] and row[z] > reds[0]-.05 and row[z] <= reds[len(reds)-1] + .05:
for y in range(0,len(yedges)-1):
if row['MU_HI'] > yedges[y] and row['MU_HI'] <= yedges[y+1]:
for i,red in enumerate(reds):
if row[z] > reds[i]-.05 and row[z] <= reds[i] + .05: #now we have mu,z info:
for bin_range in interval_dct[red,yedges[y]]:
if row[p_features] >= bin_range['bin_bottom'] and row[p_features] < bin_range['bin_top']:
low_hi_limit_list.append({'objid':row['OBJNO'],'low_limit':bin_range['low_limit'],'hi_limit':bin_range['hi_limit']})
In [26]:
#create fits file of lower and upper limits for p_features
c2 = Column([x['objid'] for x in low_hi_limit_list], name='objid')
c3 = Column([x['low_limit'] for x in low_hi_limit_list], name='low_limit')
c4 = Column([x['hi_limit'] for x in low_hi_limit_list], name='hi_limit')
limit_table = Table()
limit_table.add_columns([c2,c3,c4])
fname = 'limits_table_%i_%i_%i.fits'%(datetime.now().month,datetime.now().day,datetime.now().year)
if os.path.exists(fname):
os.remove(fname)
limit_table.write(fname,format='fits')
In [25]:
def best_p_features(category,p_features,p_features_debiased,low_limit):
if category == 'correctable':
return p_features_debiased
if category == 'uncorrectable':
return max(p_features,low_limit)
else:
return p_features
def fhat_mel(f,z,z0,zeta_hat):
val = 1. - (1. - f)*np.exp(-(z - z0)/ zeta_hat)
return val
def zeta_hat_lin(SB):
val = 10.**((SB*-0.0619) + 1.0837)
return val
In [34]:
#create low,hi limit list for FERENGI
low_hi_limit_list=[]
p_features='t00_smooth_or_features_a1_features_frac'
z='sim_redshift'
for row in data:
p_features_debiased = fhat_mel(row[p_features],row[z],.3,zeta_hat_lin(row['mu_max_i']))
if row['mu_max_i'] > yedges[0] and row['mu_max_i'] <= yedges[len(yedges)-1] and row[z] > reds[0]-.05 and row[z] <= reds[len(reds)-1] + .05:
for y in range(0,len(yedges)-1):
if row['mu_max_i'] > yedges[y] and row['mu_max_i'] <= yedges[y+1]:
for i,red in enumerate(reds):
if row[z] > reds[i]-.05 and row[z] <= reds[i] + .05: #now we have mu,z info:
for bin_range in interval_dct[red,yedges[y]]:
if row[p_features] >= bin_range['bin_bottom'] and row[p_features] < bin_range['bin_top']:
p_features_best = best_p_features(row['Correctable_Category'],row[p_features],p_features_debiased,bin_range['low_limit'])
low_hi_limit_list.append({'subject_id':row['subject_id_1'],'low_limit':bin_range['low_limit'],'hi_limit':bin_range['hi_limit'],'t00_smooth_or_features_a1_features_frac':row['t00_smooth_or_features_a1_features_frac'],'p_features_debiased':p_features_debiased,'p_features_best':p_features_best,'Correctable_Category':row['Correctable_Category']})
In [35]:
#create fits file of lower and upper limits for p_features
c2 = Column([x['subject_id'] for x in low_hi_limit_list], name='objid')
c3 = Column([x['low_limit'] for x in low_hi_limit_list], name='low_limit')
c4 = Column([x['hi_limit'] for x in low_hi_limit_list], name='hi_limit')
c5 = Column([x['t00_smooth_or_features_a1_features_frac'] for x in low_hi_limit_list], name='t00_smooth_or_features_a1_features_frac')
c6 = Column([x['p_features_debiased'] for x in low_hi_limit_list], name='p_features_debiased')
c7 = Column([x['p_features_best'] for x in low_hi_limit_list], name='p_features_best')
c8 = Column([x['Correctable_Category'] for x in low_hi_limit_list], name='Correctable_Category')
limit_table = Table()
limit_table.add_columns([c2,c3,c4,c5,c6,c7,c8])
fname = 'limits_table_ferengi%i_%i_%i.fits'%(datetime.now().month,datetime.now().day,datetime.now().year)
if os.path.exists(fname):
os.remove(fname)
limit_table.write(fname,format='fits')
In [36]:
row['Correctable_Category']=='uncorrectable'
Out[36]:
In [37]:
best_p_features(row['Correctable_Category'],row[p_features],p_features_debiased,bin_range['low_limit'])
Out[37]:
In [38]:
row[p_features]
Out[38]:
In [ ]: