In [1]:
%pylab inline
from astropy.io import fits
from datetime import datetime
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
from astropy.io import fits
from astropy.table import Table,Column
import os
import warnings
import requests

mpl.rcParams['text.usetex']=True
mpl.rcParams['axes.linewidth'] = 3

warnings.filterwarnings('ignore', category=RuntimeWarning, append=True)
warnings.filterwarnings('ignore', category=UserWarning, append=True);


Populating the interactive namespace from numpy and matplotlib

In [2]:
# Load data from Dropbox folder instead of clogging up Github

def download_from_dropbox(url):
    
    local_filename = "../data/{:}".format(url.split("/")[-1].split("?")[0])
    r = requests.get(url, stream=True)
    with open(local_filename, 'wb') as f:
        for chunk in r.iter_content(chunk_size=1024): 
            if chunk: # filter out keep-alive new chunks
                f.write(chunk)
                f.flush()
            
    return local_filename

In [3]:
#Load Data!
ferengi_filename = download_from_dropbox("https://www.dropbox.com/s/2dsnro4r5sa48gb/ferengi_all_weighted_and_meta.fits?dl=1")
data = Table.read(ferengi_filename)

In [4]:
#Defining surface brightness bins
yedges=np.linspace(np.min(data['mu_max_i']),np.max(data['mu_max_i']),10)

In [5]:
p_x = 't00_smooth_or_features_a1_features_frac_weighted_2' #p for question in question (lol)
N_x = 't00_smooth_or_features_count_weighted_2' #N for question in question (number of people who answered question) 
N_cut = 0 #number of people we require to get to question

In [6]:
#Pick out unique galaxies
galaxies = set(data['objid'])

In [7]:
#simulated redshifts of ferengi data:
reds=[.3,.4,.5,.6,.7,.8,.9,1]

#Defining lists of p_features at high and low (z=0.3) redshifts at given SB and redshifts. 
scatter_dct={}
for z in reds:
    for edge in yedges:
        scatter_dct[z,edge,'hi']=[]
        scatter_dct[z,edge,'lo']=[]
        scatter_dct[z,edge,'subj_id']=[]
        
for i,g in enumerate(galaxies):
    this_gal=(data['objid']==g)
    evos = set(data[this_gal]['sim_evolution'])
#    if 0 in evos:
#        evos.remove(0) #remove this line if including 0 evolution data
#    evos=[0]
    for e in evos:
        this_evo=(data[this_gal]['sim_evolution']==e)
        if len(set(data[this_gal][this_evo]['sim_redshift']))==8: #only want stuff where we have data down to 0.3
            p_at_3=(data[this_gal][this_evo]['sim_redshift']==.3)
            p_bar_at_3 = data[this_gal][this_evo][p_at_3][p_x][0]#value of p_x at redshift 0.3
            n_bar_at_3 = data[this_gal][this_evo][p_at_3][N_x][0] #number of people who answeredquestion for the 0.3 image
            if n_bar_at_3 >= N_cut: #only care about galaxies with at least N people answering question at 0.3
                for row in data[this_gal][this_evo]:
                    for y in range(0,len(yedges)-1):
                        for j,hi_z in enumerate(reds):
                            if round(row['sim_redshift'],2)==hi_z and row['mu_max_i'] > yedges[y] and row['mu_max_i'] < yedges[y+1] and row[N_x]>=N_cut: #now look at high redshift data ; only care if 5 people answerwed question
                                scatter_dct[hi_z,yedges[y],'hi'].append(row[p_x]) # slap p_x in high list 
                                scatter_dct[hi_z,yedges[y],'lo'].append(p_bar_at_3) # put z=0.3 value in low list

In [8]:
#Just evolution 0:

#Defining lists of p_features at high and low (z=0.3) redshifts at given SB and redshifts. 
scatter_dct_0={}
for z in reds:
    for edge in yedges:
        scatter_dct_0[z,edge,'hi']=[]
        scatter_dct_0[z,edge,'lo']=[]
        scatter_dct_0[z,edge,'subj_id']=[]
        
for i,g in enumerate(galaxies):
    this_gal=(data['objid']==g)
#    evos = set(data[this_gal]['sim_evolution'])
#    if 0 in evos:
#        evos.remove(0) #remove this line if including 0 evolution data
    evos=[0]
    for e in evos:
        this_evo=(data[this_gal]['sim_evolution']==e)
        if len(set(data[this_gal][this_evo]['sim_redshift']))==8: #only want stuff where we have data down to 0.3
            p_at_3=(data[this_gal][this_evo]['sim_redshift']==.3)
            p_bar_at_3 = data[this_gal][this_evo][p_at_3][p_x][0]#value of p_x at redshift 0.3
            n_bar_at_3 = data[this_gal][this_evo][p_at_3][N_x][0] #number of people who answeredquestion for the 0.3 image
            if n_bar_at_3 >= N_cut: #only care about galaxies with at least N people answering question at 0.3
                for row in data[this_gal][this_evo]:
                    for y in range(0,len(yedges)-1):
                        for j,hi_z in enumerate(reds):
                            if round(row['sim_redshift'],2)==hi_z and row['mu_max_i'] > yedges[y] and row['mu_max_i'] < yedges[y+1] and row[N_x]>=N_cut: #now look at high redshift data ; only care if 5 people answerwed question
                                scatter_dct_0[hi_z,yedges[y],'hi'].append(row[p_x]) # slap p_x in high list 
                                scatter_dct_0[hi_z,yedges[y],'lo'].append(p_bar_at_3) # put z=0.3 value in low list

In [8]:
def derivative_of_poly(x_list,fit_cos,deg):
    if deg == 3:
        derivative=3*fit_cos[3]*x_list**2+2*fit_cos[2]*x_list+fit_cos[1]
    if deg == 2:
        derivative=2*fit_cos[2]*x_list+fit_cos[1]
    if deg ==1:
        derivative=fit_cos[1]


    return derivative

In [9]:
#Plot p_features at a given redshift vs p_features at z=0.3 at each redshift, binned by surface brightness
f=figure(figsize=(20,18))
#track 'uncorrectable' regions of z/mu/p space (shaded)
p_range_uncorrectable_dct={}
#track 'correctable regions of z/mu/p space (unshaded *and* at least 5 points in bin) 
p_range_correctable_dct={}
#store range of spread of data in each bin
interval_dct={}
x_new = np.linspace(0,.95,40)
slope_cut=.35

gs=gridspec.GridSpec(9,10)
gs.update(wspace=0)
gs.update(hspace=0)
yedge_int=[8,7,6,5,4,3,2,1,0]
y_label=[]
facecolors=['#af8dc3','#999999','#7fbf7b']
for i in range(0,len(yedges)-1):
    y_label.append(round((yedges[i]+yedges[i+1])/2,2))
    #y_label.append(round(yedges[i],1))
y_label=y_label[::-1]
for j,y in enumerate(yedge_int):
    for i,z in enumerate(reds):
        ax=plt.subplot(gs[j,i])
        xs=scatter_dct[z,yedges[y],'lo']
        ys=scatter_dct[z,yedges[y],'hi']
    #    xs_0=scatter_dct_0[z,yedges[y],'lo']
    #   ys_0=scatter_dct_0[z,yedges[y],'hi']
        plt.plot(x_new,x_new,c='k')
        flat_list=[]
        p_range_uncorrectable_dct[z,yedges[y]]=[]
        p_range_correctable_dct[z,yedges[y]]=[]
        plt.scatter(xs,ys)
    #    plt.scatter(xs_0,ys_0,c='r',s=30)
        if len(xs)>5:
            poly_params=np.polynomial.polynomial.polyfit(xs,ys,3)
            poly = np.polynomial.Polynomial(poly_params)
            drv=derivative_of_poly(x_new,poly_params,3)
            if np.min(drv)>-1: #fit okay, go ahead and plot
                plt.plot(x_new,np.polyval(poly_params[::-1],x_new),c='k',lw=3,ls='dashed')
                for d,val in enumerate(drv):
                    if val < slope_cut:
                        #plt.axvline(x_new[d],lw=3,alpha=.1) #shade vertically 
                        flat_list.append(x_new[d]) #list of x-values for shaded region
                if len(flat_list)>0: # if there are bad regions, record stuff:
                    min_p_at_3=np.min(flat_list) #minimum x value of shaded region 
                    max_p_at_3=np.max(flat_list) #maximum x value of shaded region
                    
                    #get list of y values in shaded region
                    bad_p_at_z_list=[] 
                    for p_int,p in enumerate(xs): 
                        if p>=min_p_at_3 and p < max_p_at_3:
                            bad_p_at_z_list.append(ys[p_int])
                    #shade out 3 standard deviations above and below the mean of that area        
                    #min_val = np.mean(bad_p_at_z_list)-1.5*np.std(bad_p_at_z_list)
                    min_val = 0 #I think the minimum should be 0...because... 
                    if len(bad_p_at_z_list)>0: #need at least 1 pts to get mean and std
                        max_val = np.mean(bad_p_at_z_list)+1.5*np.std(bad_p_at_z_list)
                    else:
                        max_val=0
                    plt.axhspan(0,max_val,alpha=.1)
                    p_range_uncorrectable_dct[z,yedges[y]].append(0)
                    p_range_uncorrectable_dct[z,yedges[y]].append(max_val) 
                    p_range_correctable_dct[z,yedges[y]].append(max_val)
                    p_range_correctable_dct[z,yedges[y]].append(1)
                else: #if no bad regions, whole square is correctable.    
                    p_range_uncorrectable_dct[z,yedges[y]].append(0)
                    p_range_uncorrectable_dct[z,yedges[y]].append(0) 
                    p_range_correctable_dct[z,yedges[y]].append(0)
                    p_range_correctable_dct[z,yedges[y]].append(1)



            else: #if < 0, try 2nd order instead 
                poly_params=np.polynomial.polynomial.polyfit(xs,ys,2)
                poly = np.polynomial.Polynomial(poly_params)
                drv=derivative_of_poly(x_new,poly_params,2)
                if np.min(drv)>-1: #fit okay, go ahead and plot
                    plt.plot(x_new,np.polyval(poly_params[::-1],x_new),c='k',lw=3,ls='dashed')
                    for d,val in enumerate(drv):
                        if val < slope_cut:
                            #plt.axvline(x_new[d],lw=3,alpha=.1)
                            flat_list.append(x_new[d]) #list of x-values for shaded region
                    if len(flat_list)>0: # if there are bad regions, record stuff:
                        min_p_at_3=np.min(flat_list) #minimum x value of shaded region 
                        max_p_at_3=np.max(flat_list) #maximum x value of shaded region
                    
                        #get list of y values in shaded region
                        bad_p_at_z_list=[] 
                        for p_int,p in enumerate(xs): 
                            if p>=min_p_at_3 and p < max_p_at_3:
                                bad_p_at_z_list.append(ys[p_int])
                        #shade out 3 standard deviations around the mean of that area        
                        min_val=0
                        #min_val = np.mean(bad_p_at_z_list)-1.5*np.std(bad_p_at_z_list)
                        if len(bad_p_at_z_list)>0: #need at least 1 pts to get mean and std
                            max_val = np.mean(bad_p_at_z_list)+1.5*np.std(bad_p_at_z_list)
                        else:
                            max_val=0
                        plt.axhspan(min_val,max_val,alpha=.1)
                        p_range_uncorrectable_dct[z,yedges[y]].append(0)
                        p_range_uncorrectable_dct[z,yedges[y]].append(max_val) 
                        p_range_correctable_dct[z,yedges[y]].append(max_val)
                        p_range_correctable_dct[z,yedges[y]].append(1)
                    else: #if no bad regions, whole square is correctable.    
                        p_range_uncorrectable_dct[z,yedges[y]].append(0)
                        p_range_uncorrectable_dct[z,yedges[y]].append(0) 
                        p_range_correctable_dct[z,yedges[y]].append(0)
                        p_range_correctable_dct[z,yedges[y]].append(1)
                else: #fit still bad, do a linear
                    poly_params=np.polynomial.polynomial.polyfit(xs,ys,1)
                    poly = np.polynomial.Polynomial(poly_params)
                    plt.plot(x_new,np.polyval(poly_params[::-1],x_new),c='k',lw=3,ls='dashed')
                    drv=derivative_of_poly(x_new,poly_params,1)
                    if drv < slope_cut:
                            #plt.axvline(x_new[d],lw=3,alpha=.1)
                        flat_list=x_new #list of x-values for shaded region
                        min_p_at_3=np.min(flat_list) #minimum x value of shaded region 
                        max_p_at_3=np.max(flat_list) #maximum x value of shaded region
                    
                        #get list of y values in shaded region
                        bad_p_at_z_list=ys
                        min_val=0
                        #min_val = np.mean(bad_p_at_z_list)-1.5*np.std(bad_p_at_z_list)
                        if len(bad_p_at_z_list)>0: #need at least 1 pts to get mean and std
                            max_val = np.mean(bad_p_at_z_list)+1.5*np.std(bad_p_at_z_list)
                        else:
                            max_val=0
                        plt.axhspan(min_val,max_val,alpha=.1)
                        p_range_uncorrectable_dct[z,yedges[y]].append(0)
                        p_range_uncorrectable_dct[z,yedges[y]].append(max_val) 
                        p_range_correctable_dct[z,yedges[y]].append(max_val)
                        p_range_correctable_dct[z,yedges[y]].append(1)
                    else: #if no bad regions, whole square is correctable.    
                        p_range_uncorrectable_dct[z,yedges[y]].append(0)
                        p_range_uncorrectable_dct[z,yedges[y]].append(0) 
                        p_range_correctable_dct[z,yedges[y]].append(0)
                        p_range_correctable_dct[z,yedges[y]].append(1)

        
            plt.xlim(0,1)
            plt.ylim(0,1)
   
        else: #fewer than 5 points in square, so fuck it
            plt.axhspan(0,1,alpha=.1,color='k')
            plt.xlim(0,1)
            plt.ylim(0,1)
            p_range_uncorrectable_dct[z,yedges[y]].append(0)
            p_range_uncorrectable_dct[z,yedges[y]].append(0) 
            p_range_correctable_dct[z,yedges[y]].append(0)
            p_range_correctable_dct[z,yedges[y]].append(0)
            
        plt.tick_params(labelleft='off')
        plt.tick_params(top='off',right='off',labelbottom='off')

                
        if j==8:
            plt.xlabel('%s'%reds[i],fontsize=16)
        if i==7:
            ax.yaxis.set_label_position("right")
            plt.ylabel('%s'%str(y_label[j]).rjust(500),fontsize=16,rotation=270,labelpad=20)

f.text(.78,.5,r'$\mathrm{\mu_{max,i}~(mag/arcsec^2)}$',rotation=270,fontsize=30,va='center')
f.text(.45,.91,r'$\mathrm{redshift}$',fontsize=30,ha='center')
f.text(.1,.5,r'$\mathrm{p_{features}}$',rotation=90,fontsize=30,va='center')
f.text(.45,.08,r'$\mathrm{p_{features},z=0.3}$',fontsize=30,ha='center');

#plt.savefig('/home/mel/Documents/GZ_HUBBLE/ferengi_debias/finding_matches/images/p_vs_p_SB_redshift_slope_method_evs_highlighted.pdf')


Out[9]:
<matplotlib.text.Text at 0x7f0cc649c390>

In [10]:
#Here we'll compute the scatter in the points for different bins of measured p_values, so we can provide upper limits
#on p_features for the difficult galaxies. 

f=figure(figsize=(20,18))
#store range of spread of data in each bin
interval_dct={}


gs=gridspec.GridSpec(9,10)
gs.update(wspace=0)
gs.update(hspace=0)
yedge_int=[8,7,6,5,4,3,2,1,0]
y_label=[]
facecolors=['#af8dc3','#999999','#7fbf7b']
for i in range(0,len(yedges)-1):
    y_label.append(round((yedges[i]+yedges[i+1])/2,1))
y_label=y_label[::-1]
for j,y in enumerate(yedge_int):
    for i,z in enumerate(reds):
        ax=plt.subplot(gs[j,i])
        xs=scatter_dct[z,yedges[y],'lo']
        ys=scatter_dct[z,yedges[y],'hi']
        plt.scatter(xs,ys)
        interval_dct[z,yedges[y]]=[]
        plt.xlim(0,1)
        plt.ylim(0,1)
        this_dct={}  #stores p_features at z=0.3 value for each bin (xs); use to compute spread in each bin
                    #stored like: this_dct[lower_bin_edge,higher_bin_edge] = [distribution of xs in bin]

        bins_list=np.linspace(0,1,6) #bin measured p_features from 0 to 1
        for b in range(0,len(bins_list)-1):
            bin_bottom = round(bins_list[b],1)
            bin_top = round(bins_list[b+1],1)
            this_dct[bin_bottom,bin_top]=[]
            for l,val in enumerate(ys): #check p_features at z values
                if val >= bin_bottom and val < bin_top: # if it falls inside bin in question:
                    this_dct[bin_bottom,bin_top].append(xs[l]) #then put corresponding p_features,z=0.3 value in list
        
        #Now compute the mean and spread of p_features,z=0.3 for each bin
        p_features_means=[]
        x_error_lo=[]
        x_error_hi=[]
        bin_centers=[]

        for b in range(0,len(bins_list)-1):
            bin_bottom = round(bins_list[b],3)
            bin_top = round(bins_list[b+1],3)
            plt.axhline(bin_bottom,c='k',lw=1,alpha=.1)
            p_features_means.append(np.median(this_dct[bin_bottom,bin_top]))
            try:
                x_error_lo.append(p_features_means[b] - np.percentile(this_dct[bin_bottom,bin_top],10))
            except ValueError:
                x_error_lo.append(0)
            try:
                x_error_hi.append(np.percentile(this_dct[bin_bottom,bin_top],90) - p_features_means[b])
            except ValueError:
                x_error_hi.append(0)
            bin_centers.append((bin_top-bin_bottom)/2.+bin_bottom)
            try:
                interval_dct[z,yedges[y]].append({'bin_bottom':bin_bottom,'bin_top':bin_top,'low_limit':np.percentile(this_dct[bin_bottom,bin_top],25),'hi_limit':np.percentile(this_dct[bin_bottom,bin_top],75)})
            except ValueError:
                pass
        plt.errorbar(p_features_means,bin_centers,xerr=[x_error_lo,x_error_hi],fmt='o',c='#d95f02',markersize=1,elinewidth=5)

        

        plt.tick_params(labelleft='off')
        plt.tick_params(top='off',right='off',labelbottom='off')

        if j==8:
            plt.xlabel('%s'%reds[i],fontsize=16)
        if i==7:
            ax.yaxis.set_label_position("right")
            plt.ylabel('%s'%y_label[j],fontsize=16,rotation=270,labelpad=20)

f.text(.78,.5,r'$\mathrm{\mu_{max,i}~(mag/arcsec^2)}$',rotation=270,fontsize=30,va='center')
f.text(.45,.91,r'$\mathrm{redshift}$',fontsize=30,ha='center')
f.text(.1,.5,r'$\mathrm{p_{bar}}$',rotation=90,fontsize=30,va='center')
f.text(.45,.08,r'$\mathrm{p_{bar},z=0.3}$',fontsize=30,ha='center')

#plt.savefig('images/p_vs_p_SB_redshift_bar_Kyle_bins0.pdf')


Out[10]:
<matplotlib.text.Text at 0x7f0cbd3925d0>

In [12]:
#get list of debiasable ferengi galaxies 
unique_galaxies = set(data['objid'])
z0ind = np.zeros(len(data),dtype=bool)
for ug in unique_galaxies:
    ind = (data['objid'] == ug)
    if data[ind]['sim_redshift'].min() < 0.301:
        z0ind[ind] = True
        
data_z0 = data[z0ind]
category_list_ferengi=[]
for row in data_z0:
    if row['mu_max_i'] > yedges[0] and row['mu_max_i'] <= yedges[len(yedges)-1] and  row['sim_redshift'] > reds[0]-.05 and row['sim_redshift'] <= reds[len(reds)-1] + .05: # if within ferengi space, check where it is. else, consider NEI or uncorrectable.
        for y in range(0,len(yedges)-1):
            if row['mu_max_i'] > yedges[y] and row['mu_max_i'] <= yedges[y+1]:  
                for i,z in enumerate(reds): 
                    if row['sim_redshift'] > reds[i]-.05 and row['sim_redshift'] <= reds[i] + .05: # pick out where it is in SB/z and check color
                        if row[p_x] > p_range_correctable_dct[z,yedges[y]][0] and row[p_x] <= p_range_correctable_dct[z,yedges[y]][1]:# if it's in correctable range::
                            category_list_ferengi.append({'sdss_id':row['objid'],'subject_id':row['subject_id_1'],'Correctable_Category':'correctable','sim_redshift':row['sim_redshift'],'p_features':row[p_x],'sim_evolution':row['sim_evolution'],'mu_max_i':row['mu_max_i']})
                        elif row[p_x] > p_range_uncorrectable_dct[z,yedges[y]][0] and row[p_x] <= p_range_uncorrectable_dct[z,yedges[y]][1]:# if it's in uncorrectable range::
                            category_list_ferengi.append({'sdss_id':row['objid'],'subject_id':row['subject_id_1'],'Correctable_Category':'uncorrectable','sim_redshift':row['sim_redshift'],'p_features':row[p_x],'sim_evolution':row['sim_evolution'],'mu_max_i':row['mu_max_i']})
                        else: #not in correctable or uncorrectable range, so nei
                            category_list_ferengi.append({'sdss_id':row['objid'],'subject_id':row['subject_id_1'],'Correctable_Category':'nei','sim_redshift':row['sim_redshift'],'p_features':row[p_x],'sim_evolution':row['sim_evolution'],'mu_max_i':row['mu_max_i']})
    else: #galaxies outside ferengi SB and z limits - still need to have meaasureable z and SB to possibly correct. 
        if row['sim_redshift'] > 0 and row['sim_redshift'] < 9 and row['mu_max_i'] >0: #these have measurements for z and SB, put in NEI
                            category_list_ferengi.append({'sdss_id':row['objid'],'subject_id':row['subject_id_1'],'Correctable_Category':'nei','sim_redshift':row['sim_redshift'],'p_features':row[p_x],'sim_evolution':row['sim_evolution'],'mu_max_i':row['mu_max_i']})
        else: #these have nan or infinite values of z or mu, put in need_redshift_list
                            category_list_ferengi.append({'sdss_id':row['objid'],'subject_id':row['subject_id_1'],'Correctable_Category':'nei_needs_redshift','sim_redshift':row['sim_redshift'],'p_features':row[p_x],'sim_evolution':row['sim_evolution'],'mu_max_i':row['mu_max_i']})

In [13]:
#create fits file of galaxies with Correctable_Category Label for FERENGI
c0 = Column([x['sdss_id'] for x in category_list_ferengi], name='sdss_id', format='A24') 
c00 = Column([x['subject_id'] for x in category_list_ferengi], name='subject_id', format='A24') 
c01 = Column([x['Correctable_Category'] for x in category_list_ferengi], name='Correctable_Category',format='A24')
c02 = Column([x['p_features'] for x in category_list_ferengi], name='p_features',format='D')
c03 = Column([x['sim_redshift'] for x in category_list_ferengi], name='sim_redshift',format='D')
c04 = Column([x['sim_evolution'] for x in category_list_ferengi], name='sim_evolution',format='D')
c05 = Column([x['mu_max_i'] for x in category_list_ferengi], name='mu_max_i',format='D')

category_table_ferengi = Table()  
category_table_ferengi.add_columns([c0,c00,c01,c02,c03,c04,c05])

fname = 'ferengi_data_with_categories_slope_method.fits'
if os.path.exists(fname):
    os.remove(fname)
category_table_ferengi.write(fname,format='fits')

In [14]:
print 'The number of correctable ferengi galaxies is %i' %((category_table_ferengi['Correctable_Category'] == 'correctable')).sum()
print 'The number of uncorrectable ferengi galaxies is %i' %((category_table_ferengi['Correctable_Category'] == 'uncorrectable')).sum()
print 'The number of nei galaxies is %i' %((category_table_ferengi['Correctable_Category'] == 'nei')).sum()
print 'Total: %i' %len(category_table_ferengi)


The number of correctable ferengi galaxies is 2051
The number of uncorrectable ferengi galaxies is 1826
The number of nei galaxies is 82
Total: 3959

In [15]:
#hubble file - not on dropbox because it's huge. 
#hubble_data=fits.getdata('../data/Hubble_t01_data.fits',1)
hubble_data=Table.read('/home/mel/Documents/GZ_HUBBLE/gz_hst_votes_and_meta_acs_11_19_2015.fits')

In [104]:
#determine which parts of hubble sample are correctable, uncorrectable, or nei  
p_x_hubble = 't01_smooth_or_features_a02_features_or_disk_weighted_fraction' #task 
category_list=[]
z='Z_BEST_COMBINED'
for row in hubble_data:
    if row['MU_HI'] > yedges[0] and row['MU_HI'] <= yedges[len(yedges)-1] and  row['Z'] > reds[0] and row['Z'] <= reds[len(reds)-1] + .05: # if within ferengi space, check where it is. else, consider NEI or uncorrectable.
        for y in range(0,len(yedges)-1):
            if row['MU_HI'] > yedges[y] and row['MU_HI'] <= yedges[y+1]:  
                for i,z in enumerate(reds): 
                    if row['Z'] > reds[i]-.05 and row[z] <= reds[i] + .05: # pick out where it is in SB/z and check color
                        if row[p_x_hubble] > p_range_correctable_dct[z,yedges[y]][0] and row[p_x_hubble] <= p_range_correctable_dct[z,yedges[y]][1]:# if it's in correctable range::
                            category_list.append({'objid':row['OBJNO'],'Correctable_Category':'correctable','Imaging':row['IMAGING']})
                        elif row[p_x_hubble] > p_range_uncorrectable_dct[z,yedges[y]][0] and row[p_x_hubble] <= p_range_uncorrectable_dct[z,yedges[y]][1]:# if it's in uncorrectable range::
                            category_list.append({'objid':row['OBJNO'],'Correctable_Category':'uncorrectable','Imaging':row['IMAGING']})
                        else: #not in correctable or uncorrectable range, so nei
                            category_list.append({'objid':row['OBJNO'],'Correctable_Category':'nei','Imaging':row['IMAGING']})
    else: #galaxies outside ferengi SB and z limits - still need to have meaasureable z and SB to possibly correct. 
        if row[z] >=.3 and row[z] < 9 and row['MU_HI'] >0: #these have measurements for z and SB, put in NEI
            category_list.append({'objid':row['OBJNO'],'Correctable_Category':'nei','Imaging':row['IMAGING']})
        elif row[z] > 0 and row[z] < .3: #don't need to be corrected, z < z0
            category_list.append({'objid':row['OBJNO'],'Correctable_Category':'z_lt_3','Imaging':row['IMAGING']})
        else: #these have nan or infinite values of z or mu, put in need_redshift_list
            category_list.append({'objid':row['OBJNO'],'Correctable_Category':'nei_needs_redshift','Imaging':row['IMAGING']})

In [105]:
imaging_list=set(hubble_data['IMAGING'])
total_correctable=0
total_uncorrectable=0
total_z_lt_3=0
total_nei=0
total_nr=0
for survey in imaging_list:
    c=0
    u=0
    z_lt_3=0
    nei=0
    nr=0
    for row in category_list:
        if row['Correctable_Category']=='correctable' and row['Imaging']==survey:
            c+=1
        if row['Correctable_Category']=='uncorrectable' and row['Imaging']==survey:
            u+=1
        if row['Correctable_Category']=='z_lt_3' and row['Imaging']==survey:
            z_lt_3+=1
        if row['Correctable_Category']=='nei' and row['Imaging']==survey:
            nei+=1
        if row['Correctable_Category']=='nei_needs_redshift' and row['Imaging']==survey:
            nr+=1
    total_correctable+=c
    total_uncorrectable+=u
    total_z_lt_3+=z_lt_3
    total_nei+=nei
    total_nr+=nr
    print 'the number of correctable galaxies in %s is %i' %(survey,c)
    print 'the number of uncorrectable galaxies in %s is %i' %(survey,u)
    print 'the number of galaxies with z < 0.3 in %s is %i' %(survey,z_lt_3)
    print 'the number of NEI galaxies in %s is (due to not enough Ferengi galaxies in bin) is %i' %(survey,nei)
    print 'the number of NEI galaxies in %s is (due to needing redshift measurements) is %i' %(survey,nr)
print 'total correctable: %i' %total_correctable
print 'total uncorrectable: %i' %total_uncorrectable
print 'total z less than .3: %i' %total_z_lt_3
print 'total nei: %i' %total_nei
print 'total nr: %i' %total_nr
print 'total: %i' %len(category_list)


the number of correctable galaxies in GOODS-N is 1298
the number of uncorrectable galaxies in GOODS-N is 271
the number of galaxies with z < 0.3 in GOODS-N is 280
the number of NEI galaxies in GOODS-N is (due to not enough Ferengi galaxies in bin) is 467
the number of NEI galaxies in GOODS-N is (due to needing redshift measurements) is 235
the number of correctable galaxies in GOODS-S is 1299
the number of uncorrectable galaxies in GOODS-S is 1005
the number of galaxies with z < 0.3 in GOODS-S is 247
the number of NEI galaxies in GOODS-S is (due to not enough Ferengi galaxies in bin) is 1721
the number of NEI galaxies in GOODS-S is (due to needing redshift measurements) is 641
the number of correctable galaxies in COSMOS  is 24368
the number of uncorrectable galaxies in COSMOS  is 22263
the number of galaxies with z < 0.3 in COSMOS  is 11693
the number of NEI galaxies in COSMOS  is (due to not enough Ferengi galaxies in bin) is 27391
the number of NEI galaxies in COSMOS  is (due to needing redshift measurements) is 7093
the number of correctable galaxies in GEMS    is 3398
the number of uncorrectable galaxies in GEMS    is 1788
the number of galaxies with z < 0.3 in GEMS    is 1190
the number of NEI galaxies in GEMS    is (due to not enough Ferengi galaxies in bin) is 1280
the number of NEI galaxies in GEMS    is (due to needing redshift measurements) is 1648
the number of correctable galaxies in AEGIS   is 3115
the number of uncorrectable galaxies in AEGIS   is 1411
the number of galaxies with z < 0.3 in AEGIS   is 967
the number of NEI galaxies in AEGIS   is (due to not enough Ferengi galaxies in bin) is 1667
the number of NEI galaxies in AEGIS   is (due to needing redshift measurements) is 1347
total correctable: 33478
total uncorrectable: 26738
total z less than .3: 14377
total nei: 32526
total nr: 10964
total: 118083

In [ ]:
#create fits file of galaxies with Correctable_Category Label 
c0 = Column([x['objid'] for x in category_list], name='objid') 
c01 = Column([x['Correctable_Category'] for x in category_list], name='Correctable_Category')
category_table = Table()  
category_table.add_columns([c0,c01])

fname = 'category_table.fits'
if os.path.exists(fname):
    os.remove(fname)
category_table.write(fname,format='fits')

In [25]:
low_hi_limit_list=[]
p_features='t01_smooth_or_features_a02_features_or_disk_weighted_fraction'
z='Z_BEST_COMBINED'
for row in hubble_data:
    if row['MU_HI'] > yedges[0] and row['MU_HI'] <= yedges[len(yedges)-1] and  row[z] > reds[0]-.05 and row[z] <= reds[len(reds)-1] + .05: 
        for y in range(0,len(yedges)-1):
            if row['MU_HI'] > yedges[y] and row['MU_HI'] <= yedges[y+1]:  
                for i,red in enumerate(reds): 
                    if row[z] > reds[i]-.05 and row[z] <= reds[i] + .05: #now we have mu,z info:
                        for bin_range in interval_dct[red,yedges[y]]:
                            if row[p_features] >= bin_range['bin_bottom'] and row[p_features] < bin_range['bin_top']:
                                low_hi_limit_list.append({'objid':row['OBJNO'],'low_limit':bin_range['low_limit'],'hi_limit':bin_range['hi_limit']})

In [26]:
#create fits file of lower and upper limits for p_features
c2 = Column([x['objid'] for x in low_hi_limit_list], name='objid') 
c3 = Column([x['low_limit'] for x in low_hi_limit_list], name='low_limit') 
c4 = Column([x['hi_limit'] for x in low_hi_limit_list], name='hi_limit') 
limit_table = Table()  
limit_table.add_columns([c2,c3,c4])

fname = 'limits_table_%i_%i_%i.fits'%(datetime.now().month,datetime.now().day,datetime.now().year)
if os.path.exists(fname):
    os.remove(fname)
limit_table.write(fname,format='fits')

In [25]:
def best_p_features(category,p_features,p_features_debiased,low_limit):
    if category == 'correctable':
        return p_features_debiased
    if category == 'uncorrectable':
        return max(p_features,low_limit)
    else:
        return p_features
    
def fhat_mel(f,z,z0,zeta_hat):
    
    val = 1. - (1. - f)*np.exp(-(z - z0)/ zeta_hat)
    
    return val
def zeta_hat_lin(SB):
    
    val = 10.**((SB*-0.0619) + 1.0837)
    return val

In [34]:
#create low,hi limit list for FERENGI
low_hi_limit_list=[]
p_features='t00_smooth_or_features_a1_features_frac'
z='sim_redshift'
for row in data:
    p_features_debiased = fhat_mel(row[p_features],row[z],.3,zeta_hat_lin(row['mu_max_i']))
    if row['mu_max_i'] > yedges[0] and row['mu_max_i'] <= yedges[len(yedges)-1] and  row[z] > reds[0]-.05 and row[z] <= reds[len(reds)-1] + .05: 
        for y in range(0,len(yedges)-1):
            if row['mu_max_i'] > yedges[y] and row['mu_max_i'] <= yedges[y+1]:  
                for i,red in enumerate(reds): 
                    if row[z] > reds[i]-.05 and row[z] <= reds[i] + .05: #now we have mu,z info:
                        for bin_range in interval_dct[red,yedges[y]]:
                            if row[p_features] >= bin_range['bin_bottom'] and row[p_features] < bin_range['bin_top']:
                                p_features_best = best_p_features(row['Correctable_Category'],row[p_features],p_features_debiased,bin_range['low_limit'])
                                low_hi_limit_list.append({'subject_id':row['subject_id_1'],'low_limit':bin_range['low_limit'],'hi_limit':bin_range['hi_limit'],'t00_smooth_or_features_a1_features_frac':row['t00_smooth_or_features_a1_features_frac'],'p_features_debiased':p_features_debiased,'p_features_best':p_features_best,'Correctable_Category':row['Correctable_Category']})

In [35]:
#create fits file of lower and upper limits for p_features
c2 = Column([x['subject_id'] for x in low_hi_limit_list], name='objid') 
c3 = Column([x['low_limit'] for x in low_hi_limit_list], name='low_limit') 
c4 = Column([x['hi_limit'] for x in low_hi_limit_list], name='hi_limit')
c5 = Column([x['t00_smooth_or_features_a1_features_frac'] for x in low_hi_limit_list], name='t00_smooth_or_features_a1_features_frac') 
c6 = Column([x['p_features_debiased'] for x in low_hi_limit_list], name='p_features_debiased') 
c7 = Column([x['p_features_best'] for x in low_hi_limit_list], name='p_features_best') 
c8 = Column([x['Correctable_Category'] for x in low_hi_limit_list], name='Correctable_Category') 

limit_table = Table()  
limit_table.add_columns([c2,c3,c4,c5,c6,c7,c8])

fname = 'limits_table_ferengi%i_%i_%i.fits'%(datetime.now().month,datetime.now().day,datetime.now().year)
if os.path.exists(fname):
    os.remove(fname)
limit_table.write(fname,format='fits')

In [36]:
row['Correctable_Category']=='uncorrectable'


Out[36]:
True

In [37]:
best_p_features(row['Correctable_Category'],row[p_features],p_features_debiased,bin_range['low_limit'])


Out[37]:
0.80815018377729519

In [38]:
row[p_features]


Out[38]:
0.25

In [ ]: