In [1]:
    
%matplotlib inline
from astropy.io import fits as pyfits
from astropy.table import Table
import numpy as np
from astropy.io.fits import Column
from datetime import datetime
import matplotlib.pyplot as plt
import os
import warnings
import requests
    
In [2]:
    
def download_from_dropbox(url):
    
    local_filename = "{:}".format(url.split("/")[-1].split("?")[0])
    r = requests.get(url, stream=True)
    with open(local_filename, 'wb') as f:
        for chunk in r.iter_content(chunk_size=1024): 
            if chunk: # filter out keep-alive new chunks
                f.write(chunk)
                f.flush()
            
    return local_filename
    
In [3]:
    
# zeta parameters
#zeta_fname = download_from_dropbox("https://www.dropbox.com/s/4xoryra28lxhlrm/zeta_parameters.fits?dl=1")
zeta_fname = '/home/mel/Dropbox/gzhubble/hubble_files/catalog_debiasing_files/new_sb_method/zeta_parameters.fits'
zeta_params=Table.read(zeta_fname)
slope = zeta_params['slope'][0]
intercept=zeta_params['intercept'][0]
    
In [ ]:
    
hubble_fname = download_from_dropbox("https://www.dropbox.com/s/dfmipz5jfzl9lja/input_for_hubble_debiased_catalog.fits?dl=1")
votes_data=Table.read(hubble_fname)
    
In [5]:
    
#Zeta-hat function for computing debiased values
z0 = 0.3 # we're correcting to redshift 0.3
def fhat_mel(f,z,z0,zeta_hat):
    
    val = 1. - (1. - f)*np.exp(-(z - z0)/ zeta_hat)
    
    return val
#assume zeta_hat is a linear function of surface brightness
def zeta_hat_lin(SB):
    
    val = 10.**((SB*slope) + intercept)
    return val
    
In [ ]:
    
    
In [6]:
    
# Debaised table:
def debiased_table(tablename):
    n_corrected = 0
    df=0
    table = (votes_data['Table']==tablename)
    data = votes_data[table]
    subjects=set(data['zooniverse_id'])
    intcolumn = np.zeros(len(subjects),dtype=int)
    floatcolumn = np.zeros(len(subjects),dtype=float)
    strcolumn = np.array([' ']*len(subjects),dtype='S24')
    from astropy.table import Column as TableColumn
    ex1 = TableColumn(floatcolumn,name='t01_smooth_or_features_a01_smooth_debiased_fraction', format='D')
    ex2 = TableColumn(floatcolumn,name='t01_smooth_or_features_a01_smooth_lower_limit', format='D')
    ex3 = TableColumn(floatcolumn,name='t01_smooth_or_features_a01_smooth_upper_limit', format='D')
    ex4 = TableColumn(floatcolumn,name='t01_smooth_or_features_a01_smooth_best_fraction',format='D')
    ex5 = TableColumn(floatcolumn,name='t01_smooth_or_features_a02_features_or_disk_debiased_fraction', format='D')
    ex6 = TableColumn(floatcolumn,name='t01_smooth_or_features_a02_features_or_disk_best_fraction',format='D')
    ex7 = Table.Column(name='t01_smooth_or_features_a01_smooth_flag',length=len(data),dtype=bool)
    ex8 = Table.Column(name='t01_smooth_or_features_a02_features_or_disk_flag',length=len(data),dtype=bool)
    ex9 = Table.Column(name='t01_smooth_or_features_a03_star_or_artifact_flag',length=len(data),dtype=bool)
    newtable = data.copy(copy_data=True)
    newtable.add_columns( (ex1, ex2, ex3, ex4, ex5, ex6, ex7, ex8, ex9) )
    z = 'Z_BEST'
    SB = 'GZ_MU_I'
    print 'Writing file...'
    for i,gal in enumerate(data):
        if i % 10000 == 0:
            t=datetime.now().time().isoformat()
            print 'Writing %ith row of %s at time %s' %(i,tablename,t)
        
        #new data: debiased vote fractions
        p_features_debiased = fhat_mel(gal['t01_smooth_or_features_a02_features_or_disk_weighted_fraction'],gal[z],z0,zeta_hat_lin(gal[SB]))
        newtable.field('t01_smooth_or_features_a02_features_or_disk_debiased_fraction')[i] = p_features_debiased
    
        #write the 'best features' fraction column 
        if newtable.field('correction_type')[i]==0: #correctable
            p_features_best = newtable.field('t01_smooth_or_features_a02_features_or_disk_debiased_fraction')[i]
        elif newtable.field('correction_type')[i]==1: #uncorrectable
            p_features_best = np.nanmax([newtable.field('t01_smooth_or_features_a02_features_or_disk_lower_limit')[i],newtable.field('t01_smooth_or_features_a02_features_or_disk_weighted_fraction')[i]])
        else:
            p_features_best = newtable.field('t01_smooth_or_features_a02_features_or_disk_weighted_fraction')[i]
    
        newtable.field('t01_smooth_or_features_a02_features_or_disk_best_fraction')[i] = p_features_best
        
        #debiased, lower and upper, best smooth fractions based on 1 - p_artifact - p_features
        newtable.field('t01_smooth_or_features_a01_smooth_debiased_fraction')[i] = 1 - gal['t01_smooth_or_features_a03_star_or_artifact_weighted_fraction'] - p_features_debiased
        newtable.field('t01_smooth_or_features_a01_smooth_lower_limit')[i] = 1 - gal['t01_smooth_or_features_a03_star_or_artifact_weighted_fraction'] - gal['t01_smooth_or_features_a02_features_or_disk_upper_limit']
        newtable.field('t01_smooth_or_features_a01_smooth_upper_limit')[i] = 1 - gal['t01_smooth_or_features_a03_star_or_artifact_weighted_fraction'] - gal['t01_smooth_or_features_a02_features_or_disk_lower_limit']
        p_smooth_best = 1 - gal['t01_smooth_or_features_a03_star_or_artifact_weighted_fraction'] - p_features_best
        newtable.field('t01_smooth_or_features_a01_smooth_best_fraction')[i] = p_smooth_best
        
        #Potential error correction: some galaxies have negative smooth debiased values - this is when p_artifact
        #is particularly high. impose upper limit on p_features_debiased
        # so p_features_debiased + p_smooth_debiased + p_artifact_weighted = 1
        if p_smooth_best<0: # negative p_smooth; must adjust p_features to keep p_artifact fixed 
            p_smooth_best = 0
            old_p_features = p_features_best
            p_features_best = 1-p_smooth_best-gal['t01_smooth_or_features_a03_star_or_artifact_weighted_fraction']
            newtable.field('t01_smooth_or_features_a01_smooth_best_fraction')[i]=0
            newtable.field('t01_smooth_or_features_a02_features_or_disk_best_fraction')[i]=p_features_best
            n_corrected+=1
            df += abs(old_p_features - p_features_best)
# Last - add smooth, features, artifact flags
        p_star = gal['t01_smooth_or_features_a03_star_or_artifact_weighted_fraction']
        p_cut = 0.8
        
        if p_smooth_best>p_cut and gal['correction_type']!=3 and gal['correction_type']!=4: 
            newtable.field('t01_smooth_or_features_a01_smooth_flag')[i]=True
        else:
            newtable.field('t01_smooth_or_features_a01_smooth_flag')[i]=False
        if p_features_best>p_cut:
            newtable.field('t01_smooth_or_features_a02_features_or_disk_flag')[i]=True
        else:
            newtable.field('t01_smooth_or_features_a02_features_or_disk_flag')[i]=False
        if p_star>0.8:
            newtable.field('t01_smooth_or_features_a03_star_or_artifact_flag')[i]=True
        else:
            newtable.field('t01_smooth_or_features_a03_star_or_artifact_flag')[i]=False
        
    #write to file 
    #set path to file 
    fname = '/home/mel/Dropbox/gzhubble/hubble_files/catalog_debiasing_files/new_sb_method/catalog_%i_%i_%i_%s.fits'%(datetime.now().month,datetime.now().day,datetime.now().year,tablename)    
    newtable.write(fname,overwrite=True)    
    
    print 'Debiased %s catalog includes %i subjects.'%(tablename,len(newtable))
    print 'Best features fractions were adjusted for %s %% of subjects for having a negative smooth best fractions, with an average correction of $\Delta$ f=%s.'%(round(float(n_corrected)/len(subjects),3),round(float(df)/n_corrected,2))
    
In [7]:
    
#Write debiased catalogs for 4 tables:
debiased_tables = ('hubble','goods_shallow','recoloured','faded')
for t in debiased_tables:
    debiased_table(t)
    
    
In [60]:
    
# Unaltered table (but add smooth/features/artifact flags:
def copy_table(tablename):
    table = (votes_data['Table']==tablename)
    data = votes_data[table]
    
    ex7 = Table.Column(name='t01_smooth_or_features_a01_smooth_flag',length=len(data),dtype=bool)
    ex8 = Table.Column(name='t01_smooth_or_features_a02_features_or_disk_flag',length=len(data),dtype=bool)
    ex9 = Table.Column(name='t01_smooth_or_features_a03_star_or_artifact_flag',length=len(data),dtype=bool)
    newtable = data.copy(copy_data=True)
    newtable.add_columns((ex7,ex8,ex9))
    
    # Last - add smooth, features, artifact flags
    for i,gal in enumerate(data):    
        
        p_smooth = gal['t01_smooth_or_features_a01_smooth_weighted_fraction']
        p_features = gal['t01_smooth_or_features_a02_features_or_disk_weighted_fraction']
        p_star = gal['t01_smooth_or_features_a03_star_or_artifact_weighted_fraction']
        p_cut = 0.8
        
        if p_smooth>p_cut and gal['correction_type']!=3 and gal['correction_type']!=4: 
            newtable.field('t01_smooth_or_features_a01_smooth_flag')[i]=True
        else:
            newtable.field('t01_smooth_or_features_a01_smooth_flag')[i]=False
        if p_features>p_cut:
            newtable.field('t01_smooth_or_features_a02_features_or_disk_flag')[i]=True
        else:
            newtable.field('t01_smooth_or_features_a02_features_or_disk_flag')[i]=False
        if p_star>0.8:
            newtable.field('t01_smooth_or_features_a03_star_or_artifact_flag')[i]=True
        else:
            newtable.field('t01_smooth_or_features_a03_star_or_artifact_flag')[i]=False
    
    #write to file 
    #set path to file 
    fname = '/home/mel/Dropbox/gzhubble/hubble_files/catalog_debiasing_files/new_sb_method/catalog_%i_%i_%i_%s.fits'%(datetime.now().month,datetime.now().day,datetime.now().year,tablename)    
    newtable.write(fname,overwrite=True)    
    
    print ' %s catalog includes %i subjects.\n'%(tablename,len(newtable))
    
In [61]:
    
#Write debiased catalogs for 2 tables:
copied_tables = ('sdss_single','sdss_coadd')
for t in copied_tables:
    copy_table(t)
    
    
In [ ]:
    
    
In [8]:
    
#check corrections in hubble table
z = 'Z_BEST'
SB = 'GZ_MU_I'
h = (votes_data['Table']=='hubble')
hubble_data=votes_data[h]
#hubble_data = Table.read('/home/mel/Dropbox/gzhubble/hubble_files/catalog_debiasing_files/new_sb_method/catalog_5_26_2016_hubble.fits')
correctable=(hubble_data['correction_type']==0)
uncorrectable=(hubble_data['correction_type']==1)
nei=(hubble_data['correction_type']==3)
old_votes=hubble_data['t01_smooth_or_features_a02_features_or_disk_weighted_fraction']
old_z=hubble_data[z]
old_mu=hubble_data[SB]
new_c=fhat_mel(old_votes[correctable],old_z[correctable],z0,zeta_hat_lin(old_mu[correctable]))
new_unc=fhat_mel(old_votes[uncorrectable],old_z[uncorrectable],z0,zeta_hat_lin(old_mu[uncorrectable])) #debiased value
#new_unc = old_lower_limit[uncorrectable] #lower limit
new_nei=fhat_mel(old_votes[nei],old_z[nei],z0,zeta_hat_lin(old_mu[nei]))
    
In [11]:
    
# 2D histogram of new p_features distribution
fig = plt.figure(figsize=(20,5))
ax1 = fig.add_subplot(131)
hex1 = ax1.hexbin(old_votes[correctable],new_c, cmap=plt.cm.YlOrRd_r,gridsize=50,vmin =0,vmax=100)
ax1.set_xlabel(r'$f_{features}$',fontsize=20)
ax1.set_ylabel(r'$\hat f_{features}$',fontsize=20)
cb1 = plt.colorbar(hex1)
ax1.set_title('correctable',fontsize=20)
ax1.set_axis_bgcolor('#800000')
# Add the one-to-one line for comparision.
# Upper left = boosted p_features for z > 0.3
# Lower right = depressed p_features for z < 0.3
ax1.plot([0,1],[0,1],color='k',lw=2,ls='--')
# Try it with Mel's new function
ax2 = fig.add_subplot(133)
hex2 = ax2.hexbin(old_votes[nei],new_nei, cmap=plt.cm.YlOrRd_r,gridsize=50,vmax=100)
ax2.set_xlabel(r'$f_{features}$',fontsize=20)
ax2.set_ylabel(r'$\hat f_{features}$',fontsize=20)
ax2.set_xlim(0,1)
ax2.set_ylim(0,1)
cb2 = plt.colorbar(hex2)
ax2.plot([0,1],[0,1],color='k',lw=2,ls='--')
ax2.set_title('NEI',fontsize=20)
ax3 = fig.add_subplot(132)
hex3 = ax3.hexbin(old_votes[uncorrectable],new_unc, cmap=plt.cm.YlOrRd_r,gridsize=50,vmin =0, vmax=100)
ax3.set_xlabel(r'$f_{features}$',fontsize=20)
ax3.set_ylabel(r'$\hat f_{features}$',fontsize=20)
ax3.set_xlim(0,1)
ax3.set_ylim(0,1)
cb3 = plt.colorbar(hex3)
ax3.plot([0,1],[0,1],color='k',lw=2,ls='--')
ax3.set_title('lower limit',fontsize=20)
ax3.set_axis_bgcolor('#800000')
plt.savefig('../../writeup/figures/debiased_corrections.pdf')
    
    
In [ ]:
    
    
In [ ]:
    
    
In [ ]:
    
    
In [ ]:
    
    
In [ ]:
    
    
In [ ]:
    
    
In [ ]:
    
    
In [ ]: