In [1]:
%pylab inline
from astropy.io import fits
from sklearn.ensemble import ExtraTreesRegressor
import pickle


Populating the interactive namespace from numpy and matplotlib
/home/rybizki/anaconda3/lib/python3.6/importlib/_bootstrap.py:219: RuntimeWarning: numpy.dtype size changed, may indicate binary incompatibility. Expected 96, got 88
  return f(*args, **kwds)
/home/rybizki/anaconda3/lib/python3.6/importlib/_bootstrap.py:219: RuntimeWarning: numpy.dtype size changed, may indicate binary incompatibility. Expected 96, got 88
  return f(*args, **kwds)
/home/rybizki/anaconda3/lib/python3.6/site-packages/sklearn/ensemble/weight_boosting.py:29: DeprecationWarning: numpy.core.umath_tests is an internal NumPy module and should not be imported. It will be removed in a future NumPy release.
  from numpy.core.umath_tests import inner1d

In [2]:
gdr2val = fits.getdata('../output/GDR2_207/GDR2_207_cleaned_0.0025sampling_validation.fits')

In [3]:
gdr2 = fits.getdata("../output/GDR2_207/GDR2_207_cleaned_0.0025sampling.fits")
# cleaning nan parallax errors
pe = gdr2.parallax_error
clean = ~np.isnan(pe)
print(len(gdr2))
gdr2 = gdr2[clean]
print(len(gdr2))
print(gdr2.dtype.names)


3134770
2925796
('solution_id', 'designation', 'source_id', 'random_index', 'ref_epoch', 'ra', 'ra_error', 'dec', 'dec_error', 'parallax', 'parallax_error', 'parallax_over_error', 'pmra', 'pmra_error', 'pmdec', 'pmdec_error', 'ra_dec_corr', 'ra_parallax_corr', 'ra_pmra_corr', 'ra_pmdec_corr', 'dec_parallax_corr', 'dec_pmra_corr', 'dec_pmdec_corr', 'parallax_pmra_corr', 'parallax_pmdec_corr', 'pmra_pmdec_corr', 'astrometric_n_obs_al', 'astrometric_n_obs_ac', 'astrometric_n_good_obs_al', 'astrometric_n_bad_obs_al', 'astrometric_gof_al', 'astrometric_chi2_al', 'astrometric_excess_noise', 'astrometric_excess_noise_sig', 'astrometric_params_solved', 'astrometric_primary_flag', 'astrometric_weight_al', 'astrometric_pseudo_colour', 'astrometric_pseudo_colour_error', 'mean_varpi_factor_al', 'astrometric_matched_observations', 'visibility_periods_used', 'astrometric_sigma5d_max', 'frame_rotator_object_type', 'matched_observations', 'duplicated_source', 'phot_g_n_obs', 'phot_g_mean_flux', 'phot_g_mean_flux_error', 'phot_g_mean_flux_over_error', 'phot_g_mean_mag', 'phot_bp_n_obs', 'phot_bp_mean_flux', 'phot_bp_mean_flux_error', 'phot_bp_mean_flux_over_error', 'phot_bp_mean_mag', 'phot_rp_n_obs', 'phot_rp_mean_flux', 'phot_rp_mean_flux_error', 'phot_rp_mean_flux_over_error', 'phot_rp_mean_mag', 'phot_bp_rp_excess_factor', 'phot_proc_mode', 'bp_rp', 'bp_g', 'g_rp', 'radial_velocity', 'radial_velocity_error', 'rv_nb_transits', 'rv_template_teff', 'rv_template_logg', 'rv_template_fe_h', 'phot_variable_flag', 'l', 'b', 'ecl_lon', 'ecl_lat', 'priam_flags', 'teff_val', 'teff_percentile_lower', 'teff_percentile_upper', 'a_g_val', 'a_g_percentile_lower', 'a_g_percentile_upper', 'e_bp_min_rp_val', 'e_bp_min_rp_percentile_lower', 'e_bp_min_rp_percentile_upper', 'flame_flags', 'radius_val', 'radius_percentile_lower', 'radius_percentile_upper', 'lum_val', 'lum_percentile_lower', 'lum_percentile_upper', 'datalink_url', 'epoch_photometry_url')

In [4]:
def gmagerror(flux,fluxerror):
    """
    calculates the symmetric gmag error from fluxes, only good approximation for low values
    """
    def flux2mag(f):
        return(-2.5*np.log10(f)+25.688365)
    gp = flux2mag(flux + fluxerror)
    gm = flux2mag(flux - fluxerror)
    return(np.divide(gm-gp,2))

In [6]:
# training vpu and gnobs on l and b

g = gdr2.phot_g_mean_mag
bprp = gdr2.phot_bp_mean_mag - gdr2.phot_rp_mean_mag
l = gdr2.l#gdr2.ecl_lon#gdr2.l
b = gdr2.b#gdr2.ecl_lat#np.abs(np.sin(np.divide(gdr2.ecl_lat,np.pi/180.)))#gdr2.b
pe = gdr2.parallax_error
vp = gdr2.visibility_periods_used
gn = gdr2.phot_g_n_obs
f = gdr2.phot_g_mean_flux
fe = gdr2.phot_g_mean_flux_error
ge = gmagerror(f,fe)
#rve = gdr2.radial_velocity_error
X = np.vstack((l,b)).T
y = np.vstack((vp,gn)).T


filename = "errors/lb2vpunobs_model_bigger"
model = pickle.load(open(filename,'rb'))
y_pred = model.predict(X)

In [10]:
np.round(y_pred[:,0])


Out[10]:
array([ 8., 10., 10., ..., 18., 16., 17.])

In [8]:
y


Out[8]:
array([[  8,  95],
       [ 10, 140],
       [ 10, 112],
       ...,
       [ 18, 317],
       [ 18, 260],
       [ 18, 744]], dtype=int32)

In [ ]:
break

In [8]:
# training parallax_error and gmagnitude error on vpu, gnobs, g, bp-rp
g = gdr2.phot_g_mean_mag
bprp = gdr2.phot_bp_mean_mag - gdr2.phot_rp_mean_mag
l = gdr2.ecl_lon#gdr2.l
b = gdr2.ecl_lat#np.abs(np.sin(np.divide(gdr2.ecl_lat,np.pi/180.)))#gdr2.b
pe = gdr2.parallax_error
vp = gdr2.visibility_periods_used
gn = gdr2.phot_g_n_obs
f = gdr2.phot_g_mean_flux
fe = gdr2.phot_g_mean_flux_error
ge = gmagerror(f,fe)
#rve = gdr2.radial_velocity_error
X = np.vstack((g,bprp,vp,gn)).T
y = np.vstack((pe,ge)).T
model = ExtraTreesRegressor(n_estimators=10, criterion='mse', max_depth=None,
                            min_samples_split=5, min_samples_leaf=1, 
                            min_weight_fraction_leaf=0.0, max_features='auto', 
                            max_leaf_nodes=None, min_impurity_decrease=0.0, 
                            min_impurity_split=None, bootstrap=True, oob_score=True,
                            n_jobs=1, random_state=None, verbose=0, warm_start=False)
model.fit(X,y)
filename = "gbprpvpunobs2pege_model_bigger"
pickle.dump(model,open(filename,'wb'))


/home/rybizki/anaconda3/lib/python3.6/site-packages/sklearn/ensemble/forest.py:724: UserWarning: Some inputs do not have OOB scores. This probably means too few trees were used to compute any reliable oob estimates.
  warn("Some inputs do not have OOB scores. "

In [9]:
# radial_velocity_error on g, bp-rp, teff
gdr2 = fits.getdata('../output/GDR2_207/GDR2_207_rvs_error_training.fits')
g = gdr2.phot_g_mean_mag
bprp = gdr2.phot_bp_mean_mag - gdr2.phot_rp_mean_mag
te = gdr2.teff_val
rve = gdr2.radial_velocity_error
#rve = gdr2.radial_velocity_error
X = np.vstack((g,bprp,te)).T
y = rve
model = ExtraTreesRegressor(n_estimators=10, criterion='mse', max_depth=None,
                            min_samples_split=5, min_samples_leaf=1, 
                            min_weight_fraction_leaf=0.0, max_features='auto', 
                            max_leaf_nodes=None, min_impurity_decrease=0.0, 
                            min_impurity_split=None, bootstrap=True, oob_score=True,
                            n_jobs=1, random_state=None, verbose=0, warm_start=False)
model.fit(X,y)
filename = "gbprpteff2rvse_model"
pickle.dump(model,open(filename,'wb'))


/home/rybizki/anaconda3/lib/python3.6/site-packages/sklearn/ensemble/forest.py:724: UserWarning: Some inputs do not have OOB scores. This probably means too few trees were used to compute any reliable oob estimates.
  warn("Some inputs do not have OOB scores. "

In [16]:
np.sqrt(22/34)


Out[16]:
0.8043996665398437

In [15]:
34/22


Out[15]:
1.5454545454545454

In [ ]:
g = gdr2.phot_bp_mean_mag
f = gdr2.phot_bp_mean_flux
fe = gdr2.phot_bp_mean_flux_error
t = -2.5*np.log10(f)
print(t)
print(g)
print(t-g)
def flux2mag(flux):
    return(-2.5*np.log10(flux)+25.351388)
gp = flux2mag(f + fe)
gm = flux2mag(f - fe)
d1 = gp-g
d2 = g-gm
# assymetry in magnitude error
plt.plot(d1,d2,'.', alpha = 0.1)
plt.plot([-0.14,0],[-0.14,0])
plt.xlim((-0.03,0.0))
plt.ylim((-0.03,0.0))
bpme = np.divide(gm-gp,2)

In [ ]:
g = gdr2.phot_rp_mean_mag
f = gdr2.phot_rp_mean_flux
fe = gdr2.phot_rp_mean_flux_error
t = -2.5*np.log10(f)
print(t)
print(g)
print(t-g)
def flux2mag(flux):
    return(-2.5*np.log10(flux)+24.7619)
gp = flux2mag(f + fe)
gm = flux2mag(f - fe)
d1 = gp-g
d2 = g-gm
# assymetry in magnitude error
plt.plot(d1,d2,'.', alpha = 0.1)
plt.plot([-0.14,0],[-0.14,0])
plt.xlim((-0.03,0.0))
plt.ylim((-0.03,0.0))
rpme = np.divide(gm-gp,2)

In [ ]:
plt.plot(rpme,g,',', alpha = 0.1)
plt.yscale('log')
plt.xscale('log')

In [ ]:
nobs = np.genfromtxt('errors/nobs.txt', names = True)
scaling_factor_dr2 = 0.37
number_obs = np.round(scaling_factor_dr2*np.interp(np.abs(np.sin(gdr2.ecl_lat)),nobs['sinbeta'],nobs['N_obs']))
plt.plot(number_obs,gdr2.phot_g_n_obs,'.',alpha = 0.01)
plt.plot([6,22],[6,22],)
plt.xlabel('_real')
plt.ylabel('_predicted')
plt.yscale("log")
plt.xscale("log")
plt.show()
plt.close()