In [1]:
import numpy as np
from astropy.io import fits
from sklearn.ensemble import ExtraTreesRegressor
import pickle


/home/rybizki/anaconda3/lib/python3.6/site-packages/astropy/extern/bundled/six.py:60: ResourceWarning: unclosed file <_io.TextIOWrapper name='/home/rybizki/anaconda3/lib/python3.6/site-packages/astropy/extern/bundled/six.py' mode='r' encoding='utf-8'>
  class X(object):
/home/rybizki/anaconda3/lib/python3.6/importlib/_bootstrap.py:205: ImportWarning: can't resolve package from __spec__ or __package__, falling back on __name__ and __path__
  return f(*args, **kwds)
/home/rybizki/anaconda3/lib/python3.6/importlib/_bootstrap.py:205: ImportWarning: can't resolve package from __spec__ or __package__, falling back on __name__ and __path__
  return f(*args, **kwds)
/home/rybizki/anaconda3/lib/python3.6/importlib/_bootstrap.py:205: ImportWarning: can't resolve package from __spec__ or __package__, falling back on __name__ and __path__
  return f(*args, **kwds)
/home/rybizki/anaconda3/lib/python3.6/importlib/_bootstrap.py:205: ImportWarning: can't resolve package from __spec__ or __package__, falling back on __name__ and __path__
  return f(*args, **kwds)

In [2]:
gdr2 = fits.getdata("../output/GDR2_207/GDR2_207_cleaned_0.0025sampling.fits")
# cleaning nan parallax errors
pe = gdr2.parallax_error
clean = ~np.isnan(pe)
print(len(gdr2))
gdr2 = gdr2[clean]
print(len(gdr2))


/home/rybizki/anaconda3/lib/python3.6/importlib/_bootstrap.py:205: ImportWarning: can't resolve package from __spec__ or __package__, falling back on __name__ and __path__
  return f(*args, **kwds)
/home/rybizki/anaconda3/lib/python3.6/importlib/_bootstrap.py:205: ImportWarning: can't resolve package from __spec__ or __package__, falling back on __name__ and __path__
  return f(*args, **kwds)
3134770
2925796

In [3]:
g = gdr2.phot_g_mean_mag
bprp = gdr2.phot_bp_mean_mag - gdr2.phot_rp_mean_mag
l = gdr2.l
b = gdr2.b
pe = gdr2.parallax_error
vp = gdr2.visibility_periods_used
X = np.vstack((g,bprp,l,b)).T
y = np.vstack((pe,vp)).T

In [4]:
model = ExtraTreesRegressor()
model.fit(X,y)


Out[4]:
ExtraTreesRegressor(bootstrap=False, criterion='mse', max_depth=None,
          max_features='auto', max_leaf_nodes=None,
          min_impurity_decrease=0.0, min_impurity_split=None,
          min_samples_leaf=1, min_samples_split=2,
          min_weight_fraction_leaf=0.0, n_estimators=10, n_jobs=1,
          oob_score=False, random_state=None, verbose=0, warm_start=False)

In [8]:
filename = 'pu_and_vpu_model.pck'
pickle.dump(model,open(filename,'wb'))


/home/rybizki/anaconda3/lib/python3.6/site-packages/ipykernel_launcher.py:2: ResourceWarning: unclosed file <_io.BufferedWriter name='pu_and_vpu_model.pck'>
  

In [ ]:
model = pickle.load(open(filename, 'rb'))