Gaia

Real data!

gully
Sept 14, 2016

Outline:

Download the data
Estimate how much data it will be
Batch download more

1. Download the data



In [7]:

    
! wget 'http://cdn.gea.esac.esa.int/Gaia/gaia_source/csv/GaiaSource_000-000-001.csv.gz'









    



--2016-09-14 18:42:56--  http://cdn.gea.esac.esa.int/Gaia/gaia_source/csv/GaiaSource_000-000-001.csv.gz
Resolving cdn.gea.esac.esa.int... 43.245.63.28
Connecting to cdn.gea.esac.esa.int|43.245.63.28|:80... connected.
HTTP request sent, awaiting response... 
  HTTP/1.1 200 OK
  Date: Wed, 14 Sep 2016 10:42:57 GMT
  Content-Type: application/octet-stream
  Content-Length: 41602593
  Connection: keep-alive
  Last-Modified: Wed, 07 Sep 2016 16:47:56 GMT
  ETag: "57d044bc-27ace21"
  Access-Control-Allow-Origin: *
  Server: CDN77-Turbo
  X-Edge-IP: 43.245.63.27
  X-Edge-Location: hongkongHK
  X-Cache: MISS
  Accept-Ranges: bytes
Length: 41602593 (40M) [application/octet-stream]
Saving to: 'GaiaSource_000-000-001.csv.gz'

GaiaSource_000-000- 100%[===================>]  39.67M   430KB/s    in 47s     

2016-09-14 18:43:44 (870 KB/s) - 'GaiaSource_000-000-001.csv.gz' saved [41602593/41602593]



In [9]:

    
! ls









    



GaiaSource_000-000-000.csv
GaiaSource_000-000-001.csv.gz
Untitled.ipynb
ad_overview.ipynb
adrasteia_01-gum_mw_sample.ipynb
adrasteia_02-03_get_real_gaia_data.ipynb
asrasteia_02_01_sept2016_fake_data.ipynb
asrasteia_02_02_sept2016_fake_data_postage_stamps.ipynb



In [5]:

    
! gzip -d GaiaSource_000-000-000.csv.gz

2. Estimate how much data it will be



In [10]:

    
! du -hs GaiaSource_*









    



 98M	GaiaSource_000-000-000.csv
 40M	GaiaSource_000-000-001.csv.gz

Wow, 100 Mb csv file... There are 20 groups of 256 files plus 111 extra files.



In [11]:

    
20*256+111









    Out[11]:





5231



In [13]:

    
5231*98/1000.0









    Out[13]:





512.638



In [15]:

    
import pandas as pd



In [19]:

    
%time g000 = pd.read_csv('GaiaSource_000-000-000.csv')









    



CPU times: user 2.02 s, sys: 144 ms, total: 2.17 s
Wall time: 2.17 s



In [22]:

    
g000.columns









    Out[22]:





Index(['solution_id', 'source_id', 'random_index', 'ref_epoch', 'ra',
       'ra_error', 'dec', 'dec_error', 'parallax', 'parallax_error', 'pmra',
       'pmra_error', 'pmdec', 'pmdec_error', 'ra_dec_corr', 'ra_parallax_corr',
       'ra_pmra_corr', 'ra_pmdec_corr', 'dec_parallax_corr', 'dec_pmra_corr',
       'dec_pmdec_corr', 'parallax_pmra_corr', 'parallax_pmdec_corr',
       'pmra_pmdec_corr', 'astrometric_n_obs_al', 'astrometric_n_obs_ac',
       'astrometric_n_good_obs_al', 'astrometric_n_good_obs_ac',
       'astrometric_n_bad_obs_al', 'astrometric_n_bad_obs_ac',
       'astrometric_delta_q', 'astrometric_excess_noise',
       'astrometric_excess_noise_sig', 'astrometric_primary_flag',
       'astrometric_relegation_factor', 'astrometric_weight_al',
       'astrometric_weight_ac', 'astrometric_priors_used',
       'matched_observations', 'duplicated_source',
       'scan_direction_strength_k1', 'scan_direction_strength_k2',
       'scan_direction_strength_k3', 'scan_direction_strength_k4',
       'scan_direction_mean_k1', 'scan_direction_mean_k2',
       'scan_direction_mean_k3', 'scan_direction_mean_k4', 'phot_g_n_obs',
       'phot_g_mean_flux', 'phot_g_mean_flux_error', 'phot_g_mean_mag',
       'phot_variable_flag', 'l', 'b', 'ecl_lon', 'ecl_lat'],
      dtype='object')



In [23]:

    
len(g000)









    Out[23]:





218453



In [27]:

    
p_i = g000.parallax == g000.parallax
p000 = g000[p_i]



In [31]:

    
plt.plot(p000.ra, p000.dec, '.')









    Out[31]:





[<matplotlib.lines.Line2D at 0x11d241048>]



In [34]:

    
plt.plot(p000.parallax, p000.parallax_error, '.')
plt.xscale('log')



In [35]:

    
sns.distplot(p000.parallax)









    



//anaconda/lib/python3.4/site-packages/statsmodels/nonparametric/kdetools.py:20: VisibleDeprecationWarning: using a non-integer number instead of an integer will result in an error in the future
  y = X[:m/2+1] + np.r_[0,X[m/2+1:],0]*1j






    Out[35]:





<matplotlib.axes._subplots.AxesSubplot at 0x11e005048>



In [36]:

    
sns.distplot(p000.parallax_error)









    



//anaconda/lib/python3.4/site-packages/statsmodels/nonparametric/kdetools.py:20: VisibleDeprecationWarning: using a non-integer number instead of an integer will result in an error in the future
  y = X[:m/2+1] + np.r_[0,X[m/2+1:],0]*1j






    Out[36]:





<matplotlib.axes._subplots.AxesSubplot at 0x11f2a4630>



In [43]:

    
bins = np.arange(0, 160, 10)
sns.distplot(p000.astrometric_n_obs_ac, bins=bins, kde=False)
sns.distplot(p000.astrometric_n_bad_obs_ac, bins=bins, kde=False)
sns.distplot(p000.astrometric_n_good_obs_ac, bins=bins, kde=False)









    Out[43]:





<matplotlib.axes._subplots.AxesSubplot at 0x11ee639e8>



In [54]:

    
#bins = np.arange(0, 160, 10)
#sns.distplot(p000.astrometric_n_obs_al, bins=bins, kde=False)
#sns.distplot(p000.astrometric_n_bad_obs_al, bins=bins, kde=False)
#sns.distplot(p000.astrometric_n_good_obs_al, bins=bins, kde=False)



In [48]:

    
sns.distplot(p000.phot_g_mean_mag)









    



//anaconda/lib/python3.4/site-packages/statsmodels/nonparametric/kdetools.py:20: VisibleDeprecationWarning: using a non-integer number instead of an integer will result in an error in the future
  y = X[:m/2+1] + np.r_[0,X[m/2+1:],0]*1j






    Out[48]:





<matplotlib.axes._subplots.AxesSubplot at 0x12190ba20>



In [53]:

    
bins = np.arange(0,40,1)
sns.distplot(p000.matched_observations, bins=bins,kde=False)









    Out[53]:





<matplotlib.axes._subplots.AxesSubplot at 0x120fcce80>



In [29]:

    
p000.count()









    Out[29]:





solution_id                      1651
source_id                        1651
random_index                     1651
ref_epoch                        1651
ra                               1651
ra_error                         1651
dec                              1651
dec_error                        1651
parallax                         1651
parallax_error                   1651
pmra                             1651
pmra_error                       1651
pmdec                            1651
pmdec_error                      1651
ra_dec_corr                      1651
ra_parallax_corr                 1651
ra_pmra_corr                     1651
ra_pmdec_corr                    1651
dec_parallax_corr                1651
dec_pmra_corr                    1651
dec_pmdec_corr                   1651
parallax_pmra_corr               1651
parallax_pmdec_corr              1651
pmra_pmdec_corr                  1651
astrometric_n_obs_al             1651
astrometric_n_obs_ac             1651
astrometric_n_good_obs_al        1651
astrometric_n_good_obs_ac        1651
astrometric_n_bad_obs_al         1651
astrometric_n_bad_obs_ac         1651
astrometric_delta_q               181
astrometric_excess_noise         1651
astrometric_excess_noise_sig     1651
astrometric_primary_flag         1651
astrometric_relegation_factor    1651
astrometric_weight_al            1651
astrometric_weight_ac            1651
astrometric_priors_used          1651
matched_observations             1651
duplicated_source                1651
scan_direction_strength_k1       1651
scan_direction_strength_k2       1651
scan_direction_strength_k3       1651
scan_direction_strength_k4       1651
scan_direction_mean_k1           1651
scan_direction_mean_k2           1651
scan_direction_mean_k3           1651
scan_direction_mean_k4           1651
phot_g_n_obs                     1651
phot_g_mean_flux                 1651
phot_g_mean_flux_error           1651
phot_g_mean_mag                  1651
phot_variable_flag               1651
l                                1651
b                                1651
ecl_lon                          1651
ecl_lat                          1651
dtype: int64



In [57]:

    
p000.iloc[0]









    Out[57]:





solution_id                      1635378410781933568
source_id                              7627862074752
random_index                               883950605
ref_epoch                                       2015
ra                                           45.0343
ra_error                                    0.305989
dec                                         0.235392
dec_error                                   0.218802
parallax                                     6.35295
parallax_error                               0.30791
pmra                                         43.7523
pmra_error                                 0.0705422
pmdec                                       -7.64199
pmdec_error                                0.0874018
ra_dec_corr                                -0.414972
ra_parallax_corr                            0.179966
ra_pmra_corr                                0.159207
ra_pmdec_corr                             -0.0857597
dec_parallax_corr                          -0.407338
dec_pmra_corr                             -0.0994513
dec_pmdec_corr                             0.0606588
parallax_pmra_corr                       -0.00157679
parallax_pmdec_corr                        -0.101957
pmra_pmdec_corr                             0.214677
astrometric_n_obs_al                              79
astrometric_n_obs_ac                              79
astrometric_n_good_obs_al                         79
astrometric_n_good_obs_ac                         78
astrometric_n_bad_obs_al                           0
astrometric_n_bad_obs_ac                           1
astrometric_delta_q                          1.91906
astrometric_excess_noise                    0.717101
astrometric_excess_noise_sig                 412.606
astrometric_primary_flag                        True
astrometric_relegation_factor                 2.9361
astrometric_weight_al                        1.81816
astrometric_weight_ac                    1.26696e-05
astrometric_priors_used                            3
matched_observations                               9
duplicated_source                              False
scan_direction_strength_k1                  0.382348
scan_direction_strength_k2                  0.538266
scan_direction_strength_k3                  0.392378
scan_direction_strength_k4                  0.916306
scan_direction_mean_k1                       -113.76
scan_direction_mean_k2                       21.3929
scan_direction_mean_k3                      -41.6784
scan_direction_mean_k4                       26.2018
phot_g_n_obs                                      77
phot_g_mean_flux                         1.03123e+07
phot_g_mean_flux_error                       10577.4
phot_g_mean_mag                              7.99138
phot_variable_flag                     NOT_AVAILABLE
l                                             176.74
b                                           -48.7144
ecl_lon                                      42.6418
ecl_lat                                     -16.1211
Name: 91, dtype: object



In [ ]: