Import these first-- I auto import them every time!:
In [4]:
    
#! cat /Users/gully/.ipython/profile_default/startup/start.ipy
    
In [5]:
    
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%config InlineBackend.figure_format = 'retina'
%matplotlib inline
    
In [6]:
    
import os
    
In [ ]:
    
for i in range(16):
    fn = 'http://cdn.gea.esac.esa.int/Gaia/tgas_source/csv/TgasSource_000-000-{:03d}.csv.gz'.format(i)
    executable = 'wget '+fn
    print(executable)
    os.system(executable) ## Uncomment to actually download
    
    
In [5]:
    
#! mv Tgas* ../data
    
In [6]:
    
#! gzip -d ../data/Tgas*
    
In [7]:
    
! ls ../data/Tgas*
    
    
Compare to a Gaia full catalog source (download from previous notebook or manually):
In [8]:
    
#! wget http://cdn.gea.esac.esa.int/Gaia/gaia_source/csv/GaiaSource_000-000-000.csv.gz
#! mv GaiaSource_000-000-000.csv.gz ../data/
    
In [9]:
    
! ls ../data/GaiaSource*
    
    
In [10]:
    
import pandas as pd
    
In [11]:
    
%time t000 = pd.read_csv('../data/TgasSource_000-000-000.csv')
    
    
In [12]:
    
%time g000 = pd.read_csv('../data/GaiaSource_000-000-000.csv')
    
    
In [13]:
    
set(t000.columns) - set(g000.columns)
    
    Out[13]:
TGAS is just the subset with parallaxes available, while Gaia source has only Positions and Magnitudes but for billions of sources:
#GaiaDR1 details: 1billion stars w/ position+magnitude; 2million stars w/ pos+mag+parallax+proper motion; 3194 variable stars; 2152 quasars
— ESA Science (@esascience) September 14, 2016
In [14]:
    
len(t000), len(g000)
    
    Out[14]:
In [15]:
    
p_i = t000.parallax == t000.parallax
tp000 = t000[p_i]
p_i.sum()
    
    Out[15]:
In [16]:
    
p_i = g000.parallax == g000.parallax
gp000 = g000[p_i]
p_i.sum()
    
    Out[16]:
In [17]:
    
sns.set_color_codes()
    
For a single file, TGAS covers much more area. The file sizes are capped at 40 Mb.
In [18]:
    
plt.plot(tp000.ra[0:2], tp000.dec[0:2], 'b.', label='TGAS') # Hack to get bold labels
plt.plot(gp000.ra[0:2], gp000.dec[0:2], 'r.', label='Gaia Source')
plt.plot(tp000.ra.values, tp000.dec.values, 'b.', alpha=0.1)
plt.plot(gp000.ra.values, gp000.dec.values, 'r.', alpha=0.1)
plt.legend(loc='lower left')
    
    Out[18]:
    
In [19]:
    
df_list = []
    
This takes a finite amount of RAM, but should be fine for modern laptops.
In [20]:
    
for i in range(16):
    df_list.append(pd.read_csv('../data/TgasSource_000-000-{:03d}.csv'.format(i)))
    
In [21]:
    
tt = pd.concat(df_list, ignore_index=True)
    
In [22]:
    
t000.shape
    
    Out[22]:
In [24]:
    
tt.shape
    
    Out[24]:
In [25]:
    
len(tt.source_id.unique())
    
    Out[25]:
So 2.05+ million sources with 59 "features" or columns of metadata.
In [26]:
    
plt.plot(tt.parallax, tt.parallax_error, '.', alpha=0.005)
plt.xscale('log')
    
    
In [27]:
    
bins = np.arange(-50, 200, 3)
sns.distplot(tt.parallax, bins=bins,kde=False)
plt.yscale('log')
    
    
In [28]:
    
sns.distplot(tt.parallax_error)
    
    
    Out[28]:
    
In [30]:
    
bins = np.arange(0, 160, 2)
sns.distplot(tt.astrometric_n_obs_ac, bins=bins, kde=False)
sns.distplot(tt.astrometric_n_bad_obs_ac, bins=bins, kde=False)
sns.distplot(tt.astrometric_n_good_obs_ac, bins=bins, kde=False)
    
    Out[30]:
    
In [31]:
    
sns.distplot(tt.phot_g_mean_mag)
    
    
    Out[31]:
    
In [32]:
    
bins = np.arange(0,40,1)
sns.distplot(tt.matched_observations, bins=bins,kde=False)
    
    Out[32]:
    
In [33]:
    
tt.iloc[0]
    
    Out[33]:
In [34]:
    
gi = tt.astrometric_delta_q == tt.astrometric_delta_q
bins= np.arange(0,500, 5)
sns.distplot(tt.astrometric_delta_q[gi], bins=bins, kde=False)
plt.yscale('log')
    
    
In [35]:
    
tt.phot_variable_flag.unique()
    
    Out[35]:
In [36]:
    
vi = tt.phot_variable_flag == 'VARIABLE'
    
In [37]:
    
vi.sum(), len(vi)
    
    Out[37]:
Only one variable star of all the TGAS sample, about what you'd expect.
In [38]:
    
tt[vi]
    
    Out[38]: