Import these first-- I auto import them every time!:
In [4]:
#! cat /Users/gully/.ipython/profile_default/startup/start.ipy
In [5]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%config InlineBackend.figure_format = 'retina'
%matplotlib inline
In [6]:
import os
In [ ]:
for i in range(16):
fn = 'http://cdn.gea.esac.esa.int/Gaia/tgas_source/csv/TgasSource_000-000-{:03d}.csv.gz'.format(i)
executable = 'wget '+fn
print(executable)
os.system(executable) ## Uncomment to actually download
In [5]:
#! mv Tgas* ../data
In [6]:
#! gzip -d ../data/Tgas*
In [7]:
! ls ../data/Tgas*
Compare to a Gaia full catalog source (download from previous notebook or manually):
In [8]:
#! wget http://cdn.gea.esac.esa.int/Gaia/gaia_source/csv/GaiaSource_000-000-000.csv.gz
#! mv GaiaSource_000-000-000.csv.gz ../data/
In [9]:
! ls ../data/GaiaSource*
In [10]:
import pandas as pd
In [11]:
%time t000 = pd.read_csv('../data/TgasSource_000-000-000.csv')
In [12]:
%time g000 = pd.read_csv('../data/GaiaSource_000-000-000.csv')
In [13]:
set(t000.columns) - set(g000.columns)
Out[13]:
TGAS is just the subset with parallaxes available, while Gaia source has only Positions and Magnitudes but for billions of sources:
#GaiaDR1 details: 1billion stars w/ position+magnitude; 2million stars w/ pos+mag+parallax+proper motion; 3194 variable stars; 2152 quasars
— ESA Science (@esascience) September 14, 2016
In [14]:
len(t000), len(g000)
Out[14]:
In [15]:
p_i = t000.parallax == t000.parallax
tp000 = t000[p_i]
p_i.sum()
Out[15]:
In [16]:
p_i = g000.parallax == g000.parallax
gp000 = g000[p_i]
p_i.sum()
Out[16]:
In [17]:
sns.set_color_codes()
For a single file, TGAS covers much more area. The file sizes are capped at 40 Mb.
In [18]:
plt.plot(tp000.ra[0:2], tp000.dec[0:2], 'b.', label='TGAS') # Hack to get bold labels
plt.plot(gp000.ra[0:2], gp000.dec[0:2], 'r.', label='Gaia Source')
plt.plot(tp000.ra.values, tp000.dec.values, 'b.', alpha=0.1)
plt.plot(gp000.ra.values, gp000.dec.values, 'r.', alpha=0.1)
plt.legend(loc='lower left')
Out[18]:
In [19]:
df_list = []
This takes a finite amount of RAM, but should be fine for modern laptops.
In [20]:
for i in range(16):
df_list.append(pd.read_csv('../data/TgasSource_000-000-{:03d}.csv'.format(i)))
In [21]:
tt = pd.concat(df_list, ignore_index=True)
In [22]:
t000.shape
Out[22]:
In [24]:
tt.shape
Out[24]:
In [25]:
len(tt.source_id.unique())
Out[25]:
So 2.05+ million sources with 59 "features" or columns of metadata.
In [26]:
plt.plot(tt.parallax, tt.parallax_error, '.', alpha=0.005)
plt.xscale('log')
In [27]:
bins = np.arange(-50, 200, 3)
sns.distplot(tt.parallax, bins=bins,kde=False)
plt.yscale('log')
In [28]:
sns.distplot(tt.parallax_error)
Out[28]:
In [30]:
bins = np.arange(0, 160, 2)
sns.distplot(tt.astrometric_n_obs_ac, bins=bins, kde=False)
sns.distplot(tt.astrometric_n_bad_obs_ac, bins=bins, kde=False)
sns.distplot(tt.astrometric_n_good_obs_ac, bins=bins, kde=False)
Out[30]:
In [31]:
sns.distplot(tt.phot_g_mean_mag)
Out[31]:
In [32]:
bins = np.arange(0,40,1)
sns.distplot(tt.matched_observations, bins=bins,kde=False)
Out[32]:
In [33]:
tt.iloc[0]
Out[33]:
In [34]:
gi = tt.astrometric_delta_q == tt.astrometric_delta_q
bins= np.arange(0,500, 5)
sns.distplot(tt.astrometric_delta_q[gi], bins=bins, kde=False)
plt.yscale('log')
In [35]:
tt.phot_variable_flag.unique()
Out[35]:
In [36]:
vi = tt.phot_variable_flag == 'VARIABLE'
In [37]:
vi.sum(), len(vi)
Out[37]:
Only one variable star of all the TGAS
sample, about what you'd expect.
In [38]:
tt[vi]
Out[38]: