In [1]:
!curl -OJ https://girder.hub.yt/api/v1/file/57fcf27bb8805f000164ab40/download
# Windows 10
In [2]:
%matplotlib inline
In [3]:
import h5py
import numpy as np
import matplotlib.pyplot as plt
In [4]:
data = {}
with h5py.File("gaia_validp.h5") as f:
for k in f:
data[k] = f[k][:]
In [5]:
print(data.keys())
In [6]:
type(data)
Out[6]:
In [7]:
data
Out[7]:
https://gaia.esac.esa.int/documentation/GDR1/datamodel/Ch1/gaia_source.html
Barycentric declination δ of the source in ICRS at the reference epoch ref_epoch
Absolute barycentric stellar parallax ϖ of the soure at the reference epoch ref_epoc
Mean magnitude in the G band. This is computed from the G-band mean flux applying the magnitude zero-point in the Vega scale.
Proper motion in declination μδ of the source at the reference epoch ref_epoch. This is the projection of the proper motion vector in the direction of increasing declination.
Proper motion in right ascension μα* of the source in ICRS at the reference epoch ref_epoch. This is the projection of the proper motion vector in the direction of increasing right ascension.
Barycentric right ascension α of the source in ICRS at the reference epoch ref_epoch
In [8]:
# Plot a histogram of dec to show its distribution.
# This could give a general understanding of the dec data
# based on the amount of data in different intervals.
plt.hist(data['dec'])
plt.title('Histogram of dec')
plt.xlabel('dec')
plt.grid(True)
plt.show()
# It can be found that most values are between -75 and 75 approximately.
# Among them, basically (-75,-18) and (18,55) intervlas have the most values.
In [9]:
# Based on the nearly centralized distribution of dec, a boxplot can also be intuitionistic.
# It has meaningful quantiles. There is no outlier.
plt.boxplot(data['dec'])
plt.title('Boxplot of dec')
plt.show()
In [10]:
# Plot of parallax
# Direct plot of parallax data can demonstrate each value. There is no approximation.
# In this way, the representation of detailed values may imply some patterns such as a cycle.
plt.plot(data['parallax'],"g")
plt.title('parallax')
plt.grid(True)
plt.show()
# It seems there is a rough cycle period of the values.
# Further observation of the top values can help understand the phenomenon better.
In [11]:
# Plot a histogram of phot_g_mean_mag to show its distribution.
# It could be an overview of the data.
# I do not want to see each value at this time.
plt.hist(data['phot_g_mean_mag'], 30)
plt.title('Histogram of phot_g_mean_mag')
plt.xlabel('phot_g_mean_mag')
plt.grid(True)
plt.show()
# Most values are between 8 and 13 in general.
# A left skewed distribution can also be found based on the graph.
In [12]:
# Hexbin plot of dec vs ra
# I just want to test the use of hexbin plot.
# I tried several parameters in the given data,
# and found that dec and ra data could generate a seemingly beautiful graph.
# More background knowledge is needed for me to make an analysis of the graph.
plt.hexbin(data['ra'],data['dec'],cmap='plasma')
plt.title("Hexbin plot of ra vs dec")
plt.show()
In [13]:
data['pmra'].size
Out[13]:
In [14]:
data['ra'].size
Out[14]:
In [15]:
# Plot a scatter plot of ra and pmra to show their relationship
# I would like to know how proper motion affects the values.
plt.scatter(data['ra'],data['pmra'],alpha=0.5,c=np.arange(data['ra'].size),edgecolors='none')
# The color bar is generated according to the index of data.
# I suppose the index has its special meaning but it may not be true.
# Set the edgecolors to be none can clearly help to see most values.
plt.colorbar()
plt.xlabel('ra')
plt.ylabel('pmra')
plt.title('ra vs pmra')
plt.show()
In [ ]: