This notebook download all FITS data.
List of files is in file ondrejov-labeled-spectra.csv
.
These spectra has been classified with
Spectral View tool.
In [ ]:
%matplotlib inline
In [ ]:
import urllib.request
import urllib.parse
import io
import os
import csv
import glob
from functools import partial
from itertools import count
import numpy as np
from astropy.io import fits
import matplotlib.pyplot as plt
In [ ]:
LABELS_FILE = 'data/ondrejov-dataset.csv'
!head $LABELS_FILE
In [ ]:
with open(LABELS_FILE, newline='') as f:
reader = csv.DictReader(f)
# each public id is unique and set operation will be usefull later
spectra_idents = set(map(lambda x: x['id'], reader))
len(spectra_idents)
This is not much revlevant now since only datalink is used to download normalized spectra.
SSAP, SSA defines a uniform intreface to remotely discover and access one dimenisonal spectra. Spectral data access mmay involve active transformation of data. SSA also defines complete metadata to describe the available datasets. It makes use of VOTable for metadata exchange.
A query is used for data discovery and to negotiate the details of the static or dynamically created dataset to be retrieved. SSA allows to mediate not only dataset metadata but the actual dataset itself. Direct access to data is also provided.
A single service may support multiple operation to perform various functions. The current interface use an HTTP GET request to submit parametrized requests with responses being returned as for example FITS or VOTable. Defined operations are the following:
In [ ]:
def request_url(url):
'''Make HTTP request and return response data.'''
try:
with urllib.request.urlopen(url) as response:
data = response.read()
except Exception as e:
print(e)
return None
return data
Datalink is a service for working with spectra. For information about the one which is used here see http://voarchive.asu.cas.cz/ccd700/q/sdl/info.
In [ ]:
datalink_service = 'http://voarchive.asu.cas.cz/ccd700/q/sdl/dlget'
def make_datalink_url(
pub_id, fluxcalib=None, wave_min=None, wave_max=None,
file_format='application/fits', url=datalink_service
):
url_parameters = {'ID': pub_id}
if fluxcalib:
url_parameters['FLUXCALIB'] = fluxcalib
if wave_min and wave_max:
url_parameters['BAND'] = str(wave_min) + ' ' + str(wave_max)
if file_format:
url_parameters['FORMAT'] = file_format
return url + '?' + urllib.parse.urlencode(url_parameters)
make_datalink_url(
'ivo://asu.cas.cz/stel/ccd700/sh270028',
fluxcalib='normalized',
wave_min=6500e-10, wave_max=6600e-10
)
In [ ]:
def plot_fluxcalib(fluxcalib, ax):
# create the datalink service URL
datalink_url = make_datalink_url('ivo://asu.cas.cz/stel/ccd700/sh270028', fluxcalib=fluxcalib)
# download the data
fits_data = request_url(datalink_url)
# open the data as file
hdulist = fits.open(io.BytesIO(fits_data))
# plot it
ax.set_title('fluxcalib is ' + str(fluxcalib))
ax.plot(hdulist[1].data['spectral'], hdulist[1].data['flux'])
fluxcalibs = [None, 'normalized', 'relative', 'UNCALIBRATED']
fif, axs = plt.subplots(4, 1)
for fluxcalib, ax in zip(fluxcalibs, axs):
plot_fluxcalib(fluxcalib, ax)
fig.tight_layout()
In [ ]:
def download_spectrum(pub_id, n, directory, fluxcalib, minimum=None, maximum=None):
# get the name from public id
name = pub_id.split('/')[-1]
# directory HAS TO end with '/'
path = directory + name + '.fits'
url = make_datalink_url(pub_id, fluxcalib, minimum, maximum)
print('{:5} downloading {}'.format(n, name))
try:
data = request_url(url)
except Exception as e:
print(e)
return name
with open(path, 'wb') as f:
f.write(data)
In [ ]:
FITS_DIR = 'data/ondrejov/'
%mkdir $FITS_DIR 2> /dev/null
ondrejov_downloader = partial(
download_spectrum,
directory=FITS_DIR,
fluxcalib='normalized'
)
In [ ]:
ccd700_prefix = 'ivo://asu.cas.cz/stel/ccd700/'
def get_pub_id(path, prefix=ccd700_prefix):
return prefix + os.path.splitext(os.path.split(path)[-1])[0]
get_pub_id('ssap/uh260033.fits')
In [ ]:
spectra_idents -= set(map(get_pub_id, glob.glob(FITS_DIR + '*.fits')))
if len(spectra_idents) != 0:
donwload_info = list(map(ondrejov_downloader, spectra_idents, count(start=1)))
print('All spectra downloaded.')