In [1]:
from __future__ import print_function # to use Python 3 features
%matplotlib inline
from matplotlib import pyplot as plt
# for prettier plots; you can comment this out if you don't have seaborn
import seaborn as sns; sns.set(context="poster")
import numpy as np
See: https://docs.google.com/document/d/1COdCXs4K6kAXLcVvYxG3fqS53l2gzbkDvbbTmm8ZF1U/edit?usp=sharing
In [2]:
poisson_samples = np.random.poisson(lam=1.5, size=20)
print(poisson_samples)
In [3]:
gaussian_samples = np.random.normal(loc=-5.0, scale=2.0, size=20)
print(gaussian_samples)
In [4]:
def add_samples(dictionary_of_samples, key, samples):
""" `samples` must be a list! """
try:
dictionary_of_samples[key] += samples
except KeyError:
# if entry doesn't exist, create a new one
dictionary_of_samples[key] = samples
In [5]:
test_dictionary_of_samples = {}
key = "test"
samples = [1,2,3]
add_samples(test_dictionary_of_samples, key, samples)
add_samples(test_dictionary_of_samples, key, samples)
if test_dictionary_of_samples[key] == [1,2,3,1,2,3]:
print("OK")
else:
print("Error: add_samples isn't behaving right")
In [6]:
dictionary_of_samples = {}
N_samples = 10
key = ("gaussian", -5.0, 2.0)
gaussian_samples = list(np.random.normal(loc=key[1], scale=key[2], size=N_samples))
add_samples(dictionary_of_samples, key, gaussian_samples)
key = ("gaussian", 10, 1.0)
gaussian_samples = list(np.random.normal(loc=key[1], scale=key[2], size=N_samples))
add_samples(dictionary_of_samples, key, gaussian_samples)
key = ("poisson", 1.5)
poisson_samples = list(np.random.poisson(lam=key[1], size=N_samples))
add_samples(dictionary_of_samples, key, poisson_samples)
key = ("gaussian", 2.0)
poisson_samples = list(np.random.poisson(lam=key[1], size=N_samples))
add_samples(dictionary_of_samples, key, poisson_samples)
In [7]:
dictionary_of_samples
Out[7]:
In [8]:
from astroML import datasets
data = datasets.fetch_sdss_corrected_spectra()
In [9]:
# columns available within the data structure
data.keys()
Out[9]:
In [10]:
def separate_redshifts_of_galaxies(data):
"""
Filters the SDSS data, into two lists of redshifts:
one for star formation dominated galaxies and one for AGN-dominated galaxies
Parameters
----------
data : npz file
Must be the data structure returned from fetch_sdss_corrected_spectra()
Returns
-------
star_formation_dominated_redshifts : list
agn_dominated_redshifts : list
"""
star_formation_dominated = (data["lineindex_cln"] == 4)
agn_dominated = (data["lineindex_cln"] == 5)
star_formation_dominated_redshifts = list(data["z"][star_formation_dominated] )
agn_dominated_redshifts = list(data["z"][agn_dominated] )
return star_formation_dominated_redshifts, agn_dominated_redshifts
star_formation_dominated_redshifts, agn_dominated_redshifts = separate_redshifts_of_galaxies(data)
In [11]:
plt.hist(star_formation_dominated_redshifts,
normed=True, label="Star Forming")
plt.hist(agn_dominated_redshifts,
normed=True, label="AGN")
plt.title("Galaxy redshifts by classification")
plt.xlabel("Redshift")
plt.ylabel("Counts (Normalized)")
plt.legend(loc="best")
Out[11]: