Here, we look at statistical distributions of bubbles pulled from the full search


In [2]:
%matplotlib inline

import json
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import matplotlib
from scipy.stats import ks_2samp


from bubbly.dr1 import bubble_params
from bubbly.cluster import xmatch

matplotlib.rc_file('matplotlibrc')
matplotlib.rcParams['interactive'] = True
matplotlib.rcParams['axes.grid'] = False
matplotlib.rcParams['axes.facecolor'] = 'white'
matplotlib.rcParams['font.size'] = 13
matplotlib.rcParams['figure.facecolor'] = 'white'
figures = {}

def remove_border(axes=None, top=False, right=False, left=True, bottom=True):
    """
    Minimize chartjunk by stripping out unnecesasry plot borders and axis ticks
    
    The top/right/left/bottom keywords toggle whether the corresponding plot border is drawn
    """
    ax = axes or plt.gca()
    ax.spines['top'].set_visible(top)
    ax.spines['right'].set_visible(right)
    ax.spines['left'].set_visible(left)
    ax.spines['bottom'].set_visible(bottom)
    
    #turn off all ticks
    ax.yaxis.set_ticks_position('none')
    ax.xaxis.set_ticks_position('none')
    
    #now re-enable visibles
    if top:
        ax.xaxis.tick_top()
    if bottom:
        ax.xaxis.tick_bottom()
    if left:
        ax.yaxis.tick_left()
    if right:
        ax.yaxis.tick_right()


DEBUG:Cloud:Log file (/Users/beaumont/.picloud/cloud.log) opened

In [3]:
from sklearn.neighbors import NearestNeighbors
from astropy.table import Table

def _xmatch_hii(lon, lat):
    hrds = Table.read('../data/hrds_distances.txt', format='cds')
    xy = np.column_stack((hrds['GLON'], hrds['GLAT']))
    nn = NearestNeighbors(n_neighbors=1).fit(xy)
    d, ind = nn.kneighbors(np.column_stack((lon, lat)))
    d = d.ravel()
    ind = ind.ravel()
    return hrds[ind], d
    
def bubble_distances(lon, lat, size):
    hrds, d = _xmatch_hii(lon, lat)
    result = np.array(hrds['D'])
    result[d > size] = np.nan
    return result

def matched_hii(lon, lat, size):
    hrds, d = _xmatch_hii(lon, lat)
    return d <= size

def xmatch_mwp(lon, lat, size):
    lon = np.atleast_1d(lon)
    lat = np.atleast_1d(lat)
    size = np.atleast_1d(size)
    
    _mwp = np.array(bubble_params())
    _mwp[_mwp[:, 1] > 270, 1] -= 360
    nn_mwp = NearestNeighbors().fit(_mwp[:, 1:3])
    result = np.zeros(lon.size, dtype=np.int) - 1
    
    for i in range(lon.size):
        l, b, s = lon[i], lat[i], size[i]
        ind = nn_mwp.radius_neighbors([[l, b]], radius=s, return_distance=False)[0]
        size_ratio = _mwp[ind, 3] / s
        size_ratio = np.maximum(size_ratio, 1. / size_ratio)
        good = size_ratio < 2.5
        if good.any():
            result[i] = ind[good][0]                
            
    return result

In [4]:
data = pd.read_csv('../data/full_search.csv', delimiter=',')
data.lon[data.lon > 270] -= 360
data['rad'] *= 60
data['diam'] = data['rad'] * 2
data['dist'] = bubble_distances(data.lon.values, data.lat.values, data.rad.values)
data['has_hii'] = matched_hii(data.lon.values, data.lat.values, data.rad.values / 60.)
data['prob'] = data['score']

In [5]:
ind = xmatch_mwp(data.lon.values, data.lat.values, data.rad.values / 60)

mwp_match = data[ind >= 0]
mwp_unmatch = data[ind < 0]

In [6]:
mwp_match.shape, mwp_unmatch.shape


Out[6]:
((1500, 8), (907, 8))

In [7]:
data['prob'].sum()


Out[7]:
1243.8549999999966

In [8]:
def compare(c, **kwargs):
    kwargs.setdefault('lw', 2)
    kwargs.setdefault('histtype', 'step')
    
    labels = ['Unmatched', 'Matched to MWP']
    color = ['#7570B3', '#E6AB02']
    color = [ '#FE9929', '#1D91C0']
    result = plt.hist([mwp_unmatch[c], mwp_match[c]], 
                      color = color,
                      label=labels, **kwargs)
    plt.ylabel("Number of Bubbles")
    return result

Longitude Distribution


In [9]:
figures['lon'] = plt.figure(figsize=(5.3, 3.5), tight_layout=True)
bins, num, artists = compare('lon', bins=np.arange(-67, 67, 1), 
                             lw=2, histtype='step', stacked=False)

def arm(l1, l2, name):
    opts = dict(color='#cccccf', zorder=0)
    plt.axvspan(l1, l2, **opts)
    plt.annotate(name, xy=((l1 + l2) / 2, 110), 
                 rotation=20, ha='left', va='bottom')
    
#add offset to each histogram
for offset, artist in zip([0, 30], artists):
    artist = artist[0]
    xy = artist.get_xy()
    xy[:, 1] += offset
    baseline = np.array([[xy[:, 0].min(), offset], [xy[:, 0].max(), offset]])
    xy = np.vstack((xy, baseline))
    artist.set_xy(xy)
    artist.set_fill(True)
    artist.set_facecolor(artist.get_edgecolor())
    
plt.xlabel("Galactic Longitude (Degrees)")
plt.ylabel("Number of Bubbles + Offset")

plt.legend(loc='upper left', frameon=False)
plt.xlim(65, -65)
plt.ylim(0, 80)


Out[9]:
(0, 80)
/Users/beaumont/anaconda/lib/python2.7/site-packages/matplotlib/figure.py:1595: UserWarning: This figure includes Axes that are not compatible with tight_layout, so its results might be incorrect.
  warnings.warn("This figure includes Axes that are not "

In [10]:
figures['lat'] = plt.figure(tight_layout=True)
compare('lat', bins=np.linspace(-1.1, 1.1, 30))
plt.xlabel("Galactic Latitude (Degrees)")
plt.legend(loc='upper left', frameon=False, fontsize=11)

cuts = pd.cut(data.prob, [0, .5, .8, 1])
print data.lat.mean(), data.lat.std()
data.groupby(cuts).lat.agg([np.mean, np.std])
plt.xlim(-1.1, 1.1)
plt.ylim(0, 150)


-0.0635594165268 0.395608958137
Out[10]:
(0, 150)

In [11]:
figures['diameter'] = plt.figure(tight_layout=True)
compare('diam', bins=np.arange(0, 12, .3))
plt.xlabel("Bubble Angular Diameter (arcmin)")
plt.legend(loc='upper right', frameon=False)

#plt.yscale('log')


Out[11]:
<matplotlib.legend.Legend at 0x10d9e1a10>

In [12]:
figures['dist'] = plt.figure(tight_layout=True)
compare('dist', bins = np.arange(20), normed=True, cumulative=True)
plt.xlabel("Bubble Distance (kpc)")
plt.ylabel("Cumulative Fraction of Bubbles")
plt.legend(loc='upper left', frameon=False)
plt.ylim(0, 1.05)


Out[12]:
(0, 1.05)

In [13]:
figures['hii_score'] = plt.figure(tight_layout=True)
plt.hist([data[data.has_hii].prob, data[~data.has_hii].prob], 
         bins=np.linspace(0, 1, 15), histtype='step', lw=2, normed=True,
         label=['H$_{II}$ Region', 'No H$_{II}$ Region'],
         color = ['#DE2D26', '#252525'])
plt.xlabel("Score")
plt.yticks([])
plt.ylabel("Normalized Fraction")
plt.legend(loc='upper left')


Out[13]:
<matplotlib.legend.Legend at 0x10ccfd7d0>

In [14]:
figures['score'] = plt.figure(tight_layout=True)
compare('score', bins=np.arange(0, 1.2, .1))
plt.xlabel("Brut Score")
plt.legend(loc='upper right', frameon=False, fontsize=12)
plt.ylim(0, 500)
plt.xlim(0, 1.01)
remove_border()


Probable new bubbles


In [15]:
_mwp = np.array(bubble_params())
nn_mwp = NearestNeighbors(radius=1).fit(_mwp[:, 1:3])
def nearby_mwp(l, b):    
    dist, ind = nn_mwp.radius_neighbors([l, b])
    return _mwp[ind[0]]

In [16]:
ind = np.argsort(mwp_unmatch.score.values)[::-1]
thresh = mwp_unmatch.score > .7

import bubbly.field, bubbly.extractors
reload(bubbly.field)
reload(bubbly.extractors)

from bubbly.extractors import RGBExtractor
ex = RGBExtractor()
ex.shp = (200, 200)

In [17]:
base = 'http://milkywayproject.org/tools/coordinates'

#for i in range(0):
#for i in range(50):
#    item = mwp_unmatch.irow(ind[i])
#for item in [mwp_unmatch.irow(868)]:
#for y in yes:
for _ in []:
    item = mwp_unmatch.irow(y)
    lon, lat, rad = item['lon'], item['lat'], item['rad'] / 60.
    if lon < 0:
        lon += 360
    rgb = ex.extract(np.round(lon), lon, lat, rad)
    extent = [lon + rad, lon - rad, lat - rad, lat + rad]
    plt.imshow(rgb, extent=extent)
    m = nearby_mwp(lon, lat)
    
    print ind[i]
    
    print "%s/%0.4f/%0.4f" % (base, lon, lat)
    
    print "Overplot %i MWP bubbles" % len(m)
    for bub in m:
        c = plt.Circle((bub[1], bub[2]), radius=bub[3], fc='#0000ff', ec='r', lw=3, alpha=.5)
        plt.gca().add_patch(c)
        
    plt.title("Score=%0.3f" % item['score'])
    plt.show()

In [18]:
yes = 45, 708, 1, 526, 773, 162, 37, 868

In [19]:
fig, axes = plt.subplots(ncols=4, nrows=2, figsize=(6, 3.5), tight_layout=True)
figures['new_gallery'] = fig

for y, a in zip(yes, axes.ravel()):
    item = mwp_unmatch.irow(y)
    lon, lat, rad = item['lon'], item['lat'], item['rad'] / 60.
    if lon < 0:
        lon += 360
    rad *= 1.5
    rgb = ex.extract(np.round(lon), lon, lat, rad)
    extent = [lon + rad, lon - rad, lat - rad, lat + rad]    
    a.imshow(rgb, extent=extent)
    rgb[-70:, :140] = (rgb[-70:, :140] * .8).astype(np.uint8)
    
    a.plot( [lon + .9 * rad, lon + .9 * rad - 1 / 60.],
            [lat - .9 * rad, lat - .9 * rad], color='w', lw=2)
    a.annotate("1'", xy=(lon + .9 * rad - .5 / 60, lat - .8 * rad), 
               color='white', ha='center')
    a.annotate("$\ell=%0.2f^\circ$" % lon,
               xy = (lon + .9 * rad, lat + .75 * rad), 
               color = 'white', fontsize=11)
    a.annotate("$b=%0.2f^\circ$" % lat,
               xy = (lon + .9 * rad, lat + .45 * rad), 
               color = 'white', fontsize=11)
    
    a.set_xticks([])
    a.set_yticks([])
    a.set_xlim(lon + rad, lon - rad)
    a.set_ylim(lat - rad, lat + rad)


Loading a new field at l=304
DEBUG:bubbly.field:Loading a new field at l=304
WARNING:astropy:FITSFixedWarning: RADECSYS= 'ICRS ' / Astrometric system 
RADECSYS is non-standard, use RADESYSa.
Loading a new field at l=34
DEBUG:bubbly.field:Loading a new field at l=34
Loading a new field at l=295
DEBUG:bubbly.field:Loading a new field at l=295
Loading a new field at l=10
DEBUG:bubbly.field:Loading a new field at l=10
Loading a new field at l=44
DEBUG:bubbly.field:Loading a new field at l=44
Loading a new field at l=324
DEBUG:bubbly.field:Loading a new field at l=324
Loading a new field at l=302
DEBUG:bubbly.field:Loading a new field at l=302
Loading a new field at l=60
DEBUG:bubbly.field:Loading a new field at l=60
WARNING: FITSFixedWarning: RADECSYS= 'ICRS ' / Astrometric system 
RADECSYS is non-standard, use RADESYSa. [astropy.wcs.wcs]

In [20]:
from bubbly.dr1 import get_catalog
c = get_catalog()
for i in range(70):
    item = mwp_unmatch.irow(ind[i])
    l, b= item[0], item[1]
    if l < 0:
        l += 360
    d = np.hypot(c[:, 0] - l, c[:, 1] - b)
    if 60 * np.min(d) < item[2]:
        continue
    print "%3.3i" % ind[i], "%5.1f" % (60 * np.min(d)), "%s/%0.3f/%0.3f" % (base, l, b)


405   5.8 http://milkywayproject.org/tools/coordinates/356.731/0.038
045   9.1 http://milkywayproject.org/tools/coordinates/304.033/0.291
708   4.0 http://milkywayproject.org/tools/coordinates/34.038/0.051
455  16.7 http://milkywayproject.org/tools/coordinates/359.695/0.060
001  37.9 http://milkywayproject.org/tools/coordinates/295.169/-0.654
526   6.9 http://milkywayproject.org/tools/coordinates/9.833/-0.718
805   3.7 http://milkywayproject.org/tools/coordinates/49.686/-0.335
258   2.7 http://milkywayproject.org/tools/coordinates/339.287/0.246
773   2.8 http://milkywayproject.org/tools/coordinates/43.891/-0.786
058   2.1 http://milkywayproject.org/tools/coordinates/306.704/-0.077
761   4.3 http://milkywayproject.org/tools/coordinates/41.660/-0.020
244   5.3 http://milkywayproject.org/tools/coordinates/337.184/-0.060
196   2.1 http://milkywayproject.org/tools/coordinates/331.043/-0.163
190   4.9 http://milkywayproject.org/tools/coordinates/329.522/0.083
424   9.8 http://milkywayproject.org/tools/coordinates/358.689/-0.089
172   6.1 http://milkywayproject.org/tools/coordinates/327.008/0.059
698  15.6 http://milkywayproject.org/tools/coordinates/32.580/-0.336
019  10.2 http://milkywayproject.org/tools/coordinates/298.254/0.746
524  15.5 http://milkywayproject.org/tools/coordinates/9.642/-0.398
528   2.4 http://milkywayproject.org/tools/coordinates/9.984/-0.753
355   5.9 http://milkywayproject.org/tools/coordinates/351.469/0.559
576   8.1 http://milkywayproject.org/tools/coordinates/17.442/-0.669
807   5.5 http://milkywayproject.org/tools/coordinates/50.042/0.269
266   1.4 http://milkywayproject.org/tools/coordinates/340.069/-0.135
315  18.1 http://milkywayproject.org/tools/coordinates/346.282/0.589
466   9.0 http://milkywayproject.org/tools/coordinates/0.677/0.083
603  13.4 http://milkywayproject.org/tools/coordinates/21.241/0.059
485   8.3 http://milkywayproject.org/tools/coordinates/2.603/0.136
868  11.6 http://milkywayproject.org/tools/coordinates/59.663/0.104
533   1.6 http://milkywayproject.org/tools/coordinates/10.260/0.078
628   4.8 http://milkywayproject.org/tools/coordinates/23.602/-0.198
349   6.7 http://milkywayproject.org/tools/coordinates/350.953/-0.566
026   6.7 http://milkywayproject.org/tools/coordinates/300.380/-0.282
810  27.3 http://milkywayproject.org/tools/coordinates/50.962/0.900
192   4.5 http://milkywayproject.org/tools/coordinates/329.895/0.145
889   4.1 http://milkywayproject.org/tools/coordinates/61.753/0.514
521  16.0 http://milkywayproject.org/tools/coordinates/9.069/-0.740
422   2.1 http://milkywayproject.org/tools/coordinates/358.614/0.092
302   6.0 http://milkywayproject.org/tools/coordinates/345.233/-0.793
225  18.7 http://milkywayproject.org/tools/coordinates/334.269/0.603
298  12.8 http://milkywayproject.org/tools/coordinates/343.625/-0.664
642   8.3 http://milkywayproject.org/tools/coordinates/25.398/0.562
162   7.8 http://milkywayproject.org/tools/coordinates/324.158/0.242
759   6.0 http://milkywayproject.org/tools/coordinates/41.129/-0.237
607   4.3 http://milkywayproject.org/tools/coordinates/21.442/-0.589
662   2.2 http://milkywayproject.org/tools/coordinates/27.851/-0.246
460   4.0 http://milkywayproject.org/tools/coordinates/0.540/-0.073
589   3.0 http://milkywayproject.org/tools/coordinates/18.942/-0.003
368  18.2 http://milkywayproject.org/tools/coordinates/352.456/0.473
039  14.4 http://milkywayproject.org/tools/coordinates/302.986/0.586
150   1.4 http://milkywayproject.org/tools/coordinates/321.931/-0.006
477  12.1 http://milkywayproject.org/tools/coordinates/1.480/-0.253
587   6.8 http://milkywayproject.org/tools/coordinates/18.749/-0.531
195   2.7 http://milkywayproject.org/tools/coordinates/330.957/-0.180
518  17.6 http://milkywayproject.org/tools/coordinates/8.593/0.385

In [ ]: