Find in data intersection between dispersions $\sigma$ and gas profiles $\Sigma(R)$


In [1]:
import requests
import time
from IPython.display import HTML
from IPython.display import Image
import pylab as plt
%pylab inline


Populating the interactive namespace from numpy and matplotlib
C:\Anaconda\lib\site-packages\IPython\core\magics\pylab.py:161: UserWarning: pylab import has clobbered these variables: ['plt']
`%matplotlib` prevents importing * from pylab and numpy
  "\n`%matplotlib` prevents importing * from pylab and numpy"

In [2]:
def get_ngc_page(number):
    url = 'http://leda.univ-lyon1.fr/G.cgi?n=9&c=o&o=n%s&a=html&ob=ra' % number
    response = requests.get(url)
    time.sleep(3)
    if response.ok:
        return response.content
#     else:
#         raise
        
#     try:
#         r = requests.get(url, params={'s': thing})
#     except requests.exceptions.RequestException as e:    # This is the correct syntax
#         print e
#         sys.exit(1)

In [3]:
HTML(get_ngc_page(3898))


Out[3]:
Kinematical profiles
   objname   |L|RA (B1950) Dec |  Ref  |  F  |angp |  x  |  y   |      vel      |    sig     |

NGC3898       L 114636.2+562141 HSM99   S OR  107.0   0.0    2.8    -38.0 ± 15.0 226.0 ± 20.0
HSM99 S OR 107.0 0.0 -1.8 17.0 ± 16.0 219.0 ± 19.0
HSM99 S OR 107.0 0.0 0.5 0.0 ± 14.0 226.0 ± 18.0
HSM99 S OR 107.0 0.0 -0.7 2.0 ± 18.0 219.0 ± 18.0
HSM99 S OR 107.0 0.0 6.2 -82.0 ± 16.0 194.0 ± 16.0
HSM99 S OR 107.0 0.0 -3.0 66.0 ± 17.0 224.0 ± 22.0
HSM99 S OR 107.0 0.0 -17.9 111.0 ± 18.0 141.0 ± 24.0
HSM99 S OR 107.0 0.0 24.6 -80.0 ± 30.0 235.0 ± 35.0
HSM99 S OR 107.0 0.0 20.0 103.0 ± 21.0 256.0 ± 26.0
HSM99 S OR 107.0 0.0 -13.3 110.0 ± 16.0 153.0 ± 17.0
HSM99 S OR 107.0 0.0 29.2 108.0 ± 34.0 187.0 ± 40.0
HSM99 S OR 107.0 0.0 15.4 116.0 ± 19.0 186.0 ± 21.0
HSM99 S OR 107.0 0.0 10.8 111.0 ± 15.0 166.0 ± 17.0
HSM99 S OR 107.0 0.0 -27.1 105.0 ± 21.0 146.0 ± 29.0
HSM99 S OR 107.0 0.0 -22.5 93.0 ± 23.0 169.0 ± 24.0
HSM99 S OR 107.0 0.0 1.6 -26.0 ± 17.0 217.0 ± 19.0


In [4]:
jobs = jobs_manager()

In [5]:
d1,d2,d3,d4 = [],[],[],[]

In [6]:
# %job [d1.append((x, get_ngc_page(x))) for x in log_progress(range(1, 2000))]
# %job [d2.append((x, get_ngc_page(x))) for x in log_progress(range(2000, 4000))]
# %job [d3.append((x, get_ngc_page(x))) for x in log_progress(range(4000, 6000))]
# %job [d4.append((x, get_ngc_page(x))) for x in log_progress(range(6000, 8000))]

In [7]:
# len(d1),len(d2),len(d3),len(d4)

In [8]:
# len(zip(d1,d2))

In [9]:
# data = []
# data.extend(d1)
# data.extend(d2)
# data.extend(d3)
# data.extend(d4)

In [10]:
# len(data)

In [11]:
# f = open('data.txt', 'a')
# for l in data:
#     f.write(str(l))

In [12]:
lines= None
with open('data.txt', 'r') as f:
    lines = f.readlines()[0]
    indices = []
    for ind, letter in enumerate(lines):
        if letter == '(':
            try:
                sub = lines[ind:]
                number = int(sub[1:sub.index(',')])
                indices.append(ind)
            except ValueError:
                pass

In [13]:
len(indices)


Out[13]:
7999

In [14]:
data = []
sub = None
for ind1, ind2 in zip(indices[:-1], indices[1:]):
    sub = lines[ind1:ind2]
    comma = sub.index(',')
    number = int(sub[1:comma])
    data.append((number, sub[comma:]))

In [15]:
len(data)


Out[15]:
7998

In [16]:
without = 0
for num, html in data:
    if 'Query Help' in html:
        without += 1
print len(data) - without


664

In [17]:
disp = [d for d in data if 'Query Help' not in d[1]]

In [18]:
len(disp)


Out[18]:
664

In [19]:
print disp[100][0]


1351

In [20]:
ddisp = []
for n, html in disp:
    for row in html.split('<br'):
        if row.count('&#177;') > 1: # &#177; == ±
            ddisp.append((n, html))
            break

In [21]:
len(ddisp)


Out[21]:
278

In [22]:
print ['NGC' + str(n) for (n,_) in ddisp]


['NGC80', 'NGC97', 'NGC194', 'NGC383', 'NGC410', 'NGC470', 'NGC499', 'NGC532', 'NGC584', 'NGC680', 'NGC691', 'NGC741', 'NGC742', 'NGC772', 'NGC813', 'NGC821', 'NGC890', 'NGC990', 'NGC1023', 'NGC1024', 'NGC1056', 'NGC1137', 'NGC1169', 'NGC1171', 'NGC1175', 'NGC1186', 'NGC1199', 'NGC1209', 'NGC1210', 'NGC1316', 'NGC1336', 'NGC1339', 'NGC1343', 'NGC1351', 'NGC1373', 'NGC1374', 'NGC1375', 'NGC1379', 'NGC1380', 'NGC1381', 'NGC1399', 'NGC1400', 'NGC1404', 'NGC1407', 'NGC1419', 'NGC1426', 'NGC1427', 'NGC1428', 'NGC1461', 'NGC1485', 'NGC1521', 'NGC1549', 'NGC1553', 'NGC1571', 'NGC1587', 'NGC1588', 'NGC1589', 'NGC1600', 'NGC1653', 'NGC1700', 'NGC1726', 'NGC2217', 'NGC2300', 'NGC2310', 'NGC2314', 'NGC2325', 'NGC2329', 'NGC2332', 'NGC2336', 'NGC2340', 'NGC2434', 'NGC2476', 'NGC2523', 'NGC2545', 'NGC2549', 'NGC2560', 'NGC2563', 'NGC2633', 'NGC2648', 'NGC2672', 'NGC2673', 'NGC2685', 'NGC2694', 'NGC2695', 'NGC2712', 'NGC2726', 'NGC2732', 'NGC2742', 'NGC2768', 'NGC2778', 'NGC2784', 'NGC2798', 'NGC2816', 'NGC2841', 'NGC2865', 'NGC2872', 'NGC2894', 'NGC2903', 'NGC2905', 'NGC2916', 'NGC2945', 'NGC2964', 'NGC2974', 'NGC2983', 'NGC2985', 'NGC2986', 'NGC3021', 'NGC3031', 'NGC3041', 'NGC3046', 'NGC3051', 'NGC3067', 'NGC3115', 'NGC3156', 'NGC3158', 'NGC3169', 'NGC3190', 'NGC3193', 'NGC3226', 'NGC3245', 'NGC3254', 'NGC3271', 'NGC3289', 'NGC3294', 'NGC3338', 'NGC3368', 'NGC3371', 'NGC3377', 'NGC3379', 'NGC3384', 'NGC3412', 'NGC3437', 'NGC3585', 'NGC3607', 'NGC3610', 'NGC3613', 'NGC3627', 'NGC3640', 'NGC3641', 'NGC3675', 'NGC3718', 'NGC3726', 'NGC3810', 'NGC3818', 'NGC3853', 'NGC3898', 'NGC3921', 'NGC3941', 'NGC3998', 'NGC4026', 'NGC4036', 'NGC4105', 'NGC4106', 'NGC4111', 'NGC4125', 'NGC4168', 'NGC4237', 'NGC4251', 'NGC4258', 'NGC4261', 'NGC4267', 'NGC4270', 'NGC4278', 'NGC4291', 'NGC4303', 'NGC4318', 'NGC4339', 'NGC4340', 'NGC4350', 'NGC4365', 'NGC4371', 'NGC4380', 'NGC4382', 'NGC4406', 'NGC4429', 'NGC4434', 'NGC4435', 'NGC4442', 'NGC4458', 'NGC4461', 'NGC4464', 'NGC4467', 'NGC4468', 'NGC4472', 'NGC4473', 'NGC4476', 'NGC4478', 'NGC4479', 'NGC4486', 'NGC4494', 'NGC4501', 'NGC4515', 'NGC4546', 'NGC4552', 'NGC4564', 'NGC4579', 'NGC4589', 'NGC4596', 'NGC4621', 'NGC4636', 'NGC4638', 'NGC4643', 'NGC4649', 'NGC4660', 'NGC4667', 'NGC4684', 'NGC4697', 'NGC4698', 'NGC4725', 'NGC4754', 'NGC4762', 'NGC4826', 'NGC4874', 'NGC4946', 'NGC5018', 'NGC5033', 'NGC5055', 'NGC5077', 'NGC5129', 'NGC5296', 'NGC5297', 'NGC5322', 'NGC5353', 'NGC5480', 'NGC5490', 'NGC5557', 'NGC5576', 'NGC5782', 'NGC5812', 'NGC5846', 'NGC5854', 'NGC5864', 'NGC5866', 'NGC5903', 'NGC5909', 'NGC5912', 'NGC5928', 'NGC5953', 'NGC5954', 'NGC5966', 'NGC6146', 'NGC6411', 'NGC6684', 'NGC6702', 'NGC6703', 'NGC6721', 'NGC6734', 'NGC6736', 'NGC6744', 'NGC6758', 'NGC6776', 'NGC6849', 'NGC6958', 'NGC6962', 'NGC6964', 'NGC7079', 'NGC7135', 'NGC7177', 'NGC7284', 'NGC7285', 'NGC7331', 'NGC7332', 'NGC7391', 'NGC7448', 'NGC7454', 'NGC7457', 'NGC7507', 'NGC7562', 'NGC7606', 'NGC7619', 'NGC7626', 'NGC7660', 'NGC7678', 'NGC7727', 'NGC7768', 'NGC7778', 'NGC7785', 'NGC7796']

In [23]:
ddisp[0][1]


Out[23]:
', \'<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">\\n<html>\\n <head>\\n <title>Kinematical profiles</title> <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />\\n <link rel="stylesheet" type="text/css" href="common.css" />\\n <base target="_top" />\\n <script>var pleinpot_version=\\\'8.12.2\\\'; var table_description=\\\'Kinematical profiles\\\';var table_name=\\\'a009\\\'</script>\\n </head>\\n <body>\\n<PRE><br /><a href="G.cgi?c=m&f=f&o=search&p=[cat.ident=NGC0080]">NGC0080</a>       <a href="javascript:L(\\\'NGC0080\\\')">L</a> <a href="javascript:D(\\\'001834.7+220448\\\')">001834.7+220448</a> <a href="B.cgi?n=9&b=SP98" target="kin02">SP98</a>    S OR  180.0   0.0   -4.4    -13.0 &#177; 42.0 212.0 &#177; 22.0<br />                                <a href="B.cgi?n=9&b=SP98" target="kin02">SP98</a>    S OR  180.0   0.0   -9.0    -11.0 &#177; 37.0 249.0 &#177; 42.0<br />                                <a href="B.cgi?n=9&b=SP98" target="kin02">SP98</a>    S OR  180.0   0.0    8.2      7.0 &#177; 49.0 223.0 &#177; 24.0<br />                                <a href="B.cgi?n=9&b=SP98" target="kin02">SP98</a>    S OR  180.0   0.0   -1.0    -13.0 &#177; 17.0 220.0 &#177; 41.0<br />                                <a href="B.cgi?n=9&b=SP98" target="kin02">SP98</a>    S OR  180.0   0.0    1.3      0.0 &#177; 23.0 234.0 &#177; 17.0<br />                                <a href="B.cgi?n=9&b=SP98" target="kin02">SP98</a>    S OR  180.0   0.0    0.2      5.0 &#177; 22.0 221.0 &#177; 16.0<br />                                <a href="B.cgi?n=9&b=SP98" target="kin02">SP98</a>    S OR  180.0   0.0    3.6     14.0 &#177; 18.0 234.0 &#177; 17.0<br />                                <a href="B.cgi?n=9&b=SP98" target="kin02">SP98</a>    S OR  180.0   0.0  -18.2      1.0 &#177; 51.0 141.0 &#177; 77.0<br />                                <a href="B.cgi?n=9&b=SP98" target="kin02">SP98</a>    S OR  180.0   0.0  -13.6    -32.0 &#177; 45.0 231.0 &#177; 53.0<br />                                <a href="B.cgi?n=9&b=SP98" target="kin02">SP98</a>    S OR  180.0   0.0   -2.2     -9.0 &#177; 20.0 231.0 &#177; 21.0<br />                                <a href="B.cgi?n=9&b=SP98" target="kin02">SP98</a>    S OR  180.0   0.0   17.4     -5.0 &#177; 82.0 155.0 &#177; 70.0<br />                                <a href="B.cgi?n=9&b=SP98" target="kin02">SP98</a>    S OR  180.0   0.0   12.8    -20.0 &#177; 56.0 236.0 &#177; 66.0</PRE><hr class="frsep"/>\\n<h2>Further options on this selection</h2>\\n<ul>\\n<a href="fG.cgi?n=9&c=o&o=n80&a=html&z=d|r&ob=ra&d=$objname,$b1950,$link[dataset],param,angp,x,y,$field[vel],$field[sig]&z=s">Statistics</a><br><form name="f" action="./" onsubmit=return false>Search in a box of <input name="r" value="10" size="8" type="text">arcmin around <tt>n80</tt> <input type="button" value="submit" onclick=top.location="fG.cgi?n=9&c=o&p=n80&f="+document.f.r.value></form></ul>\\n<hr class="frsep"/>\\n<table width="100%">\\n<tr>\\n<td align="left">\\nKinematical profiles\\n</td>\\n<td align="right">Pleinpot: 8.12.2</td></tr>\\n<tr><td align="left" >Remember to <a href="acknowledge.html">acknowledge HyperLeda</a></td>\\n<td align="right"><i>Questions: <a href="mailto:leda@univ-lyon1.fr">leda@univ-lyon1.fr</a>\\n</i></td>\\n</tr>\\n</body>\\n</html>\\n\')'

In [24]:
troubled = []
for n, html in ddisp:
    good = True
    for r in html.split('<br'):
        if '</a>' in r and '&#177;' in r:
            row = ' '.join(r[r.rfind('>')+1:].replace('&#177;', ' ').split())
            if row.strip():
                if len(row.split(' ')) != 9:
                    good = False
#                     print len(row.split(' '))
#                     print row
    if not good:
        troubled.append(n)
        print 'NGC'+str(n)


NGC1461
NGC2336
NGC2648
NGC2672
NGC2673
NGC2742
NGC2816
NGC2964
NGC3169
NGC3294
NGC3437
NGC4237
NGC4303
NGC4380
NGC4435
NGC4501
NGC4579
NGC4698
NGC5480
NGC5782
NGC5866
NGC5909
NGC5912
NGC6962
NGC7448
NGC7606

In [25]:
len(troubled)


Out[25]:
26

In [26]:
html = ddisp[0][1]
xx, rr, sigs, esigs = [],[],[],[]
for r in html.split('<br'):
    if '</a>' in r and '&#177;' in r:
        row = ' '.join(r[r.rfind('>')+1:].replace('&#177;', ' ').split())
        if row.strip():
            _,_,_,x,y,vel,evel,sig,esig = row.split(' ')
            rr.append(float(y))
            xx.append(x)
            sigs.append(float(sig))
            esigs.append(float(esig))

In [27]:
dd = zip(rr, sigs, esigs)
dd = sorted(dd)
rr, sigs, esigs = zip(*dd)

In [28]:
plt.errorbar(rr, sigs, yerr=esigs)
plt.title('NGC x={}'.format(str(unique(xx))))
plt.show()



In [29]:
%%time
for n, html in ddisp:
    if n not in troubled:
        xx, rr, sigs, esigs = [],[],[],[]
        for r in html.split('<br'):
            if '</a>' in r and '&#177;' in r:
                row = ' '.join(r[r.rfind('>')+1:].replace('&#177;', ' ').split())
                if row.strip():
                    _,_,_,x,y,vel,evel,sig,esig = row.split(' ')
                    rr.append(float(y))
                    xx.append(x)
                    sigs.append(float(sig))
                    esigs.append(float(esig))
        dd = zip(rr, sigs, esigs)
        dd = sorted(dd)
        rr, sigs, esigs = zip(*dd)
        plt.errorbar(rr, sigs, yerr=esigs)
        plt.title('NGC{} x={}'.format(str(n), str(unique(xx))))
        plt.show()