In [1]:
import urllib.request
with urllib.request.urlopen('http://162.105.166.202/Default.aspx') as response:
    html = response.read()

In [7]:
print(str(html, encoding='utf-8'))



<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en">
<head>
<meta http-equiv="refresh" content="60";/>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" /><title>
	环境空气质量在线监测系统数据中心
</title><link href="Styles/Site.css" rel="stylesheet" type="text/css" />
    <style type="text/css">
        .style35
        {
            width: 97%;
            }
        .style36
        {
            height: 200px;
            width: 45%;
            margin-left: 120px;
        }
        .style43
        {
            height: 200px;
            width: 71%;
            margin-left: 120px;
        }
        .style45
        {
            margin-left: 120px;
            width: 661px;
        }
        .style47
        {
            height: 12px;
            margin-left: 120px;
            width: 661px;
        }
        .style48
        {
            height: 12px;
            width: 71%;
            margin-left: 120px;
        }
        .style49
        {
            height: 12px;
            width: 45%;
            margin-left: 120px;
        }
        .style50
        {
            width: 350px;
            height: 175px;
        }
        .style54
        {
    }
        .style73
        {
            height: 17px;
            }
        .style74
    {
        height: 28px;
    }
        </style>
    <script language="javascript" type="text/javascript">
// <![CDATA[

        function Button1_onclick() {

        }

// ]]>
    </script>

    <style type="text/css">

        .style19
        {
            height: 60px;
            width: 1900px;
        }
        .style25
        {
            width: 77%;
            }
        p.MsoNormal
	{margin-bottom:.0001pt;
	text-align:justify;
	text-justify:inter-ideograph;
	font-size:10.5pt;
	font-family:"Calibri","sans-serif";
	        margin-left: 0cm;
            margin-right: 0cm;
            margin-top: 0cm;
        }
        </style>
</head>
<body>
    <form method="post" action="Default.aspx" id="ctl01">
<input type="hidden" name="__VIEWSTATE" id="__VIEWSTATE" value="/wEPDwUJOTg4NjI2NDYyD2QWAmYPZBYCAgMPZBYCAgUPZBYEAgEPPCsAEQIADxYEHgtfIURhdGFCb3VuZGceC18hSXRlbUNvdW50AgFkARAWABYAFgAWAmYPZBYGZg8PFgIeB1Zpc2libGVoZGQCAQ9kFgJmDw8WAh4EVGV4dAURMjAxNi0zLTQgMTY6NTA6MDJkZAICDw8WAh8CaGRkAgMPPCsADwIADxYEHwBnHwECAWQKEBYBAgwWATwrAAUBABYCHgpIZWFkZXJUZXh0BQrpo47lkJEowrApFgFmFgJmD2QWIGYPDxYCHwJoZGQCAQ9kFgICAQ8PFgIfAwUFMzA4LjFkZAICD2QWAgIBDw8WAh8DBQUxMTEuNmRkAgMPZBYCAgEPDxYCHwMFAzQuOWRkAgQPZBYCAgEPDxYCHwMFBDcuNDVkZAIFD2QWAgIBDw8WAh8DBQQxOC43ZGQCBg9kFgICAQ8PFgIfAwUCNTFkZAIHD2QWAgIBDw8WAh8DBQUxNDcuM2RkAggPZBYCAgEPDxYCHwMFAzEzMWRkAgkPZBYCAgEPDxYCHwMFBTcxOC4zZGQCCg9kFgICAQ8PFgIfAwUEMTAuNGRkAgsPZBYCAgEPDxYCHwMFBDQzLjJkZAIMD2QWAgIBDw8WAh8DBQMwLjVkZAIND2QWAgIBDw8WAh8DBQMyNDdkZAIOD2QWAgIBDw8WAh8DBQYxMDEwLjNkZAIPDw8WAh8CaGRkGAIFHmN0bDAwJE1haW5Db250ZW50JERldGFpbHNWaWV3MQ8UKwAHZGRkZGQWAAIBZAUbY3RsMDAkTWFpbkNvbnRlbnQkR3JpZFZpZXcyDzwrAAwBCAIBZCISVq53ymKZFarP/yhiulqivSCkWmnFDLIbg7wN534a" />


<script src="/WebResource.axd?d=ROSFvmW6qyKmAQxIXbZjiK16MlEuinEpShi22nVgOYQ1&amp;t=634723767737656250" type="text/javascript"></script>
    <div class="page">
        <div class="header">
            <div class="title">
                <h1 style="font-size: xx-large">
                    北京大学环境模拟与污染控制国家重点实验室</h1>
                <h1>
                    &nbsp;</h1>
            </div>
            <div class="loginDisplay">
                &nbsp;
                        [ <a href="Account/Login.aspx" id="HeadLoginView_HeadLoginStatus">登录</a> 
                        ]&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
                    
            </div>
            <div class="clear hideSkiplink" align="right">
                <a href="#NavigationMenu_SkipLink"><img alt="Skip Navigation Links" src="/WebResource.axd?d=AUI05ebu7lmOoZg_HfpJDg2&amp;t=634723767737656250" width="0" height="0" border="0" /></a><div class="menu" id="NavigationMenu">
	<ul class="level1">
		<li><a class="level1" href="Default.aspx">主页</a></li><li><a class="level1" href="CP_DATA.aspx">关于</a></li>
	</ul>
</div><a id="NavigationMenu_SkipLink"></a>
            </div>
        </div>
        <div class="main">
            

    <table class="style35" bgcolor="DarkRed" border="0" 
    cellpadding="0" cellspacing="1" frame="border">

            <tr>
                <td class="style47" bgcolor="White" style="font-family: 微软雅黑; font-size: x-large; color: #000000;" 
                    align="center">
                    环境监测系统平台数据中心</td>
                <td class="style47" bgcolor="White" style="font-family: 微软雅黑; font-size: x-large; color: #000000;" 
                    align="center">
                    环境污染气体浓度时间序列</td>
                <td class="style48" bgcolor="White" align="center" 
                    style="font-size: x-large; font-family: 微软雅黑; color: #000000;">
                    环境颗粒物浓度时间序列</td>
                <td class="style49" bgcolor="White" align="center" 
                    style="font-family: 微软雅黑; font-size: x-large; color: #000000;">
                    环境气象时间序列</td>
            </tr>
            
            <tr>
                <td class="style45" bgcolor="White" rowspan="3">
                    <table style="width:100%;">
                        <tr>
                            <td>
                    <img alt="" class="style50" src="Img/WL.JPG" /></td>
                        </tr>
                        <tr>
                            <td align="center" bgcolor="Maroon" class="style73" 
                                style="color: #FFFFFF; font-family: 黑体;">
                                <div>
	<table cellspacing="0" id="MainContent_GridView2">
		<tr>
			<td><font face="Times New Roman" size="6">2016-3-4 16:50:02</font></td>
		</tr>
	</table>
</div>
                            </td>
                        </tr>
                        <tr>
                            <td 
                                style="font-family: 黑体; color: #FFFFFF;" 
                                class="style54">
                                <div>
	<table cellspacing="0" cellpadding="4" rules="rows" bordercolor="Maroon" id="MainContent_DetailsView1" width="100%" height="50" bgcolor="#CCFFFF">
		<tr>
			<td><font face="Times New Roman" color="Black" size="6">PM2.5[30℃]</font></td><td><font face="Times New Roman" color="Black" size="6">308.1</font></td>
		</tr><tr>
			<td><font face="Times New Roman" color="Black" size="6">PM2.5[50℃]</font></td><td><font face="Times New Roman" color="Black" size="6">111.6</font></td>
		</tr><tr>
			<td><font face="Times New Roman" color="Black" size="6">O3(ppb)</font></td><td><font face="Times New Roman" color="Black" size="6">4.9</font></td>
		</tr><tr>
			<td><font face="Times New Roman" color="Black" size="6">CO(ppm)</font></td><td><font face="Times New Roman" color="Black" size="6">7.45</font></td>
		</tr><tr>
			<td><font face="Times New Roman" color="Black" size="6">SO2(ppb)</font></td><td><font face="Times New Roman" color="Black" size="6">18.7</font></td>
		</tr><tr>
			<td><font face="Times New Roman" color="Black" size="6">NO(ppb)</font></td><td><font face="Times New Roman" color="Black" size="6">51</font></td>
		</tr><tr>
			<td><font face="Times New Roman" color="Black" size="6">NOx(ppb)</font></td><td><font face="Times New Roman" color="Black" size="6">147.3</font></td>
		</tr><tr>
			<td><font face="Times New Roman" color="Black" size="6">NOy(ppb)</font></td><td><font face="Times New Roman" color="Black" size="6">131</font></td>
		</tr><tr>
			<td><font face="Times New Roman" color="Black" size="6">CO2(ppm)</font></td><td><font face="Times New Roman" color="Black" size="6">718.3</font></td>
		</tr><tr>
			<td><font face="Times New Roman" color="Black" size="6">温度(℃)</font></td><td><font face="Times New Roman" color="Black" size="6">10.4</font></td>
		</tr><tr>
			<td><font face="Times New Roman" color="Black" size="6">湿度(%)</font></td><td><font face="Times New Roman" color="Black" size="6">43.2</font></td>
		</tr><tr>
			<td><font face="Times New Roman" color="Black" size="6">风速(m/s)</font></td><td><font face="Times New Roman" color="Black" size="6">0.5</font></td>
		</tr><tr>
			<td><font face="Times New Roman" color="Black" size="6">风向(&#176;)</font></td><td><font face="Times New Roman" color="Black" size="6">247</font></td>
		</tr><tr>
			<td><font face="Times New Roman" color="Black" size="6">大气压(mBar)</font></td><td><font face="Times New Roman" color="Black" size="6">1010.3</font></td>
		</tr>
	</table>
</div>
                                
                            </td>
                        </tr>
                        <tr>
                            <td bgcolor="Maroon" 
                                style="font-family: 微软雅黑; color: #FFFFFF; font-size: large;" 
                                align="center" class="style74">
                                本研究数据仅供参考</td>
                        </tr>
                    </table>
                </td>
                <td class="style45" bgcolor="White">
                    <img id="MainContent_Chart20" src="/ChartImg.axd?i=chart_0_35.png&amp;g=b170051d95314460a452782f3c6a53d7" alt="" height="275" width="500" style="border-width:0px;" />
                </td>
                <td class="style43" bgcolor="White">
                    <img id="MainContent_Chart13" src="/ChartImg.axd?i=chart_0_36.png&amp;g=b36acc153a4f43ceaecc9c97dc6221ff" alt="" height="275" width="500" style="border-width:0px;" />
                </td>
                <td class="style36" bgcolor="White">
                    <img id="MainContent_Chart14" src="/ChartImg.axd?i=chart_0_37.png&amp;g=d20becaf5d1c45af8b9965605554529e" alt="" height="275" width="500" style="border-width:0px;" />
                </td>
            </tr>
            
            <tr>
                <td class="style45" bgcolor="White">
                    <img id="MainContent_Chart1" src="/ChartImg.axd?i=chart_0_38.png&amp;g=eba8f84f92de449d93b23d76502d1616" alt="" height="275" width="500" style="border-width:0px;margin-top: 0px" />
                </td>
                <td class="style43" bgcolor="White">
            <img id="MainContent_Chart19" src="/ChartImg.axd?i=chart_0_39.png&amp;g=70ff77eb95d64565bab6ef9c81d4caa5" alt="" height="275" width="500" style="border-width:0px;margin-left: 0px" />
                </td>
                <td class="style36" bgcolor="White">
                    <img id="MainContent_Chart22" src="/ChartImg.axd?i=chart_0_40.png&amp;g=2361165ca18848d481c19df3dd104d3f" alt="" height="275" width="500" style="border-width:0px;" />
                    </td>
            </tr>
            
            <tr>
                <td class="style45" bgcolor="White">
                    <img id="MainContent_Chart21" src="/ChartImg.axd?i=chart_0_41.png&amp;g=4c4ae1e8fc7c4de9b4afd3586a00b238" alt="" height="275" width="500" style="border-width:0px;" />
                </td>
                <td class="style43" bgcolor="White">
            <img id="MainContent_Chart23" src="/ChartImg.axd?i=chart_0_42.png&amp;g=5fb4b56035304d889ce5f68b8e0bbe3e" alt="" height="275" width="500" style="border-width:0px;margin-left: 0px" />
                </td>
                <td class="style36" bgcolor="White">
                    <img alt="" src="Img/sp.png" style="height: 275px; width: 500px" /></td>
            </tr>
            
            </table>
    

            

            
        
                    
                        
                    
        </div>
        <div class="clear">
        </div>
    </div>
    <div class="footer" align="center">
        
        <table class="style19">
            <tr>
                <td class="style25" align="left" 
                    
                    style="color: #FFFFFF; font-size: x-large; font-family: 宋体, Arial, Helvetica, sans-serif" 
                    width="100%">
                                北京大学城市大气环境定位观测站由“211”、“985”和重点实验室专项经费等资助</td>
                <td class="style25" align="center" 
                    
                    style="color: #FFFFFF; font-size: large; font-family: 宋体, Arial, Helvetica, sans-serif" 
                    width="100%">
                    地址:北京大学老地学楼 邮编: 100871<br />
                    Copyright 2010 北京大学环境科学与工程学院</td>
            </tr>
            </table>
    </div>
    
<script type='text/javascript'>new Sys.WebForms.Menu({ element: 'NavigationMenu', disappearAfter: 500, orientation: 'horizontal', tabIndex: 0, disabled: false });</script></form>
    
</body>
</html>


In [10]:
import re

data = re.search('MainContent_GridView2.*?本研究数据仅供参考', str(html, encoding='utf-8'), re.DOTALL)
print(data.group())


MainContent_GridView2">
		<tr>
			<td><font face="Times New Roman" size="6">2016-3-4 16:50:02</font></td>
		</tr>
	</table>
</div>
                            </td>
                        </tr>
                        <tr>
                            <td 
                                style="font-family: 黑体; color: #FFFFFF;" 
                                class="style54">
                                <div>
	<table cellspacing="0" cellpadding="4" rules="rows" bordercolor="Maroon" id="MainContent_DetailsView1" width="100%" height="50" bgcolor="#CCFFFF">
		<tr>
			<td><font face="Times New Roman" color="Black" size="6">PM2.5[30℃]</font></td><td><font face="Times New Roman" color="Black" size="6">308.1</font></td>
		</tr><tr>
			<td><font face="Times New Roman" color="Black" size="6">PM2.5[50℃]</font></td><td><font face="Times New Roman" color="Black" size="6">111.6</font></td>
		</tr><tr>
			<td><font face="Times New Roman" color="Black" size="6">O3(ppb)</font></td><td><font face="Times New Roman" color="Black" size="6">4.9</font></td>
		</tr><tr>
			<td><font face="Times New Roman" color="Black" size="6">CO(ppm)</font></td><td><font face="Times New Roman" color="Black" size="6">7.45</font></td>
		</tr><tr>
			<td><font face="Times New Roman" color="Black" size="6">SO2(ppb)</font></td><td><font face="Times New Roman" color="Black" size="6">18.7</font></td>
		</tr><tr>
			<td><font face="Times New Roman" color="Black" size="6">NO(ppb)</font></td><td><font face="Times New Roman" color="Black" size="6">51</font></td>
		</tr><tr>
			<td><font face="Times New Roman" color="Black" size="6">NOx(ppb)</font></td><td><font face="Times New Roman" color="Black" size="6">147.3</font></td>
		</tr><tr>
			<td><font face="Times New Roman" color="Black" size="6">NOy(ppb)</font></td><td><font face="Times New Roman" color="Black" size="6">131</font></td>
		</tr><tr>
			<td><font face="Times New Roman" color="Black" size="6">CO2(ppm)</font></td><td><font face="Times New Roman" color="Black" size="6">718.3</font></td>
		</tr><tr>
			<td><font face="Times New Roman" color="Black" size="6">温度(℃)</font></td><td><font face="Times New Roman" color="Black" size="6">10.4</font></td>
		</tr><tr>
			<td><font face="Times New Roman" color="Black" size="6">湿度(%)</font></td><td><font face="Times New Roman" color="Black" size="6">43.2</font></td>
		</tr><tr>
			<td><font face="Times New Roman" color="Black" size="6">风速(m/s)</font></td><td><font face="Times New Roman" color="Black" size="6">0.5</font></td>
		</tr><tr>
			<td><font face="Times New Roman" color="Black" size="6">风向(&#176;)</font></td><td><font face="Times New Roman" color="Black" size="6">247</font></td>
		</tr><tr>
			<td><font face="Times New Roman" color="Black" size="6">大气压(mBar)</font></td><td><font face="Times New Roman" color="Black" size="6">1010.3</font></td>
		</tr>
	</table>
</div>
                                
                            </td>
                        </tr>
                        <tr>
                            <td bgcolor="Maroon" 
                                style="font-family: 微软雅黑; color: #FFFFFF; font-size: large;" 
                                align="center" class="style74">
                                本研究数据仅供参考

In [11]:
# parse data

#(?<=abc)
line_PM2_5 = re.search('PM2\.5.*', data.group())

In [13]:
print(line_PM2_5.group())


PM2.5[30℃]</font></td><td><font face="Times New Roman" color="Black" size="6">308.1</font></td>

In [21]:
read_PM2_5 = re.search('(?<=>)[0-9\.]+',line_PM2_5.group())
print(read_PM2_5.group())


308.1

In [24]:
# parse data

#(?<=abc)
line_PM2_5 = re.search('PM2\.5.*', data.group())
read_PM2_5 = re.search('(?<=>)[0-9\.]+',line_PM2_5.group())
print(read_PM2_5.group())


308.1

In [25]:
# parse datetime

datetime = re.search('(?<=="6">)[0-9\- \:]+', data.group())
# ="6">2016-3-4 16:50:02</font></td>
print(datetime.group())


2016-3-4 16:50:02

In [ ]: