In [1]:
import urllib.request
with urllib.request.urlopen('http://162.105.166.202/Default.aspx') as response:
html = response.read()
In [7]:
print(str(html, encoding='utf-8'))
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en">
<head>
<meta http-equiv="refresh" content="60";/>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" /><title>
环境空气质量在线监测系统数据中心
</title><link href="Styles/Site.css" rel="stylesheet" type="text/css" />
<style type="text/css">
.style35
{
width: 97%;
}
.style36
{
height: 200px;
width: 45%;
margin-left: 120px;
}
.style43
{
height: 200px;
width: 71%;
margin-left: 120px;
}
.style45
{
margin-left: 120px;
width: 661px;
}
.style47
{
height: 12px;
margin-left: 120px;
width: 661px;
}
.style48
{
height: 12px;
width: 71%;
margin-left: 120px;
}
.style49
{
height: 12px;
width: 45%;
margin-left: 120px;
}
.style50
{
width: 350px;
height: 175px;
}
.style54
{
}
.style73
{
height: 17px;
}
.style74
{
height: 28px;
}
</style>
<script language="javascript" type="text/javascript">
// <![CDATA[
function Button1_onclick() {
}
// ]]>
</script>
<style type="text/css">
.style19
{
height: 60px;
width: 1900px;
}
.style25
{
width: 77%;
}
p.MsoNormal
{margin-bottom:.0001pt;
text-align:justify;
text-justify:inter-ideograph;
font-size:10.5pt;
font-family:"Calibri","sans-serif";
margin-left: 0cm;
margin-right: 0cm;
margin-top: 0cm;
}
</style>
</head>
<body>
<form method="post" action="Default.aspx" id="ctl01">
<input type="hidden" name="__VIEWSTATE" id="__VIEWSTATE" value="/wEPDwUJOTg4NjI2NDYyD2QWAmYPZBYCAgMPZBYCAgUPZBYEAgEPPCsAEQIADxYEHgtfIURhdGFCb3VuZGceC18hSXRlbUNvdW50AgFkARAWABYAFgAWAmYPZBYGZg8PFgIeB1Zpc2libGVoZGQCAQ9kFgJmDw8WAh4EVGV4dAURMjAxNi0zLTQgMTY6NTA6MDJkZAICDw8WAh8CaGRkAgMPPCsADwIADxYEHwBnHwECAWQKEBYBAgwWATwrAAUBABYCHgpIZWFkZXJUZXh0BQrpo47lkJEowrApFgFmFgJmD2QWIGYPDxYCHwJoZGQCAQ9kFgICAQ8PFgIfAwUFMzA4LjFkZAICD2QWAgIBDw8WAh8DBQUxMTEuNmRkAgMPZBYCAgEPDxYCHwMFAzQuOWRkAgQPZBYCAgEPDxYCHwMFBDcuNDVkZAIFD2QWAgIBDw8WAh8DBQQxOC43ZGQCBg9kFgICAQ8PFgIfAwUCNTFkZAIHD2QWAgIBDw8WAh8DBQUxNDcuM2RkAggPZBYCAgEPDxYCHwMFAzEzMWRkAgkPZBYCAgEPDxYCHwMFBTcxOC4zZGQCCg9kFgICAQ8PFgIfAwUEMTAuNGRkAgsPZBYCAgEPDxYCHwMFBDQzLjJkZAIMD2QWAgIBDw8WAh8DBQMwLjVkZAIND2QWAgIBDw8WAh8DBQMyNDdkZAIOD2QWAgIBDw8WAh8DBQYxMDEwLjNkZAIPDw8WAh8CaGRkGAIFHmN0bDAwJE1haW5Db250ZW50JERldGFpbHNWaWV3MQ8UKwAHZGRkZGQWAAIBZAUbY3RsMDAkTWFpbkNvbnRlbnQkR3JpZFZpZXcyDzwrAAwBCAIBZCISVq53ymKZFarP/yhiulqivSCkWmnFDLIbg7wN534a" />
<script src="/WebResource.axd?d=ROSFvmW6qyKmAQxIXbZjiK16MlEuinEpShi22nVgOYQ1&t=634723767737656250" type="text/javascript"></script>
<div class="page">
<div class="header">
<div class="title">
<h1 style="font-size: xx-large">
北京大学环境模拟与污染控制国家重点实验室</h1>
<h1>
</h1>
</div>
<div class="loginDisplay">
[ <a href="Account/Login.aspx" id="HeadLoginView_HeadLoginStatus">登录</a>
]
</div>
<div class="clear hideSkiplink" align="right">
<a href="#NavigationMenu_SkipLink"><img alt="Skip Navigation Links" src="/WebResource.axd?d=AUI05ebu7lmOoZg_HfpJDg2&t=634723767737656250" width="0" height="0" border="0" /></a><div class="menu" id="NavigationMenu">
<ul class="level1">
<li><a class="level1" href="Default.aspx">主页</a></li><li><a class="level1" href="CP_DATA.aspx">关于</a></li>
</ul>
</div><a id="NavigationMenu_SkipLink"></a>
</div>
</div>
<div class="main">
<table class="style35" bgcolor="DarkRed" border="0"
cellpadding="0" cellspacing="1" frame="border">
<tr>
<td class="style47" bgcolor="White" style="font-family: 微软雅黑; font-size: x-large; color: #000000;"
align="center">
环境监测系统平台数据中心</td>
<td class="style47" bgcolor="White" style="font-family: 微软雅黑; font-size: x-large; color: #000000;"
align="center">
环境污染气体浓度时间序列</td>
<td class="style48" bgcolor="White" align="center"
style="font-size: x-large; font-family: 微软雅黑; color: #000000;">
环境颗粒物浓度时间序列</td>
<td class="style49" bgcolor="White" align="center"
style="font-family: 微软雅黑; font-size: x-large; color: #000000;">
环境气象时间序列</td>
</tr>
<tr>
<td class="style45" bgcolor="White" rowspan="3">
<table style="width:100%;">
<tr>
<td>
<img alt="" class="style50" src="Img/WL.JPG" /></td>
</tr>
<tr>
<td align="center" bgcolor="Maroon" class="style73"
style="color: #FFFFFF; font-family: 黑体;">
<div>
<table cellspacing="0" id="MainContent_GridView2">
<tr>
<td><font face="Times New Roman" size="6">2016-3-4 16:50:02</font></td>
</tr>
</table>
</div>
</td>
</tr>
<tr>
<td
style="font-family: 黑体; color: #FFFFFF;"
class="style54">
<div>
<table cellspacing="0" cellpadding="4" rules="rows" bordercolor="Maroon" id="MainContent_DetailsView1" width="100%" height="50" bgcolor="#CCFFFF">
<tr>
<td><font face="Times New Roman" color="Black" size="6">PM2.5[30℃]</font></td><td><font face="Times New Roman" color="Black" size="6">308.1</font></td>
</tr><tr>
<td><font face="Times New Roman" color="Black" size="6">PM2.5[50℃]</font></td><td><font face="Times New Roman" color="Black" size="6">111.6</font></td>
</tr><tr>
<td><font face="Times New Roman" color="Black" size="6">O3(ppb)</font></td><td><font face="Times New Roman" color="Black" size="6">4.9</font></td>
</tr><tr>
<td><font face="Times New Roman" color="Black" size="6">CO(ppm)</font></td><td><font face="Times New Roman" color="Black" size="6">7.45</font></td>
</tr><tr>
<td><font face="Times New Roman" color="Black" size="6">SO2(ppb)</font></td><td><font face="Times New Roman" color="Black" size="6">18.7</font></td>
</tr><tr>
<td><font face="Times New Roman" color="Black" size="6">NO(ppb)</font></td><td><font face="Times New Roman" color="Black" size="6">51</font></td>
</tr><tr>
<td><font face="Times New Roman" color="Black" size="6">NOx(ppb)</font></td><td><font face="Times New Roman" color="Black" size="6">147.3</font></td>
</tr><tr>
<td><font face="Times New Roman" color="Black" size="6">NOy(ppb)</font></td><td><font face="Times New Roman" color="Black" size="6">131</font></td>
</tr><tr>
<td><font face="Times New Roman" color="Black" size="6">CO2(ppm)</font></td><td><font face="Times New Roman" color="Black" size="6">718.3</font></td>
</tr><tr>
<td><font face="Times New Roman" color="Black" size="6">温度(℃)</font></td><td><font face="Times New Roman" color="Black" size="6">10.4</font></td>
</tr><tr>
<td><font face="Times New Roman" color="Black" size="6">湿度(%)</font></td><td><font face="Times New Roman" color="Black" size="6">43.2</font></td>
</tr><tr>
<td><font face="Times New Roman" color="Black" size="6">风速(m/s)</font></td><td><font face="Times New Roman" color="Black" size="6">0.5</font></td>
</tr><tr>
<td><font face="Times New Roman" color="Black" size="6">风向(°)</font></td><td><font face="Times New Roman" color="Black" size="6">247</font></td>
</tr><tr>
<td><font face="Times New Roman" color="Black" size="6">大气压(mBar)</font></td><td><font face="Times New Roman" color="Black" size="6">1010.3</font></td>
</tr>
</table>
</div>
</td>
</tr>
<tr>
<td bgcolor="Maroon"
style="font-family: 微软雅黑; color: #FFFFFF; font-size: large;"
align="center" class="style74">
本研究数据仅供参考</td>
</tr>
</table>
</td>
<td class="style45" bgcolor="White">
<img id="MainContent_Chart20" src="/ChartImg.axd?i=chart_0_35.png&g=b170051d95314460a452782f3c6a53d7" alt="" height="275" width="500" style="border-width:0px;" />
</td>
<td class="style43" bgcolor="White">
<img id="MainContent_Chart13" src="/ChartImg.axd?i=chart_0_36.png&g=b36acc153a4f43ceaecc9c97dc6221ff" alt="" height="275" width="500" style="border-width:0px;" />
</td>
<td class="style36" bgcolor="White">
<img id="MainContent_Chart14" src="/ChartImg.axd?i=chart_0_37.png&g=d20becaf5d1c45af8b9965605554529e" alt="" height="275" width="500" style="border-width:0px;" />
</td>
</tr>
<tr>
<td class="style45" bgcolor="White">
<img id="MainContent_Chart1" src="/ChartImg.axd?i=chart_0_38.png&g=eba8f84f92de449d93b23d76502d1616" alt="" height="275" width="500" style="border-width:0px;margin-top: 0px" />
</td>
<td class="style43" bgcolor="White">
<img id="MainContent_Chart19" src="/ChartImg.axd?i=chart_0_39.png&g=70ff77eb95d64565bab6ef9c81d4caa5" alt="" height="275" width="500" style="border-width:0px;margin-left: 0px" />
</td>
<td class="style36" bgcolor="White">
<img id="MainContent_Chart22" src="/ChartImg.axd?i=chart_0_40.png&g=2361165ca18848d481c19df3dd104d3f" alt="" height="275" width="500" style="border-width:0px;" />
</td>
</tr>
<tr>
<td class="style45" bgcolor="White">
<img id="MainContent_Chart21" src="/ChartImg.axd?i=chart_0_41.png&g=4c4ae1e8fc7c4de9b4afd3586a00b238" alt="" height="275" width="500" style="border-width:0px;" />
</td>
<td class="style43" bgcolor="White">
<img id="MainContent_Chart23" src="/ChartImg.axd?i=chart_0_42.png&g=5fb4b56035304d889ce5f68b8e0bbe3e" alt="" height="275" width="500" style="border-width:0px;margin-left: 0px" />
</td>
<td class="style36" bgcolor="White">
<img alt="" src="Img/sp.png" style="height: 275px; width: 500px" /></td>
</tr>
</table>
</div>
<div class="clear">
</div>
</div>
<div class="footer" align="center">
<table class="style19">
<tr>
<td class="style25" align="left"
style="color: #FFFFFF; font-size: x-large; font-family: 宋体, Arial, Helvetica, sans-serif"
width="100%">
北京大学城市大气环境定位观测站由“211”、“985”和重点实验室专项经费等资助</td>
<td class="style25" align="center"
style="color: #FFFFFF; font-size: large; font-family: 宋体, Arial, Helvetica, sans-serif"
width="100%">
地址:北京大学老地学楼 邮编: 100871<br />
Copyright 2010 北京大学环境科学与工程学院</td>
</tr>
</table>
</div>
<script type='text/javascript'>new Sys.WebForms.Menu({ element: 'NavigationMenu', disappearAfter: 500, orientation: 'horizontal', tabIndex: 0, disabled: false });</script></form>
</body>
</html>
In [10]:
import re
data = re.search('MainContent_GridView2.*?本研究数据仅供参考', str(html, encoding='utf-8'), re.DOTALL)
print(data.group())
MainContent_GridView2">
<tr>
<td><font face="Times New Roman" size="6">2016-3-4 16:50:02</font></td>
</tr>
</table>
</div>
</td>
</tr>
<tr>
<td
style="font-family: 黑体; color: #FFFFFF;"
class="style54">
<div>
<table cellspacing="0" cellpadding="4" rules="rows" bordercolor="Maroon" id="MainContent_DetailsView1" width="100%" height="50" bgcolor="#CCFFFF">
<tr>
<td><font face="Times New Roman" color="Black" size="6">PM2.5[30℃]</font></td><td><font face="Times New Roman" color="Black" size="6">308.1</font></td>
</tr><tr>
<td><font face="Times New Roman" color="Black" size="6">PM2.5[50℃]</font></td><td><font face="Times New Roman" color="Black" size="6">111.6</font></td>
</tr><tr>
<td><font face="Times New Roman" color="Black" size="6">O3(ppb)</font></td><td><font face="Times New Roman" color="Black" size="6">4.9</font></td>
</tr><tr>
<td><font face="Times New Roman" color="Black" size="6">CO(ppm)</font></td><td><font face="Times New Roman" color="Black" size="6">7.45</font></td>
</tr><tr>
<td><font face="Times New Roman" color="Black" size="6">SO2(ppb)</font></td><td><font face="Times New Roman" color="Black" size="6">18.7</font></td>
</tr><tr>
<td><font face="Times New Roman" color="Black" size="6">NO(ppb)</font></td><td><font face="Times New Roman" color="Black" size="6">51</font></td>
</tr><tr>
<td><font face="Times New Roman" color="Black" size="6">NOx(ppb)</font></td><td><font face="Times New Roman" color="Black" size="6">147.3</font></td>
</tr><tr>
<td><font face="Times New Roman" color="Black" size="6">NOy(ppb)</font></td><td><font face="Times New Roman" color="Black" size="6">131</font></td>
</tr><tr>
<td><font face="Times New Roman" color="Black" size="6">CO2(ppm)</font></td><td><font face="Times New Roman" color="Black" size="6">718.3</font></td>
</tr><tr>
<td><font face="Times New Roman" color="Black" size="6">温度(℃)</font></td><td><font face="Times New Roman" color="Black" size="6">10.4</font></td>
</tr><tr>
<td><font face="Times New Roman" color="Black" size="6">湿度(%)</font></td><td><font face="Times New Roman" color="Black" size="6">43.2</font></td>
</tr><tr>
<td><font face="Times New Roman" color="Black" size="6">风速(m/s)</font></td><td><font face="Times New Roman" color="Black" size="6">0.5</font></td>
</tr><tr>
<td><font face="Times New Roman" color="Black" size="6">风向(°)</font></td><td><font face="Times New Roman" color="Black" size="6">247</font></td>
</tr><tr>
<td><font face="Times New Roman" color="Black" size="6">大气压(mBar)</font></td><td><font face="Times New Roman" color="Black" size="6">1010.3</font></td>
</tr>
</table>
</div>
</td>
</tr>
<tr>
<td bgcolor="Maroon"
style="font-family: 微软雅黑; color: #FFFFFF; font-size: large;"
align="center" class="style74">
本研究数据仅供参考
In [11]:
# parse data
#(?<=abc)
line_PM2_5 = re.search('PM2\.5.*', data.group())
In [13]:
print(line_PM2_5.group())
PM2.5[30℃]</font></td><td><font face="Times New Roman" color="Black" size="6">308.1</font></td>
In [21]:
read_PM2_5 = re.search('(?<=>)[0-9\.]+',line_PM2_5.group())
print(read_PM2_5.group())
308.1
In [24]:
# parse data
#(?<=abc)
line_PM2_5 = re.search('PM2\.5.*', data.group())
read_PM2_5 = re.search('(?<=>)[0-9\.]+',line_PM2_5.group())
print(read_PM2_5.group())
308.1
In [25]:
# parse datetime
datetime = re.search('(?<=="6">)[0-9\- \:]+', data.group())
# ="6">2016-3-4 16:50:02</font></td>
print(datetime.group())
2016-3-4 16:50:02
In [ ]:
Content source: mofhu/pkuairquality
Similar notebooks: