In [15]:
import requests
import bs4
In [25]:
r = requests.get("http://www.liberianewsagency.org/pagesnews.php?nid='")
In [26]:
[key for key in r.headers.keys()]
Out[26]:
['content-length',
'content-encoding',
'vary',
'keep-alive',
'server',
'connection',
'date',
'content-type']
In [27]:
doc = r.content
soup = bs4.BeautifulSoup(doc)
In [28]:
print soup.prettify()
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<meta content="text/html; charset=utf-8" http-equiv="Content-Type"/>
<link href="images/favicon.ico" rel="shortcut icon"/>
<script src="script/contentslider.js" type="text/javascript">
</script>
<link href="script/contentslider.css" rel="stylesheet" type="text/css"/>
<script src="topmenu.js" type="text/javascript">
</script>
<script src="fadeimage.js" type="text/javascript">
</script>
<link href="css/moe.css" rel="stylesheet" type="text/css"/>
<link href="css/dropdown.css" rel="stylesheet" type="text/css"/>
<link href="css/chromestyle.css" rel="stylesheet" type="text/css"/>
<script src="css/chrome.js" type="text/javascript">
/***********************************************
* This site is developed by Me
***********************************************/
</script>
<title>
| Liberia News Agency- Republic of Liberia
</title>
</head>
<body>
<table align="center" cellspacing="0" class="whitebg" width="950">
<tr>
<td>
<table align="center" bgcolor="#FFFFFF" cellspacing="0" width="948">
<tr>
<td>
<table border="0" cellpadding="0" cellspacing="0" width="100%">
<tr>
<td width="24%">
<img height="112" src="images/logo2.png" width="217"/>
</td>
<td width="51%">
<table border="0" cellpadding="0" cellspacing="0" width="100%">
<tr>
<td>
<title>
- Search Results
</title>
<!-- start search results stylesheet -->
<link href="phpsearch_files/phpsearch.css" rel="stylesheet" type="text/css"/>
<!-- end search results stylesheet -->
<!-- PASTE YOUR HEADER CODE HERE -->
<div class="phpsearch_wrapper">
<form action="search2.php?" method="get">
<input autocomplete="on" class="phpsearch_input" name="search" size="27" type="text" value=""/>
<input class="phpsearch_button" name="go" type="submit" value="Search"/>
</form>
</div>
<!-- paste your footer code here -->
</td>
</tr>
<tr>
<td>
<img height="79" src="images/label.png" width="478"/>
</td>
</tr>
</table>
</td>
<td width="1%">
</td>
<td valign="top" width="24%">
<table border="0" cellpadding="0" cellspacing="0" width="100%">
<tr>
<td>
</td>
</tr>
<tr>
<td valign="top">
<img height="53" src="images/socialmedia.png" width="154"/>
</td>
</tr>
</table>
<table border="0" cellpadding="0" cellspacing="0" width="100%">
<tr>
<td width="22%">
</td>
<td width="78%">
<img height="64" src="images/LiberiaFlag.gif" width="110"/>
</td>
</tr>
</table>
</td>
</tr>
</table>
</td>
</tr>
<tr>
<td>
<div class="blackbarshort">
<ul class="solidblockmenu">
<div class="menu3">
<div class="chromestyle" id="chromemenu">
<ul>
<li>
<a href="index.php">
Home
</a>
</li>
<li>
<a href="#" rel="dropmenu1">
About LINA
</a>
</li>
<li>
<a href="#" rel="dropmenu2">
Management
</a>
</li>
<li>
<a href="#" rel="dropmenu3">
News by Sector
</a>
</li>
<li>
<a href="#" rel="dropmenu4">
County News
</a>
</li>
<li>
<a href="#" rel="dropmenu5">
Branches of Government
</a>
</li>
<li>
<a href="#" rel="dropmenu6">
Archives
</a>
</li>
<li>
<a href="pages1.php?pgID=17 ">
Contact Us
</a>
</li>
</ul>
</div>
<!--1st drop down menu -->
<div class="dropmenudiv" id="dropmenu1" style="width: 200px">
<a href="pages1.php?pgID=42">
History
</a>
<a href="pages1.php?pgID=41">
Goal & Mission Statement
</a>
</div>
<!--2nd drop down menu -->
<div class="dropmenudiv" id="dropmenu2" style="width: 200px">
<a href="pages1.php?pgID=53 ">
Director General
</a>
<a href="pages1.php?pgID=54 ">
Editorial Staff
</a>
</div>
<!--3rd drop down menu -->
<div class="dropmenudiv" id="dropmenu3" style="width: 200px">
<a href="pagesector.php?sId=14">
Health
</a>
<a href="pagesector.php?sId=1">
Politics
</a>
<a href="pagesector.php?sId=2">
Business & Finance
</a>
<a href="pagesector.php?sId=3">
Arts & Culture
</a>
<a href="pagesector.php?sId=4">
Economics
</a>
<a href="pagesector.php?sId=5">
Science & Technology
</a>
<a href="pagesector.php?sId=12">
Education
</a>
<a href="pagesector.php?sId=10">
Crime
</a>
<a href="pagesector.php?sId=7">
Sports
</a>
<a href="pagesector.php?sId=11">
Agriculture
</a>
<a href="pagesector.php?sId=9">
World
</a>
<a href="pagesector.php?sId=8">
Media
</a>
<a href="pagesector.php?sId=13">
Society/Obituary
</a>
<a href="pagesector.php?sId=6">
Others
</a>
</div>
<!--3rd drop down menu -->
<div class="dropmenudiv" id="dropmenu4" style="width: 200px;">
<a href="pagecounty.php?lId=1">
Montserrado
</a>
<a href="pagecounty.php?lId=2">
Lofa
</a>
<a href="pagecounty.php?lId=3">
Grand Bassa
</a>
<a href="pagecounty.php?lId=4">
Gbarpolu
</a>
<a href="pagecounty.php?lId=5">
Nimba
</a>
<a href="pagecounty.php?lId=6">
Maryland
</a>
<a href="pagecounty.php?lId=7">
Grand Kru
</a>
<a href="pagecounty.php?lId=8">
Bong
</a>
<a href="pagecounty.php?lId=9">
Bomi
</a>
<a href="pagecounty.php?lId=10">
Grand Gedeh
</a>
<a href="pagecounty.php?lId=11">
Margibi
</a>
<a href="pagecounty.php?lId=12">
Grand Cape Mount
</a>
<a href="pagecounty.php?lId=13">
River-Gee
</a>
<a href="pagecounty.php?lId=14">
Rivercess
</a>
<a href="pagecounty.php?lId=15">
Sinoe
</a>
</div>
<!--3rd drop down menu -->
<div class="dropmenudiv" id="dropmenu5" style="width: 200px;">
<a href="pagebrenches.php?bId=1">
Presidency
</a>
<a href="pagebrenches.php?bId=2">
Judiciary
</a>
<a href="pagebrenches.php?bId=3">
Legislative
</a>
<a href="pagebrenches.php?bId=6">
Others
</a>
</div>
<!--3rd drop down menu -->
<div class="dropmenudiv" id="dropmenu6" style="width: 200px;">
<a href="newsall.php">
News Archieves
</a>
</div>
<!--menu ends here -->
</div>
</ul>
</div>
<script type="text/javascript">
cssdropdown.startchrome("chromemenu")
</script>
</td>
</tr>
</table>
</td>
</tr>
</table>
<table align="center" border="0" cellpadding="0" cellspacing="0" class="whitebg" width="950">
<tr>
<td width="931">
<table cellspacing="0" height="183" width="100%">
<tr>
<td valign="top" width="656">
<table cellspacing="0" width="100%">
<tr>
<td valign="top" width="2%">
</td>
<td valign="top" width="98%">
<table cellspacing="0" width="100%">
<tr>
<td class="topnametitle" valign="top">
<br/>
<span class="projects">
Date Uploaded:
</span>
</td>
</tr>
<tr>
<td valign="top">
<img height="271" src="" width="637"/>
</td>
</tr>
<tr>
<td>
</td>
</tr>
</table>
</td>
</tr>
<tr>
<td colspan="2">
</td>
</tr>
</table>
</td>
<td valign="top" width="280">
<table cellspacing="0" width="100%">
<tr>
<td class="pupplecolor">
latest headlines
</td>
</tr>
<tr>
<td valign="top">
<table cellspacing="0" width="100%">
<tr>
<td bgcolor="#EFEFEF" class="latestnew" valign="top">
<a href="pagesnews.php?nid=4638">
<img class="imgnews" height="57" src="images/senatortaylor.jpg" width="92"/>
</a>
<span class="style1">
Taylor Wants Liberians Keep Focus......
</span>
<br/>
<span class="line">
Bong County Senator Jewel Taylor has emphasized the need for Liberians to recharge their energies and contribute positively to efforts by govern
</span>
<a href="pagesnews.php?nid=4638">
...more
</a>
</td>
</tr>
<tr>
<td valign="top">
<div class="redline">
</div>
<img height="8" src="images/placeholder.png" width="225"/>
</td>
</tr>
</table>
<table cellspacing="0" width="100%">
<tr>
<td bgcolor="#EFEFEF" class="latestnew" valign="top">
<a href="pagesnews.php?nid=4637">
<img class="imgnews" height="57" src="images/deaothellobomi.jpg" width="92"/>
</a>
<span class="style1">
DEA To Get US$25k Office In Bomi Soon
</span>
<br/>
<span class="line">
The Acting Commander of the Drug Enforcement Agency (DEA) in Bomi County, Othello Watson, has disclosed the construction of a US$25,000 DEA Offi
</span>
<a href="pagesnews.php?nid=4637">
...more
</a>
</td>
</tr>
<tr>
<td valign="top">
<div class="redline">
</div>
<img height="8" src="images/placeholder.png" width="225"/>
</td>
</tr>
</table>
<table cellspacing="0" width="100%">
<tr>
<td bgcolor="#EFEFEF" class="latestnew" valign="top">
<a href="pagesnews.php?nid=4636">
<img class="imgnews" height="57" src="images/bomiflag.jpg" width="92"/>
</a>
<span class="style1">
Dewey High Yearns For Decent Latrines, Safe Drinking Water
</span>
<br/>
<span class="line">
The Principal of the C. H. Dewey Central High School in Tubmanburg, Bomi County, Mr. Victor Freeman, has said the deplorable condition of latrine
</span>
<a href="pagesnews.php?nid=4636">
...more
</a>
</td>
</tr>
<tr>
<td valign="top">
<div class="redline">
</div>
<img height="8" src="images/placeholder.png" width="225"/>
</td>
</tr>
</table>
<table cellspacing="0" width="100%">
<tr>
<td bgcolor="#EFEFEF" class="latestnew" valign="top">
<a href="pagesnews.php?nid=4635">
<img class="imgnews" height="57" src="images/nimbaflag.jpg" width="92"/>
</a>
<span class="style1">
Nimba Farmers Urged To Invest In Cash Crops, Not Grow Marijuana
</span>
<br/>
<span class="line">
The Commander of the Drug Enforcement Agency (DEA) Nimba Office, Col. Julius Kanubah, has urged Liberians to desist from growing marijuana and in
</span>
<a href="pagesnews.php?nid=4635">
...more
</a>
</td>
</tr>
<tr>
<td valign="top">
<div class="redline">
</div>
<img height="8" src="images/placeholder.png" width="225"/>
</td>
</tr>
</table>
<table cellspacing="0" width="100%">
<tr>
<td bgcolor="#EFEFEF" class="latestnew" valign="top">
<a href="pagesnews.php?nid=4634">
<img class="imgnews" height="57" src="images/capemountflag.jpg" width="92"/>
</a>
<span class="style1">
Cape Mount Citizens Want Madina Contract Revisited
</span>
<br/>
<span class="line">
Citizens of Grand Cape Mount County have appealed to the county leadership and Legislative Caucus to revisit the concession agreement signed betw
</span>
<a href="pagesnews.php?nid=4634">
...more
</a>
</td>
</tr>
<tr>
<td valign="top">
<div class="redline">
</div>
<img height="8" src="images/placeholder.png" width="225"/>
</td>
</tr>
</table>
<table cellspacing="0" width="100%">
<tr>
<td bgcolor="#EFEFEF" class="latestnew" valign="top">
<a href="pagesnews.php?nid=4633">
<img class="imgnews" height="57" src="images/montserradoflag.jpg" width="92"/>
</a>
<span class="style1">
Group Outlines Achievements of Tolbert Administration
</span>
<br/>
<span class="line">
A group calling itself "The April 22 Memorial Group has praised the development initiatives of Liberia's 19th President, the late William R.Tolb
</span>
<a href="pagesnews.php?nid=4633">
...more
</a>
</td>
</tr>
<tr>
<td valign="top">
<div class="redline">
</div>
<img height="8" src="images/placeholder.png" width="225"/>
</td>
</tr>
</table>
</td>
</tr>
</table>
</td>
</tr>
</table>
</td>
<td width="19">
</td>
</tr>
<tr>
<td colspan="3">
<div class="footer">
</div>
</td>
</tr>
<tr>
<td colspan="3">
<table border="0" cellpadding="0" cellspacing="0" width="100%">
<tr>
<td align="center">
<div id="footer">
</div>
</td>
</tr>
<tr>
<td align="center">
All rights reserved © 2013 - 2014
<br/>
Liberia News Agency
</td>
</tr>
</table>
</td>
</tr>
</table>
<!--Below script is needed for the rotating image on the home page-->
<script type="text/javascript">
featuredcontentslider.init({
id: "slider4", //id of main slider DIV
contentsource: ["inline", ""], //Valid values: ["inline", ""] or ["ajax", "path_to_file"]
toc: "markup" ["test recond"], //Valid values: "#increment", "markup", ["label1", "label2", etc]
nextprev: ["", "Next"], //labels for "prev" and "next" links. Set to "" to hide.
revealtype: "mouseover", //Behavior of pagination links to reveal the slides: "click" or "mouseover"
enablefade: [true, 0.1], //[true/false, fadedegree]
autorotate: [true, 3000], //[true/false, pausetime]
onChange: function(previndex, curindex){ //event handler fired whenever script changes slide
//previndex holds index of last slide viewed b4 current (1=1st slide, 2nd=2nd etc)
//curindex holds index of currently shown slide (1=1st slide, 2nd=2nd etc)
}
})
</script>
</body>
</html>
In [39]:
soup.get_text()
Out[39]:
u'\n\n\n\n\n\n\n\n\n\n\n\n\n/***********************************************\n* This site is developed by Me\n***********************************************/\n\n\n | Liberia News Agency- Republic of Liberia\n\n\n\n\n\n\n\n\n\n\n\n\n\n - Search Results\n\n\n\n \n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\xa0\n\n\n\n\n\n\n\n\n\n\n\n\n\nHome\n About LINA\nManagement\nNews by Sector\nCounty News\nBranches of Government\nArchives\nContact Us \n\n\n\n\nHistory\nGoal & Mission Statement\n\n\n\nDirector General\nEditorial Staff\n\n\n\nHealth\nPolitics\nBusiness & Finance\nArts & Culture\nEconomics\nScience & Technology\nEducation\nCrime\nSports\nAgriculture\nWorld\nMedia\nSociety/Obituary \nOthers\n\n\n\nMontserrado\nLofa\nGrand Bassa\nGbarpolu\nNimba\nMaryland\nGrand Kru\nBong\nBomi\nGrand Gedeh\nMargibi\nGrand Cape Mount\nRiver-Gee\nRivercess\nSinoe\n\n\n\nPresidency \nJudiciary \nLegislative \nOthers\n\n\n\nNews Archieves\n\n\n\n \n\n\ncssdropdown.startchrome("chromemenu")\n\n\n\n\n\n\n\n\n\n\n\n\n\n\xa0\n\n\n\nDate Uploaded: \n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nlatest headlines \n\n\n\n\n\nTaylor Wants Liberians Keep Focus......\n Bong County Senator Jewel Taylor has emphasized the need for Liberians to recharge their energies and contribute positively to efforts by govern ...more\n\n\n\n\n\n\n\nDEA To Get US$25k Office In Bomi Soon\nThe Acting Commander of the Drug Enforcement Agency (DEA) in Bomi County, Othello Watson, has disclosed the construction of a US$25,000 DEA Offi ...more\n\n\n\n\n\n\n\nDewey High Yearns For Decent Latrines, Safe Drinking Water\nThe Principal of the C. H. Dewey Central High School in Tubmanburg, Bomi County, Mr. Victor Freeman, has said the deplorable condition of latrine ...more\n\n\n\n\n\n\n\nNimba Farmers Urged To Invest In Cash Crops, Not Grow Marijuana\nThe Commander of the Drug Enforcement Agency (DEA) Nimba Office, Col. Julius Kanubah, has urged Liberians to desist from growing marijuana and in ...more\n\n\n\n\n\n\n\nCape Mount Citizens Want \x91Madina Contract\x92 Revisited\nCitizens of Grand Cape Mount County have appealed to the county leadership and Legislative Caucus to revisit the concession agreement signed betw ...more\n\n\n\n\n\n\n\nGroup Outlines Achievements of Tolbert Administration\nA group calling itself "The April 22 Memorial Group\x94 has praised the development initiatives of Liberia\'s 19th President, the late William R.Tolb ...more\n\n\n\n\n\n\n\n\n\n\n\n\xa0\n\n\n\n \n\n\n\n\n\n\n\n\n\n\n\n \n All rights reserved \xa9 2013 - 2014\n Liberia News Agency \n\n \n\n\n\n\n\nfeaturedcontentslider.init({\nid: "slider4", //id of main slider DIV\ncontentsource: ["inline", ""], //Valid values: ["inline", ""] or ["ajax", "path_to_file"]\ntoc: "markup" ["test recond"], //Valid values: "#increment", "markup", ["label1", "label2", etc]\nnextprev: ["", "Next"], //labels for "prev" and "next" links. Set to "" to hide.\nrevealtype: "mouseover", //Behavior of pagination links to reveal the slides: "click" or "mouseover"\nenablefade: [true, 0.1], //[true/false, fadedegree]\nautorotate: [true, 3000], //[true/false, pausetime]\nonChange: function(previndex, curindex){ //event handler fired whenever script changes slide\n//previndex holds index of last slide viewed b4 current (1=1st slide, 2nd=2nd etc)\n//curindex holds index of currently shown slide (1=1st slide, 2nd=2nd etc)\n}\n})\n\n \n\n\n'
In [ ]:
Content source: ndanielsen/liberia-media
Similar notebooks: