In [1]:
import requests
from bs4 import BeautifulSoup
In [2]:
def geturl():
return put
In [3]:
urlReq = requests.get('http://tlcxpress.ac.nz/')
In [4]:
def dtlc(webaddr):
return requests.get(webaddr)
def
File "<ipython-input-4-7b9697606587>", line 4
def
^
SyntaxError: invalid syntax
In [5]:
dtlc('http://tlcxpress.ac.nz/').text
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
<ipython-input-5-0e61e614d531> in <module>()
----> 1 dtlc('http://tlcxpress.ac.nz/').text
NameError: name 'dtlc' is not defined
In [6]:
print urlReq.text
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
<meta name="description" content="TLC Xpress is The Learning Connexion's free online newsletter and notices hub." />
<title>A Trip to Patea | TLC Xpress | The Learning Connexion School of Art and Creativity Blog</title>
<link rel="profile" href="http://gmpg.org/xfn/11" />
<link rel="stylesheet" type="text/css" media="all" href="http://tlcxpress.ac.nz/wp-content/themes/xpress/style.css" />
<link rel="pingback" href="http://tlcxpress.ac.nz/xmlrpc.php" />
<link rel='stylesheet' id='gigya_css-css' href='http://tlcxpress.ac.nz/wp-content/plugins/gigya-socialize-for-wordpress/gigya.css?ver=3.9.2' type='text/css' media='all' />
<link rel='stylesheet' id='simple_mp3_style-css' href='http://tlcxpress.ac.nz/wp-content/plugins/mp3-player/style.css?ver=3.9.2' type='text/css' media='all' />
<link rel='stylesheet' id='mr_social_sharing-css' href='http://tlcxpress.ac.nz/wp-content/plugins/social-sharing-toolkit/style_2.1.2.css?ver=3.9.2' type='text/css' media='all' />
<link rel='stylesheet' id='mr_social_sharing_custom-css' href='http://tlcxpress.ac.nz/wp-content/uploads/social_sharing_custom.css?ver=3.9.2' type='text/css' media='all' />
<script type="text/javascript">
//<![CDATA[
var site_url= 'http://tlcxpress.ac.nz';
//]]>
</script><script type='text/javascript' src='http://tlcxpress.ac.nz/wp-includes/js/jquery/jquery.js?ver=1.11.0'></script>
<script type='text/javascript' src='http://tlcxpress.ac.nz/wp-includes/js/jquery/jquery-migrate.min.js?ver=1.2.1'></script>
<script type='text/javascript'>
/* <![CDATA[ */
var gigyaParams = {"ajaxurl":"http:\/\/tlcxpress.ac.nz\/wp-admin\/admin-ajax.php","logoutUrl":"http:\/\/tlcxpress.ac.nz\/wp-login.php?action=logout&_wpnonce=adbfc4af94","connectWithoutLoginBehavior":null,"jsonExampleURL":"http:\/\/tlcxpress.ac.nz\/wp-content\/plugins\/gigya-socialize-for-wordpress\/admin\/forms\/json\/advance_example.json","enabledProviders":null,"lang":null};
/* ]]> */
</script>
<script type='text/javascript' src='http://tlcxpress.ac.nz/wp-content/plugins/gigya-socialize-for-wordpress/gigya.js?ver=3.9.2'></script>
<script type='text/javascript' src='http://tlcxpress.ac.nz/wp-content/plugins/mp3-player/javascript.js?ver=3.9.2'></script>
<script type='text/javascript' src='http://tlcxpress.ac.nz/wp-content/plugins/mp3-player/js.js?ver=3.9.2'></script>
<script type='text/javascript' src='http://tlcxpress.ac.nz/wp-content/plugins/social-sharing-toolkit/includes/buttons/button.facebook.js?ver=3.9.2'></script>
<link rel="EditURI" type="application/rsd+xml" title="RSD" href="http://tlcxpress.ac.nz/xmlrpc.php?rsd" />
<link rel="wlwmanifest" type="application/wlwmanifest+xml" href="http://tlcxpress.ac.nz/wp-includes/wlwmanifest.xml" />
<meta name="generator" content="WordPress 3.9.2" />
<meta name='description' content='On Thursday, Kate and I packed up The Learning Connexion van with our cameras and lights and headed up to the third largest South Taranaki town, Patea, to make a resource video about print-maker and South Taranaki District Council Arts Co-ordinator, Michaela Stoneman.' />
<meta name='keywords' content='Patea, Michaela Stoneman' />
<!-- Added by Richard Matthews on 26.07.2012 to allow for google analytics -->
<script type="text/javascript">
var _gaq = _gaq || [];
_gaq.push(['_setAccount', 'UA-3260176-4']);
_gaq.push(['_trackPageview']);
(function() {
var ga = document.createElement('script'); ga.type = 'text/javascript'; ga.async = true;
ga.src = ('https:' == document.location.protocol ? 'https://ssl' : 'http://www') + '.google-analytics.com/ga.js';
var s = document.getElementsByTagName('script')[0]; s.parentNode.insertBefore(ga, s);
})();
</script>
</head>
<body>
<!-- Page -->
<div id="page">
<!-- Header -->
<div id="header">
<!-- Navbar -->
<div id="navbar">
<ul>
<li><a href="/"><img src="/wp-content/themes/xpress/img/home.png" alt="TLC Xpress Home" /></a></li>
<li><a href="/category/out-and-about/"><img src="/wp-content/themes/xpress/img/out-and-about.png" alt="Out & About - What's going on in your creative community?" /></a></li>
<li><a href="/category/features/"><img src="/wp-content/themes/xpress/img/features.png" alt="Features - Coverage of what's happening" /></a></li>
<li><a href="/category/creativity-hub/"><img src="/wp-content/themes/xpress/img/miracle-department.png" alt="Creativity Hub - TLC's director Jonathan Milne imparts creative knowledge" /></a></li>
<li><a href="/category/tlc-people/"><img src="/wp-content/themes/xpress/img/tlc-people.png" alt="TLC People - TLC students & graduates - what are they getting up to" /></a></li>
<li><a href="/category/tips-and-tricks/"><img src="/wp-content/themes/xpress/img/tips-and-tricks.png" alt="Tips & Tricks - Art techniques & projects to fire your creative dynamo" /></a></li>
</ul>
</div>
</div>
<!-- Body -->
<div id="body">
<!-- Content -->
<div id="content">
<!-- Miracle Department -->
<div id="miracledept">
<img src="/wp-content/themes/xpress/img/miracledept-text.png" alt="Jonathan Milne's Miracle Department" />
<h2><a href="http://tlcxpress.ac.nz/2014/07/unobtainium-and-creativity/" title="Permalink to Unobtainium and Creativity" rel="bookmark"><b>Unobtainium and Creativity</b></a></h2>
<p>Throughout our history we humans have been easily lured by dangerous bling and the myth of living 'happily ever after'. The drama kicks in when we suspend moral and rational judgement for the sake of treasure. The movie 'Avatar' is the same parable told with beguiling special effects and the quest for a rare and valuable mineral called 'unobtainium'.
<a href="http://tlcxpress.ac.nz/2014/07/unobtainium-and-creativity/">Read more</a></p>
</div>
<div class="hr"><hr /></div>
<p><img src="/wp-content/themes/xpress/img/feature-articles.png" alt="Feature Articles" /></p>
<h2><a href="http://tlcxpress.ac.nz/2014/07/a-trip-to-patea/" title="Permalink to A Trip to Patea" rel="bookmark">A Trip to Patea</a></h2>
<p class="subheading">Wednesday, July 23rd 2014</p>
<img src="http://tlcxpress.ac.nz/wp-content/uploads/2014/07/Michaela-Stoneman.jpg" alt="" width="600" height="360" /> <p>On Thursday. Kate and I packed up The Learning Connexion van with our cameras and lights and headed up to the third largest South Taranaki town, Patea, to make a resource video about print-maker and South Taranaki District Council Arts Co-ordinator, Michaela Stoneman.</p>
<p><a href="http://tlcxpress.ac.nz/2014/07/a-trip-to-patea/">Read more</a></p>
<div class="hr"><hr /></div> <h2><a href="http://tlcxpress.ac.nz/2014/07/graduate-profile-jojo-jowett/" title="Permalink to Graduate Profile – Jojo Jowett" rel="bookmark">Graduate Profile – Jojo Jowett</a></h2>
<p class="subheading"></p>
<img src="http://tlcxpress.ac.nz/wp-content/uploads/2014/07/save-me-you-listen-4_Xpress_cover.jpg" alt="" width="600" height="360" /> <p>Jojo Jowett trained and worked as a naval engineer in the Philippines. When Jojo eventually emigrated to New Zealand, doing an art programme became a possibility for her. She jumped at the opportunity and hasn't looked back.
</p>
<p><a href="http://tlcxpress.ac.nz/2014/07/graduate-profile-jojo-jowett/">Read more</a></p>
<div class="hr"><hr /></div> <h2><a href="http://tlcxpress.ac.nz/2014/07/school-holidays-at-the-learning-connexion/" title="Permalink to School Holidays at the Learning Connexion" rel="bookmark">School Holidays at the Learning Connexion</a></h2>
<p class="subheading">Monday, July 21st 2014</p>
<img src="http://tlcxpress.ac.nz/wp-content/uploads/2014/07/KIDSHOLIDAYPROG_XPRESS.jpg" alt="" width="600" height="360" /> <p>When we popped in on the school holiday art programme at The Learning Connexion recently, we found twenty two enthusiastic and creatively charged children, ranging in age from 5-14 years.</p>
<p><a href="http://tlcxpress.ac.nz/2014/07/school-holidays-at-the-learning-connexion/">Read more</a></p>
<div class="hr"><hr /></div> <h2><a href="http://tlcxpress.ac.nz/2014/06/the-learning-connexion-student-exhibition/" title="Permalink to The Learning Connexion Student Exhibition" rel="bookmark">The Learning Connexion Student Exhibition</a></h2>
<p class="subheading">Friday, June 27th 2014</p>
<img src="http://tlcxpress.ac.nz/wp-content/uploads/2014/06/Jamie-Ngan.jpg" alt="" width="600" height="360" /> <p>See a selection of images from the Learning Connexion Term 2, 2014 student exhibition.</p>
<p><a href="http://tlcxpress.ac.nz/2014/06/the-learning-connexion-student-exhibition/">Read more</a></p>
</div>
<!-- Sidebar -->
<div id="sidebar">
<!-- Search -->
<img class="heading" src="/wp-content/themes/xpress/img/headings/search.png" alt="Search" />
<form action="/" method="get">
<label>Keywords:</label><br />
<input type="text" name="s" />
<input type="submit" value="Search" />
</form>
<p>Not sure where to begin?<br /><a href="/random/">Read a random article</a></p>
<p>TLC Xpress is The Learning Connexion's free online newsletter and notices hub.</p>
<!-- Subscribe -->
<img class="heading" src="/wp-content/themes/xpress/img/headings/subscribe.png" alt="Subscribe" />
<form action="http://sendout.tlcstudents.ac.nz/subscribe/" method="post">
<label>Email:</label><br />
<input type="text" name="email" />
<input type="submit" value="Submit" />
</form>
<!-- Notices -->
<a href="http://tlcxpress.ac.nz/tlc-website/notices/"><img class="heading" src="/wp-content/themes/xpress/img/headings/notices.png" alt="Notices" /></a>
<p><a href="http://tlcxpress.ac.nz/2014/07/new-block-week-classes-8th-12th-september/" class="notice">New Block Week Classes (8th – 12th September)</a></p><p>See all our new Block Week Classes on offer.</p><div class="hr"><hr /></div><p><a href="http://tlcxpress.ac.nz/2014/05/exhibiting-at-the-learning-connexion/" class="notice">Exhibiting at The Learning Connexion</a></p><p>Originally from the small town of Opotiki, Michael came to study with The Learning Connexion straight from college.</p><div class="hr"><hr /></div><p><a href="http://tlcxpress.ac.nz/2014/05/the-learning-connexion-student-art-exhibition/" class="notice">The Learning Connexion Student Art Exhibition</a></p><p>Each term The Learning Connexion hosts a large student-run exhibition at our campus nestled in the bush on the edge of the Hutt Valley in Taita.
Us...</p><div class="hr"><hr /></div><p><a href="http://tlcxpress.ac.nz/2014/05/the-parkin-drawing-prize/" class="notice">The Parkin Drawing Prize</a></p><p>The Parkin Drawing Prize is valued at $20,000 and is Aotearoa New Zealand's premier award for drawing. It promotes drawing in all its forms - as disco...</p><div class="hr"><hr /></div><p><a href="http://tlcxpress.ac.nz/2014/05/introducing-dane-divine/" class="notice">Introducing: Dane Divine</a></p><p>We are excited to introduce a new staff member who has taken on the newly formed role of Learning Support at the Learning Connexion. Dane is here to help students to create catch-up plans if they are falling behind with their work, to help with literacy, numeracy and organising skills as well as working towards students' employment goals and generally supporting their learning needs. Dane outlines her background and interests in creativity and education.</p> <script type="text/javascript">
// <![CDATA[
var disqus_shortname = 'tlcxpress';
(function () {
var nodes = document.getElementsByTagName('span');
for (var i = 0, url; i < nodes.length; i++) {
if (nodes[i].className.indexOf('dsq-postid') != -1) {
nodes[i].parentNode.setAttribute('data-disqus-identifier', nodes[i].getAttribute('rel'));
url = nodes[i].parentNode.href.split('#', 1);
if (url.length == 1) { url = url[0]; }
else { url = url[1]; }
nodes[i].parentNode.href = url + '#disqus_thread';
}
}
var s = document.createElement('script'); s.async = true;
s.type = 'text/javascript';
s.src = '//' + disqus_shortname + '.disqus.com/count.js';
(document.getElementsByTagName('HEAD')[0] || document.getElementsByTagName('BODY')[0]).appendChild(s);
}());
//]]>
</script>
<div id="moreNotices">
<br />
<a href="http://tlcxpress.ac.nz/tlc-website/notices/">More notices?</a>
</div>
<!-- Recent -->
<img class="heading" src="/wp-content/themes/xpress/img/headings/recent.png" alt="Recent" />
<ul>
<li><a href="http://tlcxpress.ac.nz/2014/07/a-trip-to-patea/">A Trip to Patea</a></li><li><a href="http://tlcxpress.ac.nz/2014/07/graduate-profile-jojo-jowett/">Graduate Profile – Jojo Jowett</a></li><li><a href="http://tlcxpress.ac.nz/2014/07/new-block-week-classes-8th-12th-september/">New Block Week Classes (8th – 12th September)</a></li><li><a href="http://tlcxpress.ac.nz/2014/07/school-holidays-at-the-learning-connexion/">School Holidays at the Learning Connexion</a></li><li><a href="http://tlcxpress.ac.nz/2014/07/unobtainium-and-creativity/">Unobtainium and Creativity</a></li></ul>
</div>
</div>
<!-- Original Footer -->
<!-- <div id="footer">
<iframe src="http://www.facebook.com/plugins/like.php?href=http%3A%2F%2Ftlcxpress.ac.nz%2F&layout=standard&show_faces=false&width=450&action=like&font=tahoma&colorscheme=light&height=35" scrolling="no" frameborder="0" style="border:none; overflow:hidden; width:350px; height:35px; float: left;"></iframe>
<p>
<a href="http://tlc.ac.nz/">TLC Home</a> | <a href="http://tlcstudents.ac.nz/">TLC Student Home</a> | © 2014 The Learning Connexion
</p>
</div>
-->
<!-- New Footer 26/04/2012-->
<div id="footer">
<div class="images">
<!-- /wp-content/themes/xpress/img/home.png -->
<a href="http://tlc.ac.nz"><img src="/wp-content/themes/xpress/img/footer/tlc.png" alt="TLC.AC.NZ" /></a>
<a href="http://tlcstudents.ac.nz/"><img src="/wp-content/themes/xpress/img/footer/student-site.png" alt="TLC Student Site" /></a>
<a href="http://www.flickr.com/thelearningconnexion/"><img src="/wp-content/themes/xpress/img/footer/flickr.png" alt="TLC's Flickr" /></a>
<a href="http://www.facebook.com/thelearningconnexion"><img src="/wp-content/themes/xpress/img/footer/facebook.png" alt="TLC's Facebook" /></a>
<a href="http://www.youtube.com/user/learningconnexion"><img src="/wp-content/themes/xpress/img/footer/youtube.png" alt="TLC's YouTube" /></a>
<a href="http://twitter.com/#!/tlcxpress"><img src="/wp-content/themes/xpress/img/footer/twitter.png" alt="TLC's Twitter" /></a>
</div>
<p>
<b>The Learning Connexion</b> - 182 Eastern Hutt Rd, Taita, Lower Hutt 5019
<br />
0800 ART POWER (278 769)
info@tlc.ac.nz
www.tlc.ac.nz
</p>
</div>
</div>
<script type="text/javascript">
var _gaq = _gaq || [];
_gaq.push(['_setAccount', 'UA-3260176-3']);
_gaq.push(['_trackPageview']);
(function() {
var ga = document.createElement('script'); ga.type = 'text/javascript'; ga.async = true;
ga.src = ('https:' == document.location.protocol ? 'https://ssl' : 'http://www') + '.google-analytics.com/ga.js';
var s = document.getElementsByTagName('script')[0]; s.parentNode.insertBefore(ga, s);
})();
</script>
<script type='text/javascript' src='http://tlcxpress.ac.nz/wp-includes/js/jquery/ui/jquery.ui.core.min.js?ver=1.10.4'></script>
<script type='text/javascript' src='http://tlcxpress.ac.nz/wp-includes/js/jquery/ui/jquery.ui.widget.min.js?ver=1.10.4'></script>
<script type='text/javascript' src='http://tlcxpress.ac.nz/wp-includes/js/jquery/ui/jquery.ui.mouse.min.js?ver=1.10.4'></script>
<script type='text/javascript' src='http://tlcxpress.ac.nz/wp-includes/js/jquery/ui/jquery.ui.resizable.min.js?ver=1.10.4'></script>
<script type='text/javascript' src='http://tlcxpress.ac.nz/wp-includes/js/jquery/ui/jquery.ui.draggable.min.js?ver=1.10.4'></script>
<script type='text/javascript' src='http://tlcxpress.ac.nz/wp-includes/js/jquery/ui/jquery.ui.button.min.js?ver=1.10.4'></script>
<script type='text/javascript' src='http://tlcxpress.ac.nz/wp-includes/js/jquery/ui/jquery.ui.position.min.js?ver=1.10.4'></script>
<script type='text/javascript' src='http://tlcxpress.ac.nz/wp-includes/js/jquery/ui/jquery.ui.dialog.min.js?ver=1.10.4'></script>
<script type='text/javascript' src='http://tlcxpress.ac.nz/wp-content/plugins/social-sharing-toolkit/script_2.1.2.js?ver=3.9.2'></script>
<script type='text/javascript' src='//connect.facebook.net/en_US/all.js?ver=3.9.2#xfbml=1&appId=188707654478'></script>
<script type='text/javascript' src='http://tlcxpress.ac.nz/wp-content/plugins/social-sharing-toolkit/includes/buttons/button.googleplus.js?ver=3.9.2'></script>
<script type='text/javascript' src='https://platform.twitter.com/widgets.js?ver=3.9.2'></script>
</body>
</html>
In [7]:
f = open("myfile.txt", "a")
In [8]:
for data in urlReq:
#print data
endAte = BeautifulSoup(data)
#print >> f, '>', endAte
#laAte = endAte.get_text()
#print laAte
liAte = endAte.find_all('a')
print liAte
cntAte = len(liAte)
print cntAte
leAte = endAte.p
#print leAte
[]
0
[]
0
[]
0
[]
0
[]
0
[]
0
[]
0
[]
0
[]
0
[]
0
[]
0
[]
0
[]
0
[]
0
[]
0
[]
0
[]
0
[]
0
[]
0
[]
0
[]
0
[]
0
[]
0
[]
0
[]
0
[]
0
[]
0
[]
0
[]
0
[]
0
[]
0
[]
0
[<a href="/"></a>]
1
[<a href="/category/out-and-about/"></a>]
1
[]
0
[]
0
[<a href="/category/creativity-hub/"></a>]
1
[<a href="/category/tlc-people/"></a>]
1
[]
0
[<a href="/category/tips-and-tricks/"></a>]
1
[]
0
[]
0
[]
0
[]
0
[]
0
[]
0
[]
0
[<a href="http://tlcxpress.ac.nz/2014/07/unobtainium-and-creativity/">Read more</a>]
1
[]
0
[<a href="http://tlcxpress.ac.nz/2014/07/a-trip-to-patea/" rel="bookmark" title="Permalink to A Trip to Patea">A Trip to Pat</a>]
1
[]
0
[]
0
[]
0
[]
0
[]
0
[]
0
[]
0
[]
0
[]
0
[<a href="http://tlcxpress.ac.nz/2014/07/graduate-profile-jojo-jowett/">Read more</a>]
1
[]
0
[]
0
[]
0
[]
0
[]
0
[<a href="http://tlcxpress.ac.nz/2014/07/school-holidays-at-the-learning-connexion/">Read more</a>]
1
[]
0
[]
0
[]
0
[]
0
[]
0
[]
0
[]
0
[<a href="/random/">Read a random article</a>]
1
[]
0
[]
0
[]
0
[<a href="http://tlcxpress.ac.nz/tlc-website/notices/"></a>]
1
[]
0
[]
0
[<a class="notice" href="http://tlcxpress.ac.nz/2014/05/exhibiting-at-the-learning-connexion/">Exhibiting at The L</a>]
1
[]
0
[]
0
[]
0
[]
0
[<a class="notice" href="http://tlcxpress.ac.nz/2014/05/the-parkin-drawing-prize/">The Parkin Drawing Prize</a>]
1
[]
0
[]
0
[]
0
[]
0
[]
0
[]
0
[]
0
[]
0
[]
0
[]
0
[]
0
[]
0
[]
0
[]
0
[<a href="http://tlcxpress.ac.nz/tlc-website/notices/">More notices?</a>]
1
[]
0
[]
0
[]
0
[]
0
[]
0
[]
0
[]
0
[]
0
[<a href="http://tlc.ac.nz/">TLC Home</a>]
1
[<a href="http://tlcstudents.ac.nz/">TLC Student Home</a>]
1
[]
0
[]
0
[]
0
[]
0
[]
0
[<a href="http://www.youtube.com/user/learningconnexion"></a>]
1
[<a href="http://twitter.com/#!/tlcxpress"></a>]
1
[]
0
[]
0
[]
0
[]
0
[]
0
[]
0
[]
0
[]
0
[]
0
[]
0
[]
0
[]
0
[]
0
[]
0
[]
0
[]
0
[]
0
[]
0
That's alot of [] that are totally not needed.
In [33]:
%%bash
wget \
--recursive \
--no-clobber \
--page-requisites \
--html-extension \
--convert-links \
--restrict-file-names=windows \
--domains http://www.tlcstudents.ac.nz \
--no-parent \
http://www.tlcstudents.ac.nz/home
Both --no-clobber and --convert-links were specified,only --convert-links will be used.
--2014-08-15 07:30:42-- http://www.tlcstudents.ac.nz/home
Resolving www.tlcstudents.ac.nz (www.tlcstudents.ac.nz)... 203.167.233.6
Connecting to www.tlcstudents.ac.nz (www.tlcstudents.ac.nz)|203.167.233.6|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: 5386 (5.3K) [text/html]
Saving to: `www.tlcstudents.ac.nz/home.html'
0K ..... 100% 24.4K=0.2s
2014-08-15 07:30:42 (24.4 KB/s) - `www.tlcstudents.ac.nz/home.html' saved [5386/5386]
FINISHED --2014-08-15 07:30:42--
Total wall clock time: 0.7s
Downloaded: 1 files, 5.3K in 0.2s (24.4 KB/s)
Converting www.tlcstudents.ac.nz/home.html... 2-21
Converted 1 files in 0.02 seconds.
In [10]:
openSlc = open('home.html','r')
---------------------------------------------------------------------------
IOError Traceback (most recent call last)
<ipython-input-10-3076399d2bdb> in <module>()
----> 1 openSlc = open('home.html','r')
IOError: [Errno 2] No such file or directory: 'home.html'
In [11]:
opz = open('result','w')
for slc in openSlc:
print slc[0]
opz.write(slc)
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
<ipython-input-11-a5a5a36693f1> in <module>()
1 opz = open('result','w')
----> 2 for slc in openSlc:
3 print slc[0]
4
5 opz.write(slc)
NameError: name 'openSlc' is not defined
In [12]:
endAte = BeautifulSoup(slc)
print endAte
laAte = endAte.get_text()
print laAte
liAte = endAte.find_all('a')
print liAte
cntAte = len(liAte)
#print cntAte
leAte = endAte.p
#print leAte
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
<ipython-input-12-54fdcf37473b> in <module>()
----> 1 endAte = BeautifulSoup(slc)
2 print endAte
3
4 laAte = endAte.get_text()
5 print laAte
NameError: name 'slc' is not defined
In [14]:
filOpn = open('myfile.txt','r')
blehDat = filOpn.read()
In [15]:
print blehDat
In [16]:
blehDat = BeautifulSoup(blehDat)
In [17]:
classz = blehDat.find_all(["a"])
In [18]:
print classz.sort
<built-in method sort of ResultSet object at 0x2733d08>
In [19]:
classString = str(classz)
In [20]:
print classz[0]
---------------------------------------------------------------------------
IndexError Traceback (most recent call last)
<ipython-input-20-2703ccb25781> in <module>()
----> 1 print classz[0]
IndexError: list index out of range
In [21]:
tlcSite = open('tlchome.html', 'w')
In [22]:
tlcSite.write(classString)
In [23]:
tlcSite.close()
In [24]:
openClass = open('tlchome.html', 'r')
In [24]:
In [25]:
openClass.readlines()
Out[25]:
['[]']
In [26]:
openClass.close()
In [27]:
last_link = blehDat.find("a")
In [28]:
print last_link
None
In [29]:
blehDat.title
In [30]:
blehDat.title.name
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
<ipython-input-30-2d5093e0e9c9> in <module>()
----> 1 blehDat.title.name
AttributeError: 'NoneType' object has no attribute 'name'
In [31]:
blehDat.title.string
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
<ipython-input-31-055eabcfcbeb> in <module>()
----> 1 blehDat.title.string
AttributeError: 'NoneType' object has no attribute 'string'
In [32]:
blehDat.text
Out[32]:
u''
In [41]:
In [41]:
In [41]:
In [ ]:
Content source: wcmckee/wcmckee-notebook
Similar notebooks: