In [1]:
import socket
mysock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
mysock.connect( ('www.vahidmirjalili.com', 80) )
mysock
Out[1]:
In [ ]:
mysock.send('GET http://www/py4inf.com/code/romo.txt HTTP/1.0\n\n')
while True:
data = mysock.recv(512)
if len(data)<1:
break
print(data)
mysock.close()
In [2]:
import urllib.request
import re
fhand = urllib.request.urlopen('http://vahidmirjalili.com/about.html')
for line in fhand:
line = line.decode().rstrip()
if re.search('href=(\S)', line):
print(line)
fhand.close()
In [ ]:
import bs4
In [3]:
import urllib.request
import bs4
html = urllib.request.urlopen('http://vahidmirjalili.com/about.html').read()
soup = bs4.BeautifulSoup(html, 'lxml')
tags = soup('a')
for tag in tags:
print(tag.get('href', None))
In [4]:
link = 'http://detroit.craigslist.org/search/apa'
html = urllib.request.urlopen(link).read()
soup = bs4.BeautifulSoup(html, 'lxml')
tags = soup('a')
n = 0
for tag in tags:
reslink = tag.get('href', None)
if re.search('html$', reslink):
print(reslink)
n += 1
if n>6:
break
In [5]:
mainlink = 'http://detroit.craigslist.org'
link = mainlink + reslink
print(link)
html = urllib.request.urlopen(link).read()
soup = bs4.BeautifulSoup(html, 'lxml')
tags = soup('img')
for tag in tags:
print(tag.get('src', None))
In [16]:
import urllib.request
import bs4
url = 'http://python-data.dr-chuck.net/comments_219720.html'
html = urllib.request.urlopen(url).read()
soup = bs4.BeautifulSoup(html, "html.parser")
# Retrieve all of the anchor tags
tags = soup('span')
x = 0
for tag in tags:
# Look at the parts of a tag
#print ('TAG:',tag)
#print ('URL:',tag.get('href', None))
#print ('Contents:',tag.text)
#print ('Attrs:',tag.attrs)
x += int(tag.text)
print(x)
In [24]:
import urllib.request
import bs4
import ssl
url = 'https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Huda.html'
scontext = ssl.SSLContext(ssl.PROTOCOL_TLSv1)
for i in range(7):
html = urllib.request.urlopen(url, context=scontext).read()
soup = bs4.BeautifulSoup(html, "html.parser")
# Retrieve all of the anchor tags
tags = soup('a')
url = tags[17].get('href', None)
print(tags[17].get('href', None))
In [ ]: