In [1]:
    
import socket
mysock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
mysock.connect( ('www.vahidmirjalili.com', 80) )
mysock
    
    Out[1]:
In [ ]:
    
mysock.send('GET http://www/py4inf.com/code/romo.txt HTTP/1.0\n\n')
while True:
    data = mysock.recv(512)
    if len(data)<1:
        break
    print(data)
    
mysock.close()
    
In [2]:
    
import urllib.request
import re
fhand = urllib.request.urlopen('http://vahidmirjalili.com/about.html')
for line in fhand:
    line = line.decode().rstrip()
    if re.search('href=(\S)', line):
        print(line)
fhand.close()
    
    
In [ ]:
    
import bs4
    
In [3]:
    
import urllib.request
import bs4
html = urllib.request.urlopen('http://vahidmirjalili.com/about.html').read()
soup = bs4.BeautifulSoup(html, 'lxml')
tags = soup('a')
for tag in tags:
    print(tag.get('href', None))
    
    
In [4]:
    
link = 'http://detroit.craigslist.org/search/apa'
html = urllib.request.urlopen(link).read()
soup = bs4.BeautifulSoup(html, 'lxml')
tags = soup('a')
n = 0
for tag in tags:
    reslink = tag.get('href', None)
    if re.search('html$', reslink):
        print(reslink)
        n += 1
    if n>6:
        break
    
    
In [5]:
    
mainlink = 'http://detroit.craigslist.org'
link = mainlink + reslink
print(link)
html = urllib.request.urlopen(link).read()
soup = bs4.BeautifulSoup(html, 'lxml')
tags = soup('img')
for tag in tags:
    print(tag.get('src', None))
    
    
In [16]:
    
import urllib.request
import bs4
url = 'http://python-data.dr-chuck.net/comments_219720.html'
html = urllib.request.urlopen(url).read()
soup = bs4.BeautifulSoup(html, "html.parser")
# Retrieve all of the anchor tags
tags = soup('span')
x = 0
for tag in tags:
    # Look at the parts of a tag
    #print ('TAG:',tag)
    #print ('URL:',tag.get('href', None))
    #print ('Contents:',tag.text)
    #print ('Attrs:',tag.attrs)
    x += int(tag.text)
    
print(x)
    
    
In [24]:
    
import urllib.request
import bs4
import ssl
url = 'https://pr4e.dr-chuck.com/tsugi/mod/python-data/data/known_by_Huda.html'
scontext = ssl.SSLContext(ssl.PROTOCOL_TLSv1)
for i in range(7):
    html = urllib.request.urlopen(url, context=scontext).read()
    soup = bs4.BeautifulSoup(html, "html.parser")
    # Retrieve all of the anchor tags
    tags = soup('a')
    url = tags[17].get('href', None)
    print(tags[17].get('href', None))
    
    
In [ ]: