In [1]:
import urllib2
from scrapy.selector import Selector
from scrapy.http import HtmlResponse
In [2]:
xpath = '//table/tbody/tr[2]/td[2]/textarea'
url_src = 'http://e-maxx.ru/algo/src_euler_function'
url = 'http://e-maxx.ru/algo/euler_function'
In [3]:
r = urllib2.urlopen(url_src)
body = r.read()
print 'type',type(body)
#body = unicode(body, 'utf=8', errors='replace')
ext = Selector(text=body).xpath('//textarea/text()').extract()
In [4]:
#print ext[0]
ext[0].encode('utf8')
print ext[0].decode('utf8', 'ignore')
In [5]:
print ext[0].encode('utf8')
file = open('euler-ru.txt','w')
file.write(ext[0].encode('utf8'))
In [ ]:
r = urllib2.urlopen(url)
r.headers['content-type']
In [36]:
_, params = cgi.parse_header(r.headers.get('Content-Type', ''))
encoding = params.get('charset', 'unicode')
text = r.read().decode(encoding)
print url
In [ ]: