In [5]:
import requests, simplejson, pprint
token={ 'sss@gmx.info' : "1fNpNSiw", 'spon' : "4BA1IOGz" }
def eventdata(eid) :
a=requests.get("http://apiv1.scribblelive.com/event/%s/?Token=%s&callback=g" % (eid,token["spon"]))
a.raise_for_status()
return simplejson.loads(a.content[2:-1])
print eventdata("184792")
In [7]:
from scrapelib import TreeScraper
import re
def eventdata(eid) :
try :
t=TreeScraper("http://www.scribblelive.com/Event/Thread.aspx?Id=%s" % eid)
except Exception, e:
return { "error" : e, "id" : eid }
e=t.extract(title="//h2/text()",
description="//h3/text()",
time="//span[contains(@class,'DisplayPostTime')][1]//text()[1]",
who="//dl[contains(@id,'WhosBloggingSidebar')]//li//text()",
meta="//head//script/text()",
canonical="//head//link[contains(@rel,'canonical')]/@href"
)
for m in re.finditer(r"var (?P<name>[^ ]+) *= *(?P<val>[^\r]+)",''.join(e["meta"])) :
d=m.groupdict()
if d["name"] in ("DiscussionsEnabled","PromotionalUrl","IsLive","Time") :
e[d["name"]]=d["val"]
else :
# e["rest"]=e["rest"]+"\n%(name)s = %(val)s" % d
pass
del e["meta"]
e["id"]=eid
return e
print eventdata("184792")
In [3]:
from scrapelib import TreeScraper
import re
def eventdata(eid) :
try :
t=TreeScraper("http://embed.scribblelive.com/Embed/v5.aspx?Id=%s" % eid)
except Exception, e:
return { "error" : e, "id" : eid }
e=t.extract(title="//h2/text()",
description="//meta[contains(@property,'og:description')]/@content",
url="//meta[contains(@property,'og:url')]/@content",
time="//span[contains(@class,'DisplayPostTime')][1]//text()[1]",
who="//dl[contains(@id,'WhosBloggingSidebar')]//li//text()",
meta="//head//script/text()",
canonical="//head//link[contains(@rel,'canonical')]/@href"
)
e["rest"]=""
for m in re.finditer(r"var (?P<name>[^ ]+) *= *(?P<val>[^\r]+)",''.join(e["meta"])) :
d=m.groupdict()
if d["name"] in ("DiscussionsEnabled","PromotionalUrl","IsLive","Time") :
e[d["name"]]=d["val"]
else :
# e["rest"]=e["rest"]+"\n%(name)s = %(val)s" % d
pass
del e["meta"]
e["id"]=eid
return e
print eventdata("120285") # error
print eventdata("120295") # ok
print eventdata("120120")
print eventdata("165878")
In [6]:
import requests
import re
import random
import simplejson
import pprint
def recentposts(eid) :
url=re.sub(r"([0-9])",r"/\1",eid)
resp=requests.get("http://liveupdate1.scribblelive.com%s/recentposts.js?rand=%s" % (url,random.randint(1000000,9999999))).content
obj=simplejson.loads(resp[resp.find(",")+1:-2])
obj["id"]=eid
return obj
pprint.pprint(recentposts("120285")) # error
pprint.pprint(recentposts("75449")["Posts"][0]["CreatorName"])
In [106]:
import requests
import re
import random
import simplejson
import pprint
def eventdata(eid) :
url=re.sub(r"([0-9])",r"/\1",eid)
resp=requests.get("http://liveupdate1.scribblelive.com%s/lastmodified.js?rand=%s" % (url,random.randint(1000000,9999999))).content
return resp
pprint.pprint(eventdata("120285")) # error
pprint.pprint(eventdata("120295")) # ok
pprint.pprint(eventdata("120120"))
pprint.pprint(eventdata("165878"))
In [ ]:
In [22]:
import urlparse,re
from scrapelib import TreeScraper
def eventdata(eid) :
if eid[0]>="0" and eid[0]<="9" :
url="http://www.scribblelive.com/Event/Thread.aspx?Id=%s" % eid
else :
url=urlparse.urljoin("http://www.scribblelive.com/Event/",urlparse.urlsplit(eid)[2])
try :
t=TreeScraper(url)
except Exception, e:
return { "error" : e, "id" : eid }
e=t.extract(title="//h2/text()",
description="//h3/text()",
time="//span[contains(@class,'DisplayPostTime')][1]//text()[1]",
who="//dl[contains(@id,'WhosBloggingSidebar')]//li//text()",
meta="//head//script/text()",
canonical="//head//link[contains(@rel,'canonical')]/@href"
)
for m in re.finditer(r"var (?P<name>[^ ]+) *= *\"?(?P<val>[^\r]+[^\"])\"?;",''.join(e["meta"])) :
d=m.groupdict()
if d["name"] in ("DiscussionsEnabled","PromotionalUrl","IsLive","Time","ThreadId") :
e[d["name"]]=d["val"]
else :
# e["rest"]=e["rest"]+"\n%(name)s = %(val)s" % d
pass
del e["meta"]
e["id"]=eid
return e
print eventdata("http://scribblelive.mobi/Event/Test-Liveevent_Politik_2")
In [15]:
import requests
from scrapelib import TreeScraper
import scrapelib
proxy = { 'http':'127.0.0.1:8118' }
header = { "user-agent" : "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0; chromeframe/12.0.742.112)" }
p=TreeScraper("http://google.com")
p1=TreeScraper("http://versicherungsmonitor.de/test_dpa",proxies=proxy,headers=header)
type(p.tree),type(p1.tree)==scrapelib.ScrapedElement
Out[15]:
In [4]:
from scrapelib import TreeScraper
from transformlib import Transformer
import re
import urlparse
import requests
import random
import simplejson, pprint,string, datetime
from collections import OrderedDict
import types
import copy
proxy = { 'http':'127.0.0.1:8118' }
header = { "user-agent" : "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0; chromeframe/12.0.742.112)" }
token = { 'sss@gmx.info' : "1fNpNSiw", 'spon' : "4BA1IOGz" }
def eventdata_web(eid) :
if eid[0]>="0" and eid[0]<="9" :
url="http://www.scribblelive.com/Event/Thread.aspx?Id=%s" % eid
else :
url=urlparse.urljoin("http://www.scribblelive.com/Event/",urlparse.urlsplit(eid)[2])
try :
t=TreeScraper(url,proxies=proxy, headers=header)
except Exception, e:
return { "error" : e, "id" : eid }
e=t.extract(title="//h2/text()",
description="//h3/text()",
time="//span[contains(@class,'DisplayPostTime')][1]//text()[1]",
who="//dl[contains(@id,'WhosBloggingSidebar')]//li//text()",
meta="//head//script/text()",
canonical="//head//link[contains(@rel,'canonical')]/@href"
)
for m in re.finditer(r"var (?P<name>[^ ]+) *= *\"?(?P<val>[^\r]+[^\"])\"?;",''.join(e["meta"])) :
d=m.groupdict()
if d["name"] in ("DiscussionsEnabled","PromotionalUrl","IsLive","Time","ThreadId") :
e[d["name"]]=d["val"]
else :
# e["rest"]=e["rest"]+"\n%(name)s = %(val)s" % d
pass
del e["meta"]
e["id"]=eid
return e
def eventdata_api(eid) :
a=requests.get("http://apiv1.scribblelive.com/event/%s/?Token=%s&callback=g" % (eid,token["sss@gmx.info"]),proxies=proxy,headers=header)
return simplejson.loads(a.content[2:-2])
def js_to_timestamp(a) :
return datetime.datetime.fromtimestamp(int(re.search(r"(?P<time>\d+)",a).groupdict()["time"])/1000)
translate = Transformer((( 'time' , lambda a,b,c: { a: None, 'stime' : datetime.datetime.strptime(b, "%m/%d/%Y %I:%M:%S %p") }),
( 'Time' , lambda a,b,c: { a: None, 'mtime' : datetime.datetime.strptime(b, "%m/%d/%Y %I:%M:%S %p") }),
( 'Title' , lambda a,b,c: { a: None, 'metatitle' : b }),
( 'ThreadId' , lambda a,b,c: { a: None, 'id' : b }),
( re.compile('^End|Created|Start|LastModified$')
, lambda a,b,c: { a: None, string.lower(a) : js_to_timestamp(b) }),
( re.compile(".*") , lambda a,b,c: { a: None, string.lower(a) : b}),
))
def eventdata(eid,api=True,web=True) :
if not web :
m=eventdata_api(eid)
else :
m=eventdata_web(eid)
if api and (not "error" in m) and (("ThreadId" in m) or ("id" in m)):
m.update(eventdata_api(m.get("ThreadId",m.get("id",""))))
return translate(m)
def listevents() :
t=TreeScraper("http://scribblelive.mobi",headers=header)
return t.extract("ul#Threads li",url="./a/@href", title="./a/text()",description=".//span[contains(@class,'Description')]/text()",stamp=".//span[contains(@class,'DateTime')]/text()" )
# for l in listevents() :
# l.update(eventdata(l["url"]))
# print l :
print eventdata("52369",web=False),"\n\n",eventdata("120285"),"\n\n",eventdata("179163",api=False),"\n\n",eventdata("120285",web=False)
In [48]:
import re
a=re.compile('^end|created|start|lastmodified$')
a.search("end")
Out[48]:
In [69]:
datetime.datetime.strptime("8/23/2013 2:07:53 PM", "%m/%d/%Y %I:%M:%S %p")
Out[69]:
In [14]:
from slscraper import eventdata
eventdata("144083")
Out[14]:
In [1]:
import slscraper
slscraper.eventdata("113596")
Out[1]:
In [ ]: