Scribble Live Event Metadaten

API mit Token


In [5]:
import requests, simplejson, pprint

token={ 'sss@gmx.info' : "1fNpNSiw", 'spon' : "4BA1IOGz" }


def eventdata(eid) :
    a=requests.get("http://apiv1.scribblelive.com/event/%s/?Token=%s&callback=g" % (eid,token["spon"]))
    a.raise_for_status()
    return simplejson.loads(a.content[2:-1])

print eventdata("184792")


{'End': '/Date(1377884871290+0000)/', 'Title': u'Kampf ums Kanzleramt: Fernseh-Duell Merkel - Steinbr\xfcck', 'LastModified': '/Date(1377874071290+0000)/', 'Discussion': {'Moderated': 0, 'Enabled': 0}, 'Start': '/Date(1377874070000+0000)/', 'Location': {'Lat': 51.0, 'Long': 9.0}, 'IsSyndicated': 1, 'Description': u'dpa tickert live \xfcber das Fernseh-Duell von Bundeskanzlerin Angela Merkel und ihrem Herausforderer Peer Steinbr\xfcck', 'Pages': 1, 'IsDeleted': 0, 'Language': 'de', 'Created': '/Date(1377706607000+0000)/', 'IsSyndicatable': 0, 'Websites': [{'Url': 'http://live.dtf12.de/Event/Kampf_ums_Kanzleramt_TV-Duell_Merkel_-_Steinbruck', 'Id': 988, 'Name': 'dpa test Whitelabel V2'}], 'NumPosts': 5, 'IsLive': 1, 'IsCommenting': 0, 'IsModerated': 0, 'NumComments': 0, 'Meta': {'Secure': '0'}, 'SyndicatedComments': 0, 'Id': 184792}

Via öffentlich gehostete Event-URL


In [7]:
from scrapelib import TreeScraper
import re

def eventdata(eid) :
    try :
        t=TreeScraper("http://www.scribblelive.com/Event/Thread.aspx?Id=%s" % eid)
    except Exception, e:
        return { "error" : e, "id" : eid  }
    e=t.extract(title="//h2/text()",
              description="//h3/text()",
              time="//span[contains(@class,'DisplayPostTime')][1]//text()[1]",
              who="//dl[contains(@id,'WhosBloggingSidebar')]//li//text()",
              meta="//head//script/text()",
              canonical="//head//link[contains(@rel,'canonical')]/@href"
              )
    for m in re.finditer(r"var (?P<name>[^ ]+) *= *(?P<val>[^\r]+)",''.join(e["meta"])) :
        d=m.groupdict()
        if d["name"] in ("DiscussionsEnabled","PromotionalUrl","IsLive","Time") :
            e[d["name"]]=d["val"]
        else :
            # e["rest"]=e["rest"]+"\n%(name)s = %(val)s" % d
            pass
    del e["meta"]
    e["id"]=eid
    return e


print eventdata("184792")


---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
<ipython-input-7-fbd59056444b> in <module>()
     26 
     27 
---> 28 print eventdata("184792")

<ipython-input-7-fbd59056444b> in eventdata(eid)
     14               canonical="//head//link[contains(@rel,'canonical')]/@href"
     15               )
---> 16     for m in re.finditer(r"var (?P<name>[^ ]+) *= *(?P<val>[^\r]+)",''.join(e["meta"])) :
     17         d=m.groupdict()
     18         if d["name"] in ("DiscussionsEnabled","PromotionalUrl","IsLive","Time") :

KeyError: 'meta'
Caching the list of root modules, please wait!
(This will only be done once - type '%rehashx' to reset cache!)

Via Embed URL


In [3]:
from scrapelib import TreeScraper
import re

def eventdata(eid) :
    try :
        t=TreeScraper("http://embed.scribblelive.com/Embed/v5.aspx?Id=%s" % eid)
    except Exception, e:
        return { "error" : e, "id" : eid  }
    e=t.extract(title="//h2/text()",
             description="//meta[contains(@property,'og:description')]/@content",
             url="//meta[contains(@property,'og:url')]/@content",        
             time="//span[contains(@class,'DisplayPostTime')][1]//text()[1]",
             who="//dl[contains(@id,'WhosBloggingSidebar')]//li//text()",
             meta="//head//script/text()",
             canonical="//head//link[contains(@rel,'canonical')]/@href"
              )
    e["rest"]=""
    for m in re.finditer(r"var (?P<name>[^ ]+) *= *(?P<val>[^\r]+)",''.join(e["meta"])) :
        d=m.groupdict()
        if d["name"] in ("DiscussionsEnabled","PromotionalUrl","IsLive","Time") :
            e[d["name"]]=d["val"]
        else :
            # e["rest"]=e["rest"]+"\n%(name)s = %(val)s" % d
            pass
    del e["meta"]
    e["id"]=eid
    return e


print eventdata("120285") # error

print eventdata("120295") # ok

print eventdata("120120")

print eventdata("165878")


---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
<ipython-input-3-1d5263b14830> in <module>()
     30 print eventdata("120285") # error
     31 
---> 32 print eventdata("120295") # ok
     33 
     34 print eventdata("120120")

<ipython-input-3-1d5263b14830> in eventdata(eid)
     16               )
     17     e["rest"]=""
---> 18     for m in re.finditer(r"var (?P<name>[^ ]+) *= *(?P<val>[^\r]+)",''.join(e["meta"])) :
     19         d=m.groupdict()
     20         if d["name"] in ("DiscussionsEnabled","PromotionalUrl","IsLive","Time") :

KeyError: 'meta'
{'id': '120285', 'error': IOError(u"Error reading file 'http://embed.scribblelive.com/Embed/v5.aspx?Id=120285': failed to load HTTP resource",)}

Über recent posts widget


In [6]:
import requests
import re
import random
import simplejson
import pprint

def recentposts(eid) :
    url=re.sub(r"([0-9])",r"/\1",eid)
    resp=requests.get("http://liveupdate1.scribblelive.com%s/recentposts.js?rand=%s" % (url,random.randint(1000000,9999999))).content
    obj=simplejson.loads(resp[resp.find(",")+1:-2])
    obj["id"]=eid
    return obj
    
    

pprint.pprint(recentposts("120285")) # error

pprint.pprint(recentposts("75449")["Posts"][0]["CreatorName"])


{'Posts': [{'Content': 'The liveblog has been closed by Thomas Joechler.  Thanks for joining in!',
            'CreatorId': 27815195,
            'CreatorName': 'Thomas Joechler',
            'CreatorThumbnail': '',
            'Date': '6/11/2013 3:09:42 PM',
            'EditorId': '',
            'EditorName': '',
            'Group': '0',
            'Id': 79002277,
            'IsApproved': 1,
            'IsComment': 0,
            'IsDeleted': 1,
            'Lat': '',
            'Long': '',
            'Rank': '1',
            'ReceivedDate': '6/11/2013 3:09:42 PM',
            'Source': 'ScribbleLive',
            'Type': 4}],
 'Time': '6/11/2013 3:09:42 PM',
 'id': '120285'}
'Rubrik Bayern BR.de '

Wie viele Inline? via RecentPostsWidget


In [106]:
import requests
import re
import random
import simplejson
import pprint


def eventdata(eid) :
    url=re.sub(r"([0-9])",r"/\1",eid)
    resp=requests.get("http://liveupdate1.scribblelive.com%s/lastmodified.js?rand=%s" % (url,random.randint(1000000,9999999))).content
    return resp
    
    

pprint.pprint(eventdata("120285")) # error

pprint.pprint(eventdata("120295")) # ok

pprint.pprint(eventdata("120120"))

pprint.pprint(eventdata("165878"))


'LiveBlog.Ping(120285,{Time:"6/11/2013 3:09:42 PM",CurrentVisitors:-1});'
'LiveBlog.Ping(120295,{Time:"6/10/2013 2:14:32 PM",CurrentVisitors:-1});'
'LiveBlog.Ping(120120,{Time:"6/10/2013 7:58:10 PM",CurrentVisitors:-1});'
'LiveBlog.Ping(165878,{Time:"8/13/2013 3:44:13 AM",CurrentVisitors:-1});'

In [ ]:


In [22]:
import urlparse,re
from scrapelib import TreeScraper


def eventdata(eid) :
    if eid[0]>="0" and eid[0]<="9" :
        url="http://www.scribblelive.com/Event/Thread.aspx?Id=%s" % eid
    else :
        url=urlparse.urljoin("http://www.scribblelive.com/Event/",urlparse.urlsplit(eid)[2])
    try :
        t=TreeScraper(url)
    except Exception, e:
        return { "error" : e, "id" : eid  }
    e=t.extract(title="//h2/text()",
              description="//h3/text()",
              time="//span[contains(@class,'DisplayPostTime')][1]//text()[1]",
              who="//dl[contains(@id,'WhosBloggingSidebar')]//li//text()",
              meta="//head//script/text()",
              canonical="//head//link[contains(@rel,'canonical')]/@href"
              )
    for m in re.finditer(r"var (?P<name>[^ ]+) *= *\"?(?P<val>[^\r]+[^\"])\"?;",''.join(e["meta"])) :
        d=m.groupdict()
        if d["name"] in ("DiscussionsEnabled","PromotionalUrl","IsLive","Time","ThreadId") :
            e[d["name"]]=d["val"]
        else :
            # e["rest"]=e["rest"]+"\n%(name)s = %(val)s" % d
            pass
    del e["meta"]
    e["id"]=eid
    return e


print eventdata("http://scribblelive.mobi/Event/Test-Liveevent_Politik_2")


{'Time': '8/22/2013 2:52:45 PM', 'DiscussionsEnabled': 'true', 'description': 'ScribbleLive-Test PolitikScribbleLive-Test PolitikScribbleLive-Test PolitikScribbleLive-Test PolitikScribbleLive-Test PolitikScribbleLive-Test PolitikScribbleLive-Test PolitikScribbleLive-Test PolitikScribbleLive-Test PolitikScribbleLive-Test Politik', 'title': 'ScribbleLive-Test Politik ', 'who': ['Cai Philippsen', 'Kai N. Pritzsche', 'Tillmann Neuscheler', 'Oliver Georgi', 'Monika Ganster', 'FAZ Admin'], 'ThreadId': '177087', 'time': '8/22/2013 2:52:45 PM', 'IsLive': 'true', 'id': 'http://scribblelive.mobi/Event/Test-Liveevent_Politik_2', 'canonical': 'http://live.faz.net/Event/Test-Liveevent_Politik_2'}

In [15]:
import requests 
from scrapelib import TreeScraper
import scrapelib

proxy = { 'http':'127.0.0.1:8118' }
header = { "user-agent" : "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0; chromeframe/12.0.742.112)" }


p=TreeScraper("http://google.com")
p1=TreeScraper("http://versicherungsmonitor.de/test_dpa",proxies=proxy,headers=header)
type(p.tree),type(p1.tree)==scrapelib.ScrapedElement


Out[15]:
(lxml.etree._ElementTree, True)

In [4]:
from scrapelib import TreeScraper
from transformlib import Transformer
import re
import urlparse
import requests
import random   
import simplejson, pprint,string, datetime
from collections import OrderedDict
import types
import copy


proxy  = { 'http':'127.0.0.1:8118' }
header = { "user-agent" : "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0; chromeframe/12.0.742.112)" }
token = { 'sss@gmx.info' : "1fNpNSiw", 'spon' : "4BA1IOGz" }



def eventdata_web(eid) :
    if eid[0]>="0" and eid[0]<="9" :
        url="http://www.scribblelive.com/Event/Thread.aspx?Id=%s" % eid
    else :
        url=urlparse.urljoin("http://www.scribblelive.com/Event/",urlparse.urlsplit(eid)[2])
    try :
        t=TreeScraper(url,proxies=proxy, headers=header)
    except Exception, e:
        return { "error" : e, "id" : eid  }
    e=t.extract(title="//h2/text()",
              description="//h3/text()",
              time="//span[contains(@class,'DisplayPostTime')][1]//text()[1]",
              who="//dl[contains(@id,'WhosBloggingSidebar')]//li//text()",
              meta="//head//script/text()",
              canonical="//head//link[contains(@rel,'canonical')]/@href"
              )
    for m in re.finditer(r"var (?P<name>[^ ]+) *= *\"?(?P<val>[^\r]+[^\"])\"?;",''.join(e["meta"])) :
        d=m.groupdict()
        if d["name"] in ("DiscussionsEnabled","PromotionalUrl","IsLive","Time","ThreadId") :
            e[d["name"]]=d["val"]
        else :
            # e["rest"]=e["rest"]+"\n%(name)s = %(val)s" % d
            pass
    del e["meta"]
    e["id"]=eid
    return e


def eventdata_api(eid) :
    a=requests.get("http://apiv1.scribblelive.com/event/%s/?Token=%s&callback=g" % (eid,token["sss@gmx.info"]),proxies=proxy,headers=header)
    return simplejson.loads(a.content[2:-2])

    

def js_to_timestamp(a) :
    return datetime.datetime.fromtimestamp(int(re.search(r"(?P<time>\d+)",a).groupdict()["time"])/1000)

    

 
translate = Transformer(((  'time'                                    , lambda a,b,c:  { a: None,  'stime' : datetime.datetime.strptime(b, "%m/%d/%Y %I:%M:%S %p") }),
                        (   'Time'                                    , lambda a,b,c:  { a: None,  'mtime' : datetime.datetime.strptime(b, "%m/%d/%Y %I:%M:%S %p") }),
                        (   'Title'                                   , lambda a,b,c:  { a: None,  'metatitle' : b }),
                        (   'ThreadId'                                , lambda a,b,c:  { a: None,  'id' : b }),
                        (  re.compile('^End|Created|Start|LastModified$')
                                                                      , lambda a,b,c:  { a: None, string.lower(a) : js_to_timestamp(b) }),
                        (   re.compile(".*")                          , lambda a,b,c:  { a: None, string.lower(a) : b}),
                        
                       ))

                 
def eventdata(eid,api=True,web=True) :
    if not web :
        m=eventdata_api(eid)
    else :
        m=eventdata_web(eid)
        if api and (not "error" in m) and (("ThreadId" in m) or ("id" in m)):
            m.update(eventdata_api(m.get("ThreadId",m.get("id",""))))
    return translate(m)
                
               
               
def listevents() :
    t=TreeScraper("http://scribblelive.mobi",headers=header)
    return t.extract("ul#Threads li",url="./a/@href", title="./a/text()",description=".//span[contains(@class,'Description')]/text()",stamp=".//span[contains(@class,'DateTime')]/text()" )
    
# for l in listevents() :
#    l.update(eventdata(l["url"]))
#    print l :
    
print eventdata("52369",web=False),"\n\n",eventdata("120285"),"\n\n",eventdata("179163",api=False),"\n\n",eventdata("120285",web=False)


{'isdeleted': 0, 'iscommenting': 1, 'issyndicatable': 0, 'syndicatedcomments': 0, 'issyndicated': 0, 'meta': {}, 'pages': 2, 'description': '', 'numcomments': 86, 'end': datetime.datetime(2012, 7, 11, 23, 51, 39), 'language': 'da', 'discussion': {'Moderated': 0, 'Enabled': 0}, 'numposts': 0, 'metatitle': u'FCK-Cercle Br\xfcgge', 'id': 52369, 'start': datetime.datetime(2012, 7, 11, 18, 55, 56), 'created': datetime.datetime(2012, 7, 11, 14, 44, 51), 'ismoderated': 1, 'lastmodified': datetime.datetime(2012, 7, 11, 20, 51, 39), 'islive': 0} 

{'issyndicated': 0, 'meta': {}, 'id': 120285, 'numcomments': 3, 'end': datetime.datetime(2013, 6, 11, 17, 9, 42), 'title': 'Sorry, that live event was not found', 'discussion': {'Moderated': 0, 'Enabled': 0}, 'iscommenting': 1, 'start': datetime.datetime(2013, 6, 11, 17, 0), 'ismoderated': 1, 'syndicatedcomments': 0, 'islive': 0, 'issyndicatable': 0, 'description': 'Deutschland gegen Schottland', 'lastmodified': datetime.datetime(2013, 6, 11, 17, 10, 3), 'pages': 1, 'language': 'de', 'numposts': 5, 'created': datetime.datetime(2013, 6, 10, 16, 10, 23), 'websites': [{'Url': 'http://www.scribblelive.com/Event/Fussball_Landerspiel_der_Frauen', 'Id': 1, 'Name': 'ScribbleLive'}], 'isdeleted': 1, 'metatitle': u'Fussball L\xe4nderspiel der Frauen'} 

{'title': 'Advanced Post ', 'who': 'SuperFooty', 'islive': 'true', 'mtime': datetime.datetime(2013, 8, 23, 2, 7, 53), 'discussionsenabled': 'false', 'id': '179163', 'canonical': 'http://www.scribblelive.com/Event/Thread.aspx?Id=179163'} 

{'meta': {}, 'id': 120285, 'issyndicated': 0, 'end': datetime.datetime(2013, 6, 11, 17, 9, 42), 'discussion': {'Moderated': 0, 'Enabled': 0}, 'pages': 1, 'start': datetime.datetime(2013, 6, 11, 17, 0), 'ismoderated': 1, 'syndicatedcomments': 0, 'islive': 0, 'description': 'Deutschland gegen Schottland', 'lastmodified': datetime.datetime(2013, 6, 11, 17, 10, 3), 'issyndicatable': 0, 'iscommenting': 1, 'language': 'de', 'numposts': 5, 'created': datetime.datetime(2013, 6, 10, 16, 10, 23), 'websites': [{'Url': 'http://www.scribblelive.com/Event/Fussball_Landerspiel_der_Frauen', 'Id': 1, 'Name': 'ScribbleLive'}], 'isdeleted': 1, 'numcomments': 3, 'metatitle': u'Fussball L\xe4nderspiel der Frauen'}

In [48]:
import re

a=re.compile('^end|created|start|lastmodified$')
a.search("end")


Out[48]:
<_sre.SRE_Match at 0x9469608>

In [69]:
datetime.datetime.strptime("8/23/2013 2:07:53 PM", "%m/%d/%Y %I:%M:%S %p")


Out[69]:
datetime.datetime(2013, 8, 23, 14, 7, 53)

In [14]:
from slscraper import eventdata 

eventdata("144083")


Out[14]:
{'created': datetime.datetime(2013, 7, 11, 10, 20, 40),
 'description': 'Live coverage of the Holmfirth Tesco planning appeal',
 'discussion': {'Enabled': 0, 'Moderated': 0},
 'end': datetime.datetime(2013, 7, 11, 15, 0, 46),
 'id': 144083,
 'iscommenting': 1,
 'isdeleted': 0,
 'islive': 0,
 'ismoderated': 1,
 'issyndicatable': 0,
 'issyndicated': 0,
 'language': 'en',
 'lastmodified': datetime.datetime(2013, 7, 11, 15, 0, 46),
 'meta': {},
 'metatitle': 'Holmfirth Tesco planning appeal',
 'numcomments': 2,
 'numposts': 23,
 'pages': 1,
 'start': datetime.datetime(2013, 7, 11, 10, 51, 49),
 'syndicatedcomments': 0,
 'title': '403 - Forbidden: Access is denied.'}

In [1]:
import slscraper

slscraper.eventdata("113596")


Out[1]:
{'canonical': 'http://live.dtf12.de/Event/Test_Event_dpa_Berlin_29_5_2013',
 'created': datetime.datetime(2013, 5, 29, 10, 28),
 'description': 'Dies ist der Beschreibungstext.',
 'discussion': {'Enabled': 1, 'Moderated': 1},
 'discussionsenabled': 'true',
 'end': datetime.datetime(2013, 8, 30, 22, 21, 1),
 'id': 113596,
 'iscommenting': 1,
 'isdeleted': 0,
 'islive': 1,
 'ismoderated': 1,
 'issyndicatable': 0,
 'issyndicated': 1,
 'language': 'de',
 'lastmodified': datetime.datetime(2013, 8, 30, 19, 21, 1),
 'location': {'Lat': 51.0, 'Long': 9.0},
 'meta': {'Secure': '1'},
 'metatitle': '[Sandkasten] Test Event dpa Berlin 29. 5. 2013',
 'mtime': datetime.datetime(2013, 8, 30, 17, 21, 30),
 'numcomments': 15,
 'numposts': 20066,
 'pages': 446,
 'start': datetime.datetime(2013, 8, 29, 20, 30),
 'stime': datetime.datetime(2013, 6, 10, 15, 22, 48),
 'syndicatedcomments': 0,
 'title': '[Sandkasten] Test Event dpa Berlin 29. 5. 2013 ',
 'websites': [{'Id': 988,
   'Name': 'dpa test Whitelabel V2',
   'Url': 'http://live.dtf12.de/Event/Test_Event_dpa_Berlin_29_5_2013'}],
 'who': ['Martin Virtel',
  'Sabrina Sosa Silva',
  'mollitor.daniel',
  'frank.buhr',
  'jan.buelck',
  u'S\xc3\xbcleyman Artiisik /...']}

In [ ]: