Vorspann


In [5]:
from dumptruck import DumpTruck
dt=DumpTruck(dbname="data/events.sqlite")
from cgi import escape
import types
import re

def hstr(s) :
    if type(s) not in types.StringTypes :
        s=unicode(repr(s))
    s=unicode(s)
    s=re.sub(r"(http://([^/]+[^ ]+))",r'<a href="\1" target="_blank">\2</a>',escape(s))
    return s


styles=['#fbe1ca','#ffffff']

class HtmlLol :
    
    def __init__(self,o) :
        self.o=o
        
    def _repr_html_(self) :
        n=0
        res=u"<table>"
        res+=u"<tr><th>%s</th></tr>" % "</th><th>".join([hstr(a) for a in self.o[0]])
        for r in self.o[1:] :
            res+=u"<tr style='background-color: %s'><td>%s</td></tr>" % (styles[n % 2] ,u"</td><td>".join([hstr(a) for a in r]))
            n+=1
        res+=u"</table>"
        return res
            
        

def table(s) :
    res=dt.execute(s)
    top=[]
    top.append(res[0].keys())
    top.extend([a.values() for a in dt.execute(s)])
    t=HtmlLol(top)
    return(t)

Neueste Live Blogs (deutsch)


In [6]:
table("select numposts,numcomments,id,title,lastmodified from events where (upper(title) like '%MERKEL%' or upper(title) like '%DUELL%' or upper(description) like '%DUELL%') order by id desc, numposts desc limit 20")


Out[6]:
numpostsnumcommentsidtitlelastmodified
10321200952TV-Arena Merkeldatetime.datetime(2013, 9, 9, 21, 51, 5)
11232200803TV-Duelle: Faymann vs. Glawischnig, Spindelegger vs. Strachedatetime.datetime(2013, 9, 9, 21, 57, 33)
4317194849Das bayerische TV-Duelldatetime.datetime(2013, 9, 5, 10, 36, 44)
14332193204TV-Duelle: Spindelegger vs. Stronach, Faymann vs. Bucherdatetime.datetime(2013, 9, 5, 11, 23, 14)
12120192006Merkel-Steinbrück-Debatte im Bundestag 3.9.13datetime.datetime(2013, 9, 3, 17, 9, 14)
9217191150Kleines TV Duell Trittin Brüderle Gysi 2.9.2013datetime.datetime(2013, 9, 3, 8, 16, 50)
60190607Duellen på Plassen datetime.datetime(2013, 9, 2, 13, 0, 52)
12618189804Das TV-Duell: Merkel gegen Steinbrückdatetime.datetime(2013, 9, 1, 23, 12, 11)
1396189714TV-Duell Merkel vs. Steinbrückdatetime.datetime(2013, 9, 2, 9, 35, 38)
00189350Das TV-Duelldatetime.datetime(2013, 9, 1, 15, 14, 27)
172456187306Merkel gegen Steinbrück - Kommentare zum TV-Duelldatetime.datetime(2013, 9, 2, 0, 2, 48)
15022186293Kampf ums Kanzleramt: Merkel und Steinbrück im TV-Duell datetime.datetime(2013, 9, 2, 12, 52, 9)
17718186272TV Duell Merkel Steinbrück 1.9.13datetime.datetime(2013, 9, 2, 13, 57, 3)
4353186151Merkel kontra Steinbrück – Wer gewinnt das TV-Duell?datetime.datetime(2013, 9, 1, 22, 21, 18)
00186048TV-Duelle: Glawischnig vs. Strache, Bucher vs. Stronachdatetime.datetime(2013, 8, 29, 19, 28, 47)
1073184792Kampf ums Kanzleramt: Fernseh-Duell Merkel - Steinbrückdatetime.datetime(2013, 9, 2, 12, 52, 14)
5933184755Liveticker-Fernsehduelldatetime.datetime(2013, 9, 1, 22, 37, 22)
00184380Duell zur Landtagswahldatetime.datetime(2013, 9, 1, 17, 14, 14)
24712184351Duell zur Bundestagswahldatetime.datetime(2013, 9, 2, 0, 20, 14)
1690182287Merkel gegen Steinbrück: Das TV-Duell zur Bundestagswahl 2013datetime.datetime(2013, 9, 2, 1, 51, 57)

Statistik nach Sprachen


In [15]:
table("select language,count(*) as num from events where numposts>0  group by language order by num desc")


Out[15]:
languagenum
en75674
es4770
fr4410
da1877
it1827
de1784
sv510
pt340
nn293
ru287
fi231
nl197
tr79
sl77
ar56
En53
ja16
hr13
no4
nb3
zh3
pl2
sr2
1
he1
lv1

Zeitverlauf: Deutsche Liveblogs


In [17]:
table("select language,count(*) as num,strftime('%Y-%m',start) as month from events where numposts>0 and language='de' group by language,month order by month desc")


Out[17]:
languagenummonth
de12014-07
de12014-05
de22014-03
de12014-01
de22013-12
de12013-11
de12013-10
de752013-09
de1872013-08
de1462013-07
de1942013-06
de1812013-05
de1112013-04
de892013-03
de842013-02
de692013-01
de242012-12
de452012-11
de602012-10
de712012-09
de182012-08
de112012-07
de272012-06
de252012-05
de152012-04
de202012-03
de132012-02
de142012-01
de32011-12
de32011-11
de32011-10
de42011-09
de12011-08
de22011-07
de12010-10
de12010-08
de72010-07
de212010-06
de252010-05
de192010-04
de162010-03
de202010-02
de212010-01
de302009-12
de162009-11
de272009-10
de272009-09
de262009-08
de142009-07
de212009-06
de132009-05
de72009-04
de192009-03
de202009-02

Die letzten Tage


In [8]:
table("select count(*),max(id) as id,strftime('%Y-%m-%d',start) as day from events where start>'2013-08-01' and language='de' and numposts>0  group by day order by day desc")


Out[8]:
count(*)idday
14929842014-03-18
14768192014-03-09
13084892013-12-04
12212712013-09-25
32028212013-09-11
182027952013-09-10
62010412013-09-09
21994902013-09-08
31986162013-09-07
21968452013-09-06
81956992013-09-05
81950602013-09-04
41948892013-09-03
131921332013-09-02
71898042013-09-01
11882332013-08-31
111874012013-08-30
101863272013-08-29
71847402013-08-28
21833932013-08-27
61824112013-08-26
41807072013-08-24
151797302013-08-23
161786042013-08-22
111777402013-08-21
91760572013-08-20
41752942013-08-19
51753132013-08-18
41743092013-08-17
61727252013-08-16
71716942013-08-15
61706802013-08-14
41695842013-08-13
61683492013-08-12
11646722013-08-11
51663682013-08-10
31655502013-08-09
91646012013-08-08
61642962013-08-07
31636692013-08-06
91633132013-08-05
11575732013-08-04
41605692013-08-03
101608352013-08-02
21594792013-08-01

Top 10: Deutsche Liveblogs mit den meisten Kommentaren


In [9]:
table("select numposts,numcomments,title,canonical,who from events where language='de' order by numcomments desc limit 10")


Out[9]:
numpostsnumcommentstitlecanonicalwho
353027222Tatort TestNoneNone
6911725DD_Rock am Ring LivestreamNoneNone
3957788UEFA Frauen EM 2013None["Meike Richter"]
31316879Jetzt mitdiskutieren im EM-Talklive.hr-online.de/Event/EM_der_Tag_live["Redaktion sportschau.de", "Tom Klein", "Sven Nees", "michael.friedrich", "Redaktion sportschau.de", "Facebook-Kommentare"]
185547Walulis sieht fern - Premiere Folge 4NoneNone
4734300Rock im Park 2012None["on3_de"]
2654079Leichtathletik WM 2013NoneNone
126399724h Berlin live Blogging DEwww.scribblelive.com/Event/24h_Berlin_live_Blogging_DEARTE
6933558LateLine TV 23.5.NoneNone
11823236Sportschau: Wintersport im ErstenNoneNone

Top 10: Längste Liveblogs (deutsch) nach Anzahl der Postings


In [10]:
table("select numposts,numcomments,title,canonical from events where language='de' order by numposts desc limit 10")


Out[10]:
numpostsnumcommentstitlecanonical
891377London Olympicswww.scribblelive.com/Event/London_Olympics
6632219iPhone 6 Keynotewww.scribblelive.com/Event/iPhone_6_Keynote
400100Tweets mit #RIPliveblog.br.de/Event/Tweets_mit_RIP
400030re:publica 2013 tweetsNone
300050Detox CHNone
241150#rar13None
2063015[Sandkasten] Test Event dpa Berlin 29. 5. 2013 live.dtf12.de/Event/Test_Event_dpa_Berlin_29_5_2013
164532847Testwww.scribblelive.com/Event/Thread.aspx?Id=44954
156370#rarNone
13359458Studentenprotestwww.scribblelive.com/Event/Studentenprotest

Top 10: Syndizierte Liveblogs nach Anzahl der Postings


In [11]:
table("select numposts,title,canonical,issyndicated from events where issyndicated>0  order by numposts desc limit 10")


Out[11]:
numpoststitlecanonicalissyndicated
132161Prueba DTSNone1
108640Resassionchangecamp.scribblelive.com/Event/Resassion1
106267Movember Charity Donationstoronto.exceptional-results.com/Event/Movember_Charity_Donations1
84366Cricket Eventwww.scribblelive.com/Event/Cricket_Event1
47751test p2liveblog.citytv.com/bachelor/Event/test_p21
42126SEC Twitter feeds espn.scribblelive.com/Event/SEC_Twitter_feeds1
41625JLo bbdev.scribblelive.com/LiveBlog/Event/JLo1
40694NCAA Tournament live blog and buzzNone1
40004AD Testdemos.scribblelive.com/Event/AD_Test_21
40003Real Estate Agent Toronto: SYNDICATIONNone1

Top 10: White-Label-Liveblogs nach Anzahl der Kommentare

Die allerersten Liveblogs 2008


In [155]:
table("select start,language,canonical,title,numposts from events where numposts>0  order by start asc limit 4")


Out[155]:
startlanguagecanonicaltitlenumposts
datetime.datetime(1990, 10, 1, 7, 0)enNonetitle1
datetime.datetime(2008, 1, 19, 23, 32, 31)enhttp://www.scribblelive.com/Event/Thread.aspx?Id=12Test11
datetime.datetime(2008, 1, 20, 0, 5, 5)enhttp://www.scribblelive.com/Event/Thread.aspx?Id=13Test25
datetime.datetime(2008, 1, 20, 0, 45, 19)enhttp://www.scribblelive.com/Event/Thread.aspx?Id=15Test32

In [12]:
table("select max(id),min(id) from events")


Out[12]:
max(id)min(id)
7765931

Top 10: Liveblogs von Reuters nach Anzahl der Postings


In [14]:
table("select numposts,strftime('%Y-%m-%d',start) as start, canonical from events where canonical like '%reuters%' order by start desc limit 10")




Liveblogs von T-Online nach Zahl der Postings


In [15]:
"""
Demo of the histogram (hist) function with a few features.

In addition to the basic histogram, this demo shows a few optional features:

    * Setting the number of data bins
    * The ``normed`` flag, which normalizes bin heights so that the integral of
      the histogram is 1. The resulting histogram is a probability density.
    * Setting the face color of the bars
    * Setting the opacity (alpha value).

"""
import numpy as np
import matplotlib.mlab as mlab
import matplotlib.pyplot as plt

x=[a["c"] for a in dt.execute("select numposts as c from events where numposts>10 and numposts<1000")]



num_bins = 50
# the histogram of the data
n, bins, patches = plt.hist(x, num_bins, facecolor='green', alpha=0.5)
# add a 'best fit' line
plt.xlabel('x')
plt.ylabel('y')
plt.title(r'Histogram')

# Tweak spacing to prevent clipping of ylabel
plt.subplots_adjust(left=0.15)
plt.show()


---------------------------------------------------------------------------
ImportError                               Traceback (most recent call last)
<ipython-input-15-c2440af9a04c> in <module>()
     12 """
     13 import numpy as np
---> 14 import matplotlib.mlab as mlab
     15 import matplotlib.pyplot as plt
     16 

ImportError: No module named matplotlib.mlab

In [16]:
plt.plot([a["c"] for a in table("select count(*) as c ,strftime('%Y-%m-%d',start) as day from events where start>'2013-08-01' and numposts>0  group by day order by day desc").o])


---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-16-5fde6c9d5f39> in <module>()
----> 1 plt.plot([a["c"] for a in table("select count(*) as c ,strftime('%Y-%m-%d',start) as day from events where start>'2013-08-01' and numposts>0  group by day order by day desc").o])

TypeError: list indices must be integers, not str

In [ ]: