In [5]:
from dumptruck import DumpTruck
dt=DumpTruck(dbname="data/events.sqlite")
from cgi import escape
import types
import re
def hstr(s) :
if type(s) not in types.StringTypes :
s=unicode(repr(s))
s=unicode(s)
s=re.sub(r"(http://([^/]+[^ ]+))",r'<a href="\1" target="_blank">\2</a>',escape(s))
return s
styles=['#fbe1ca','#ffffff']
class HtmlLol :
def __init__(self,o) :
self.o=o
def _repr_html_(self) :
n=0
res=u"<table>"
res+=u"<tr><th>%s</th></tr>" % "</th><th>".join([hstr(a) for a in self.o[0]])
for r in self.o[1:] :
res+=u"<tr style='background-color: %s'><td>%s</td></tr>" % (styles[n % 2] ,u"</td><td>".join([hstr(a) for a in r]))
n+=1
res+=u"</table>"
return res
def table(s) :
res=dt.execute(s)
top=[]
top.append(res[0].keys())
top.extend([a.values() for a in dt.execute(s)])
t=HtmlLol(top)
return(t)
In [6]:
table("select numposts,numcomments,id,title,lastmodified from events where (upper(title) like '%MERKEL%' or upper(title) like '%DUELL%' or upper(description) like '%DUELL%') order by id desc, numposts desc limit 20")
Out[6]:
In [15]:
table("select language,count(*) as num from events where numposts>0 group by language order by num desc")
Out[15]:
In [17]:
table("select language,count(*) as num,strftime('%Y-%m',start) as month from events where numposts>0 and language='de' group by language,month order by month desc")
Out[17]:
In [8]:
table("select count(*),max(id) as id,strftime('%Y-%m-%d',start) as day from events where start>'2013-08-01' and language='de' and numposts>0 group by day order by day desc")
Out[8]:
In [9]:
table("select numposts,numcomments,title,canonical,who from events where language='de' order by numcomments desc limit 10")
Out[9]:
In [10]:
table("select numposts,numcomments,title,canonical from events where language='de' order by numposts desc limit 10")
Out[10]:
In [11]:
table("select numposts,title,canonical,issyndicated from events where issyndicated>0 order by numposts desc limit 10")
Out[11]:
In [13]:
table("select numposts,canonical from events where numposts>0 and canonical != url order by numposts desc limit 10")
Out[13]:
In [155]:
table("select start,language,canonical,title,numposts from events where numposts>0 order by start asc limit 4")
Out[155]:
In [12]:
table("select max(id),min(id) from events")
Out[12]:
In [14]:
table("select numposts,strftime('%Y-%m-%d',start) as start, canonical from events where canonical like '%reuters%' order by start desc limit 10")
Out[14]:
In [205]:
Out[205]:
In [15]:
table("select numposts,strftime('%Y-%m-%d',start) as start, canonical from events where canonical like '%t-online%' order by start desc limit 10")
Out[15]:
In [15]:
"""
Demo of the histogram (hist) function with a few features.
In addition to the basic histogram, this demo shows a few optional features:
* Setting the number of data bins
* The ``normed`` flag, which normalizes bin heights so that the integral of
the histogram is 1. The resulting histogram is a probability density.
* Setting the face color of the bars
* Setting the opacity (alpha value).
"""
import numpy as np
import matplotlib.mlab as mlab
import matplotlib.pyplot as plt
x=[a["c"] for a in dt.execute("select numposts as c from events where numposts>10 and numposts<1000")]
num_bins = 50
# the histogram of the data
n, bins, patches = plt.hist(x, num_bins, facecolor='green', alpha=0.5)
# add a 'best fit' line
plt.xlabel('x')
plt.ylabel('y')
plt.title(r'Histogram')
# Tweak spacing to prevent clipping of ylabel
plt.subplots_adjust(left=0.15)
plt.show()
In [16]:
plt.plot([a["c"] for a in table("select count(*) as c ,strftime('%Y-%m-%d',start) as day from events where start>'2013-08-01' and numposts>0 group by day order by day desc").o])
In [ ]: