In [1]:
import sqlite3
from pandas.io import sql
df=sql.read_frame("select * from events",sqlite3.connect("/home/martin/projekte/scribblelive/ipython/data/events.sqlite"))
/usr/local/lib/python2.7/dist-packages/pytz/__init__.py:35: UserWarning: Module readline was already imported from /usr/lib/python2.7/lib-dynload/readline.i386-linux-gnu.so, but /usr/local/lib/python2.7/dist-packages is being added to sys.path
from pkg_resources import resource_stream
In [5]:
df.columns
Out[5]:
Index([u'iscommenting', u'issyndicatable', u'discussionsenabled', u'description', u'lastmodified', u'metatitle', u'who', u'numposts', u'meta', u'stime', u'mtime', u'islive', u'issyndicated', u'id', u'ismoderated', u'canonical', u'end', u'language', u'title', u'url', u'websites', u'discussion', u'created', u'start', u'pages', u'stamp', u'syndicatedcomments', u'numcomments', u'isdeleted', u'location', u'error'], dtype=object)
In [6]:
len(df.index)
Out[6]:
184635
In [8]:
df.loc[1]
Out[8]:
iscommenting 1
issyndicatable 0
discussionsenabled true
description Hubbard Test Event Desc
lastmodified 2013-08-24 01:14:52
metatitle Hubbard Test Event Title
who ["Graeme MacFarlane", "Jeremy Sinon"]
numposts 882
meta {}
stime 2013-08-23 23:14:52
mtime 2013-08-23 23:14:52
islive 1
issyndicated 0
id 173297
ismoderated 0
canonical http://live.1500espn.com/Event/Hubbard_Test_Ev...
end 2013-10-31 05:00:00
language en
title Hubbard Test Event Title
url http://scribblelive.mobi/Event/Hubbard_Test_Ev...
websites [{"Url": "http://live.1500espn.com/Event/Hubba...
discussion {"Moderated": 0, "Enabled": 1}
created 2013-08-16 20:17:11
start 2013-08-16 20:17:11
pages 20
stamp 11:14 PM EST
syndicatedcomments 0
numcomments 6
isdeleted 0
location None
error None
Name: 1, dtype: object
In [9]:
df.loc[0]
Out[9]:
iscommenting 1
issyndicatable 0
discussionsenabled false
description Gillette Stadium goes country for this weekend...
lastmodified 2013-08-24 01:15:00
metatitle Country Fest at Gillette Stadium, Aug. 23-24 -...
who Glenn Yoder, A&E producer
numposts 15139
meta {}
stime 2013-08-23 23:14:56
mtime 2013-08-23 23:14:56
islive 1
issyndicated 0
id 179063
ismoderated 1
canonical http://live.boston.com/Event/Country_Fest_at_G...
end 2013-08-26 04:00:00
language en
title Country Fest at Gillette Stadium, Aug. 23-24 -...
url http://scribblelive.mobi/Event/Country_Fest_at...
websites [{"Url": "http://live.boston.com/Event/Country...
discussion {"Moderated": 0, "Enabled": 0}
created 2013-08-22 22:11:32
start 2013-08-22 22:11:32
pages 337
stamp 11:14 PM EST
syndicatedcomments 0
numcomments 0
isdeleted 0
location None
error None
Name: 0, dtype: object
In [10]:
df["who"][:10]
Out[10]:
0 Glenn Yoder, A&E producer
1 ["Graeme MacFarlane", "Jeremy Sinon"]
2 ["Dan Larson", "robhernandezwsj", "Dennis Semr...
3 ["abarreales", "ivangelibter", "suriphone41", ...
4 Emily Mertz
5 Katie Cassidy
6 ["SportsDayDFW", "SportsDay Live Chat", "Evan ...
7 ["David Moore", "SportsDay Live Chat"]
8 ["La r\u00c3\u00a9daction", "La r\u00c3\u00a9d...
9 ["Jennifer Weigel/Staff", "Matthew Thomas/Staff"]
Name: who, dtype: object
In [16]:
df["mtime"][-400:]
Out[16]:
184235 2013-08-27 20:55:36
184236 2013-08-28 05:12:53
184237 None
184238 2013-08-28 00:00:37
184239 2013-08-28 04:50:56
184240 2013-08-27 23:10:44
184241 2013-08-28 03:38:53
184242 2013-08-27 23:12:05
184243 2013-08-27 20:20:52
184244 2013-08-28 04:42:21
184245 2013-08-27 23:06:50
184246 None
184247 2013-08-28 05:22:30
184248 2013-08-27 21:23:12
184249 2013-08-28 03:24:44
...
184620 None
184621 2013-08-28 10:45:10
184622 None
184623 2013-08-28 12:27:10
184624 2013-08-28 12:19:34
184625 2013-08-28 11:54:49
184626 None
184627 2013-08-28 11:10:24
184628 2013-08-28 11:22:32
184629 None
184630 2013-08-28 11:53:31
184631 2013-08-28 10:52:29
184632 2013-08-28 11:57:09
184633 2013-08-28 10:43:23
184634 2013-08-28 10:48:03
Name: mtime, Length: 400, dtype: object
In [ ]:
Content source: mvtango/python-scraper-toolkit
Similar notebooks: