In [1]:
import redditDataset
import praw
r = praw.Reddit(user_agent='date_test')
In [4]:
reload(redditDataset)
sub = redditDataset.getSubreddits(r, ['funny'])
sub = sub.next()
In [11]:
"""
Get submission 202wd3
http://www.reddit.com/r/funny/comments/202wd3/i_participated_in_one_of_the_biggest_magic_the/
"""
post = r.get_submission('http://www.reddit.com/r/funny/comments/202wd3/i_participated_in_one_of_the_biggest_magic_the/')
Out[11]:
In [49]:
"""
Get posts within timeframe
"""
import datetime, time
startDate = '140101'
endDate = '140102'
startDate = time.mktime(datetime.datetime.strptime(startDate, "%y%m%d").timetuple())
endDate = time.mktime(datetime.datetime.strptime(endDate, "%y%m%d").timetuple())
searchTerm = 'timestamp:' + str(startDate)[:-2] + '..' + str(endDate)[:-2]
print searchTerm
posts = sub.search(searchTerm, sort='top', syntax='cloudsearch', limit=100)
len(list(posts))
Out[49]:
In [52]:
# test function out
reload(redditDataset)
posts = redditDataset.getPostsWithinRange(sub, '140101', '140102')
len(list(posts))
Out[52]:
In [68]:
# TEST COMBINING GENERATORS
reload(redditDataset)
import itertools
gen1 = redditDataset.getPostsWithinRange(sub, '140101', '140102', nPosts=50)
gen2 = redditDataset.getPostsWithinRange(sub, '140103', '140104', nPosts=50)
fullgen = itertools.chain(gen1, gen2)
empty = []
emptygen = itertools.chain(empty, fullgen)
len(list(emptygen))
Out[68]:
In [96]:
# test fine scale function
reload(redditDataset)
posts = redditDataset.getAllPostsWithinRangeFineScale(sub, '140101', '140104', fineScale=12)
In [97]:
len(list(posts))
Out[97]:
In [ ]: