In [ ]:
import ijson
In [ ]:
def tweet_map(json_file, tweet_func, save=False):
"""
Apply a function to each tweet in a json file
json_file - path to tweet json file
tweet_func - function that takes in a 'tweet' object, and returns a 'tweet' object
save (optional) - overwrite json_file with modified json
returns list where each tweet has tweet_func applied to it
"""
mapped_tweets = []
with open(json_file, 'r') as f:
for tweet in ijson.items(f, "item"):
mapped_tweets.append(tweet_func(tweet))
if save:
list_to_json(mapped_tweets, json_file)
return mapped_tweets
In [ ]:
def add_test(tweet):
tweet["test"] = True
return tweet
processed_tweets = tweet_map("../json/sarcastic/unique.json", add_test)
processed_tweets
In [ ]:
def tweet_iterate(json_file, key=None):
"""
Stream through objects in a json file
json_file - path to tweet json file
key (optional) - single key value of interest (ex: return only "text" field, or only "id" field of each tweet)
"""
with open(json_file, 'r') as f:
if key:
for tweet in ijson.items(f, "item.{}".format(key)):
yield tweet
else:
for tweet in ijson.items(f, "item"):
yield tweet
In [ ]:
for thing in tweet_iterate("../json/sarcastic/unique.json"):
print(thing)
In [ ]:
# partial iteration on a generator using itertools
import itertools
for thing in itertools.islice(tweet_iterate("../json/sarcastic/unique.json"), 25):
print(thing)