In [ ]:
import ijson

In [ ]:
def tweet_map(json_file, tweet_func, save=False):
    """
    Apply a function to each tweet in a json file 
    
    json_file - path to tweet json file
    tweet_func - function that takes in a 'tweet' object, and returns a 'tweet' object
    save (optional) - overwrite json_file with modified json
    
    returns list where each tweet has tweet_func applied to it
   
    """
    mapped_tweets = []
    with open(json_file, 'r') as f:
        for tweet in ijson.items(f, "item"):
            mapped_tweets.append(tweet_func(tweet))
    if save:
        list_to_json(mapped_tweets, json_file)
    return mapped_tweets

In [ ]:
def add_test(tweet):
    tweet["test"] = True
    return tweet

processed_tweets = tweet_map("../json/sarcastic/unique.json", add_test)
processed_tweets

In [ ]:
def tweet_iterate(json_file, key=None):
    """
    Stream through objects in a json file

    json_file - path to tweet json file
    key (optional) - single key value of interest (ex: return only "text" field, or only "id" field of each tweet)
    """

    with open(json_file, 'r') as f:
        if key:
            for tweet in ijson.items(f, "item.{}".format(key)):
                yield tweet
        else:
            for tweet in ijson.items(f, "item"):
                yield tweet

In [ ]:
for thing in tweet_iterate("../json/sarcastic/unique.json"):
    print(thing)

In [ ]:
# partial iteration on a generator using itertools
import itertools
for thing in itertools.islice(tweet_iterate("../json/sarcastic/unique.json"), 25):
    print(thing)