In [1]:
import json
from pymongo import MongoClient
import random 

MONGO_CONFIG = {"host": "localhost", "port": 27017}
user_agents = [
    "Mozilla/5.0 (Windows NT 6.1, Trident/7.0, rv:11.0) like Gecko",
    "Mozilla/5.0 (iPhone, CPU iPhone OS 9_3_4 like Mac OS X) AppleWebKit/601.1.46 (KHTML, like Gecko) Version/9.0 Mobile/13G35 Safari/601.1,apple_iphone_ver9",
    "HasOffers Mobile AppTracking v1.0",
    "Mozilla/5.0 (Linux, Android 5.0.1, SAMSUNG SCH-I545 4G Build/LRX22C) AppleWebKit/537.36 (KHTML, like Gecko) SamsungBrowser/2.1 Chrome/34.0.1847.76 Mobile Safari/537.36,samsung_sch_i545_ver1_suban50"
]

recorded_clicks=MongoClient(**MONGO_CONFIG).kinesis_traffic.recorded_clicks
print("mongo connection", recorded_clicks)

clicks = []
with open('/Users/robert/dev/bigdata/traffic_gen/clicks.json', 'r') as clickfile:
    for line in clickfile.readlines():
        try:
            click = json.loads(line)
            click["user_agent"] = random.choice(user_agents)
            if click["client_id"] == 3:
                click["traffic_type_id"] = 1
                clicks.append(click)
                recorded_clicks.insert_one(click)
        except:
            pass
            # print("decode skipped for [{}]".format(line))
print("done with [{}] lines".format(len(clicks)))
print("clicks [{}]".format(clicks[:1]))


mongo connection Collection(Database(MongoClient(host=['localhost:27017'], document_class=dict, tz_aware=False, connect=True), 'kinesis_traffic'), 'recorded_clicks')
done with [18534] lines
clicks [[{'price_format_id': 2, 'campaign_type_id': 1, 'redirect_url': 'https://www.google.com/search?q=offer+link', 'click_pixel': False, 'language_raw': 'qu', 'traffic_type_id': 1, 'campaign_id': 22030, 'creative_id': 14121, 'ip_address': '52.42.0.110', 'subid_1': 'SUB_ID/', 'test': False, 'request_date_utc': '2016-10-05 21:56:26.756469', 'location_id': 107, 'offer_id': 8146, 'country': 'US', 'create_campaign_direct_channel': False, 'region': 'OR', 'domain': 'c.demo-newtrk.cakemarketing.com', 'client_id': 3, 'session_id': 'bb23b051-adf5-437b-8ca7-67f9e5fdc4bb', 'create_campaign_copy_parent': False, 'price_received': 2, 'city': 'portland', 'click_id': '08caba62-d61e-497c-b6a7-672f767117ae', 'first_touch_by_publisher': True, 'tracking_id': '81bc8891-9e8c-409e-a64a-c46deabfe7c8', 'visitor_id': 'd79af5d1-cc30-487c-9f47-e24c715209ee', 'price_owed': 1, 'offer_contract_id': 8686, 'user_agent': 'HasOffers Mobile AppTracking v1.0', 'first_touch': True, '_id': ObjectId('57f7d3f3c1518133c37786e0'), 'browser_version_minor_id': 2, 'request_date': '2016-10-05 14:56:26.756469', 'language_id': 77, 'publisher_id': 6683, 'create_campaign_copy_parent_pixel': False, 'isp_id': 35832, 'create_campaign_internal_campaign_redirect': False}]]