In [1]:
#imports
from sqlalchemy import *
import datetime
import mediacloud, datetime
import numpy as np

In [2]:
#create a table to store data
engine = create_engine('sqlite:///:memory:',echo=True)
metadata = MetaData()
colombia = Table('data', metadata,
    Column('id', Integer, primary_key=True),
    Column('sentences', Integer),
    Column('searches', String(400), nullable=False),
    Column('timestamp', DateTime, default=datetime.datetime.now),
)
metadata.create_all(engine)


2016-12-08 05:03:18,550 INFO sqlalchemy.engine.base.Engine SELECT CAST('test plain returns' AS VARCHAR(60)) AS anon_1
2016-12-08 05:03:18,552 INFO sqlalchemy.engine.base.Engine ()
2016-12-08 05:03:18,553 INFO sqlalchemy.engine.base.Engine SELECT CAST('test unicode returns' AS VARCHAR(60)) AS anon_1
2016-12-08 05:03:18,554 INFO sqlalchemy.engine.base.Engine ()
2016-12-08 05:03:18,557 INFO sqlalchemy.engine.base.Engine PRAGMA table_info("data")
2016-12-08 05:03:18,558 INFO sqlalchemy.engine.base.Engine ()
2016-12-08 05:03:18,561 INFO sqlalchemy.engine.base.Engine 
CREATE TABLE data (
	id INTEGER NOT NULL, 
	sentences INTEGER, 
	searches VARCHAR(400) NOT NULL, 
	timestamp DATETIME, 
	PRIMARY KEY (id)
)


2016-12-08 05:03:18,562 INFO sqlalchemy.engine.base.Engine ()
2016-12-08 05:03:18,563 INFO sqlalchemy.engine.base.Engine COMMIT

In [3]:
#search with single terms
mc = mediacloud.api.MediaCloud('MY_API_KEY')
searches = ['agreement', 'signed', 'colombia', 'peace', 'plebiscite', 'conflict', 'guerrilla']
for item in searches:
    res = mc.sentenceCount(item, solr_filter=[mc.publish_date_query( datetime.date( 2015, 10, 1), datetime.date( 2016, 12, 2) ), 'tags_id_media:1' ])
    agreement_counts = res['count']
    print agreement_counts
    insert_stmt = colombia.insert().values(searches=item, sentences=agreement_counts)
    db_conn = engine.connect()
    result = db_conn.execute(insert_stmt)
    result.inserted_primary_key


63663
2016-12-08 05:00:44,219 INFO sqlalchemy.engine.base.Engine INSERT INTO data (sentences, searches, timestamp) VALUES (?, ?, ?)
2016-12-08 05:00:44,220 INFO sqlalchemy.engine.base.Engine (63663, 'agreement', '2016-12-08 05:00:44.219550')
2016-12-08 05:00:44,221 INFO sqlalchemy.engine.base.Engine COMMIT
64064
2016-12-08 05:00:45,140 INFO sqlalchemy.engine.base.Engine INSERT INTO data (sentences, searches, timestamp) VALUES (?, ?, ?)
2016-12-08 05:00:45,141 INFO sqlalchemy.engine.base.Engine (64064, 'signed', '2016-12-08 05:00:45.140157')
2016-12-08 05:00:45,142 INFO sqlalchemy.engine.base.Engine COMMIT
10274
2016-12-08 05:00:46,061 INFO sqlalchemy.engine.base.Engine INSERT INTO data (sentences, searches, timestamp) VALUES (?, ?, ?)
2016-12-08 05:00:46,062 INFO sqlalchemy.engine.base.Engine (10274, 'colombia', '2016-12-08 05:00:46.061863')
2016-12-08 05:00:46,063 INFO sqlalchemy.engine.base.Engine COMMIT
43723
2016-12-08 05:00:47,034 INFO sqlalchemy.engine.base.Engine INSERT INTO data (sentences, searches, timestamp) VALUES (?, ?, ?)
2016-12-08 05:00:47,034 INFO sqlalchemy.engine.base.Engine (43723, 'peace', '2016-12-08 05:00:47.033963')
2016-12-08 05:00:47,035 INFO sqlalchemy.engine.base.Engine COMMIT
723
2016-12-08 05:00:47,599 INFO sqlalchemy.engine.base.Engine INSERT INTO data (sentences, searches, timestamp) VALUES (?, ?, ?)
2016-12-08 05:00:47,600 INFO sqlalchemy.engine.base.Engine (723, 'plebiscite', '2016-12-08 05:00:47.599575')
2016-12-08 05:00:47,602 INFO sqlalchemy.engine.base.Engine COMMIT
25130
2016-12-08 05:00:48,718 INFO sqlalchemy.engine.base.Engine INSERT INTO data (sentences, searches, timestamp) VALUES (?, ?, ?)
2016-12-08 05:00:48,719 INFO sqlalchemy.engine.base.Engine (25130, 'conflict', '2016-12-08 05:00:48.718705')
2016-12-08 05:00:48,720 INFO sqlalchemy.engine.base.Engine COMMIT
1856
2016-12-08 05:00:49,441 INFO sqlalchemy.engine.base.Engine INSERT INTO data (sentences, searches, timestamp) VALUES (?, ?, ?)
2016-12-08 05:00:49,442 INFO sqlalchemy.engine.base.Engine (1856, 'guerrilla', '2016-12-08 05:00:49.441063')
2016-12-08 05:00:49,443 INFO sqlalchemy.engine.base.Engine COMMIT

In [3]:
#attempting to do combined keyword searches
mc = mediacloud.api.MediaCloud('MY_API_KEY')
searches = ['(agreement AND signed AND colombia)', '( colombia AND peace)', '(plebiscite AND colombia AND peace)', '(conflict AND colombia AND guerrilla)']
for item in searches:
    res = mc.sentenceCount(item, solr_filter=[mc.publish_date_query( datetime.date( 2015, 10, 1), datetime.date( 2016, 12, 2) ), 'tags_id_media:1' ])
    agreement_counts = res['count']
    print agreement_counts
    insert_stmt = colombia.insert().values(searches=item, sentences=agreement_counts)
    db_conn = engine.connect()
    result = db_conn.execute(insert_stmt)
    result.inserted_primary_key


120
2016-12-08 05:04:14,650 INFO sqlalchemy.engine.base.Engine INSERT INTO data (sentences, searches, timestamp) VALUES (?, ?, ?)
2016-12-08 05:04:14,651 INFO sqlalchemy.engine.base.Engine (120, '(agreement AND signed AND colombia)', '2016-12-08 05:04:14.649957')
2016-12-08 05:04:14,651 INFO sqlalchemy.engine.base.Engine COMMIT
4174
2016-12-08 05:04:16,803 INFO sqlalchemy.engine.base.Engine INSERT INTO data (sentences, searches, timestamp) VALUES (?, ?, ?)
2016-12-08 05:04:16,803 INFO sqlalchemy.engine.base.Engine (4174, '( colombia AND peace)', '2016-12-08 05:04:16.803059')
2016-12-08 05:04:16,805 INFO sqlalchemy.engine.base.Engine COMMIT
47
2016-12-08 05:04:20,177 INFO sqlalchemy.engine.base.Engine INSERT INTO data (sentences, searches, timestamp) VALUES (?, ?, ?)
2016-12-08 05:04:20,178 INFO sqlalchemy.engine.base.Engine (47, '(plebiscite AND colombia AND peace)', '2016-12-08 05:04:20.177335')
2016-12-08 05:04:20,179 INFO sqlalchemy.engine.base.Engine COMMIT
71
2016-12-08 05:04:23,620 INFO sqlalchemy.engine.base.Engine INSERT INTO data (sentences, searches, timestamp) VALUES (?, ?, ?)
2016-12-08 05:04:23,621 INFO sqlalchemy.engine.base.Engine (71, '(conflict AND colombia AND guerrilla)', '2016-12-08 05:04:23.620743')
2016-12-08 05:04:23,622 INFO sqlalchemy.engine.base.Engine COMMIT

In [18]:
#can I do this? No. I tried butthe insert has to happen right after the count. Ask Rahul why.
# for item in agreement_counts:
#     insert_stmt = queries.insert().values(keywords= item, count = r)
#     db_conn = engine.connect()
#     result = db_conn.execute(insert_stmt)
#     result.inserted_primary_key


---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-18-07a220b8b420> in <module>()
----> 1 for item in agreement_counts:
      2     insert_stmt = colombia.insert().values(searches=item, sentences=agreement_counts)
      3     db_conn = engine.connect()
      4     result = db_conn.execute(insert_stmt)
      5     result.inserted_primary_key

TypeError: 'int' object is not iterable

In [4]:
#check if data was correctly stored
from sqlalchemy.sql import select
select_stmt = select([colombia])
results = db_conn.execute(select_stmt)
for row in results:
    print row


2016-12-08 05:00:55,772 INFO sqlalchemy.engine.base.Engine SELECT data.id, data.sentences, data.searches, data.timestamp 
FROM data
2016-12-08 05:00:55,774 INFO sqlalchemy.engine.base.Engine ()
(1, 63663, u'agreement', datetime.datetime(2016, 12, 8, 5, 0, 44, 219550))
(2, 64064, u'signed', datetime.datetime(2016, 12, 8, 5, 0, 45, 140157))
(3, 10274, u'colombia', datetime.datetime(2016, 12, 8, 5, 0, 46, 61863))
(4, 43723, u'peace', datetime.datetime(2016, 12, 8, 5, 0, 47, 33963))
(5, 723, u'plebiscite', datetime.datetime(2016, 12, 8, 5, 0, 47, 599575))
(6, 25130, u'conflict', datetime.datetime(2016, 12, 8, 5, 0, 48, 718705))
(7, 1856, u'guerrilla', datetime.datetime(2016, 12, 8, 5, 0, 49, 441063))

In [20]:
#calling MC API and inserting returns into db
# mc = mediacloud.api.MediaCloud('MY_API_KEY')
# signed_agr = mc.sentenceCount('(agreement AND signed AND colombia)', solr_filter=[mc.publish_date_query( datetime.date( 2015, 10, 1), datetime.date( 2016, 12, 2) ), 'tags_id_media:1' ])
# col_peace = mc.sentenceCount('( colombia AND peace)', solr_filter=[mc.publish_date_query( datetime.date( 2015, 10, 1), datetime.date( 2016, 12, 2) ), 'tags_id_media:1' ])
# plebiscite = mc.sentenceCount('(plebiscite AND colombia AND peace)', solr_filter=[mc.publish_date_query( datetime.date( 2015, 10, 1), datetime.date( 2016, 12, 2) ), 'tags_id_media:1' ])
# conflict = mc.sentenceCount('(conflict AND colombia AND guerrilla)', solr_filter=[mc.publish_date_query( datetime.date( 2015, 10, 1), datetime.date( 2016, 12, 2) ), 'tags_id_media:1' ])
# agreement_counts = (signed_agr['count'], col_peace['count'], plebiscite['count'], conflict['count'])
# print agreement_counts
# db_conn = engine.connect()
# for item in agreement_counts:
#     insert_stmt = colombia.insert().values(sentences= agreement_counts)
#     result = db_conn.execute(insert_stmt)
#     result.inserted_primary_key


(120, 4174, 47, 71)

In [8]:
#sum of counts added to the database
from sqlalchemy.sql import select
select_stmt = select([colombia])
results = db_conn.execute(select_stmt)
sum = 0
for row in results:
    sum  = sum + row[1]
print sum


2016-12-08 05:01:45,963 INFO sqlalchemy.engine.base.Engine SELECT data.id, data.sentences, data.searches, data.timestamp 
FROM data
2016-12-08 05:01:45,964 INFO sqlalchemy.engine.base.Engine ()
209433