In [1]:
#imports
from sqlalchemy import *
import datetime
import mediacloud, datetime
import numpy as np
In [2]:
#create a table to store data
engine = create_engine('sqlite:///:memory:',echo=True)
metadata = MetaData()
colombia = Table('data', metadata,
Column('id', Integer, primary_key=True),
Column('sentences', Integer),
Column('searches', String(400), nullable=False),
Column('timestamp', DateTime, default=datetime.datetime.now),
)
metadata.create_all(engine)
In [3]:
#search with single terms
mc = mediacloud.api.MediaCloud('MY_API_KEY')
searches = ['agreement', 'signed', 'colombia', 'peace', 'plebiscite', 'conflict', 'guerrilla']
for item in searches:
res = mc.sentenceCount(item, solr_filter=[mc.publish_date_query( datetime.date( 2015, 10, 1), datetime.date( 2016, 12, 2) ), 'tags_id_media:1' ])
agreement_counts = res['count']
print agreement_counts
insert_stmt = colombia.insert().values(searches=item, sentences=agreement_counts)
db_conn = engine.connect()
result = db_conn.execute(insert_stmt)
result.inserted_primary_key
In [3]:
#attempting to do combined keyword searches
mc = mediacloud.api.MediaCloud('MY_API_KEY')
searches = ['(agreement AND signed AND colombia)', '( colombia AND peace)', '(plebiscite AND colombia AND peace)', '(conflict AND colombia AND guerrilla)']
for item in searches:
res = mc.sentenceCount(item, solr_filter=[mc.publish_date_query( datetime.date( 2015, 10, 1), datetime.date( 2016, 12, 2) ), 'tags_id_media:1' ])
agreement_counts = res['count']
print agreement_counts
insert_stmt = colombia.insert().values(searches=item, sentences=agreement_counts)
db_conn = engine.connect()
result = db_conn.execute(insert_stmt)
result.inserted_primary_key
In [18]:
#can I do this? No. I tried butthe insert has to happen right after the count. Ask Rahul why.
# for item in agreement_counts:
# insert_stmt = queries.insert().values(keywords= item, count = r)
# db_conn = engine.connect()
# result = db_conn.execute(insert_stmt)
# result.inserted_primary_key
In [4]:
#check if data was correctly stored
from sqlalchemy.sql import select
select_stmt = select([colombia])
results = db_conn.execute(select_stmt)
for row in results:
print row
In [20]:
#calling MC API and inserting returns into db
# mc = mediacloud.api.MediaCloud('MY_API_KEY')
# signed_agr = mc.sentenceCount('(agreement AND signed AND colombia)', solr_filter=[mc.publish_date_query( datetime.date( 2015, 10, 1), datetime.date( 2016, 12, 2) ), 'tags_id_media:1' ])
# col_peace = mc.sentenceCount('( colombia AND peace)', solr_filter=[mc.publish_date_query( datetime.date( 2015, 10, 1), datetime.date( 2016, 12, 2) ), 'tags_id_media:1' ])
# plebiscite = mc.sentenceCount('(plebiscite AND colombia AND peace)', solr_filter=[mc.publish_date_query( datetime.date( 2015, 10, 1), datetime.date( 2016, 12, 2) ), 'tags_id_media:1' ])
# conflict = mc.sentenceCount('(conflict AND colombia AND guerrilla)', solr_filter=[mc.publish_date_query( datetime.date( 2015, 10, 1), datetime.date( 2016, 12, 2) ), 'tags_id_media:1' ])
# agreement_counts = (signed_agr['count'], col_peace['count'], plebiscite['count'], conflict['count'])
# print agreement_counts
# db_conn = engine.connect()
# for item in agreement_counts:
# insert_stmt = colombia.insert().values(sentences= agreement_counts)
# result = db_conn.execute(insert_stmt)
# result.inserted_primary_key
In [8]:
#sum of counts added to the database
from sqlalchemy.sql import select
select_stmt = select([colombia])
results = db_conn.execute(select_stmt)
sum = 0
for row in results:
sum = sum + row[1]
print sum