media_export_through_api



In [1]:
import sys
#print (sys.path)
sys.path.append('../')
sys.path
import mc_database

In [2]:
import psycopg2
import psycopg2.extras

In [3]:
import mediacloud, json

In [4]:
import requests

def get_download_from_api( mc_api_url, api_key, downloads_id ):
    #https://api.mediacloud.org/api/v2/media/single/1?key=KRN4T5JGJ2A
    
    r = requests.get( mc_api_url +'/api/v2/downloads/single/' + str( downloads_id) , 
                     params = { 'key': api_key} )
    download = r.json()[0]
    return download

In [5]:
def add_feed_download_with_api( mc_api_url, api_key, download, raw_content ):
    r = requests.put( mc_api_url + '/api/v2/crawler/add_feed_download', 
             params={  'key': api_key }, 
             data=json.dumps( { 'download': download, 'raw_content': raw_content } ),
             headers={ 'Accept': 'application/json'} )

    return r

In [6]:
local_key = '2a4cebc31101a2d3d5e60456c23ae877c2d49944068f237e1134e2c75191a2af'
local_key = '1161251f5de4f381a198eea4dc20350fd992f5eef7cb2fdc284c245ff3d4f3ca'
source_media_cloud_api_url =  'http://localhost:8000/'
dest_media_cloud_api_url = 'http://localhost:3000/'
source_api_key = 'e07cf98dd0d457351354ee520635c226acd238ecf15ec9e853346e185343bf7b'
dest_api_key = local_key

db_label =  "AWS backup crawler"

In [7]:
conn = mc_database.connect_to_database( db_label )
cursor = conn.cursor(cursor_factory=psycopg2.extras.DictCursor)

In [8]:
cursor.execute( "SELECT * from downloads where type='feed' and state in ( 'success', 'feed_error') order by downloads_id limit 10" )
feed_downloads = cursor.fetchall()

In [9]:
#cPickle.dump( feed_downloads, file( os.path.expanduser( '~/feed_downloads.pickle' ), 'wb' ) )
#feed_downloads = cPickle.load( file( os.path.expanduser( '~/feed_downloads.pickle' ), 'r' ) )
#len( feed_downloads )

In [10]:
#dict( feed_downloads[0] )

In [11]:
for feed_download in feed_downloads:
    download = get_download_from_api( source_media_cloud_api_url, source_api_key, feed_download['downloads_id'] )
    #print download
    #break
    raw_content = download['raw_content' ]
    del download['raw_content']

    if download[ 'state' ] == 'feed_error':
        download[ 'state' ]  = 'success'
    add_feed_download_with_api( dest_media_cloud_api_url, dest_api_key, download, raw_content )

In [12]:
#download = get_download_from_api( 'https://api.mediacloud.org', api_key, 684280545 )
#download