In [ ]:
import happybase as hb
import elasticsearch as es
import json
import matplotlib
import matplotlib.pyplot as plt
%matplotlib inline

In [ ]:
conf_fn = '../conf/global_var_remotehbase_release.json'
sha1 = "FDDADAD0CBB16245FB1BAB9CAFB8E87994CBA7A1"

In [ ]:
conf = json.load(open(conf_fn, 'rt'))

In [ ]:
def get_all_cdr_ids(sha1, conf):
    conn = hb.Connection(conf['HBI_host'])
    tab_sha1 = conn.table(conf['HBI_table_sha1infos'])
    row = tab_sha1.row(sha1)
    #print row
    return row['info:all_cdr_ids']

In [ ]:
def get_s3urls_cdr_ids(cdr_ids, conf):
    from elasticsearch import Elasticsearch
    els_user = conf['ist_els_user']
    els_pass = conf['ist_els_pass']
    els_instance = conf['ist_els_instance']
    els_index = conf['ist_els_index']
    els_doc_type = conf['ist_els_doc_type']
    query = "{\"fields\": [\"obj_stored_url\"], \"query\": { \"ids\": { \"values\": [\""+'\",\"'.join(cdr_ids.split(','))+"\"]}}}"
    #print query
    es = Elasticsearch('https://'+els_user+':'+els_pass+'@'+els_instance)
    response = es.search(index=els_index,doc_type=els_doc_type,body=query)
    #print response
    # in hits-hits
    image_list = []
    for image in response['hits']['hits']:
        #print image
        # get _id and fields-obj_stored_url
        image_list.append((image['_id'],image['fields']['obj_stored_url']))
    return image_list

In [ ]:
def dl_image(s3_url):
    # download with boto, read from bytes to PIL image
    import boto3
    import cStringIO
    from PIL import Image
    s3_res = boto3.resource('s3')
    s3_clt = boto3.client('s3')
    try:
        s3_spl = s3_url.split("/")
        try:
          bucket_pos = s3_spl.index('s3.amazonaws.com')+1
        except:
          print "{} is not a s3 url.".format(s3_url)
          return None
        bucket = s3_spl[bucket_pos]
        key = '/'.join(s3_spl[bucket_pos+1:])
        buffer = cStringIO.StringIO()
        s3_clt.download_fileobj(bucket, key, buffer)
        img = Image.open(buffer)
        # # for debugging
        # s3_img_obj = s3_res.Object(bucket, key)
        # ctype = s3_img_obj.content_type
        # nb_bytes = s3_img_obj.content_length
        # #print bucket, key, ctype, nb_bytes, img.size
        return img
    except Exception as err:
        print "Could not download image from {}. Error was: {}".format(s3_url, err)
        return None

In [ ]:
def disp_image_list(image_list, sha1):
    #from matplotlib.pyplot import imshow
    from IPython.display import display
    imgs = []
    for img_sha1,s3_url in image_list:
        imgs.append(dl_image(str(s3_url[0])))
    display(*imgs)

Run things


In [ ]:
cdr_ids = get_all_cdr_ids(sha1, conf)
#print cdr_ids

In [ ]:
image_list = get_s3urls_cdr_ids(cdr_ids, conf)

In [ ]:
disp_image_list(image_list, sha1)