In [1]:
%matplotlib notebook
from metadatastore.mds import MDS
from databroker import Broker
from databroker.core import register_builtin_handlers
from filestore.fs import FileStore as FS
from pymongo import MongoClient
import datetime
import numpy as np
import os
import pytz
import matplotlib.pyplot as plt

utc=pytz.UTC

_mds_config = {'host': 'xf03id-ca1',
               'port': 27017,
               'database': 'datastore',
               'timezone': 'US/Eastern'}
mds = MDS(_mds_config, auth=False)

_fs_config = {'host': 'xf03id-ca1',
              'port': 27017,
              'database': 'filestore'}

db = Broker(mds, FS(_fs_config))

mds = MDS(_mds_config)
fs = FS(_fs_config)
conn = MongoClient(host=_fs_config['host'],
                  port=_fs_config['port'])
fsdb = conn.filestore


/opt/conda_envs/analysis-17Q2.0/lib/python3.6/site-packages/databroker/databroker.py:20: UserWarning: No default DataBroker object will be created because the necessary configuration was not found: "The configuration field(s) ['timezone'] were not found in any file or environmental variable."
  "the necessary configuration was not found: %s" % exc)

In [49]:
# 1. Copy database and perform the following on copied database
# 2. Fetch uids of resources with datums that have no resource info
# 3. Looking at the resources, get the file timestamps
# 4. Comparing consecutive resource timestamps and verifying against file timestamps, update datums' "resource" field
# 5. Verify and perform the same opeation on production db

In [2]:
#Not possible via filestore, so resorting to pymongo for this limited purpose
incomplete_datums = list(fsdb.datum.find({'resource': None}))

In [83]:
patient_zero = next(mds.find_run_starts(scan_id=27768))
patient_last = next(mds.find_run_starts(scan_id=27917))
problem_starts = list(mds.find_run_starts(scan_id={'$gte': 27768, '$lte':27917+1}))

rstart_times = [r['time'] for r in problem_starts]

In [84]:
# find resources within defined time window 
end = np.max(rstart_times)
start = np.min(rstart_times)
end_utc = utc.localize(datetime.datetime.utcfromtimestamp(end))
start_utc = utc.localize(datetime.datetime.utcfromtimestamp(start))

query = fsdb.resource.find()
rlist = []
flist = []
tlist = []
for r in query:
    if start_utc<= r['_id'].generation_time <= end_utc:
        rlist.append(r)
        flist.append(os.path.getmtime(r['root'] + '/' +  r['resource_path']))
        tlist.append(r['_id'].generation_time)

In [5]:
flist[0]


Out[5]:
1495508467.9110901

In [38]:
rlist[184]


Out[38]:
{'_id': ObjectId('592597f7e97f4e10fd1fca4f'),
 'resource_kwargs': {},
 'resource_path': '2017/05/24/9e7bee57-e972-4f7f-875a_000000.h5',
 'root': '/data',
 'spec': 'XSP3',
 'uid': 'b0ac8913-a008-41d1-b05c-c3dc314599b2'}

In [51]:
hdr = db[27768]
t1 = hdr.start.time
t2 = hdr.stop.time
t1_utc = utc.localize(datetime.datetime.utcfromtimestamp(t1))
t2_utc = utc.localize(datetime.datetime.utcfromtimestamp(t2))

In [52]:
hdr.start


Out[52]:
axes ['x', 'y']
beamline_id HXN
config
dead_time 0.005
detectors ['zebra', 'xspress3', 'merlin1', 'sclr1']
exposure_time 0.15
fast_axis zpssx
final_pos [[11.9945369073248, 12.199598297863488], [11.9945369073248, 14.99959829786349]]
fly_type soft
group
max_points 16384
motor1 zpssx
motor2 zpssy
motors ['zpssx', 'zpssy']
num1 120
num2 150
per_points [[0.2, 0.2], [0.2, 0.20000000000000004]]
plan_args
dead_time 0.005
exposure_time 0.15
fly_type soft
max_points 16384
motor1 EpicsMotor(prefix='XF:03IDC-ES{Ppmac:1-zpssx}Mtr', name='zpssx', parent='zps', settle_time=0.0, timeout=None, read_attrs=['user_readback', 'user_setpoint'], configuration_attrs=['motor_egu', 'velocity', 'acceleration', 'user_offset', 'user_offset_dir'])
motor2 EpicsMotor(prefix='XF:03IDC-ES{Ppmac:1-zpssy}Mtr', name='zpssy', parent='zps', settle_time=0.0, timeout=None, read_attrs=['user_readback', 'user_setpoint'], configuration_attrs=['motor_egu', 'velocity', 'acceleration', 'user_offset', 'user_offset_dir'])
num1 120
num2 150
return_speed 40.0
scan_end1 12
scan_end2 15
scan_start1 -12
scan_start2 -15
plan_name FlyPlan2D
plan_type FlyPlan2D
points [[120, 136], [120, 14]]
return_speed 40.0
scan_command fly2d(motor1=EpicsMotor(prefix='XF:03IDC-ES{Ppmac:1-zpssx}Mtr', name='zpssx', parent='zps', settle_time=0.0, timeout=None, read_attrs=['user_readback', 'user_setpoint'], configuration_attrs=['motor_egu', 'velocity', 'acceleration', 'user_offset', 'user_offset_dir']), scan_start1=-12, scan_end1=12, num1=120, motor2=EpicsMotor(prefix='XF:03IDC-ES{Ppmac:1-zpssy}Mtr', name='zpssy', parent='zps', settle_time=0.0, timeout=None, read_attrs=['user_readback', 'user_setpoint'], configuration_attrs=['motor_egu', 'velocity', 'acceleration', 'user_offset', 'user_offset_dir']), scan_start2=-15, scan_end2=15, num2=150, exposure_time=0.15, dead_time=0.005, fly_type='soft', return_speed=40.0, max_points=16384)
scan_count 2
scan_end1 12
scan_end2 15
scan_ends [[11.9945369073248, 12.199598297863488], [11.9945369073248, 14.99959829786349]]
scan_id 27768
scan_range [[-12.0054630926752, 11.9945369073248], [-15.000401702136514, 14.999598297863486]]
scan_start1 -12
scan_start2 -15
scan_starts [[-12.0054630926752, -15.000401702136514], [-12.0054630926752, 12.199598297863488]]
script_path /opt/conda_envs/collection-17Q2.0/lib/python3.6/site-packages/hxnfly/scripts/soft_2d.txt
shape [120, 150]
subscan_dims [[120, 136], [120, 14]]
time a month ago (2017-05-22T22:16:10.241868)
uid ef0f5fc1-a759-4bbc-8460-74d23214142a

In [65]:
sub = hdr.start.subscan_dims
len(sub)


Out[65]:
2

In [70]:
fig, ax = plt.subplots()
ax.plot(tlist, '.-')
ax.axhline(t1_utc)
ax.axhline(t2_utc)


Out[70]:
<matplotlib.lines.Line2D at 0x7fbd67328630>

In [8]:
for des in hdr.descriptors:
    for k,v in des['data_keys'].items():
        if 'external' in v:
            print(k)


merlin1
xspress3_ch1
xspress3_ch2
xspress3_ch3

In [53]:
e = list(db.get_events(hdr, stream_name='primary', fill=False))

In [54]:
len(e)


Out[54]:
18000

In [55]:
e[120*136].data


Out[55]:
{'Det1_Ca': 1.0002179931087083,
 'Det1_Cl': 1.0002179931087083,
 'Det1_Cr': 0.0,
 'Det1_Cu': 0.0,
 'Det1_Fe': 1.0002179931087083,
 'Det1_Ga': 0.0,
 'Det1_K': 0.0,
 'Det1_Mn': 0.0,
 'Det1_Ni': 0.0,
 'Det1_P': 9.001961937978376,
 'Det1_S': 0.0,
 'Det1_Se': 0.0,
 'Det1_Si': 143.03117301454532,
 'Det1_Te': 1.0002179931087083,
 'Det1_Ti': 0.0,
 'Det1_Zn': 0.0,
 'Det2_Ca': 0.0,
 'Det2_Cl': 0.0,
 'Det2_Cr': 0.0,
 'Det2_Cu': 0.0,
 'Det2_Fe': 0.0,
 'Det2_Ga': 0.0,
 'Det2_K': 0.0,
 'Det2_Mn': 0.0,
 'Det2_Ni': 0.0,
 'Det2_P': 6.001389077592115,
 'Det2_S': 0.0,
 'Det2_Se': 0.0,
 'Det2_Si': 173.04005173723937,
 'Det2_Te': 0.0,
 'Det2_Ti': 0.0,
 'Det2_Zn': 0.0,
 'Det3_Ca': 0.0,
 'Det3_Cl': 0.0,
 'Det3_Cr': 0.0,
 'Det3_Cu': 0.0,
 'Det3_Fe': 0.0,
 'Det3_Ga': 0.0,
 'Det3_K': 0.0,
 'Det3_Mn': 0.0,
 'Det3_Ni': 0.0,
 'Det3_P': 5.001050010748242,
 'Det3_S': 0.0,
 'Det3_Se': 1.0002100021496483,
 'Det3_Si': 141.0296103031004,
 'Det3_Te': 0.0,
 'Det3_Ti': 0.0,
 'Det3_Zn': 0.0,
 'alive': 0.15141780162230134,
 'dead': 0.005312905181199312,
 'elapsed_time': 0.07615164292796531,
 'merlin1': '0a3dce66-52e8-4197-a794-6c1da77fe55f',
 'scaler_alive': array(151.79976),
 'sclr1_ch2': array(10899),
 'sclr1_ch3': array(56),
 'sclr1_ch4': array(16073),
 'sclr1_ch5': array(194),
 'sclr1_ch6': array(0),
 'sclr1_ch7': array(0),
 'sclr1_ch8': array(0),
 'xspress3_ch1': '64054a90-3405-4bc0-a483-437b73796fdc',
 'xspress3_ch2': '2f966001-ff5b-41eb-9a8b-c409e0fda0ad',
 'xspress3_ch3': '7019e7cf-7b58-4027-98a6-9b1fd4e576c4',
 'zpssx': -11.951305332270413,
 'zpssy': 12.201941499445072,
 'zpssz': 0.0006881263723177563}

In [66]:
dat = next(fsdb.datum.find({'datum_id': e[120*136-1].data['xspress3_ch2']}))
#dat = next(fsdb.datum.find({'datum_id': e[0].data['merlin1']}))

In [81]:
# before correction
dat

#120*136-1


Out[81]:
{'_id': ObjectId('5923a5fee97f4e0979d03152'),
 'datum_id': '8081a60f-1b0c-49a5-b655-10d3d775512d',
 'datum_kwargs': {'channel': 2, 'frame': 16319},
 'resource': None}

In [15]:
dat['resource'] = rlist[113]['uid']

In [80]:
def update_datum(scanid, resource_list):
    print(scanid)
    hdr = db[scanid]
    subscan = hdr.start.subscan_dims
    elist = list(db.get_events(hdr, fill=False, stream_name='primary'))
    t1 = hdr.start.time
    t2 = hdr.stop.time
    timelist = [os.path.getmtime(r['root'] + '/' +  r['resource_path']) for r in resource_list]
    #timelist =  [r['_id'].generation_time for r in source_list]
    
    id_found = []
    for i,t in enumerate(timelist):
        if t1 < t< t2:
            if resource_list[i]['spec'] == 'XSP3':
                id_found.append(i)
                print('time:', i, t)
    print('number of h5 file: ', len(id_found))
    if len(id_found) != len(subscan):
        raise('size is inconsistent!')
    #if len(id_found) != 2:
    #    raise('len of data should be 2')
    num = 0
    file_id = -1
    for e in elist:
        for detid in range(3):
            detname = 'xspress3_ch' + str(detid+1)
            dat = next(fsdb.datum.find({'datum_id': e.data[detname]}))
            if detid==0 and dat['datum_kwargs']['frame']==0:
                file_id += 1
                
            dat['resource'] = resource_list[id_found[file_id]]['uid']
            
            # turn this on later when updating is needed
            #fsdb.datum.update_many(filter={'datum_id': e.data[detname]},
            #                       update={'$set':dat},
            #                       upsert=False)

In [86]:
runlist = [27768, 27822, 27825, 27835, 27840, 27845, 27849, 27851, 27858, 27860, 27873, 27883, 27888, 27892, 27912, 27917]
#large_list = [27768, 27845, 27849, 27860]
for num in runlist[-1:]:
    update_datum(num, rlist)


27917
time: 319 1495700523.7346997
number of h5 file:  1

In [ ]:


In [47]:
plt.close('all')
fig, ax = plt.subplots()
ax.plot(flist, '.')
ax.axhline(t1)
ax.axhline(t2)


Out[47]:
<matplotlib.lines.Line2D at 0x7f3bbee0ea58>

In [5]:
file_start_time = os.path.getmtime(rlist[0]['root'] + '/' +  rlist[0]['resource_path'])
file_end_time = os.path.getmtime(rlist[-1]['root'] + '/' +  rlist[-1]['resource_path'])

In [ ]:
for i in range(len(problem_resources)):
    primary = missing[i]
    try:
        secondary = missing[i+1]
    except IndexError:
        break
    diff = s_time - p_time
    fsdb.datum.find({'time': {'$gte': p_time, '$lt': s_time }})

In [ ]:
a = next(fsdb.resource.find())['_id']

In [ ]:
a.generation_time

In [ ]: