notebook.community

Edit and run



In [1]:

    
pd.options.display.max_columns = 100



In [8]:

    
l1b = io.some_l1b()









    



/maven_iuvs/stage/products/level1b



In [9]:









    Out[9]:





array([[80],
       [80],
       [80],
       [80],
       [80],
       [80],
       [80],
       [80],
       [80],
       [80]], dtype=int16)



In [2]:

    
l1bdarkfnames = io.get_current_science_fnames('l1b', env='production')









    



/maven_iuvs/production/products/level1b



In [3]:

    
len(l1bdarkfnames)









    Out[3]:





58227



In [4]:

    
def process_fname(fname):
    l1b = io.L1BReader(fname)
    d = dict(fname=fname)
    if not hasattr(l1b, 'detector_dark'):
        d['error'] = 'no dark'
        return d
    if l1b.dark_dn_s.ndim == 3:
        dark = l1b.dark_dn_s[0]
    else:
        dark = l1b.dark_dn_s
    if l1b.raw_dn_s.ndim == 3:
        light = l1b.raw_dn_s[0]
    else:
        light = l1b.raw_dn_s
    sub = light - dark
    try:
        spa_slice, spe_slice = io.find_scaling_window(sub)
    except IndexError:
        d['error'] = 'IndexError'
        return d
    window = sub[spa_slice, spe_slice]
    d['mean'] = window.mean()
    d['std'] = window.std()
    d['nspa'] = window.shape[0]
    d['nspe'] = window.shape[1]
    return d



In [5]:

    
process_fname(l1bdarkfnames[100])









    Out[5]:





{'fname': PosixPath('/maven_iuvs/production/products/level1b/mvn_iuv_l1b_APP1-orbit00087-mode1231-fuv_20141014T132121_v02_r01.fits.gz'),
 'mean': -0.22160493827160496,
 'nspa': 9,
 'nspe': 3,
 'std': 0.33600211929859064}

Generate dataframe with filename stats



In [6]:

    
namestats = pd.DataFrame([io.ScienceFilename(str(i)).as_series() for i in l1bdarkfnames])



In [7]:

    
from IPython.parallel import Client
c = Client()



In [8]:

    
dview = c.direct_view()
lview = c.load_balanced_view()



In [9]:

    
doing = namestats.sample(2000).basename
results = lview.map_async(process_fname, doing)



In [10]:

    
from iuvs.multitools import nb_progress_display









    



WARNING:py.warnings::0: FutureWarning: IPython widgets are experimental and may change in the future.



In [11]:

    
nb_progress_display(results, doing)









    



---------------------------------------------------------------------------
KeyboardInterrupt                         Traceback (most recent call last)
<ipython-input-11-4ae4ead6bb39> in <module>()
----> 1 nb_progress_display(results, doing)

/home/klay6683/src/iuvs/iuvs/multitools.py in nb_progress_display(results, objectlist, sleep)
     19     while not results.ready():
     20         prog.value = results.progress
---> 21         time.sleep(sleep)
     22 
     23 

KeyboardInterrupt:



In [123]:

    
res = []
for result in results:
    res.append(result)









    



Traceback (most recent call last):
  File "/usr/local/python3/miniconda/lib/python3.4/site-packages/IPython/parallel/controller/scheduler.py", line 347, in handle_stranded_tasks
    raise error.EngineError("Engine %r died while running task %r"%(engine, msg_id))
IPython.parallel.error.EngineError: Engine b'67724e8b-32c7-49a1-aa46-97285d4446aa' died while running task '4cd2895b-cd10-4c84-9ce4-f1173fad988c'



In [125]:

    
res = pd.DataFrame(res)



In [130]:

    
merged = res.merge(namestats, left_on='fname', right_on='basename')



In [133]:

    
import seaborn as sns



In [134]:

    
sns.set_context('notebook')



In [136]:

    
merged.columns









    Out[136]:





Index(['error', 'fname', 'mean', 'nspa', 'nspe', 'std', 'basename', 'channel',
       'cycle_orbit', 'instrument', 'level', 'mission', 'mode', 'obs_id', 'p',
       'phase', 'revision', 'root', 'time', 'timestr', 'tokens', 'version',
       'version_string'],
      dtype='object')



In [ ]:

    
len(res)



In [135]:

    
sns.stripplot(x='phase', y='mean'









    



---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-135-150e35ab0db0> in <module>()
----> 1 sns.stripplot(x='phase', y='mean')

/usr/local/python3/miniconda/lib/python3.4/site-packages/seaborn/categorical.py in stripplot(x, y, hue, data, order, hue_order, jitter, split, orient, color, palette, size, edgecolor, linewidth, ax, **kwargs)
   2000 
   2001     plotter = _StripPlotter(x, y, hue, data, order, hue_order,
-> 2002                             jitter, split, orient, color, palette)
   2003     if ax is None:
   2004         ax = plt.gca()

/usr/local/python3/miniconda/lib/python3.4/site-packages/seaborn/categorical.py in __init__(self, x, y, hue, data, order, hue_order, jitter, split, orient, color, palette)
   1061                  jitter, split, orient, color, palette):
   1062         """Initialize the plotter."""
-> 1063         self.establish_variables(x, y, hue, data, orient, order, hue_order)
   1064         self.establish_colors(color, palette, 1)
   1065 

/usr/local/python3/miniconda/lib/python3.4/site-packages/seaborn/categorical.py in establish_variables(self, x, y, hue, data, orient, order, hue_order, units)
    143                 if isinstance(input, string_types):
    144                     err = "Could not interperet input '{}'".format(input)
--> 145                     raise ValueError(err)
    146 
    147             # Figure out the plotting orientation

ValueError: Could not interperet input 'phase'



In [88]:

    
l1b = io.L1BReader('/maven_iuvs/production/products/level1b/mvn_iuv_l1b_APP1-orbit00087-mode0002-fuv_20141014T141612_v02_r01.fits.gz')



In [89]:

    
l1b.detector_dark.shape









    Out[89]:





(128, 128)



In [91]:

    
l1b.detector_raw.ndim









    Out[91]:





2



In [ ]:



In [16]:

    
def check_for_issues(p):
    from iuvs import exceptions
    d = {}
    d['fname'] = p.name
    try:
        l1a = io.L1AReader(str(p))
    except exceptions.DimensionsError:
        d['dims'] = False
    d['kind'] = l1a.img_header['BIN_TBL'][:3]
    d['n_unique_spabins'] = l1a.n_unique_spabins
    d['n_unique_spebins'] = l1a.n_unique_spebins
    return d



In [17]:

    
check_for_issues(df.p[4])









    Out[17]:





{'fname': 'mvn_iuv_l1a_APP1-orbit00084-mode0111-muvdark_20141013T232042_v02_r01.fits.gz',
 'kind': 'LIN',
 'n_unique_spabins': 2,
 'n_unique_spebins': 1}



In [18]:

    
doing = df.p
results = lview.map_async(check_for_issues, doing)



In [19]:

    
from iuvs.multitools import nb_progress_display
results.ready()









    Out[19]:





False



In [20]:

    
nb_progress_display(results, doing)



In [21]:

    
resultdf = pd.DataFrame(results.result)



In [22]:

    
for col in resultdf.columns:
    if col == 'fname': continue
    print(col)
    print(resultdf[col].value_counts(dropna=False))









    



kind
LIN    13758
NON      374
Name: kind, dtype: int64
n_unique_spabins
1    13567
2      565
Name: n_unique_spabins, dtype: int64
n_unique_spebins
1    13171
2      775
9      186
Name: n_unique_spebins, dtype: int64



In [23]:

    
resultdf['phase'] = resultdf.fname.map(lambda x: io.ScienceFilename(x).phase)



In [24]:

    
subdf = resultdf[(resultdf.n_unique_spabins==2) | (resultdf.n_unique_spebins==2)]



In [40]:

    
subdf[subdf.kind=='LIN'].info()









    



<class 'pandas.core.frame.DataFrame'>
Int64Index: 645 entries, 2 to 4301
Data columns (total 5 columns):
fname               645 non-null object
kind                645 non-null object
n_unique_spabins    645 non-null int64
n_unique_spebins    645 non-null int64
phase               645 non-null object
dtypes: int64(2), object(3)
memory usage: 30.2+ KB

Scanning code



In [271]:

    
def process_fname(fname):
    from iuvs import exceptions
    import numpy as np
    l1a = io.L1AReader(fname)
    d = dict(fname=fname)
    try:
        l1a = io.L1AReader(fname)
    except exceptions.DimensionsError:
        d['error'] = 'dims'
        return d
    if l1a.img_header['BIN_TBL'].startswith('NON LINEAR'):
        d['error'] = 'nonlinear'
        return d
    if any([l1a.n_unique_spabins>1, l1a.n_unique_spebins>1]):
        d['error'] = 'badbins'
        return d
    main_header = io.get_header_df(l1a.hdulist[0])
    integration = l1a.Integration
    if main_header.loc[0, 'NAXIS'] == 2:
        main_header.loc[0, 'NAXIS3'] = np.nan
        avgtuple = None
    elif main_header.loc[0, 'NAXIS'] == 3:
        avgtuple = (1,2)
    else:
        d['error'] = 'axes'
        return d
    try:
        integration['mean'] = l1a.primary_img_dn_s.mean(axis=avgtuple)
        integration['median'] = np.median(l1a.primary_img_dn_s, axis=avgtuple)
        integration['std'] = l1a.primary_img_dn_s.std(axis=avgtuple)
    except KeyError:
        integration['mean'] = np.nan
        integration['median'] = np.nan
        integration['std'] = np.nan
    lenint = len(integration)
    if lenint > 1:
        main_header = main_header.append([main_header]*(lenint-1), ignore_index=True)
    joined = pd.concat([integration, main_header], axis=1)
    for col in l1a.Observation.names[:-3]:
        val = l1a.Observation[col][0]
        if col == 'COLLECTION_ID':
            val = val[0]
        joined[col] = val
    savepath = io.save_to_hdf(joined.sort_index(axis=1), fname, 'l1a_dark_scans')
    d['success'] = True
    return d
#     return joined, 0



In [272]:

    
process_fname(l1adarkfnames[2000])









    Out[272]:





{'fname': PosixPath('/maven_iuvs/production/products/level1a/mvn_iuv_l1a_IPH2-cycle00075-mode060-muvdark_20140513T205057_v02_r01.fits.gz'),
 'success': True}



In [273]:

    
doing = l1adarkfnames
results = lview.map_async(process_fname, doing)



In [274]:

    
nb_progress_display(results, doing)









    



---------------------------------------------------------------------------
KeyboardInterrupt                         Traceback (most recent call last)
<ipython-input-274-4ae4ead6bb39> in <module>()
----> 1 nb_progress_display(results, doing)

/home/klay6683/src/iuvs/iuvs/multitools.py in nb_progress_display(results, objectlist, sleep)
     19     while not results.ready():
     20         prog.value = results.progress
---> 21         time.sleep(sleep)
     22 
     23 

KeyboardInterrupt:



In [281]:

    
results_df = pd.DataFrame(results.result)



In [61]:

    
results_df.info()









    



<class 'pandas.core.frame.DataFrame'>
Int64Index: 18826 entries, 0 to 18825
Data columns (total 53 columns):
ANC_SVN_REVISION         18826 non-null object
BIN_PATTERN_INDEX        18826 non-null object
BIN_TBL                  18826 non-null object
BITPIX                   18826 non-null int64
BLANK                    18826 non-null int64
BUNDLE_ID                18826 non-null object
CADENCE                  18826 non-null float64
CAPTURE                  18826 non-null object
CASE_TEMP                18826 non-null float64
CHANNEL                  18826 non-null object
CODE_SVN_REVISION        18826 non-null object
COLLECTION_ID            18826 non-null object
DET_TEMP                 18826 non-null float64
DUTY_CYCLE               18826 non-null float64
ET                       18826 non-null float64
EXTEND                   18826 non-null bool
FILENAME                 18826 non-null object
FOV_DEG                  18826 non-null float64
GRATING_SELECT           18826 non-null object
INT_TIME                 18826 non-null float64
KEYHOLE_SELECT           18826 non-null object
LYA_CENTROID             18826 non-null int64
MCP_VOLT                 18826 non-null float64
MIRROR_DEG               18826 non-null float64
MIRROR_DN                18826 non-null float64
MIR_DEG                  18826 non-null float64
MISSION_PHASE            18826 non-null object
NAXIS                    18826 non-null int64
NAXIS1                   18826 non-null int64
NAXIS2                   18826 non-null int64
NAXIS3                   16775 non-null float64
N_FILL                   18826 non-null int64
OBSERVATION_TYPE         18826 non-null object
OBS_ID                   18826 non-null int64
ORBIT_NUMBER             18826 non-null int64
ORBIT_SEGMENT            18826 non-null int64
PROCESS                  18826 non-null object
PRODUCT_CREATION_DATE    18826 non-null object
PRODUCT_ID               18826 non-null object
SIMPLE                   18826 non-null bool
SOLAR_LONGITUDE          18826 non-null float64
SPA_OFS                  17716 non-null float64
SPA_SIZE                 17716 non-null float64
SPE_OFS                  17716 non-null float64
SPE_SIZE                 17716 non-null float64
SW_VER                   17563 non-null object
TARGET_NAME              18826 non-null object
TIMESTAMP                18826 non-null float64
UTC                      18826 non-null object
XUV                      18826 non-null object
mean                     17810 non-null float64
median                   17814 non-null float64
std                      17810 non-null float64
dtypes: bool(2), float64(21), int64(10), object(20)
memory usage: 7.5+ MB



In [62]:

    
results_df.loc[results_df.ANC_SVN_REVISION == '', 'ANC_SVN_REVISION'] = 0



In [63]:

    
results_df = results_df.convert_objects(convert_numeric=True)



In [64]:

    
results_df.to_hdf('/home/klay6683/output/l1a_dark_scans/results_df.h5', 'df')

Merge temporary h5 files to database



In [277]:

    
import glob
h5fnames = glob.glob("/home/klay6683/output/l1a_dark_scans/*.h5")



In [278]:

    
len(h5fnames)









    Out[278]:





13107



In [279]:

    
def chunker(seq, size):
    return (seq[pos:pos + size] for pos in range(0, len(seq), size))



In [280]:

    
dfs = []
for i,chunk in enumerate(chunker(h5fnames, 200)):
    print("Chunk {}".format(i))
    frames = []
    for fname in chunk:
        frames.append(pd.read_hdf(fname, 'df'))
    dfs.append(pd.concat(frames, ignore_index=True))









    



Chunk 0
Chunk 1
Chunk 2
Chunk 3
Chunk 4
Chunk 5
Chunk 6
Chunk 7
Chunk 8
Chunk 9
Chunk 10
Chunk 11
Chunk 12
Chunk 13
Chunk 14
Chunk 15
Chunk 16
Chunk 17
Chunk 18
Chunk 19
Chunk 20
Chunk 21
Chunk 22
Chunk 23
Chunk 24
Chunk 25
Chunk 26
Chunk 27
Chunk 28
Chunk 29
Chunk 30
Chunk 31
Chunk 32
Chunk 33
Chunk 34
Chunk 35
Chunk 36
Chunk 37
Chunk 38
Chunk 39
Chunk 40
Chunk 41
Chunk 42
Chunk 43
Chunk 44
Chunk 45
Chunk 46
Chunk 47
Chunk 48
Chunk 49
Chunk 50
Chunk 51
Chunk 52
Chunk 53
Chunk 54
Chunk 55
Chunk 56
Chunk 57
Chunk 58
Chunk 59
Chunk 60
Chunk 61
Chunk 62
Chunk 63
Chunk 64
Chunk 65



In [282]:

    
superdf = pd.concat(dfs, ignore_index=True)



In [283]:

    
superdf.info()









    



<class 'pandas.core.frame.DataFrame'>
Int64Index: 51306 entries, 0 to 51305
Data columns (total 52 columns):
ANC_SVN_REVISION         51306 non-null object
BIN_PATTERN_INDEX        51306 non-null object
BIN_TBL                  51306 non-null object
BITPIX                   51306 non-null int64
BLANK                    51306 non-null int64
BUNDLE_ID                51306 non-null object
CADENCE                  51306 non-null float64
CAPTURE                  51306 non-null object
CASE_TEMP                51306 non-null float64
CHANNEL                  51306 non-null object
CODE_SVN_REVISION        51306 non-null object
COLLECTION_ID            51306 non-null object
DET_TEMP                 51306 non-null float64
DUTY_CYCLE               51306 non-null float64
ET                       51306 non-null float64
EXTEND                   51306 non-null bool
FILENAME                 51306 non-null object
FOV_DEG                  51306 non-null float32
GRATING_SELECT           51306 non-null object
INT_TIME                 51306 non-null float64
KEYHOLE_SELECT           51306 non-null object
LYA_CENTROID             51306 non-null int16
MCP_VOLT                 51306 non-null float64
MIRROR_DEG               51306 non-null float32
MIRROR_DN                51306 non-null float64
MIR_DEG                  51306 non-null float64
MISSION_PHASE            51306 non-null object
NAXIS                    51306 non-null int64
NAXIS1                   51306 non-null int64
NAXIS2                   51306 non-null int64
NAXIS3                   44027 non-null float64
N_FILL                   51306 non-null int64
OBSERVATION_TYPE         51306 non-null object
OBS_ID                   51306 non-null int64
ORBIT_NUMBER             51306 non-null int64
ORBIT_SEGMENT            51306 non-null int64
PROCESS                  51306 non-null object
PRODUCT_CREATION_DATE    51306 non-null object
PRODUCT_ID               51306 non-null object
SIMPLE                   51306 non-null bool
SOLAR_LONGITUDE          51306 non-null float64
SPA_OFS                  51306 non-null int64
SPA_SIZE                 51306 non-null int64
SPE_OFS                  51306 non-null int64
SPE_SIZE                 51306 non-null int64
TARGET_NAME              51306 non-null object
TIMESTAMP                51306 non-null float64
UTC                      51306 non-null object
XUV                      51306 non-null object
mean                     50262 non-null float64
median                   50290 non-null float64
std                      50262 non-null float64
dtypes: bool(2), float32(2), float64(15), int16(1), int64(13), object(19)
memory usage: 19.4+ MB



In [284]:

    
from iuvs import calib



In [285]:

    
superdf.DET_TEMP = superdf.DET_TEMP.map(calib.convert_det_temp_to_C)
superdf.CASE_TEMP = superdf.CASE_TEMP.map(calib.convert_case_temp_to_C)



In [286]:

    
superdf.to_hdf('/home/klay6683/to_keep/l1a_dark_scan.h5','df')



In [287]:

    
from iuvs import meta



In [ ]: