In [46]:
    
df = pd.read_hdf('/Users/klay6683/data/iuvs/dark_stuff/results_df.h5', 'df')
    
In [47]:
    
df = meta.clean_up_dark_scan(df)
    
In [57]:
    
df.head()
    
    Out[57]:
In [58]:
    
df['orbit_segment'] = df.PRODUCT_ID.map(lambda x: x.split('_')[3].split('-')[0])
    
In [61]:
    
df[(df.NAXIS1==1024) & (df.NAXIS2==1024)].groupby('orbit_segment').size()
    
    Out[61]:
In [5]:
    
df[df.BIN_PATTERN_INDEX=='LINEAR linear_0006'].BIN_TBL.value_counts()
    
    Out[5]:
In [6]:
    
three50_450 = df[(df.ORBIT_NUMBER>349) & (df.ORBIT_NUMBER<450)]
four50_550 = df[(df.ORBIT_NUMBER>449) & (df.ORBIT_NUMBER<550)]
    
In [7]:
    
three50_450.BIN_PATTERN_INDEX.value_counts()
    
    Out[7]:
In [8]:
    
four50_550.BIN_PATTERN_INDEX.value_counts()
    
    Out[8]:
Will be focusing on LINEAR linear_0006 for now, until I better understand how they compare.
In [9]:
    
df = df[df.BIN_PATTERN_INDEX == 'LINEAR linear_0006']
# now can drop that column
df = df.drop('BIN_PATTERN_INDEX', axis=1)
    
In [10]:
    
bin_tables = df.BIN_TBL.value_counts()
bin_tables
    
    Out[10]:
In [11]:
    
for ind in bin_tables.index:
    print(ind)
    print(df[df.BIN_TBL==ind].orbit_segment.value_counts())
    
    
In [12]:
    
df = df[df.BIN_TBL=='LINEAR 7,8 linear_0006']
df = df.drop('BIN_TBL', axis=1)
    
In [13]:
    
df.orbit_segment.value_counts()
    
    Out[13]:
In [14]:
    
df.index
    
    Out[14]:
In [15]:
    
df.columns
    
    Out[15]:
In [16]:
    
df.CHANNEL.value_counts()
    
    Out[16]:
In [17]:
    
df.INT_TIME.value_counts()
    
    Out[17]:
In [17]:
    
df.BINNING_SET.value_counts()
    
    Out[17]:
In [18]:
    
df.NAXIS1.value_counts()
    
    Out[18]:
In [19]:
    
df.NAXIS2.value_counts()
    
    Out[19]:
In [20]:
    
to_drop = []
for col in df.columns:
    length = len(df[col].value_counts())
    if length == 1:
        to_drop.append(col)
df = df.drop(to_drop, axis=1)
    
In [21]:
    
df.columns
    
    Out[21]:
In [22]:
    
from iuvs import calib
    
In [23]:
    
df.DET_TEMP = df.DET_TEMP.map(calib.convert_det_temp_to_C) +273.15
    
In [24]:
    
df.CASE_TEMP = df.CASE_TEMP.map(calib.convert_case_temp_to_C) + 273.15
    
In [25]:
    
%matplotlib nbagg
import seaborn as sns
sns.set_context('talk')
    
In [26]:
    
from sklearn.preprocessing import normalize
    
In [27]:
    
df.index
    
    Out[27]:
In [29]:
    
df = df.reset_index()
    
In [34]:
    
df.set_index('TIME_OF_INT', inplace=True)
    
In [30]:
    
df['normalized_mean'] = normalize(df['mean']).T
    
In [38]:
    
df[['mean']].plot(style='*')
    
    
    
    Out[38]:
In [43]:
    
df.plot(kind='scatter', x='CASE_TEMP', y='mean')
    
    
    
    Out[43]:
In [44]:
    
df.plot(kind='scatter',x='DET_TEMP', y='CASE_TEMP')
    
    
    
    Out[44]:
In [40]:
    
df.plot(kind='scatter', x='SOLAR_LONGITUDE',y='mean')
    
    
    
    Out[40]:
In [39]:
    
df.plot(kind='scatter', x='SOLAR_LONGITUDE', y='DET_TEMP')
    
    
    
    Out[39]:
In [32]:
    
from sklearn import linear_model, decomposition, datasets
    
In [33]:
    
pca = decomposition.RandomizedPCA()
    
In [34]:
    
df.columns
    
    Out[34]:
In [42]:
    
Xcols = 'case_temp det_temp fov_deg lya_centroid mirror_deg mirror_dn mir_deg solar_longitude'.upper().split()
Xcols += ['mean']
Xcols
    
    Out[42]:
In [43]:
    
pca.fit(df[Xcols].values)
    
    Out[43]:
In [49]:
    
plt.close('all')
    
In [50]:
    
plt.figure(1, figsize=(4, 3))
plt.clf()
plt.axes([.2, .2, .7, .7])
plt.semilogy(pca.explained_variance_, linewidth=2)
plt.axis('tight')
plt.xlabel('n_components')
plt.ylabel('explained_variance_')
    
    
    
    Out[50]:
In [ ]: