In [46]:
df = pd.read_hdf('/Users/klay6683/data/iuvs/dark_stuff/results_df.h5', 'df')
In [47]:
df = meta.clean_up_dark_scan(df)
In [57]:
df.head()
Out[57]:
In [58]:
df['orbit_segment'] = df.PRODUCT_ID.map(lambda x: x.split('_')[3].split('-')[0])
In [61]:
df[(df.NAXIS1==1024) & (df.NAXIS2==1024)].groupby('orbit_segment').size()
Out[61]:
In [5]:
df[df.BIN_PATTERN_INDEX=='LINEAR linear_0006'].BIN_TBL.value_counts()
Out[5]:
In [6]:
three50_450 = df[(df.ORBIT_NUMBER>349) & (df.ORBIT_NUMBER<450)]
four50_550 = df[(df.ORBIT_NUMBER>449) & (df.ORBIT_NUMBER<550)]
In [7]:
three50_450.BIN_PATTERN_INDEX.value_counts()
Out[7]:
In [8]:
four50_550.BIN_PATTERN_INDEX.value_counts()
Out[8]:
Will be focusing on LINEAR linear_0006 for now, until I better understand how they compare.
In [9]:
df = df[df.BIN_PATTERN_INDEX == 'LINEAR linear_0006']
# now can drop that column
df = df.drop('BIN_PATTERN_INDEX', axis=1)
In [10]:
bin_tables = df.BIN_TBL.value_counts()
bin_tables
Out[10]:
In [11]:
for ind in bin_tables.index:
print(ind)
print(df[df.BIN_TBL==ind].orbit_segment.value_counts())
In [12]:
df = df[df.BIN_TBL=='LINEAR 7,8 linear_0006']
df = df.drop('BIN_TBL', axis=1)
In [13]:
df.orbit_segment.value_counts()
Out[13]:
In [14]:
df.index
Out[14]:
In [15]:
df.columns
Out[15]:
In [16]:
df.CHANNEL.value_counts()
Out[16]:
In [17]:
df.INT_TIME.value_counts()
Out[17]:
In [17]:
df.BINNING_SET.value_counts()
Out[17]:
In [18]:
df.NAXIS1.value_counts()
Out[18]:
In [19]:
df.NAXIS2.value_counts()
Out[19]:
In [20]:
to_drop = []
for col in df.columns:
length = len(df[col].value_counts())
if length == 1:
to_drop.append(col)
df = df.drop(to_drop, axis=1)
In [21]:
df.columns
Out[21]:
In [22]:
from iuvs import calib
In [23]:
df.DET_TEMP = df.DET_TEMP.map(calib.convert_det_temp_to_C) +273.15
In [24]:
df.CASE_TEMP = df.CASE_TEMP.map(calib.convert_case_temp_to_C) + 273.15
In [25]:
%matplotlib nbagg
import seaborn as sns
sns.set_context('talk')
In [26]:
from sklearn.preprocessing import normalize
In [27]:
df.index
Out[27]:
In [29]:
df = df.reset_index()
In [34]:
df.set_index('TIME_OF_INT', inplace=True)
In [30]:
df['normalized_mean'] = normalize(df['mean']).T
In [38]:
df[['mean']].plot(style='*')
Out[38]:
In [43]:
df.plot(kind='scatter', x='CASE_TEMP', y='mean')
Out[43]:
In [44]:
df.plot(kind='scatter',x='DET_TEMP', y='CASE_TEMP')
Out[44]:
In [40]:
df.plot(kind='scatter', x='SOLAR_LONGITUDE',y='mean')
Out[40]:
In [39]:
df.plot(kind='scatter', x='SOLAR_LONGITUDE', y='DET_TEMP')
Out[39]:
In [32]:
from sklearn import linear_model, decomposition, datasets
In [33]:
pca = decomposition.RandomizedPCA()
In [34]:
df.columns
Out[34]:
In [42]:
Xcols = 'case_temp det_temp fov_deg lya_centroid mirror_deg mirror_dn mir_deg solar_longitude'.upper().split()
Xcols += ['mean']
Xcols
Out[42]:
In [43]:
pca.fit(df[Xcols].values)
Out[43]:
In [49]:
plt.close('all')
In [50]:
plt.figure(1, figsize=(4, 3))
plt.clf()
plt.axes([.2, .2, .7, .7])
plt.semilogy(pca.explained_variance_, linewidth=2)
plt.axis('tight')
plt.xlabel('n_components')
plt.ylabel('explained_variance_')
Out[50]:
In [ ]: