Table of Contents


In [ ]:
obsids = pd.read_csv("./image_names.csv", header=None, names=['obsid'])

In [ ]:
obsids.info()

In [ ]:
meta = pd.read_hdf("/Users/klay6683/local_data/EDRCUMINDEX.hdf")

In [ ]:
meta.columns

In [ ]:
meta.drop_duplicates(subset='OBSERVATION_ID', inplace=True)

In [ ]:
df = obsids.merge(meta, left_on='obsid', right_on='OBSERVATION_ID', how='inner')
df.head()

In [ ]:
%matplotlib widget

In [ ]:
df.filter(regex='ITUDE').columns

In [ ]:
import seaborn as sns
sns.set_context('notebook')

In [ ]:
df = df[['obsid', 'IMAGE_CENTER_LATITUDE','IMAGE_CENTER_LONGITUDE']]

In [ ]:
df.columns = ['obsid','lat','lon']

In [ ]:
region_names = pd.read_csv("/Users/klay6683/Dropbox/src/planet4/planet4/data/region_names.csv")
region_names.columns=['name','lat','lon']

In [ ]:
region_names.shape

In [ ]:
from scipy.spatial.distance import cdist, pdist

In [ ]:
from geopy.distance import distance
from geopy import Point

In [ ]:
def create_point(row):
    return Point(latitude=row.lat,
                 longitude=row.lon)

In [ ]:
region_names['point'] = region_names[['lat','lon']].apply(create_point, axis=1)

In [ ]:
df['point'] = df[['lat','lon']].apply(create_point, axis=1)

In [ ]:
distance(region_names.point.iloc[0], df.point.iloc[0]).kilometers

In [ ]:
Y = cdist(df[['point']], region_names[['point']], 
          lambda x, y: distance(x,y).kilometers)

In [ ]:
distance?

In [ ]:
df['geopy_names'] = region_names.iloc[Y.argmin(axis=1)].name.values

In [ ]:
df['minimal_distance'] = Y.min(axis=1)

In [ ]:
merged = df.drop('point', axis=1).merge(region_names.drop('point', axis=1),
                                                 left_on='geopy_names', 
                                                 right_on='name', suffixes=('_IND', '_WORD')).drop('name', axis=1)

In [ ]:
merged.minimal_distance.hist(log=True, bins=100)

In [ ]:
merged.loc[merged.minimal_distance > 50, 'geopy_names'] = 'unknown'

In [ ]:
merged.groupby('geopy_names').size()

In [ ]:
merged = merged.merge(meta[['OBSERVATION_START_TIME', 'OBSERVATION_ID']], 
             left_on='obsid', right_on='OBSERVATION_ID').drop('OBSERVATION_ID', axis=1)

In [ ]:
merged.rename({'OBSERVATION_START_TIME': 'time'}, axis=1, inplace=True)

In [ ]:
from planet4 import stats

In [ ]:
stats.define_martian_year(merged, 'time')

In [ ]:
pd.set_option("display.max_rows", 100)

In [ ]:
to_plot2 = merged.groupby(['geopy_names', 'MY']).size().unstack('MY')

In [ ]:
to_plot = merged.groupby(['geopy_names','MY']).size().reset_index()

In [ ]:
merged.loc[merged.geopy_names=='unknown'].MY.value_counts()

In [ ]:
merged[merged.geopy_names=='unknown'][['lat_IND', 'lon_IND', 'MY']]

In [ ]:


In [ ]:
to_plot= to_plot[to_plot.MY < 33]

In [ ]:
merged[merged.geopy_names=='unknown'][['MY','obsid', 'lat_IND','lon_IND']].to_csv("unknowns.csv", index=False)

In [ ]:
to_plot.columns = ['name', 'MY', 'count']

In [ ]:
to_plot2.head()

In [ ]:
to_plot2 = to_plot2.reset_index()

In [ ]:
to_plot2[to_plot2.index=='unknown']

In [ ]:
to_plot2 = to_plot2[to_plot2.geopy_names!='unknown']

In [ ]:
to_plot2.get(['Caterpillar', 29])

In [ ]:
to_plot2.set_index('geopy_names').at['Caterpillar', 28]

In [ ]:
to_plot2.loc[to_plot2.geopy_names'Albany', 29]

In [ ]:
for name in indexed.index:
    print(name)

In [ ]:
indexed.at['Starburst', 28]

In [ ]:


In [ ]:
f, axes = plt.subplots(1, 5, figsize=(6,6), sharey=True,
                       constrained_layout=True)

indexed = to_plot2.set_index('geopy_names')

for yr,ax in zip(range(28,33), axes):
    g = sns.barplot(x=yr, y='geopy_names', ax=ax, data=to_plot2)
    ax.set_ylabel('')
    
#     for name in indexed.index:
#         s = indexed.at[name, yr]
#         ax.text(s, name, s);

In [ ]:
for_table = to_plot2.set_index('geopy_names')

In [ ]:
for_table.index.name="ROI"

In [ ]:
for_table.to_csv("dataset_table.csv")

In [ ]:
merged.shape

In [ ]:
to_plot2 = to_plot2.fillna(0).astype('int')

In [ ]:
sns.countplot(x='MY', y='geopy_names', data=to_plot)

In [ ]:
region_names.set_index('name').drop('point', axis=1).join(for_table).to_csv("dataset_table.csv")

In [ ]:
for_table.join(region_names.set_index('name'))

In [ ]:
merged.time.max()

In [ ]:
to_plot

In [ ]:
!ls *.csv

In [ ]:
status = pd.read_csv("current_status.csv", header=None)

In [ ]:
status.columns = ['obsid', 'done']

In [ ]:
merged.columns

In [ ]:
newmerge = merged.merge(status, on='obsid')

In [ ]:
newmerge[newmerge.done < 25].geopy_names.value_counts()

In [ ]:
import qgrid

In [ ]:

P4 catalog stats


In [ ]:
fans = pd.read_csv("/Users/klay6683/local_data/P4_catalog_v1.1/P4_catalog_v1.1_L1C_cut_0.5_fan.csv")

In [ ]:
fans.head()

In [ ]:
fans.groupby('obsid').size().sort_values(ascending=False).head()

In [ ]:
fans.groupby('obsid').size().mean()

In [ ]:
blotches = pd.read_csv("/Users/klay6683/local_data/P4_catalog_v1.1/P4_catalog_v1.1_L1C_cut_0.5_blotch.csv")

In [ ]:
blotches.groupby('obsid').size().sort_values(ascending=False).head()

In [ ]:
blotches.groupby('obsid').size().mean()

In [ ]: