In [ ]:
obsids = pd.read_csv("./image_names.csv", header=None, names=['obsid'])
In [ ]:
obsids.info()
In [ ]:
meta = pd.read_hdf("/Users/klay6683/local_data/EDRCUMINDEX.hdf")
In [ ]:
meta.columns
In [ ]:
meta.drop_duplicates(subset='OBSERVATION_ID', inplace=True)
In [ ]:
df = obsids.merge(meta, left_on='obsid', right_on='OBSERVATION_ID', how='inner')
df.head()
In [ ]:
%matplotlib widget
In [ ]:
df.filter(regex='ITUDE').columns
In [ ]:
import seaborn as sns
sns.set_context('notebook')
In [ ]:
df = df[['obsid', 'IMAGE_CENTER_LATITUDE','IMAGE_CENTER_LONGITUDE']]
In [ ]:
df.columns = ['obsid','lat','lon']
In [ ]:
region_names = pd.read_csv("/Users/klay6683/Dropbox/src/planet4/planet4/data/region_names.csv")
region_names.columns=['name','lat','lon']
In [ ]:
region_names.shape
In [ ]:
from scipy.spatial.distance import cdist, pdist
In [ ]:
from geopy.distance import distance
from geopy import Point
In [ ]:
def create_point(row):
return Point(latitude=row.lat,
longitude=row.lon)
In [ ]:
region_names['point'] = region_names[['lat','lon']].apply(create_point, axis=1)
In [ ]:
df['point'] = df[['lat','lon']].apply(create_point, axis=1)
In [ ]:
distance(region_names.point.iloc[0], df.point.iloc[0]).kilometers
In [ ]:
Y = cdist(df[['point']], region_names[['point']],
lambda x, y: distance(x,y).kilometers)
In [ ]:
distance?
In [ ]:
df['geopy_names'] = region_names.iloc[Y.argmin(axis=1)].name.values
In [ ]:
df['minimal_distance'] = Y.min(axis=1)
In [ ]:
merged = df.drop('point', axis=1).merge(region_names.drop('point', axis=1),
left_on='geopy_names',
right_on='name', suffixes=('_IND', '_WORD')).drop('name', axis=1)
In [ ]:
merged.minimal_distance.hist(log=True, bins=100)
In [ ]:
merged.loc[merged.minimal_distance > 50, 'geopy_names'] = 'unknown'
In [ ]:
merged.groupby('geopy_names').size()
In [ ]:
merged = merged.merge(meta[['OBSERVATION_START_TIME', 'OBSERVATION_ID']],
left_on='obsid', right_on='OBSERVATION_ID').drop('OBSERVATION_ID', axis=1)
In [ ]:
merged.rename({'OBSERVATION_START_TIME': 'time'}, axis=1, inplace=True)
In [ ]:
from planet4 import stats
In [ ]:
stats.define_martian_year(merged, 'time')
In [ ]:
pd.set_option("display.max_rows", 100)
In [ ]:
to_plot2 = merged.groupby(['geopy_names', 'MY']).size().unstack('MY')
In [ ]:
to_plot = merged.groupby(['geopy_names','MY']).size().reset_index()
In [ ]:
merged.loc[merged.geopy_names=='unknown'].MY.value_counts()
In [ ]:
merged[merged.geopy_names=='unknown'][['lat_IND', 'lon_IND', 'MY']]
In [ ]:
In [ ]:
to_plot= to_plot[to_plot.MY < 33]
In [ ]:
merged[merged.geopy_names=='unknown'][['MY','obsid', 'lat_IND','lon_IND']].to_csv("unknowns.csv", index=False)
In [ ]:
to_plot.columns = ['name', 'MY', 'count']
In [ ]:
to_plot2.head()
In [ ]:
to_plot2 = to_plot2.reset_index()
In [ ]:
to_plot2[to_plot2.index=='unknown']
In [ ]:
to_plot2 = to_plot2[to_plot2.geopy_names!='unknown']
In [ ]:
to_plot2.get(['Caterpillar', 29])
In [ ]:
to_plot2.set_index('geopy_names').at['Caterpillar', 28]
In [ ]:
to_plot2.loc[to_plot2.geopy_names'Albany', 29]
In [ ]:
for name in indexed.index:
print(name)
In [ ]:
indexed.at['Starburst', 28]
In [ ]:
In [ ]:
f, axes = plt.subplots(1, 5, figsize=(6,6), sharey=True,
constrained_layout=True)
indexed = to_plot2.set_index('geopy_names')
for yr,ax in zip(range(28,33), axes):
g = sns.barplot(x=yr, y='geopy_names', ax=ax, data=to_plot2)
ax.set_ylabel('')
# for name in indexed.index:
# s = indexed.at[name, yr]
# ax.text(s, name, s);
In [ ]:
for_table = to_plot2.set_index('geopy_names')
In [ ]:
for_table.index.name="ROI"
In [ ]:
for_table.to_csv("dataset_table.csv")
In [ ]:
merged.shape
In [ ]:
to_plot2 = to_plot2.fillna(0).astype('int')
In [ ]:
sns.countplot(x='MY', y='geopy_names', data=to_plot)
In [ ]:
region_names.set_index('name').drop('point', axis=1).join(for_table).to_csv("dataset_table.csv")
In [ ]:
for_table.join(region_names.set_index('name'))
In [ ]:
merged.time.max()
In [ ]:
to_plot
In [ ]:
!ls *.csv
In [ ]:
status = pd.read_csv("current_status.csv", header=None)
In [ ]:
status.columns = ['obsid', 'done']
In [ ]:
merged.columns
In [ ]:
newmerge = merged.merge(status, on='obsid')
In [ ]:
newmerge[newmerge.done < 25].geopy_names.value_counts()
In [ ]:
import qgrid
In [ ]:
In [ ]:
fans = pd.read_csv("/Users/klay6683/local_data/P4_catalog_v1.1/P4_catalog_v1.1_L1C_cut_0.5_fan.csv")
In [ ]:
fans.head()
In [ ]:
fans.groupby('obsid').size().sort_values(ascending=False).head()
In [ ]:
fans.groupby('obsid').size().mean()
In [ ]:
blotches = pd.read_csv("/Users/klay6683/local_data/P4_catalog_v1.1/P4_catalog_v1.1_L1C_cut_0.5_blotch.csv")
In [ ]:
blotches.groupby('obsid').size().sort_values(ascending=False).head()
In [ ]:
blotches.groupby('obsid').size().mean()
In [ ]: