In [ ]:
folder = Path('/Users/klay6683/Dropbox/data/planet4/p4_analysis/P4_catalog_v1.0/')
In [ ]:
newfolder = folder.with_name("P4_catalog_v1.1")
In [ ]:
newfolder.mkdir()
In [ ]:
fanfname = folder / "P4_catalog_v1.0_L1C_cut_0.5_fan_meta_merged.csv"
In [ ]:
newfanfname = fanfname.with_name("P4_catalog_v1.1_L1C_cut_0.5_fan_meta_merged.csv")
In [ ]:
fans = pd.read_csv(fanfname)
In [ ]:
fans.marking_id.count()
In [ ]:
fans.marking_id.nunique()
In [ ]:
fans.groupby('marking_id').size().sort_values(ascending=False).head()
From several fnotching events I got several entries with different vote_ratios in the catalog. I think the best way to deal with them is to average the vote_ratio, as a fair judgement on its "fan-ness".
In [ ]:
fans.query("marking_id=='F018f75'").vote_ratio.describe()
In [ ]:
fans.query("marking_id=='F00cfa9'").vote_ratio
In [ ]:
# this will create mean value for each column
mean_fans = fans.groupby('marking_id').mean()
In [ ]:
mean_fans.loc['F00cfa9']
In [ ]:
fans.shape
In [ ]:
mean_fans = mean_fans.reset_index()
mean_fans.shape
In [ ]:
fans.shape
In [ ]:
mean_fans.shape
In [ ]:
mean_fans.head()
In [ ]:
mean_fans = mean_fans.join(fans.drop_duplicates(subset='marking_id').set_index('marking_id')[['tile_id', 'obsid']],
how='inner')
mean_fans.shape
In [ ]:
mean_fans.reset_index(inplace=True)
In [ ]:
(mean_fans.shape[0]-fans.shape[0])/fans.shape[0]
In [ ]:
mean_fans[fans.columns].to_csv(newfanfname, index=False)
In [ ]:
mean_fans.shape
In [ ]:
fans.shape
In [ ]: