Table of Contents

    
    
    In [ ]:
    folder = Path('/Users/klay6683/Dropbox/data/planet4/p4_analysis/P4_catalog_v1.0/')
    
    
    
    In [ ]:
    newfolder = folder.with_name("P4_catalog_v1.1")
    
    
    
    In [ ]:
    newfolder.mkdir()
    
    
    
    In [ ]:
    fanfname = folder / "P4_catalog_v1.0_L1C_cut_0.5_fan_meta_merged.csv"
    
    
    
    In [ ]:
    newfanfname = fanfname.with_name("P4_catalog_v1.1_L1C_cut_0.5_fan_meta_merged.csv")
    
    
    
    In [ ]:
    fans = pd.read_csv(fanfname)
    
    
    
    In [ ]:
    fans.marking_id.count()
    
    
    
    In [ ]:
    fans.marking_id.nunique()
    
    
    
    In [ ]:
    fans.groupby('marking_id').size().sort_values(ascending=False).head()
    

    From several fnotching events I got several entries with different vote_ratios in the catalog. I think the best way to deal with them is to average the vote_ratio, as a fair judgement on its "fan-ness".

    
    
    In [ ]:
    fans.query("marking_id=='F018f75'").vote_ratio.describe()
    
    
    
    In [ ]:
    fans.query("marking_id=='F00cfa9'").vote_ratio
    
    
    
    In [ ]:
    # this will create mean value for each column
    mean_fans = fans.groupby('marking_id').mean()
    
    
    
    In [ ]:
    mean_fans.loc['F00cfa9']
    
    
    
    In [ ]:
    fans.shape
    
    
    
    In [ ]:
    mean_fans = mean_fans.reset_index()
    mean_fans.shape
    
    
    
    In [ ]:
    fans.shape
    
    
    
    In [ ]:
    mean_fans.shape
    
    
    
    In [ ]:
    mean_fans.head()
    
    
    
    In [ ]:
    mean_fans = mean_fans.join(fans.drop_duplicates(subset='marking_id').set_index('marking_id')[['tile_id', 'obsid']],
                               how='inner')
    mean_fans.shape
    
    
    
    In [ ]:
    mean_fans.reset_index(inplace=True)
    
    
    
    In [ ]:
    (mean_fans.shape[0]-fans.shape[0])/fans.shape[0]
    
    
    
    In [ ]:
    mean_fans[fans.columns].to_csv(newfanfname, index=False)
    
    
    
    In [ ]:
    mean_fans.shape
    
    
    
    In [ ]:
    fans.shape
    
    
    
    In [ ]: