In [15]:
import rpy2
from rpy2.robjects import r, pandas2ri
import pandas as pd

def fp_stats(file_path):
    if file_path[-4:] == '.csv':
        df = pd.DataFrame.from_csv(file_path)
    else:
        pandas2ri.activate()
        rf=r['load'](file_path)
        df = pd.DataFrame(data=r['in_sample_result'])

    df_bad = df.loc[df['SEVERE'] == 1]
    bad_mines = set(df_bad['MINE_ID'])
    all_mines = set(df['MINE_ID'])
    good_mines = all_mines - bad_mines
    
    bad, post, early = 0, 0, 0
    for mine in good_mines:
        df_mine = df.loc[df['MINE_ID'] == mine]
        bad += sum(df_mine['PREDICTION'])

    for mine in bad_mines:
        df_mine = df.loc[df['MINE_ID'] == mine]
        real = list(df_mine['SEVERE'])
        pred = list(df_mine['PREDICTION'])
        for i in range(len(pred)):
            if pred[i] == 1 and real[i] == 0:
                if sum(real[i:i+4]) > 0:
                    early += 1
                if sum(real[i-4:i]) > 0:
                    post += 1
                if sum(real[i:i+4]) == 0 and sum(real[i-4:i]) == 0:
                    bad += 1  
                    
    return {'bad':bad, 'post':post, 'early':early}

In [ ]:


In [9]:
print(fp_stats('./Result_clogit.RData'))


{'bad': 72951, 'post': 6453, 'early': 2833}

In [16]:
print(fp_stats('./outputResults.csv'))


{'bad': 72562, 'post': 3654, 'early': 2686}