In [15]:
import rpy2
from rpy2.robjects import r, pandas2ri
import pandas as pd
def fp_stats(file_path):
if file_path[-4:] == '.csv':
df = pd.DataFrame.from_csv(file_path)
else:
pandas2ri.activate()
rf=r['load'](file_path)
df = pd.DataFrame(data=r['in_sample_result'])
df_bad = df.loc[df['SEVERE'] == 1]
bad_mines = set(df_bad['MINE_ID'])
all_mines = set(df['MINE_ID'])
good_mines = all_mines - bad_mines
bad, post, early = 0, 0, 0
for mine in good_mines:
df_mine = df.loc[df['MINE_ID'] == mine]
bad += sum(df_mine['PREDICTION'])
for mine in bad_mines:
df_mine = df.loc[df['MINE_ID'] == mine]
real = list(df_mine['SEVERE'])
pred = list(df_mine['PREDICTION'])
for i in range(len(pred)):
if pred[i] == 1 and real[i] == 0:
if sum(real[i:i+4]) > 0:
early += 1
if sum(real[i-4:i]) > 0:
post += 1
if sum(real[i:i+4]) == 0 and sum(real[i-4:i]) == 0:
bad += 1
return {'bad':bad, 'post':post, 'early':early}
In [ ]:
In [9]:
print(fp_stats('./Result_clogit.RData'))
In [16]:
print(fp_stats('./outputResults.csv'))