In [ ]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
In [ ]:
df_r = pd.read_csv("../data/realword_scores.csv")
df_r.columns = ["real", "word", "error"]
df_r["real"] = 1
df_f = pd.read_csv("../data/fakeword_scores.csv")
df_f.columns = ["real", "word", "error"]
df_f["real"] = 0
df = pd.concat([df_r, df_f], axis=0)
In [ ]:
bins = np.linspace(0, 1, 50)
plt.hist(df_r["error"], bins, alpha=0.5, label='Real')
plt.hist(df_f["error"], bins, alpha=0.5, label='Fake')
plt.legend(loc='upper right')
plt.show()
In [ ]:
df_f["error"].quantile(0.27)
In [ ]:
df_f[df_f["error"] < 0.03]
In [ ]:
df_r[df_r["error"] < 0.01]
In [ ]: