In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
leaderboard = pd.read_csv('transfer-learning-on-stack-exchange-tags-publicleaderboard.csv')
leaderboard.sample(4)


Out[2]:
TeamId TeamName SubmissionDate Score
1264 508529 No Print Good Product 2017-03-13 02:17:12 0.08511
107 415012 connectwithghosh 2016-11-28 05:22:28 0.06519
771 421196 NTU_r05921004_FourPeople 2017-01-05 08:52:29 0.14867
1401 563016 ObserverL 2017-03-23 12:14:37 0.13107

In [12]:
plt.figure(figsize=(14,6))
leaderboard['Score'].hist(bins=50)
plt.xlabel('F1 Score')


Out[12]:
<matplotlib.text.Text at 0x7f614288f048>

In [4]:
leaderboard[leaderboard.Score > 0.9]


Out[4]:
TeamId TeamName SubmissionDate Score
1169 456730 Kilorad 2017-03-03 07:04:59 0.93974
1289 514346 Test 2017-03-15 01:37:11 0.97295
1340 456730 Kilorad 2017-03-19 12:53:43 0.94087
1341 456730 Kilorad 2017-03-19 13:02:53 0.94198
1342 456730 Kilorad 2017-03-19 14:04:52 0.94635
1361 442101 Mice Labs 2017-03-20 08:19:27 0.91404

In [5]:
topByTeam = leaderboard.sort_values('Score', ascending=False).groupby('TeamId').first().reset_index()

In [14]:
plt.figure(figsize=(14,6))
topByTeam.Score.hist(bins=30)
plt.xlabel("F1 Score")


Out[14]:
<matplotlib.text.Text at 0x7f6142579240>

In [ ]: