notebook.community

Edit and run



In [1]:

    
%matplotlib notebook
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

from astropy.time import Time



In [2]:

    
def convert_to_ap_Time(df, key):
    print(key)
    df[key] = pd.to_datetime(df[key])
    df[key] = Time([t1.astype(str) for t1 in  df[key].values], format="isot")
    return df

def convert_times_to_datetime(df):
    columns = ["Gun Time", "Chip Time", "TOD", "Beat the Bridge", "Beat the Bridge.1"]
    
    for key in columns:
        df = convert_to_ap_Time(df, key)
        df = convert_Time_to_seconds(df, key)
    return df

def convert_Time_to_seconds(df, key):
    t0 = Time("2017-05-04T00:00:00.000", format="isot")
    df["sub" + key] = df[key] - t0
    df["sub" + key] = [t.sec for t in df["sub" + key].values]
    return df

def find_astronomers(df):
    astronomers = ("Robert FIRTH", "Stephen BROWETT", "Mathew SMITH", "Sadie JONES")
    astro_df = df[df["Name"].isin((astronomers))]
    return astro_df

def plot_hist_with_astronomers(df, astro_df, key):
    rob_time = astro_df[key][158]/60.
    mat_time = astro_df[key][737]/60.
    steve_time = astro_df[key][1302]/60.
    sadie_time = astro_df[key][576]/60.

    mean_time = df[key].mean()/60
    median_time = df[key].median()/60

    plt.hist(df[key]/60., bins = 100)

    plt.plot([rob_time, rob_time], [0, 70], lw = 2, label = "Rob")
    plt.plot([mat_time, mat_time], [0, 70], lw = 2, label = "Mat")
    plt.plot([steve_time, steve_time], [0, 70], lw = 2, label = "Steve")
    plt.plot([sadie_time, sadie_time], [0, 70], lw = 2, label = "Sadie")

    plt.plot([mean_time, mean_time], [0, 70], lw = 2, color = "Black", ls = ":", label = "Mean")
    plt.plot([median_time, median_time], [0, 70], lw = 2, color = "Black", ls = "--", label = "Median")
    plt.xlabel(key.replace("sub", "") + " Minutes")

    plt.legend()



In [3]:

    
results_path = "/Users/berto/Code/zoidberg/ABPSoton10k/data/Results10k.csv"

df = pd.read_csv(results_path)
# df = df.drop(df.index[len(df)-10:])
df = df.drop(df.loc[df["Gun Time"] == "DNF"].index)
df = df.drop(df.loc[df["Gun Time"] == "QRY"].index)
df = df.drop(df.loc[df["Beat the Bridge"] == "99:99:99"].index)



In [4]:

    
df.columns









    Out[4]:





Index(['Pos', 'Bib No', 'Fav', 'Share', 'Print', 'Name', 'Gun Time',
       'Chip Time', 'Category', 'Cat Pos', 'Gender', 'Gen Pos', 'Club', 'Pace',
       'TOD', 'Beat the Bridge', 'G/Pos', 'Beat the Bridge.1', 'G/Pos.1'],
      dtype='object')



In [5]:

    
df = convert_times_to_datetime(df)









    



Gun Time
Chip Time
TOD
Beat the Bridge
Beat the Bridge.1



In [6]:

    
astro_df = find_astronomers(df)



In [7]:

    
astro_df









    Out[7]:






  
    
      
      Pos
      Bib No
      Fav
      Share
      Print
      Name
      Gun Time
      Chip Time
      Category
      Cat Pos
      ...
      TOD
      Beat the Bridge
      G/Pos
      Beat the Bridge.1
      G/Pos.1
      subGun Time
      subChip Time
      subTOD
      subBeat the Bridge
      subBeat the Bridge.1
    
  
  
    
      158
      159
      1662
      NaN
      NaN
      NaN
      Robert FIRTH
      2017-05-04T00:48:59.600
      2017-05-04T00:48:07.600
      Sen
      89
      ...
      2017-05-04T11:19:05.600
      2017-05-04T00:08:38.900
      125
      2017-05-04T00:08:38.900
      125
      2939.6
      2887.6
      40745.6
      518.9
      518.9
    
    
      576
      577
      1560
      NaN
      NaN
      NaN
      Sadie JONES
      2017-05-04T00:59:35.600
      2017-05-04T00:55:00.400
      Sen
      123
      ...
      2017-05-04T11:29:41.600
      2017-05-04T00:09:27.200
      80
      2017-05-04T00:09:27.200
      80
      3575.6
      3300.4
      41381.6
      567.2
      567.2
    
    
      737
      738
      1165
      NaN
      NaN
      NaN
      Mathew SMITH
      2017-05-04T01:01:54.300
      2017-05-04T00:56:49.400
      Sen
      276
      ...
      2017-05-04T11:32:00.300
      2017-05-04T00:09:52.100
      316
      2017-05-04T00:09:52.100
      316
      3714.3
      3409.4
      41520.3
      592.1
      592.1
    
    
      1302
      1303
      1915
      NaN
      NaN
      NaN
      Stephen BROWETT
      2017-05-04T01:14:55.000
      2017-05-04T01:09:49.700
      Sen
      364
      ...
      2017-05-04T11:45:01.000
      2017-05-04T00:12:50.800
      614
      2017-05-04T00:12:50.800
      614
      4495.0
      4189.7
      42301.0
      770.8
      770.8
    
  

4 rows × 24 columns



In [ ]:



In [18]:

    
# key = "subGun Time"
key = "subChip Time"

rob_time = astro_df[key][158]/60.
mat_time = astro_df[key][737]/60.
steve_time = astro_df[key][1302]/60.
sadie_time = astro_df[key][576]/60.

mean_time = df[key].mean()/60
median_time = df[key].median()/60

plt.hist(df[key]/60., bins = 100)

plt.plot([rob_time, rob_time], [0, 70], lw = 2, label = "Rob")
plt.plot([mat_time, mat_time], [0, 70], lw = 2, label = "Mat")
plt.plot([steve_time, steve_time], [0, 70], lw = 2, label = "Steve")
plt.plot([sadie_time, sadie_time], [0, 70], lw = 2, label = "Sadie")


plt.plot([mean_time, mean_time], [0, 70], lw = 2, color = "Black", ls = ":", label = "Mean")
plt.plot([median_time, median_time], [0, 70], lw = 2, color = "Black", ls = "--", label = "Median")
plt.xlabel(key.replace("sub", "") + " Minutes")
plt.legend()









    














    











    Out[18]:





<matplotlib.legend.Legend at 0x11c9757b8>



In [20]:

    
plot_hist_with_astronomers(df=df, astro_df=astro_df, key="subBeat the Bridge")

Chip Time vs Bridge Time



In [21]:

    
keyx = "subChip Time"
keyy = "subBeat the Bridge"

corr_co = np.corrcoef(df[keyx]/60., df[keyy]/60.)

plt.scatter(df[keyx]/60., df[keyy]/60.)

plt.xlabel(keyx.replace("sub", "") + " Minutes")
plt.ylabel(keyy.replace("sub", "") + " Minutes")









    














    











    Out[21]:





<matplotlib.text.Text at 0x11d6a12e8>



In [22]:

    
print(corr_co[1,0])









    



0.981240820274

Time vs Bib Number



In [23]:

    
keyx = "subChip Time"
keyy = "Bib No"

corr_co = np.corrcoef(df[keyx]/60., df[keyy])

plt.scatter(df[keyx]/60., df[keyy])

plt.xlabel(keyx.replace("sub", "") + " Minutes")
plt.ylabel(keyy.replace("sub", ""))









    














    











    Out[23]:





<matplotlib.text.Text at 0x11d987048>



In [24]:

    
print(corr_co[1,0])









    



0.0808167937219



In [25]:

    
# plt.scatter(df["Pos"], df["subChip Time"])
# plt.scatter(df["subChip Time"], df["subBeat the Bridge"])
plt.scatter(df["Pos"], df["G/Pos"])









    














    











    Out[25]:





<matplotlib.collections.PathCollection at 0x11df1c080>



In [29]:

    
# print(df.groupby("Gender"))
plt.scatter((df["subGun Time"] - df["subChip Time"])/60., df["subGun Time"]/60.)









    














    











    Out[29]:





<matplotlib.collections.PathCollection at 0x11d1aa630>



In [28]:

    
# plt.scatter(df["subChip Time"]/60., df["Bib No"])



In [17]:

    
# df.









    



  File "<ipython-input-17-d230d845cd15>", line 1
    df.
       ^
SyntaxError: invalid syntax



In [ ]:

    
# df.columns



In [30]:

    
# fig = plt.figure(figsize=[8, 4])
# fig.subplots_adjust(left = 0.09, bottom = 0.13, top = 0.99,
#                     right = 0.99, hspace=0, wspace = 0)

# ax1 = fig.add_subplot(111)

# ax1.scatter(df[df["Club"] == "NaN"]["subChip Time"]/60., df[df["Club"] == "NaN"]["subBeat the Bridge"]/60., color = "Orange")
# ax1.scatter(df[df["Club"] != "NaN"]["subChip Time"]/60., df[df["Club"] != "NaN"]["subBeat the Bridge"]/60., color = "Blue")









    














    











    Out[30]:





<matplotlib.collections.PathCollection at 0x11d104c50>



In [ ]:



In [31]:

    
clubs = df["Club"].unique()



In [32]:

    
clubs = [clubs[i] for i in np.arange(len(clubs)) if i != 1]



In [38]:

    
keyx = "subChip Time"
keyy = "subBeat the Bridge"

corr_co = np.corrcoef(df[keyx][df["Club"].isin(clubs)]/60., df[keyy][df["Club"].isin(clubs)]/60.)

plt.scatter(df[keyx][df["Club"].isin(clubs)]/60., df[keyy][df["Club"].isin(clubs)]/60., label = "clubbed")
# plt.scatter(df[keyx][df["Club"].isin(np.invert(clubs))]/60., df[keyy][df["Club"].isin(np.invert(clubs))]/60.)
keyx = "subChip Time"
keyy = "subBeat the Bridge"

corr_co = np.corrcoef(df[keyx]/60., df[keyy]/60.)

plt.scatter(df[keyx]/60., df[keyy]/60., label = "unclubbed", zorder = -9)

plt.xlabel(keyx.replace("sub", "") + " Minutes")
plt.ylabel(keyy.replace("sub", "") + " Minutes")

plt.legend()









    














    











    Out[38]:





<matplotlib.legend.Legend at 0x11d00dac8>



In [43]:

    
plt.hist(df[keyx][df["Club"].isin(clubs)]/60,label = "clubbed", normed = True, alpha = 0.7)
plt.hist(df[keyx]/60,label = "unclubbed", zorder = -99, normed= True, alpha = 0.7)









    














    











    Out[43]:





(array([ 0.00566366,  0.01993316,  0.03802746,  0.02912741,  0.01662322,
         0.00669342,  0.00191241,  0.00014711,  0.00022066,  0.00029422]),
 array([  36.47166667,   44.90033333,   53.329     ,   61.75766667,
          70.18633333,   78.615     ,   87.04366667,   95.47233333,
         103.901     ,  112.32966667,  120.75833333]),
 <a list of 10 Patch objects>)



In [46]:

    
plt.scatter((df["subGun Time"][df["Club"].isin(clubs)] - df["subChip Time"][df["Club"].isin(clubs)])/60., df["subGun Time"][df["Club"].isin(clubs)]/60.)
plt.scatter((df["subGun Time"] - df["subChip Time"])/60., df["subGun Time"]/60., zorder = -99)









    














    











    Out[46]:





<matplotlib.collections.PathCollection at 0x120123128>



In [44]:

    
print(df[keyx].mean()/60.)
print(df[keyx][df["Club"].isin(clubs)].mean()/60.)









    



62.066337053110196
60.520030413625314



In [ ]:

    
df[["Club", "Name", "subChip Time"]][df["Club"].isin(clubs)]



In [ ]:

    
# convert_to_ap_Time(df)
t0 = Time("2017-04-26T00:00:00.000", format="isot")



In [ ]:



In [ ]:

    
t1 = df["Gun Time"].values[0]



In [ ]:

    
t1



In [ ]:

    
t1 - t0



In [ ]:

    
col = df["Gun Time"] - t0



In [ ]:

    
x = col[0]



In [ ]:

    
x.



In [ ]:

    
col.sec



In [ ]:

	Pos	Bib No	Fav	Share	Print	Name	Gun Time	Chip Time	Category	Cat Pos	...	TOD	Beat the Bridge	G/Pos	Beat the Bridge.1	G/Pos.1	subGun Time	subChip Time	subTOD	subBeat the Bridge	subBeat the Bridge.1
158	159	1662	NaN	NaN	NaN	Robert FIRTH	2017-05-04T00:48:59.600	2017-05-04T00:48:07.600	Sen	89	...	2017-05-04T11:19:05.600	2017-05-04T00:08:38.900	125	2017-05-04T00:08:38.900	125	2939.6	2887.6	40745.6	518.9	518.9
576	577	1560	NaN	NaN	NaN	Sadie JONES	2017-05-04T00:59:35.600	2017-05-04T00:55:00.400	Sen	123	...	2017-05-04T11:29:41.600	2017-05-04T00:09:27.200	80	2017-05-04T00:09:27.200	80	3575.6	3300.4	41381.6	567.2	567.2
737	738	1165	NaN	NaN	NaN	Mathew SMITH	2017-05-04T01:01:54.300	2017-05-04T00:56:49.400	Sen	276	...	2017-05-04T11:32:00.300	2017-05-04T00:09:52.100	316	2017-05-04T00:09:52.100	316	3714.3	3409.4	41520.3	592.1	592.1
1302	1303	1915	NaN	NaN	NaN	Stephen BROWETT	2017-05-04T01:14:55.000	2017-05-04T01:09:49.700	Sen	364	...	2017-05-04T11:45:01.000	2017-05-04T00:12:50.800	614	2017-05-04T00:12:50.800	614	4495.0	4189.7	42301.0	770.8	770.8