In [1]:
import json
import requests
import pandas as pd
import matplotlib.pyplot as plt
In [2]:
data_source = 'https://livescore.123go.vn/livescoregeneralv2/result?eventId=610&stageId=30&typePage=60&flagLeader=0&refresh_cache=0'
resp = requests.get(url=data_source)
json_data = resp.json()
In [ ]:
# if cache data
json_path = 'somewhere/on/your/disk'
with open(json_path) as json_file:
json_data = json.load(json_file)
In [3]:
total_record = json_data['page_info']['total_record']
In [4]:
import datetime
participants = []
exceptions = {
'21891': 'Female'
}
for i in json_data['data']:
t = datetime.datetime.strptime(i['chip_time'], '%H:%M:%S')
if t.hour > 0:
try:
sex = i['age_groups']['68']['age_group_name']
except Exception as e:
sex = exceptions.get(i['bib_number'], 'NA')
participants.append((i['bib_number'], i['chip_time'], i['rank'], i['age_groups'], sex))
In [5]:
df = pd.DataFrame(participants, columns=['bib', 'chip_time', 'rank', 'age_group', 'sex'])
# df['time'] = pd.to_datetime(df['time'].str.strip(), format='%H:%M:%S')
def comp_minutes(t_str):
t = t_str.split(':')
return int(t[0])*60 + int(t[1])
df['time'] = df['chip_time'].map(comp_minutes)
In [6]:
na = df[df['sex'] == 'NA']
In [7]:
na
Out[7]:
In [8]:
male = df[df['sex'] == 'Male']
female = df[df['sex'] != 'Male']
In [9]:
male.head(10)
Out[9]:
In [10]:
female.head(10)
Out[10]:
In [11]:
import numpy as np
bins = np.linspace(min(df.time), max(df.time), 15)
In [12]:
plt.style.use('seaborn-deep')
plt.hist(male.time, bins, alpha=0.5, label='male')
plt.hist(female.time, bins, alpha=0.5, label='female')
plt.legend(loc='upper right')
plt.xlabel('duration (minutes)')
plt.ylabel('#participants')
plt.title('Dalat Ultra Trail 2019 - 21K')
plt.axvline(x=365, color='r', ls='--')
plt.savefig('DUT2019.png',dpi=350)
plt.show()