In [5]:
import pandas as pd, numpy as np
import kendo_romania

Read data


In [6]:
matches={i:{} for i in range(1993,2019)}
Import data

2018

CR


In [7]:
filename='rawdata/2018/CR/CR25 - Public.xlsx'
sheetname='List of matches'
column_keys={'match_type':2,'aka':{'name':5,'hansoku':6,'point1':7,'point2':8,'point3':9},
             'shiro':{'name':15,'hansoku':14,'point1':11,'point2':12,'point3':13},'outcome':10,
             'shinpan':{'fukushin1':16,'shushin':17,'fukushin2':18}}
matches[2018]['CR']=kendo_romania.get_matches_from_list(filename,sheetname,column_keys,3)

SL


In [8]:
filename='rawdata/2018/SL/Prezenta SL_WKC17.xlsx'
sheetname=['F','M']
matches[2018]['SL']=kendo_romania.get_matches_from_table(filename,sheetname,5)

2017

CN


In [9]:
categories=['Individual masculin','Echipe']
filename=['rawdata/2017/CN/'+i+'.xlsx' for i in categories]
sheetname='List of matches'
column_keys={'match_type':2,'aka':{'name':5,'hansoku':6,'point1':7,'point2':8,'point3':9},
             'shiro':{'name':15,'hansoku':14,'point1':11,'point2':12,'point3':13},'outcome':10,
             'shinpan':{'fukushin1':16,'shushin':17,'fukushin2':18}}
shift=0
matches[2017]['CN']=kendo_romania.get_matches_from_list(filename,sheetname,column_keys,3,shift=shift)

In [10]:
categories=['Individual juniori mici','Individual juniori mari','Individual feminin']
filename=['rawdata/2017/CN/'+i+'.xlsx' for i in categories]
shift=-1
matches[2017]['CN']=matches[2017]['CN']+\
        kendo_romania.get_matches_from_list(filename,sheetname,column_keys,3,shift=shift)

CR


In [11]:
categories=['Individual masculin']
filename=['rawdata/2017/CR/'+i+'.xlsx' for i in categories]
sheetname='List of matches'
column_keys={'match_type':2,'aka':{'name':5,'hansoku':6,'point1':7,'point2':8,'point3':9},
             'shiro':{'name':15,'hansoku':14,'point1':11,'point2':12,'point3':13},'outcome':10}
shift=2
matches[2017]['CR']=kendo_romania.get_matches_from_list(filename,sheetname,column_keys,3,shift=shift)

In [12]:
categories=['Individual juniori','Individual veterani','Individual feminin']
filename=['rawdata/2017/CR/'+i+'.xlsx' for i in categories]
shift=-1
matches[2017]['CR']=matches[2017]['CR']+\
        kendo_romania.get_matches_from_list(filename,sheetname,column_keys,3,shift=shift)

In [13]:
categories=['Echipe']
filename=['rawdata/2017/CR/'+i+'.xlsx' for i in categories]
shift=0
matches[2017]['CR']=matches[2017]['CR']+\
        kendo_romania.get_matches_from_list(filename,sheetname,column_keys,3,shift=shift)

SL


In [14]:
filename='rawdata/2017/SL/Prezenta.xlsx'
sheetname=['F','M','J']
matches[2017]['SL']=kendo_romania.get_matches_from_table(filename,sheetname,6)

2016

SL


In [15]:
filename='rawdata/2016/SL/Event management - stagiul 4.xlsx'
sheetname=['F','M']
matches[2016]['SL']=kendo_romania.get_matches_from_table(filename,sheetname,6)

In [16]:
sheetname=['J']
matches[2016]['SL']=matches[2016]['SL']+\
        kendo_romania.get_matches_from_table(filename,sheetname,5)

CN


In [17]:
categories=['Individual masculin']
filename=['rawdata/2016/CN/'+i+'.xlsx' for i in categories]
sheetname='List of matches'
column_keys={'match_type':2,'aka':{'name':5,'hansoku':6,'point1':7,'point2':8,'point3':9},
             'shiro':{'name':15,'hansoku':14,'point1':11,'point2':12,'point3':13},'outcome':10}
shift=2
matches[2016]['CN']=kendo_romania.get_matches_from_list(filename,sheetname,column_keys,3,shift=shift)

In [18]:
categories=['Individual feminin']
filename=['rawdata/2016/CN/'+i+'.xlsx' for i in categories]
shift=-1
matches[2016]['CN']=matches[2016]['CN']+\
        kendo_romania.get_matches_from_list(filename,sheetname,column_keys,3,shift=shift)

In [19]:
categories=['Echipe','Male team']
filename=['rawdata/2016/CN/'+i+'.xlsx' for i in categories]
shift=0
matches[2016]['CN']=matches[2016]['CN']+\
        kendo_romania.get_matches_from_list(filename,sheetname,column_keys,3,shift=shift)

In [20]:
categories=['Junior 1 individual','Junior 2 individual']
filename=['rawdata/2016/CN/'+i+'.xlsx' for i in categories]
shift=-1
matches[2016]['CN']=matches[2016]['CN']+\
        kendo_romania.get_matches_from_list(filename,sheetname,column_keys,3,shift=shift)

CR


In [21]:
filename='rawdata/2016/CR/Event management_CR23.2016.xlsx'
sheetname=['IF_m','IJ_m','IM_m','IS_m']
column_keys={'match_type':0,'aka':{'name':1,'point1':2},
             'shiro':{'name':6,'point1':5},'outcome':3,
             'shinpan':{'fukushin1':7,'shushin':8,'fukushin2':9}}
shift=0
matches[2016]['CR']=kendo_romania.get_matches_from_list(filename,sheetname,column_keys,4,shift=shift)

In [22]:
sheetname=['EJ_m','ES_m']
matches[2016]['CR']=matches[2016]['CR']+\
        kendo_romania.get_matches_from_list(filename,sheetname,column_keys,6,shift=shift)

2015

SL


In [23]:
filename='rawdata/2015/SL/Event management - stagiul 5.xlsx'
sheetname=['SF_s','SM_s']
matches[2015]['SL']=kendo_romania.get_matches_from_table(filename,sheetname,6)

CN


In [24]:
filename='rawdata/2015/CN/Event management_CN22.2015.xlsx'
sheetname=['IF_m','IJ2_m','IM_m']
column_keys={'match_type':0,'aka':{'name':1,'point1':2},
             'shiro':{'name':6,'point1':5},'outcome':3,
             'shinpan':{'fukushin1':7,'shushin':8,'fukushin2':9}}
shift=0
matches[2015]['CN']=kendo_romania.get_matches_from_list(filename,sheetname,column_keys,4,shift=shift)

In [25]:
sheetname='E_m'
matches[2015]['CN']=matches[2015]['CN']+\
        kendo_romania.get_matches_from_list(filename,sheetname,column_keys,6,shift=shift)

CR


In [26]:
filename='rawdata/2015/CR/Event management_CR22.2015.xlsx'
sheetname=['IF_m','IS_m']
column_keys={'match_type':0,'aka':{'name':1,'point1':2},
             'shiro':{'name':6,'point1':5},'outcome':3,
             'shinpan':{'fukushin1':7,'shushin':8,'fukushin2':9}}
shift=0
matches[2015]['CR']=kendo_romania.get_matches_from_list(filename,sheetname,column_keys,4,shift=shift)

In [27]:
filename='rawdata/2015/CR/Event management_CR22.2015.xlsx'
sheetname=['IJ1_s']
matches[2015]['CR']=matches[2015]['CR']+\
                kendo_romania.get_matches_from_table(filename,
                    sheetname,skiprows=7,shift=1,nrows=9)

In [28]:
filename='rawdata/2015/CR/Event management_CR22.2015.xlsx'
sheetname=['IJ2_s']
matches[2015]['CR']=matches[2015]['CR']+\
                kendo_romania.get_matches_from_table(filename,
                    sheetname,skiprows=8,shift=12,nrows=8)
matches[2015]['CR']=matches[2015]['CR']+\
                kendo_romania.get_matches_from_table(filename,
                    sheetname,skiprows=16,shift=12,nrows=8)

In [29]:
sheetname=['IM_s']
column_keys={'match_type':19,'aka':{'name':20,'point1':21},
             'shiro':{'name':24,'point1':23},'outcome':22}
shift=0
matches[2015]['CR']=matches[2015]['CR']+\
                kendo_romania.get_matches_from_list(filename,sheetname,column_keys,7,shift=shift)
shift=10
matches[2015]['CR']=matches[2015]['CR']+\
                kendo_romania.get_matches_from_list(filename,sheetname,column_keys,7,shift=shift)

2014

SL


In [30]:
filename='rawdata/2014/SL/Lista de participanti 6.xlsx'
sheetname=['SF_s','SM_s','J_s']
matches[2014]['SL']=kendo_romania.get_matches_from_table(filename,sheetname,6)

CR


In [31]:
filename='rawdata/2014/CR/Event management_CR21.2014.xlsx'
sheetname=['IC-10_m','IC_m','IJ_m','IS_m','IF_m']
column_keys={'match_type':0,'aka':{'name':1,'point1':2},
             'shiro':{'name':6,'point1':5},'outcome':3,
             'shinpan':{'fukushin1':7,'shushin':8,'fukushin2':9}}
shift=0
matches[2014]['CR']=kendo_romania.get_matches_from_list(filename,sheetname,column_keys,4,shift=shift)

In [32]:
sheetname=['IM_s']
column_keys={'match_type':0,'aka':{'name':1,'point1':2},
             'shiro':{'name':5,'point1':4},'outcome':3}
shift=8
matches[2014]['CR']=matches[2014]['CR']+\
                kendo_romania.get_matches_from_list(filename,sheetname,column_keys,8,shift=shift)

CN


In [33]:
filename='rawdata/2014/CN/Event management_CN21.2014 - v2.xlsx'
sheetname=['IF_m']
column_keys={'match_type':0,'aka':{'name':1,'point1':2},
             'shiro':{'name':6,'point1':5},'outcome':3,
             'shinpan':{'fukushin1':7,'shushin':8,'fukushin2':9}}
shift=0
matches[2014]['CN']=kendo_romania.get_matches_from_list(filename,sheetname,column_keys,4,shift=shift)

In [34]:
sheetname=['IM_s']
column_keys={'match_type':0,'aka':{'name':1,'point1':2},
             'shiro':{'name':5,'point1':4},'outcome':3}
shift=19
matches[2014]['CN']=matches[2014]['CN']+\
                kendo_romania.get_matches_from_list(filename,sheetname,column_keys,7,shift=shift)
shift=29
matches[2014]['CN']=matches[2014]['CN']+\
                kendo_romania.get_matches_from_list(filename,sheetname,column_keys,7,shift=shift)

In [35]:
sheetname=['IJ1_s']
matches[2014]['CN']=matches[2014]['CN']+\
                kendo_romania.get_matches_from_table(filename,sheetname,7,shift=1,nrows=10)

In [36]:
sheetname=['IJ2_s']
matches[2014]['CN']=matches[2014]['CN']+\
                kendo_romania.get_matches_from_table(filename,sheetname,8,shift=12,nrows=6)
matches[2014]['CN']=matches[2014]['CN']+\
                kendo_romania.get_matches_from_table(filename,sheetname,14,shift=12,nrows=6)
matches[2014]['CN']=matches[2014]['CN']+\
                kendo_romania.get_matches_from_table(filename,sheetname,20,shift=12,nrows=6)

2013

CN


In [37]:
filename='rawdata/2013/CN/Event management_CN2013.xlsx'
sheetname=['IS_m','IF_m','IC_m','IJ_m','E_m','IM_m']
column_keys={'match_type':0,'aka':{'name':1,'point1':2},
             'shiro':{'name':6,'point1':5},'outcome':3,
             'shinpan':{'fukushin1':7,'shushin':8,'fukushin2':9}}
shift=0
matches[2013]['CN']=kendo_romania.get_matches_from_list(filename,sheetname,column_keys,4,shift=shift)

CR


In [38]:
filename='rawdata/2013/CR/Event management_CR2013.xlsx'
sheetname=['IF_meciuri','IJ_meciuri','IM_meciuri']
column_keys={'match_type':0,'aka':{'name':1,'point1':2},
             'shiro':{'name':6,'point1':5},'outcome':3,
             'shinpan':{'fukushin1':7,'shushin':8,'fukushin2':9}}
shift=0
matches[2013]['CR']=kendo_romania.get_matches_from_list(filename,sheetname,column_keys,4,shift=shift)

SL


In [39]:
filename='rawdata/2013/SL/Event management.xlsx'
sheetname=['E_meciuri']
column_keys={'match_type':0,'aka':{'name':1,'point1':2},
             'shiro':{'name':6,'point1':5},'outcome':3,
             'shinpan':{'fukushin1':7,'shushin':8,'fukushin2':9}}
shift=0
matches[2013]['SL']=kendo_romania.get_matches_from_list(filename,sheetname,column_keys,4,shift=shift)

In [40]:
sheetname=['Schema feminin']
matches[2013]['SL']=matches[2013]['SL']+\
                kendo_romania.get_matches_from_table(filename,sheetname,2,nrows=14)
sheetname=['Schema juniori']
matches[2013]['SL']=matches[2013]['SL']+\
                kendo_romania.get_matches_from_table(filename,sheetname,2,nrows=12)

2012

CN


In [41]:
filename='rawdata/2012/CN/Event management CN2012.xlsx'
sheetname=['E_meciuri','IJ_meciuri','IF_meciuri','IM_meciuri']
column_keys={'match_type':0,'aka':{'name':1,'point1':2},
             'shiro':{'name':6,'point1':5},'outcome':3,
             'shinpan':{'fukushin1':7,'shushin':8,'fukushin2':9}}
shift=0
matches[2012]['CN']=kendo_romania.get_matches_from_list(filename,sheetname,column_keys,4,shift=shift)

CR


In [42]:
filename='rawdata/2012/CR/2012.05.05-06 - CR - Cluj.xlsx'
sheetname=['IC']
matches[2012]['CR']=kendo_romania.get_matches_from_table_oneliner(filename,
                                sheetname,12,shift=1,nrows=3)
matches[2012]['CR']=matches[2012]['CR']+\
                kendo_romania.get_matches_from_table_oneliner(filename,
                                sheetname,18,shift=1,nrows=4)

In [43]:
sheetname=['IJ']
matches[2012]['CR']=matches[2012]['CR']+\
                kendo_romania.get_matches_from_table_oneliner(filename,
                                sheetname,14,shift=1,nrows=3)
matches[2012]['CR']=matches[2012]['CR']+\
                kendo_romania.get_matches_from_table_oneliner(filename,
                                sheetname,19,shift=1,nrows=3)
matches[2012]['CR']=matches[2012]['CR']+\
                kendo_romania.get_matches_from_table_oneliner(filename,
                                sheetname,24,shift=1,nrows=3)
matches[2012]['CR']=matches[2012]['CR']+\
                kendo_romania.get_matches_from_table_oneliner(filename,
                                sheetname,30,shift=1,nrows=3)
matches[2012]['CR']=matches[2012]['CR']+\
                kendo_romania.get_matches_from_table_oneliner(filename,
                                sheetname,35,shift=1,nrows=3)

In [44]:
sheetname=['IF']
matches[2012]['CR']=matches[2012]['CR']+\
                kendo_romania.get_matches_from_table_oneliner(filename,
                                sheetname,13,shift=1,nrows=3)
matches[2012]['CR']=matches[2012]['CR']+\
                kendo_romania.get_matches_from_table_oneliner(filename,
                                sheetname,18,shift=1,nrows=3)
column_keys={'match_type':0,'aka':{'name':1,'point1':3},
             'shiro':{'name':6,'point1':5},'outcome':4}
shift=0
matches[2012]['CR']=matches[2012]['CR']+\
                kendo_romania.get_matches_from_list(filename,sheetname,column_keys,22,shift=shift)

In [45]:
sheetname=['IM']
column_keys={'match_type':0,'aka':{'name':1,'point1':2},
             'shiro':{'name':5,'point1':4},'outcome':3}
shift=6
matches[2012]['CR']=matches[2012]['CR']+\
                kendo_romania.get_matches_from_list(filename,sheetname,column_keys,6,shift=shift)

In [46]:
sheetname=['ES']
column_keys={'match_type':20,'aka':{'name':1,'point1':2},
             'shiro':{'name':5,'point1':4},'outcome':3}
shift=-1
matches[2012]['CR']=matches[2012]['CR']+\
                kendo_romania.get_matches_from_list(filename,sheetname,column_keys,4,shift=shift)
shift=4
matches[2012]['CR']=matches[2012]['CR']+\
                kendo_romania.get_matches_from_list(filename,sheetname,column_keys,4,shift=shift)
shift=9
matches[2012]['CR']=matches[2012]['CR']+\
                kendo_romania.get_matches_from_list(filename,sheetname,column_keys,4,shift=shift)

2011

CN


In [47]:
filename='rawdata/2011/CN/2011.11.26-27 - CN - Bucuresti_print.xlsx'
sheetname=['IJ']
matches[2011]['CN']=kendo_romania.get_matches_from_table_oneliner(filename,
                                sheetname,13,shift=1,nrows=3)
matches[2011]['CN']=matches[2011]['CN']+\
                kendo_romania.get_matches_from_table_oneliner(filename,
                                sheetname,18,shift=1,nrows=3)
matches[2011]['CN']=matches[2011]['CN']+\
                kendo_romania.get_matches_from_table_oneliner(filename,
                                sheetname,23,shift=1)

In [48]:
sheetname=['IF']
matches[2011]['CN']=matches[2011]['CN']+\
                kendo_romania.get_matches_from_table_oneliner(filename,
                                sheetname,13,shift=1,nrows=3)
matches[2011]['CN']=matches[2011]['CN']+\
                kendo_romania.get_matches_from_table_oneliner(filename,
                                sheetname,18,shift=1,nrows=3)
matches[2011]['CN']=matches[2011]['CN']+\
                kendo_romania.get_matches_from_table_oneliner(filename,
                                sheetname,23,shift=1,nrows=4)
column_keys={'match_type':0,'aka':{'name':1,'point1':3},
             'shiro':{'name':6,'point1':5},'outcome':4}
shift=0
matches[2011]['CN']=matches[2011]['CN']+\
                kendo_romania.get_matches_from_list(filename,sheetname,column_keys,28,shift=shift)

In [49]:
sheetname=['IM']
column_keys={'match_type':0,'aka':{'name':1,'point1':2},
             'shiro':{'name':5,'point1':4},'outcome':3}
shift=5
matches[2011]['CN']=matches[2011]['CN']+\
                kendo_romania.get_matches_from_list(filename,sheetname,column_keys,6,shift=shift)
shift=11
matches[2011]['CN']=matches[2011]['CN']+\
                kendo_romania.get_matches_from_list(filename,sheetname,column_keys,6,shift=shift)

In [50]:
sheetname=['E']
column_keys={'match_type':0,'aka':{'name':1,'point1':2},
             'shiro':{'name':5,'point1':4},'outcome':3}
shift=17
matches[2011]['CN']=matches[2011]['CN']+\
                kendo_romania.get_matches_from_list(filename,sheetname,column_keys,5,shift=shift)
shift=23
matches[2011]['CN']=matches[2011]['CN']+\
                kendo_romania.get_matches_from_list(filename,sheetname,column_keys,5,shift=shift)
shift=29
matches[2011]['CN']=matches[2011]['CN']+\
                kendo_romania.get_matches_from_list(filename,sheetname,column_keys,5,shift=shift)

CR


In [51]:
filename='rawdata/2011/CR/2011.04.16-17 - CR - Miercurea Ciuc.xlsx'
sheetname=['ES']
column_keys={'match_type':6,'aka':{'name':1,'point1':2},
             'shiro':{'name':5,'point1':4},'outcome':3}
shift=-1
matches[2011]['CR']=kendo_romania.get_matches_from_list(filename,sheetname,column_keys,7,shift=shift)
shift=5
matches[2011]['CR']=matches[2011]['CR']+\
                kendo_romania.get_matches_from_list(filename,sheetname,column_keys,7,shift=shift)
shift=11
matches[2011]['CR']=matches[2011]['CR']+\
                kendo_romania.get_matches_from_list(filename,sheetname,column_keys,7,shift=shift)

In [52]:
sheetname=['IM']
column_keys={'match_type':0,'aka':{'name':1,'point1':2},
             'shiro':{'name':5,'point1':4},'outcome':3}
shift=5
matches[2011]['CR']=matches[2011]['CR']+\
                kendo_romania.get_matches_from_list(filename,sheetname,column_keys,6,shift=shift)
shift=11
matches[2011]['CR']=matches[2011]['CR']+\
                kendo_romania.get_matches_from_list(filename,sheetname,column_keys,6,shift=shift)

In [53]:
sheetname=['IF']
matches[2011]['CR']=matches[2011]['CR']+\
                kendo_romania.get_matches_from_table_oneliner(filename,
                                sheetname,15,shift=1,nrows=4)
matches[2011]['CR']=matches[2011]['CR']+\
                kendo_romania.get_matches_from_table_oneliner(filename,
                                sheetname,21,shift=1,nrows=4)
column_keys={'match_type':0,'aka':{'name':1,'point1':3},
             'shiro':{'name':6,'point1':5},'outcome':4}
shift=0
matches[2011]['CR']=matches[2011]['CR']+\
                kendo_romania.get_matches_from_list(filename,sheetname,column_keys,26,shift=shift)

In [54]:
sheetname=['IJ']
matches[2011]['CR']=matches[2011]['CR']+\
                kendo_romania.get_matches_from_table_oneliner(filename,
                                sheetname,16,shift=1,nrows=3)
matches[2011]['CR']=matches[2011]['CR']+\
                kendo_romania.get_matches_from_table_oneliner(filename,
                                sheetname,21,shift=1,nrows=4)
matches[2011]['CR']=matches[2011]['CR']+\
                kendo_romania.get_matches_from_table_oneliner(filename,
                                sheetname,27,shift=1,nrows=3)

In [55]:
sheetname=['IC']
matches[2011]['CR']=matches[2011]['CR']+\
                kendo_romania.get_matches_from_table_oneliner(filename,
                                sheetname,4,shift=0,nrows=4)

In [56]:
sheetname=['EJ']
column_keys={'match_type':0,'aka':{'name':1,'point1':2},
             'shiro':{'name':5,'point1':4},'outcome':3}
shift=0
matches[2011]['CR']=matches[2011]['CR']+\
                kendo_romania.get_matches_from_list(filename,sheetname,column_keys,15,shift=shift)

2010

CR


In [57]:
filename='rawdata/2010/CR/2010.03.27-28 - CR - Budeasa.xlsx'
sheetname=['IM']
column_keys={'match_type':0,'aka':{'name':1,'point1':2},
             'shiro':{'name':5,'point1':4},'outcome':3}
shift=5
matches[2010]['CR']=kendo_romania.get_matches_from_list(filename,sheetname,column_keys,6,shift=shift)
shift=11
matches[2010]['CR']=matches[2010]['CR']+\
                kendo_romania.get_matches_from_list(filename,sheetname,column_keys,6,shift=shift)

In [58]:
sheetname=['IF']
matches[2010]['CR']=matches[2010]['CR']+\
                kendo_romania.get_matches_from_table_oneliner(filename,
                                sheetname,15,shift=1,nrows=4)
matches[2010]['CR']=matches[2010]['CR']+\
                kendo_romania.get_matches_from_table_oneliner(filename,
                                sheetname,21,shift=1,nrows=4)
column_keys={'match_type':0,'aka':{'name':1,'point1':3},
             'shiro':{'name':6,'point1':5},'outcome':4}
shift=0
matches[2010]['CR']=matches[2010]['CR']+\
                kendo_romania.get_matches_from_list(filename,sheetname,column_keys,26,shift=shift)

In [59]:
sheetname=['EJ']
column_keys={'match_type':0,'aka':{'name':1,'point1':2},
             'shiro':{'name':5,'point1':4},'outcome':3}
shift=0
matches[2010]['CR']=matches[2010]['CR']+\
                kendo_romania.get_matches_from_list(filename,sheetname,column_keys,15,shift=shift)

In [60]:
sheetname=['IJ']
matches[2010]['CR']=matches[2010]['CR']+\
                kendo_romania.get_matches_from_table_oneliner(filename,
                                sheetname,16,shift=1,nrows=3)
matches[2010]['CR']=matches[2010]['CR']+\
                kendo_romania.get_matches_from_table_oneliner(filename,
                                sheetname,21,shift=1,nrows=4)
matches[2010]['CR']=matches[2010]['CR']+\
                kendo_romania.get_matches_from_table_oneliner(filename,
                                sheetname,27,shift=1,nrows=3)

In [61]:
sheetname=['IC']
matches[2010]['CR']=matches[2010]['CR']+\
                kendo_romania.get_matches_from_table_oneliner(filename,
                                sheetname,4,shift=0,nrows=4)

CN


In [62]:
filename='rawdata/2010/CN/2010.11.27-28 - CN - Bucuresti.xlsx'
sheetname=['IJ']
matches[2010]['CN']=kendo_romania.get_matches_from_table_oneliner(filename,
                                sheetname,13,shift=1,point_shift=0,nrows=5)

In [63]:
sheetname=['IC']
matches[2010]['CN']=matches[2010]['CN']+\
                kendo_romania.get_matches_from_table_oneliner(filename,
                                sheetname,13,shift=1,nrows=3)
matches[2010]['CN']=matches[2010]['CN']+\
                kendo_romania.get_matches_from_table_oneliner(filename,
                                sheetname,18,shift=1,nrows=3)

In [64]:
sheetname=['IF']
matches[2010]['CN']=matches[2010]['CN']+\
                kendo_romania.get_matches_from_table_oneliner(filename,
                                sheetname,13,shift=1,nrows=3)
matches[2010]['CN']=matches[2010]['CN']+\
                kendo_romania.get_matches_from_table_oneliner(filename,
                                sheetname,18,shift=1,nrows=3)

In [65]:
sheetname=['IM']
column_keys={'match_type':0,'aka':{'name':1,'point1':2},
             'shiro':{'name':5,'point1':4},'outcome':3}
shift=6
matches[2010]['CN']=matches[2010]['CN']+\
                kendo_romania.get_matches_from_list(filename,sheetname,column_keys,4,shift=shift)
shift=12
matches[2010]['CN']=matches[2010]['CN']+\
                kendo_romania.get_matches_from_list(filename,sheetname,column_keys,4,shift=shift)

In [66]:
sheetname=['E']
column_keys={'match_type':15,'aka':{'name':1,'point1':2},
             'shiro':{'name':5,'point1':4},'outcome':3}
shift=-1
matches[2010]['CN']=matches[2010]['CN']+\
                kendo_romania.get_matches_from_list(filename,sheetname,column_keys,5,shift=shift)
shift=5
matches[2010]['CN']=matches[2010]['CN']+\
                kendo_romania.get_matches_from_list(filename,sheetname,column_keys,5,shift=shift)
shift=11
matches[2010]['CN']=matches[2010]['CN']+\
                kendo_romania.get_matches_from_list(filename,sheetname,column_keys,5,shift=shift)

2009

CN


In [67]:
filename='rawdata/2009/CN/2009.11.28-29 - CN - Bucuresti.xlsx'
sheetname=['IJ']
matches[2009]['CN']=kendo_romania.get_matches_from_table_oneliner(filename,
                                sheetname,4,shift=0,nrows=4)

In [68]:
sheetname=['IF']
matches[2009]['CN']=matches[2009]['CN']+\
                kendo_romania.get_matches_from_table_oneliner(filename,
                                sheetname,12,shift=1,point_shift=0,nrows=5)

In [69]:
sheetname=['IM']
column_keys={'match_type':0,'aka':{'name':1,'point1':2},
             'shiro':{'name':5,'point1':4},'outcome':3}
shift=5
matches[2009]['CN']=matches[2009]['CN']+\
                kendo_romania.get_matches_from_list(filename,sheetname,column_keys,6,shift=shift)
shift=11
matches[2009]['CN']=matches[2009]['CN']+\
                kendo_romania.get_matches_from_list(filename,sheetname,column_keys,6,shift=shift)

In [70]:
sheetname=['ES']
column_keys={'match_type':1,'aka':{'name':1,'point1':2},
             'shiro':{'name':5,'point1':4},'outcome':3}
shift=-1
matches[2009]['CN']=matches[2009]['CN']+\
                kendo_romania.get_matches_from_list(filename,sheetname,column_keys,7,shift=shift)
shift=5
matches[2009]['CN']=matches[2009]['CN']+\
                kendo_romania.get_matches_from_list(filename,sheetname,column_keys,7,shift=shift)
shift=11
matches[2009]['CN']=matches[2009]['CN']+\
                kendo_romania.get_matches_from_list(filename,sheetname,column_keys,7,shift=shift)

CR


In [71]:
filename='rawdata/2009/CR/2009.04.04 - CR - Budeasa - print.xlsx'
sheetname=['IJ']
matches[2009]['CR']=kendo_romania.get_matches_from_table_oneliner(filename,
                                sheetname,12,shift=1,point_shift=0,nrows=5)

In [72]:
sheetname=['IF']
matches[2009]['CR']=matches[2009]['CR']+\
                kendo_romania.get_matches_from_table_oneliner(filename,
                                sheetname,13,shift=1,point_shift=0,nrows=6)

In [73]:
sheetname=['IM']
column_keys={'match_type':0,'aka':{'name':1,'point1':2},
             'shiro':{'name':5,'point1':4},'outcome':3}
shift=5
matches[2009]['CR']=matches[2009]['CR']+\
                kendo_romania.get_matches_from_list(filename,sheetname,column_keys,6,shift=shift)
shift=11
matches[2009]['CR']=matches[2009]['CR']+\
                kendo_romania.get_matches_from_list(filename,sheetname,column_keys,6,shift=shift)

In [74]:
sheetname=['ES']
column_keys={'match_type':1,'aka':{'name':1,'point1':2},
             'shiro':{'name':5,'point1':4},'outcome':3}
shift=-1
matches[2009]['CR']=matches[2009]['CR']+\
                kendo_romania.get_matches_from_list(filename,sheetname,column_keys,8,shift=shift)
shift=5
matches[2009]['CR']=matches[2009]['CR']+\
                kendo_romania.get_matches_from_list(filename,sheetname,column_keys,8,shift=shift)

Clean up points, matches, player names


In [75]:
def match_cleaner(year,match):
    kind,phase='Unknown','Unknown'
    if '#' in match:
        stage0=match.split('#')[0].lower()
        stage1=match.split('#')[1]
        if 'pool' in stage1: 
            phase='Pool'
        if 'Pool' in stage1: 
            phase='Pool'
        elif 'prel' in stage1: 
            phase='Prelim.'
        elif 'Prel' in stage1: 
            phase='Prelim.'
        elif 'layoff' in stage1: 
            phase='Prelim.'
        elif '- F' in stage1: 
            phase='Finals'
        elif 'F -' in stage1: 
            phase='Finals'
        elif 'Final' in stage1: 
            phase='Finals'
        elif 'SF' in stage1: 
            phase='Finals'
        elif 'QF' in stage1: 
            phase='Finals'
        elif 'A'==stage1: phase='Pool'
        elif 'B'==stage1: phase='Pool'
        elif 'C'==stage1: phase='Pool'
        elif 'D'==stage1: phase='Pool'
        elif 'E'==stage1: phase='Pool'
        elif 'F'==stage1: phase='Pool'
        elif 'G'==stage1: phase='Pool'
        elif 'H'==stage1: phase='Pool'
        elif 'I'==stage1: phase='Pool'
        elif 'J'==stage1: phase='Pool'
        elif 'K'==stage1: phase='Pool'
        elif 'L'==stage1: phase='Pool'
        elif 'M'==stage1: phase='Pool'
        elif 'N'==stage1: phase='Pool'
        elif 'O'==stage1: phase='Pool'
        elif 'P'==stage1: phase='Pool'
        elif 'Q'==stage1: phase='Pool'
        elif 'R'==stage1: phase='Pool'
        elif 'S'==stage1: phase='Pool'
        elif 'T'==stage1: phase='Pool'
        
        if 'IS' in stage1:
            kind="Senior's Individual"
        elif 'IF' in stage1:
            kind="Women's Individual"
        elif 'IM' in stage1:
            kind="Men's Individual"
        elif 'IC' in stage1:
            kind="Children's Individual"
        elif 'IJ' in stage1:
            kind="Junior's Individual"
        elif 'EJ' in stage1:
            kind="Junior's Team"
        elif 'EF' in stage1:
            kind="Men's Team"
        elif 'ES' in stage1:
            kind="Senior's Team"
            
        if 'individual masculin.' in stage0:
            kind="Men's Individual"
        if 'echipe.' in stage0:
            kind="Mixed Team"
        if 'individual juniori' in stage0:
            kind="Junior's Team"
        if 'individual feminin' in stage0:
            kind="Junior's Team"
        if 'individual veterani' in stage0:
            kind="Senior's Team"
        if 'male team' in stage0:
            kind="Men's Team"
        if 'junior 1 individual' in stage0:
            kind="Junior's Individual"
        if 'junior 2 individual' in stage0:
            kind="Junior's Individual"
        
    elif match=='F':
        kind="Women's Individual"
    elif match=='M':
        kind="Men's Individual"
    elif match=='J':
        kind="Junior's Individual"
    elif match=='SF_s':
        kind="Women's Individual"
    elif match=='SM_s':
        kind="Men's Individual"
    elif match=='J_s':
        kind="Junior's Individual"
    
    if kind=='Unknown':
        category='Unknown'
        teams='Unknown'
    else:
        category=kind.split(' ')[0][:-2]
        teams=kind.split(' ')[1]
    if year<2014: 
        category=category.replace('Senior','Men')
    if year in [2018]: 
        category=category.replace('Senior','Men')
    return category,teams,phase

Load names


In [559]:
members=pd.read_csv('data/members.csv')

In [560]:
members.head()


Out[560]:
Unnamed: 0 active age birth club dan ekf gen name pretty_club year
0 0 Activ 23.0 1991-12-27 TAI 1 RO.00205 M Abrudan Dorin-Ștefan Taiken 2015
1 1 Activ 34.0 1980-05-11 TAI 2 RO.00133 M Alexa Ionel-Aliodor Taiken 2015
2 2 Activ 29.0 1985-01-13 TAI 0 RO.00212 M Caila Răducu-Ciprian Taiken 2015
3 3 Activ 24.0 1990-02-05 TAI 2 RO.00152 F Caliniuc Ionela Taiken 2015
4 4 Inactiv 16.0 1998-06-03 TAI 0 RO.00191 M Cristea Andrei Taiken 2015

In [561]:
name_exceptions={'Atanasovski':'Atanasovski A. (MAC)',
                 'Dobrovicescu (SON)':'Dobrovicescu T. (SON)',
                 'Ianăș':'Ianăș F.',
                 'Crăciun (Tamang) Sujata':'Crăciun S.',
                 'Crăciun (Tamang) Sujata':'Crăciun S.',
                 'Dinu (Ioniță) Claudia-Andreea':'Dinu A.',
                 'Arabadjiyski': 'Arabadjiyski A.',
                 'Mandia':'Mandia F.',
                 'Stanev':'Stanev A.',
                 'Mochalov':'Mochalov O.',
                 'Sozzi':'Sozzi A.',
                 'Crăciunel':'Crăciunel I.',
                 'Craciunel':'Crăciunel I.',
                 'Sagaev':'Sagaev L.',
                 'Buzás':'Búzás C.',
                 'Csala':'Csala D.',
                 'Dimitrov':'Dimitrov M.',
                 'Józsa':'Józsa L.',
                 'Creangă':'Creangă A.',
                 'Duțescu':'Duțescu M.',                 
                 'Furtună':'Furtună G.',
                 'Gârbea':'Gârbea I.',
                 'Stupu':'Stupu I.',
                 'Mahika-Voiconi':'Mahika-Voiconi S.',
                 'Mahika':'Mahika-Voiconi S.',
                 'Stanciu':'Stanciu F.',
                 'Vrânceanu':'Vrânceanu R.',
                 'Wolfs':'Wolfs J.',
                 'Ducarme':'Ducarme A.',
                 'Sbârcea':'Sbârcea B.',
                 'Mocian':'Mocian A.',
                 'Hatvani':'Hatvani L.',
                 'Dusan':'Dusan N.',
                 'Borota':'Borota V.',
                 'Tsushima':'Tsushima K.',
                 'Tráser':'Tráser T.',
                 'Colțea':'Colțea A.',
                 'Brîcov':'Brîcov A.',
                 'Yamamoto':'Yamamoto M.',
                 'Crăciun':'Crăciun D.'}
redflags_names=['-','—','—',np.nan,'. ()','— ','- -.','- -. (-)',
                'Kashi','Sankon','București','Victorii:','Sakura','Taiken','Ikada','Sonkei','CRK','Museido',
                'Ichimon','Bushi Tokukai 1','Competitori – Shiai-sha','Echipa - roşu','Numele şi prenumele',
                'Victorii:','Victorii: 0','Victorii: 1','Victorii: 2','Victorii: 3','Victorii: 4',
                'Victorii: 5','?','Kyobukan','2/5','2/6','3/8','Finala','Kyobukan (0/0/0)','―',
                '(clasament final după meci de baraj)','CRK (Bucuresti)','Kaybukan','Isshin (Cluj)',
                'Ikada (Bucureşti)','Kyobukan (Braşov)','Puncte:','KASHI','Budoshin','Isshin',
                '— (—)','4. B.','4. Baraj: Stupu M - Hostina','4. Baraj: Moise KM - Korenschi M',
               'Bushi Tokukai (2/8/17)','CRK 2 (1/6/14)', 'CRK 2','CRK 1','Loc I.:',
               'Bushi Tokukai 2 (M Ciuc)','Echipa suport']
redflags_names2=['Bushi Tokukai','Eliminatoriu','finala','Finala','Fianala','Ikada','Ichimon','Pool',
                'Locul ','Lotul ','Loc ','Grupa ','Isshin','Meciul ','Victorii:']
name_equals={'Chirea M.':'Chirea A.',
            'Ghinet C.':'Ghineț C.',
            'Domnița M.':'Domniță M.',
            'Garbea I.':'Gârbea I.',
            'Ah-hu W.':'Ah-hu S.',
            'Horvát M.':'Horváth M.',
            'Ionita A.':'Ioniță A.',
            'Medvedschi I.':'Medvețchi I.',
            'Mahika S.':'Mahika-Voiconi S.',
            'Mate L.':'Máté L.',
            'Stupu I.':'Stupu A.',
            'Ah-Hu S.':'Ah-hu S.',
            'Alexa I.':'Alexa A.',
            'Angelescu M.':'Angelescu M.',
            'Apostu D.':'Apostu T.',
            'Brâcov A.':'Brîcov A.',
            'Catoriu D.':'Cantoriu D.',
            'Călina A.':'Călina C.',
            'Buzás C.':'Búzás C.',
            'Korenshi E.':'Korenschi E.',
            'Pleșa R.':'Pleșea R.',
            'Galos A.':'Galoș A.',
            'Győrfi G.':'Györfi G.',
            'Győrfi S.':'Györfi S.',
            'Ghineț G.':'Ghineț C.',
            'Hostina E.':'Hoștină E.', 
            'Hostină E.':'Hoștină E.', 
            'Ianăs F.':'Ianăș F.',
            'Lacatus M.':'Lăcătuș M.',
            'Máthé L.':'Máté L.',
            'Nastase M.':'Năstase E.',
            'Oprisan A.':'Oprișan A.',
            'Pârlea A.':'Pîrlea A.',
            'Sabau D.':'Sabău D.',
            'Spriu C.':'Spiru C.',
            'Stănculascu C.':'Stănculescu C.',
            'Vrânceanu M.': 'Vrânceanu L.',
            'Wasicek V.':'Wasicheck W.',
            'Wasicsec W.':'Wasicheck W.',
            'Wasicsek W.':'Wasicheck W.',
            'Zolfoghari A.':'Zolfaghari A.'}
name_doubles={
        'Cristea Cristina':'Cristea Cr.', 
        'Cristea Călin-Ștefan':'Cristea Că.',
        'Sandu Marius-Cristian':'Sandu Mar.', 
        'Sandu Matei-Serban':'Sandu Mat.',
        'Georgescu Andrei':'Georgescu An.', 
        'Georgescu Alexandra':'Georgescu Al.',
        'Péter Csongor':'Péter Cso.', 
        'Péter Csanád':'Péter Csa.',
        'Luca Mihnea':'Luca Mihn.', 
        'Luca Mihai-Cătălin':'Luca Miha.',
        'Luca':'Luca Miha.',
        'Luca M':'Luca Miha.',
        'Luca M.':'Luca Miha.',
        'Luca Mihai':'Luca Miha.',
        'Luca Traian-Dan':'Luca Tr.', 
        'Luca Tudor':'Luca Tu.',
        'Canceu Anamaria':'Canceu An.', 
        'Canceu Adriana-Maria':'Canceu Ad.',
        'Cioată Daniel-Mihai':'Cioată M.', 
        'Cioată Dragoș':'Cioată D.',
        'Burinaru Alexandra':'Burinaru Al.', 
        'Burinaru Andreea':'Burinaru An.',
        'Dudaș Francisc Andrei':'Dudaș F.', 
        'Dudaș Francisc':'Dudaș F.'}

letter_norm={'ţ':'ț','ş':'ș','Ş':'Ș'}
def name_cleaner(name):
    if name in name_doubles:
        return name_doubles[name]
    else:
        for letter in letter_norm:
            name=name.replace(letter,letter_norm[letter])
        if name not in name_exceptions:
            nc=name.replace('  ',' ').split('(')    
        else:
            nc=name_exceptions[name].split('(')
        rname=nc[0].strip()
        rnames=rname.split(' ')
        sname=rnames[0]+' '+rnames[1][0]+'.'
        if sname in name_equals:
            sname=name_equals[sname]

        if sname in name_doubles:
            print(name,sname)
            return sname
        else:    
            return sname

In [588]:
def name_ok(name):
    if name==np.nan: return False
    if str(name)=='nan': return False
    if name not in redflags_names:
        if np.array([i not in name for i in redflags_names2]).all():
            return True
    return False

Standardize names


In [589]:
all_players={}
all_players_r={}
all_players_unsorted=set()
for year in matches:
    for competition in matches[year]:
        for match in matches[year][competition]:
            for color in ['aka','shiro']:
                name=match[color]['name']
                all_players_unsorted.add(name)
                if name_ok(name):
                    name=name_cleaner(name)
                    rname=match[color]['name']
                    if rname not in all_players_r:all_players_r[rname]=name
                    if name not in all_players: all_players[name]={}
                    if year not in all_players[name]:all_players[name][year]={'names':set()}
                    all_players[name][year]['names'].add(rname)

In [590]:
name_linker={}
for i in members.index:
    name=members.loc[i]['name']
    try:
        cname=name_cleaner(name)
    except:
        print(name)
    if cname not in name_linker:name_linker[cname]=set()
    name_linker[cname].add(name)

In [591]:
names_abbr={}
for name in name_linker:
    if len(name_linker[name])>1:
        #only for dev to create exceptions for duplicate person names.
        print(name,name_linker[name])
    for i in name_linker[name]:
        names_abbr[i]=name


Dudaș F. {'Dudaș Francisc Andrei', 'Dudaș Francisc'}

In [592]:
names_abbr_list=[]
name_abbr2long={}
name_abbr2club={}
for i in members.index:
    name=members.loc[i]['name']
    club=members.loc[i]['club']
    year=members.loc[i]['year']
    names_abbr_list.append(names_abbr[name])
    name_abbr2long[names_abbr[name]]=name
    if names_abbr[name] not in name_abbr2club:name_abbr2club[names_abbr[name]]={}
    name_abbr2club[names_abbr[name]][year]=club

In [593]:
members['name_abbr']=names_abbr_list

In [594]:
all_shinpan={}
all_shinpan_r={}
all_shinpan_unsorted=set()
for year in matches:
    for competition in matches[year]:
        for match in matches[year][competition]:
            if 'shinpan' in match:
                for color in ['fukushin1','shushin','fukushin2']:
                    aka=match['aka']['name']
                    shiro=match['shiro']['name']
                    if (name_ok(aka)) and\
                       (name_ok(shiro)) and\
                       (name_cleaner(aka) in all_players) and\
                       (name_cleaner(shiro) in all_players):
                        rname=match['shinpan'][color]
                        all_shinpan_unsorted.add(rname)
                        if name_ok(rname):
                            name=name_cleaner(rname)
                            if name not in all_shinpan: all_shinpan[name]=[] 
                            all_shinpan[name].append(match)
                            if rname not in all_shinpan_r:all_shinpan_r[rname]=name

In [595]:
name_abbr2long_extends={
    'Ishikubo S.':'Ishikubo Shinichi',
    'Yamamoto M.':'Yamamoto M.',
    'Wolfs J.':'Wolfs Jan Claude',
    'Tsushima K.':'Tsushima Kanji',
    'Ducarme A.':'Ducarme Alain',
    'Tráser T.':'Tráser Tamás',
    'Borota B.':'Borota B.',
    'Arabadjiyski A.':'Arabadjiyski Alexandar',
    'Csala T.':'Csala Tibor',
    'Sagaev L.':'Sagaev Lubomir',
    'Hatvani L.':'Hatvani Lóránt',
    'Dusan N.':'Dusan N',
    'Borota V.':'Borota Vladimir',
    'Mandia F.':'Mandia Fabrizio',
    'Stanev A.':'Stanev A.',
    'Mochalov O.':'Mochalov O.',
    'Sozzi A.':'Sozzi A.',
    'Dimitrov M.':'Dimitrov M.'
}    
for i in all_shinpan.keys():
    if i not in name_abbr2long:
        name_abbr2long[i]=name_abbr2long_extends[i]

Infer clubs


In [596]:
#naive infer
redflags_clubs=['','N/A','RO1','RO2']
club_equals={'MLD':'MOL/Md',
             'MOL':'MOL/Md',
             'IKD':'IKA',
             'HUN':'HUN/Hu',
             'BUL':'BUL/Bg',
             'TUR':'TUR/Tr',
             'MAC':'MAC/Mc',
             'MNE':'MNE/Mn',
             'SRB':'SRB/Sr',
             'ITA':'ITA/It',
             'ISS':'ISH',
             'Musso, Bg':'MUS/Bg',
             'Makoto, Sr':'MAK/Sr',
             'Szeged, Hu':'SZE/Hu'}
for name in all_players:
    for year in all_players[name]:
        for name_form in all_players[name][year]['names']:
            if '(' in name_form:
                club=name_form.split('(')[1].strip()[:-1]
                if club in club_equals: club=club_equals[club]
                if club not in redflags_clubs:
                    all_players[name][year]['club']=club

In [597]:
for name in all_players:
    for year in all_players[name]:
        if 'club' not in all_players[name][year]:
            #more than 1 year?
            years=np.sort(list(all_players[name].keys()))
            if len(years)>1:
                #get club from previous year
                for y in range(years[0],year):
                    if y in all_players[name]:
                        if 'club' in all_players[name][y]:
                            all_players[name][year]['club']=all_players[name][y]['club']
                #if still not found, get club from next year
                if 'club' not in all_players[name][year]:
                    #get club from next year
                    for y in np.arange(years[-1],year,-1):
                        if y in all_players[name]:
                            if 'club' in all_players[name][y]:
                                all_players[name][year]['club']=all_players[name][y]['club']

In [598]:
for name in all_players:
    if name not in name_abbr2long:
        #infer using longest available name
        names={len(j):j  for i in all_players[name] for j in all_players[name][i]['names']}
        if len(names)>0:
            inferred_name=names[max(names.keys())]
            if '(' in inferred_name:
                inferred_name=inferred_name[:inferred_name.find('(')-1]
            print(name,inferred_name)
            name_abbr2long[name]=inferred_name
        else:
            print(name,all_players[name])


Benedek L. Benedek Laszlo
Illyés A. Illyés Attila
Leat M. Leat Mihaela
Preda A. Preda Alexandru
Salló Z. Salló Zoltán
Alexandrescu N. Alexandrescu Nic.
Macavei A. Macavei Aurel
Grossu D. Grossu Dragos
Bíró S. Bíró Sándor
Rotaru V. Rotaru V.
Macavei I. Macavei I.
Nagy V. Nagy Vlad
Neagu F. Neagu Florenţa
Mehelean L. Mehelean Ligia
Dumbravă L. Dumbravă Lucian
Bódi Z. Bódi Zsolt
Bumbu D. Bumbu Dorin
Botean A. Botean Adrian-Ioan
Xantopol C. Xantopol Claudiu
Jeszenszki T. Jeszenszki Tamás
Moldoveanu M. Moldoveanu  Mihai
Balázs S. Balázs Sándor
Perdi L. Perdi Levente
Pienaru S. Pienaru Sorin
Bojan V. Bojan Vladimir
Iordan R. Iordan Relu
Constantinescu A. Constantinescu Andrei
Nechifor C. Nechifor Cristian
Chiric D. Chiric Dragoș
Zinculescu T. Zinculescu Tiberiu
Cerneavschi A. Cerneavschi Adrian
Oprișan A. Oprişan Alexandru
Cerchez I. Cerchez Iuliana
Ștefan C. Ștefan Cristina
Blaj V. Blaj Valentin
Sandache I. Sandache Ionuţ
Bojan I. Bojan Ioachim
Leca F. Leca Florin
Pall D. Pall David
Zolfaghari A. Zolfoghari Anahita
Ionescu I. Ionescu Ilinca
Crișan E. Crişan Eugen
Zainea I. Zainea Ionuţ
Dițu A. Dițu A.
Andrei O. Andrei Ovidiu
Cristea C. Cristea Călin
Bărbulescu E. Bărbulescu Eduard
Turdean S. Turdean Sergiu
Georgescu B. Georgescu Bogdan
Georgescu R. Georgescu Radu
Wasicheck W. Wasicheck Werner
Martin M. Martin Mihai
Georgescu A. Georgescu A.
Constantina A. Constantina Alexandru
Keresztes M. Keresztes Mátyás
Galoș A. Galoş Alin
Albert V. Albert V.
Lăcătuș M. Lăcătuş Mihai
Leca L. Leca Lorin
Pavel A. Pavel Alexandra
Parlea A. Parlea Andrei
Crețiu M. Crețiu Matei
Crețiu T. Crețiu Tudor
Sânpetru R. Sânpetru Raul
Zaporojan R. Zaporojan Radu Ovidiu
Moldoveanu A. Moldoveanu Alexandru
Szikszai M. Szikszai Mihály
Moise T. Moise Tudor
Chiper I. Chiper Ioan
Ștefu L. Ștefu Laurențiu
Mitelea C. Mitelea Călin
Cosma F. Cosma Florin
Tiron L. Tiron Laurenţiu
Pîrlea A. Pîrlea Andrei Daniel
Ungureanu A. Ungureanu Alexandru Cătălin
Canceu A. Canceu Adriana
Duicu T. Duicu Tudor
Jianu A. Jianu Alexandru
Tamang S. Tamang Sujata
Bonta T. Bonta Tudor
Sandu M. Sandu Marius
Luca T. Luca Traian
Angelescu M. Angelescu Marcel
Ioniță A. Ioniță Andreea
Măgirdicean R. Măgirdicean Răzvan Ionuț
Tudor-Duicu C. Tudor-Duicu Cătălin
Hassan D. Hassan Dariush
Vasile C. Vasile Codrin
Burinaru A. Burinaru Alexandra
Constantin D. Constantin Doru
Medvețchi I. Medvedschi I.
Oncea V. Oncea Vitalie
Ulinici A. Ulinici Andrian
Soloviov M. Soloviov M.
Guțu E. Guţu Eugeniu
Pantaz A. Pantaz Alexandru
Sidelnicov I. Sidelnicov Iulii
Kim A. Kim Alexei
Zugravu G. Zugravu Gheorghe
Dudca I. Dudca Ion
Nastasiu A. Nastasiu Anatol
Grigoraș L. Grigoraș L.
Hentea L. Hentea L.
Péter C. Péter C.
Jovanović D. Jovanović D.
Dolan S. Dolan S.
Novakovic A. Novakovic A.
Demircioglu B. Demircioglu B.
Jeličić M. Jeličić M.
Aydogdu C. Aydogdu C.
Savić S. Savić S.
Tekelioglu B. Tekelioglu B.
Nikolić D. Nikolić D.
Engin C. Engin C.
Ivanov A. Ivanov A.
Nikolaev N. Nikolaev Nikolay
Jović M. Jović M.
Jovanović A. Jovanović A.
Kurtulus E. Kurtulus E.
Sljivancanin M. Sljivancanin M.
Keskovski V. Keskovski V.
Petricevic D. Petricevic D.
Ivanovski M. Ivanovski M.
Andrijasevic M. Andrijasevic M.
Petreski A. Petreski A.
Vucinic A. Vucinic A.
Atanasovski A. Atanasovski A.
Djuraskovic D. Djuraskovic D.
Keskovski Z. Keskovski Z.
Petrovski G. Petrovski G.
Ivancevic M. Ivancevic M.
Kabaktchiev G. Kabaktchiev Georgi
Vodenicharov K. Vodenicharov Kostadin
Molinger B. Molinger Bogdan
Movatz E. Movatz E.
Taralunga D. Taralunga D.
Țarălungă D. Țarălungă D.
Dinu C. Dinu C.
Murat I. Murat I.
Madzharova B. Madzharova B.
Kasnakova D. Kasnakova D.
Hadjieva R. Hadjieva R.
Miglena H. Miglena H.
Ivanov K. Ivanov K.
Krastev I. Krastev I.
Gutu E. Gutu E.
Markov Y. Markov Y.

In [599]:
for name in all_players:
    years=np.sort(list(all_players[name].keys()))
    for year in all_players[name]:
        if 'club' not in all_players[name][year]:
            #get from list
            if name in name_abbr2club:
                minyear=min(name_abbr2club[name].keys())
                if year in name_abbr2club[name]:
                    all_players[name][year]['club']=name_abbr2club[name][year]
                elif year<minyear:
                    all_players[name][year]['club']=name_abbr2club[name][minyear]
            elif len(years)>1:
                #get club from previous year
                for y in range(years[0],year):
                    if y in all_players[name]:
                        if 'club' in all_players[name][y]:
                            all_players[name][year]['club']=all_players[name][y]['club']
                #if still not found, get club from next year
                if 'club' not in all_players[name][year]:
                    #get club from next year
                    for y in np.arange(years[-1],year,-1):
                        if y in all_players[name]:
                            if 'club' in all_players[name][y]:
                                all_players[name][year]['club']=all_players[name][y]['club']

In [600]:
clubs_manual={
            'Balázs-Kercsó Z.':'BTK',
            'Nagy V.':'ISH',
            'Goró L.':'BTK',
            'Ghineț G.':'YUK',
            'Cioată E.':'KAS',
            'Leat M.':'IKA',
            'Perianu S.':'KNS',
            'Ah-hu S.':'ICH',
            'Preda A.':'CRK',
            'Salló Z.':'BTK',
            'András Z.':'BTK',
            'Bíró S.':'BTK',
            'Neagu F.':'IKA',
            'Bódi Z.':'KYO',
            'Bumbu D.':'ISH',
            'Botean A.':'ISH',
            'Moldoveanu M.':'ISH',
            'Jeszenszki T.':'BTK',
            'Suru N.':'SAM',
            'Balázs S.':'BTK',
            'Perdi L.':'ISH',
            'Oprișan A.':'IKA',
            'Horváth D.':'BTK',
            'Sandache I.':'BTK',
            'Moise T.':'KAY',
            'Angelescu M.':'SAM',
            'Bărbulescu E.':'MUS',
            'Canceu A.':'KAS',
            'Crișan E.':'ISH',
            'Duicu T.':'KAS',
            'Dumbravă L.':'ISH',
            'Iordan R.':'IKA',
            'Jianu A.':'MUS',
            'Keresztes M.':'BTK',
            'Macavei I.':'KYO',
            'Mitelea C.':'ICH',
            'Pavel A.':'IKA',
            'Pienaru S.':'ISH',
            'Szikszai M.':'BTK',
            'Tamang S.':'SAM',
            'Tiron L.':'KNS',
            'Turdean S.':'KAS',
            'Wasicheck W.':'ISH',
            'Ștefan C.':'IKA'
}

In [601]:
club_errors=[]
for name in all_players:
    for year in all_players[name]:
        if 'club' not in all_players[name][year]:
            #if still not found, print error, infer other way
            if name in clubs_manual:
                all_players[name][year]['club']=clubs_manual[name]
            else:
                all_players[name][year]['club']='XXX'
                club_errors.append(name)

In [602]:
set(club_errors)


Out[602]:
{'Albert V.',
 'Alexandrescu N.',
 'Andrei O.',
 'Blaj V.',
 'Bonta T.',
 'Cerchez I.',
 'Cerneavschi A.',
 'Chiper I.',
 'Chiric D.',
 'Constantin D.',
 'Constantina A.',
 'Constantinescu A.',
 'Cosma F.',
 'Crețiu M.',
 'Crețiu T.',
 'Galoș A.',
 'Georgescu B.',
 'Georgescu R.',
 'Grossu D.',
 'Hassan D.',
 'Ionescu I.',
 'Lăcătuș M.',
 'Macavei A.',
 'Martin M.',
 'Mehelean L.',
 'Moldoveanu A.',
 'Nechifor C.',
 'Parlea A.',
 'Pîrlea A.',
 'Rotaru V.',
 'Sânpetru R.',
 'Ungureanu A.',
 'Xantopol C.',
 'Zainea I.',
 'Zinculescu T.',
 'Zolfaghari A.'}

In [603]:
clubs={}
for name in all_players:
    for year in all_players[name]:
        club=all_players[name][year]['club'] 
        if club not in clubs:clubs[club]={}
        if year not in clubs[club]:clubs[club][year]=set()
        clubs[club][year].add(name)

In [604]:
def outcome_cleaner(outcome):
    if outcome=='E': return True
    else: return False

In [605]:
def outcome_from_points(aka,shiro):
    if aka==shiro: return 'X',0
    elif aka>shiro: return 'A',str(aka-shiro)
    else: return 'S',str(shiro-aka)

In [606]:
redflags_points=['Puncte']
def point_clean1(point):
    return point.replace('○','O').replace('I','H').replace('×','')\
            .replace('–','').replace('1','O').replace('—','').replace('?','')
def points_cleaner(points):
    hansoku=0
    if '∆' in points:
        hansoku=1
        points=points.replace('∆','')
    if '▲' in points:
        hansoku=1
        points=points.replace('▲','')
    if '(Ht)' in points:
        hansoku=1
        points=points.replace('(Ht)','')
    if '(victorie)' in points:
        points=points.replace('(victorie)','OO')
    points=points.strip()
    if len(points)>2:
        print(points,'error')
    elif len(points)>1:
        point1=points[0]
        point2=points[1]
    elif len(points)>0:
        point1=points[0]
        point2=''
    else:
        point1=''
        point2=''
    point1=point_clean1(point1)
    point2=point_clean1(point2)
    return point1,point2,len(points),hansoku

In [607]:
def club_cleaner(club):
    if '/' in club:
        return club.split('/')[0],club.split('/')[1].upper()
    else:
        return club,'RO'

In [608]:
pretty_clubs={'ARA':'Arashi', 'BSD':'Bushido', 'BTK':'Bushi Tokukai', 'BG':'Bulgaria',
              'CDO':'Coroan de Oțel', 'CRK':'Clubul Român de Kendo', 'HAR':'Hargita', 
              'ICH':'Ichimon', 'IKA':'Ikada','ISH':'Ishhin', 'IT':'Italy','HU':'Hungary',
              'KAS':'Kashi', 'KNS':'Kenshin', 'KYO':'Kyobukan', 'MC':'Macedonia',
              'SR':'Serbia', 'MN':'Montenegro', 'MD':'Moldova', 'MUS':'Museido', 
               'RON':'Ronin-do', 'SAK':'Sakura', 'SAM':'Sam-sho','SAN':'Sankon', 'SBK':'Sobukan',
               'SON':'Sonkei', 'SR':'Serbia', 'TAI':'Taiken', 'TR':'Turkey', 'XXX':'Unknown',
               'YUK':'Yu-kai','KAY':'Kaybukan'}
def pretty_club(club, country):
    if country!='RO':
        return pretty_clubs[country]
    else: return pretty_clubs[club]

In [609]:
master_matches=[]
for year in matches:
    for competition in matches[year]:
        print(year,competition)
        for k in matches[year][competition]:
            good=True
            match={'year':year,'competition':competition}
            match['match_category'],match['match_teams'],match['match_phase']=match_cleaner(year,k['match_type'])
            if 'shinpan' in k:
                for color in ['fukushin1','shushin','fukushin2']:
                    if color in k['shinpan']:
                        if k['shinpan'][color] in all_shinpan_r:
                            match[color]=all_shinpan_r[k['shinpan'][color]]
            aka=k['aka']['name']
            shiro=k['shiro']['name']
            if (name_ok(aka)) and\
               (name_ok(shiro)) and\
               (name_cleaner(aka) in all_players) and\
               (name_cleaner(shiro) in all_players):
                for a in ['aka','shiro']:
                    points=''
                    for h in k[a]:
                        if h=='name':
                            name=k[a][h]
                            match[a+' name']=all_players_r[name]
                            club, country=club_cleaner(all_players[match[a+' name']][year]['club'])
                            match[a+' club'], match[a+' country']=club, country
                            match[a+' pretty_name']=name_abbr2long[all_players_r[name]]
                            match[a+' pretty_club']=pretty_club(club, country)
                        else:
                            point=k[a][h]
                            if str(point)=='nan': point=''
                            points=points+point
                    for redflag in redflags_points:
                        if redflag in points:
                            good=False
                    if good:
                        match[a+' point1'],match[a+' point2'],match[a+' points'],match[a+' hansoku']=points_cleaner(points)
            else:
                good=False                
            if good:
                if 'outcome' in k:
                    match['encho']=outcome_cleaner(k['outcome'])
                else: 
                    match['encho']=False
                match['winner'],match['difference']=outcome_from_points(match['aka points'],match['shiro points'])

                master_matches.append(match)


2009 CN
2009 CR
2010 CR
2010 CN
2011 CN
2011 CR
2012 CN
2012 CR
2013 CN
2013 CR
2013 SL
2014 SL
2014 CR
2014 CN
2015 SL
2015 CN
2015 CR
2016 SL
2016 CN
2016 CR
2017 CN
2017 CR
2017 SL
2018 CR
2018 SL

In [610]:
data=pd.DataFrame(master_matches)

Cleanup


In [611]:
data['aka hansoku']=data['aka hansoku'].replace(0,'').replace(1,'Δ')
data['shiro hansoku']=data['shiro hansoku'].replace(0,'').replace(1,'Δ')

In [612]:
data.to_csv('data/matches.csv')

Group by player


In [613]:
aka=data[[i for i in data.columns if 'shiro ' not in i]]
aka.columns=[i.replace('aka ','') for i in aka.columns]
aka['color']='aka'
aka['opponent']=data['shiro name']


C:\Users\csala\AppData\Local\Continuum\anaconda2\envs\python3\lib\site-packages\ipykernel_launcher.py:3: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  This is separate from the ipykernel package so we can avoid doing imports until
C:\Users\csala\AppData\Local\Continuum\anaconda2\envs\python3\lib\site-packages\ipykernel_launcher.py:4: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  after removing the cwd from sys.path.

In [614]:
shiro=data[[i for i in data.columns if 'aka ' not in i]]
shiro.columns=[i.replace('shiro ','') for i in shiro.columns]
shiro['color']='shiro'
shiro['opponent']=data['aka name']


C:\Users\csala\AppData\Local\Continuum\anaconda2\envs\python3\lib\site-packages\ipykernel_launcher.py:3: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  This is separate from the ipykernel package so we can avoid doing imports until
C:\Users\csala\AppData\Local\Continuum\anaconda2\envs\python3\lib\site-packages\ipykernel_launcher.py:4: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  after removing the cwd from sys.path.

In [615]:
extended_matches=pd.concat([aka,shiro],axis=0).reset_index(drop=True)


C:\Users\csala\AppData\Local\Continuum\anaconda2\envs\python3\lib\site-packages\ipykernel_launcher.py:1: FutureWarning: Sorting because non-concatenation axis is not aligned. A future version
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.

To retain the current behavior and silence the warning, pass 'sort=True'.

  """Entry point for launching an IPython kernel.

In [616]:
extended_matches.head()


Out[616]:
club color competition country difference encho fukushin1 fukushin2 hansoku match_category ... name opponent point1 point2 points pretty_club pretty_name shushin winner year
0 CRK aka CN RO 1 False NaN NaN Unknown ... Chirea V. Benedek L. M 1 Clubul Român de Kendo Chirea Valentin-Gabriel NaN A 2009
1 CRK aka CN RO 2 False NaN NaN Unknown ... Chirea V. Chirea A. 0 Clubul Român de Kendo Chirea Valentin-Gabriel NaN S 2009
2 CRK aka CN RO 2 False NaN NaN Unknown ... Chirea V. Illyés A. 0 Clubul Român de Kendo Chirea Valentin-Gabriel NaN S 2009
3 BTK aka CN RO 1 False NaN NaN Unknown ... Benedek L. Chirea A. K K 2 Bushi Tokukai Benedek Laszlo NaN A 2009
4 BTK aka CN RO 1 False NaN NaN Unknown ... Benedek L. Illyés A. 0 Bushi Tokukai Benedek Laszlo NaN S 2009

5 rows × 22 columns


In [617]:
extended_matches.to_csv('data/extended_matches.csv')

In [618]:
p1=extended_matches[[i for i in extended_matches.columns if i!='point2']]
p2=extended_matches[[i for i in extended_matches.columns if i!='point1']]
p1.rename(columns={'point1':'point'}, inplace=True)
p2.rename(columns={'point2':'point'}, inplace=True)


C:\Users\csala\AppData\Local\Continuum\anaconda2\envs\python3\lib\site-packages\pandas\core\frame.py:3781: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  return super(DataFrame, self).rename(**kwargs)

In [619]:
extended_points=pd.concat([p1,p2],axis=0).reset_index(drop=True)

In [620]:
extended_points.to_csv('data/extended_points.csv')

In [621]:
extended_points.columns


Out[621]:
Index(['club', 'color', 'competition', 'country', 'difference', 'encho',
       'fukushin1', 'fukushin2', 'hansoku', 'match_category', 'match_phase',
       'match_teams', 'name', 'opponent', 'point', 'points', 'pretty_club',
       'pretty_name', 'shushin', 'winner', 'year'],
      dtype='object')

In [622]:
shu=extended_points[[i for i in extended_points.columns if 'fukushin' not in i]]
shu.columns=[i.replace('shushin','shinpan') for i in shu.columns]
fk1=extended_points[[i for i in extended_points.columns if 'shushin' not in i and 'fukushin2' not in i]]
fk1.columns=[i.replace('fukushin1','shinpan') for i in fk1.columns]
fk2=extended_points[[i for i in extended_points.columns if 'shushin' not in i and 'fukushin1' not in i]]
fk2.columns=[i.replace('fukushin2','shinpan') for i in fk2.columns]
extended_shinpan=pd.concat([shu,fk1,fk2],axis=0).reset_index(drop=True)


C:\Users\csala\AppData\Local\Continuum\anaconda2\envs\python3\lib\site-packages\ipykernel_launcher.py:7: FutureWarning: Sorting because non-concatenation axis is not aligned. A future version
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.

To retain the current behavior and silence the warning, pass 'sort=True'.

  import sys

In [623]:
extended_shinpan.to_csv('data/extended_shinpan.csv')

In [624]:
extended_shinpan.columns


Out[624]:
Index(['club', 'color', 'competition', 'country', 'difference', 'encho',
       'hansoku', 'match_category', 'match_phase', 'match_teams', 'name',
       'opponent', 'point', 'points', 'pretty_club', 'pretty_name', 'shinpan',
       'winner', 'year'],
      dtype='object')

Competitor statistics


In [625]:
competitors={}
for i in data.T.iteritems():
    for a in ['aka ','shiro ']:
        name=i[1][a+'name']
        club=i[1][a+'club']
        if name not in competitors:
            competitors[name]={'U':0,'club':club}
        for j in ['point1','point2']:
            point=i[1][a+j] 
            if point!='':
                if point not in competitors[name]:competitors[name][point]=0
                competitors[name][point]+=1
        competitors[name]['U']+=1

In [626]:
data2=pd.DataFrame(competitors)

In [627]:
data2.T.to_csv('data/competitors.csv')

In [628]:
len(extended_matches),len(extended_points),len(extended_shinpan)


Out[628]:
(6398, 12796, 38388)

In [631]:
members[members['name']=='Búzás Csaba']


Out[631]:
Unnamed: 0 active age birth club dan ekf gen name pretty_club year name_abbr
786 2 Activ 47.0 1963-04-13 BTK 4 RO.00025 M Búzás Csaba Bushi Tokukai 2011 Búzás C.
793 2 Activ 48.0 1963-04-13 BTK 4 RO.00025 M Búzás Csaba Bushi Tokukai 2012 Búzás C.
801 2 Activ 49.0 1963-04-13 BTK 5 RO.00025 M Búzás Csaba Bushi Tokukai 2013 Búzás C.
810 3 Activ 50.0 1963-04-13 BTK 5 RO.00025 M Búzás Csaba Bushi Tokukai 2014 Búzás C.
820 3 Activ 51.0 1963-04-13 BTK 5 RO.00025 M Búzás Csaba Bushi Tokukai 2015 Búzás C.
835 5 Activ 52.0 1963-04-13 BTK 5 RO.00025 M Búzás Csaba Bushi Tokukai 2016 Búzás C.
860 5 Activ 53.0 1963-04-13 BTK 5 RO.00025 M Búzás Csaba Bushi Tokukai 2017 Búzás C.
885 5 Activ 54.0 1963-04-13 BTK 5 RO.00025 M Búzás Csaba Bushi Tokukai 2018 Búzás C.
905 1 Activ 41.0 1963-04-13 BTK 3 RO.00025 M Búzás Csaba Bushi Tokukai 2005 Búzás C.
911 1 Activ 42.0 1963-04-13 BTK 3 RO.00025 M Búzás Csaba Bushi Tokukai 2006 Búzás C.
917 1 Activ 43.0 1963-04-13 BTK 3 RO.00025 M Búzás Csaba Bushi Tokukai 2007 Búzás C.
923 1 Activ 44.0 1963-04-13 BTK 4 RO.00025 M Búzás Csaba Bushi Tokukai 2008 Búzás C.
929 1 Activ 45.0 1963-04-13 BTK 4 RO.00025 M Búzás Csaba Bushi Tokukai 2009 Búzás C.
935 1 Activ 46.0 1963-04-13 BTK 4 RO.00025 M Búzás Csaba Bushi Tokukai 2010 Búzás C.
942 0 Activ 33.0 1963-04-13 BTK 0 RO.00025 M Búzás Csaba Bushi Tokukai 1997 Búzás C.
943 0 Activ 34.0 1963-04-13 BTK 1 RO.00025 M Búzás Csaba Bushi Tokukai 1998 Búzás C.
946 0 Activ 35.0 1963-04-13 BTK 2 RO.00025 M Búzás Csaba Bushi Tokukai 1999 Búzás C.
949 0 Activ 36.0 1963-04-13 BTK 2 RO.00025 M Búzás Csaba Bushi Tokukai 2000 Búzás C.
952 0 Activ 37.0 1963-04-13 BTK 2 RO.00025 M Búzás Csaba Bushi Tokukai 2001 Búzás C.
955 0 Activ 38.0 1963-04-13 BTK 3 RO.00025 M Búzás Csaba Bushi Tokukai 2002 Búzás C.
958 0 Activ 39.0 1963-04-13 BTK 3 RO.00025 M Búzás Csaba Bushi Tokukai 2003 Búzás C.
961 0 Activ 40.0 1963-04-13 BTK 3 RO.00025 M Búzás Csaba Bushi Tokukai 2004 Búzás C.