In [375]:
import pandas as pd, numpy as np, json
import kendo_romania

Read data


In [376]:
members=pd.read_excel('rawdata/Evidenta membrilor.xlsm',header=[1])

In [377]:
members.head()


Out[377]:
231 Nr. EKF Club Unnamed: 3 Numele Prenumele Gen Data naşterii Locul naşterii, oraş/judeţ/tara CNP ... Data.2 Kendo Iaido Jodo Observații.2 Data.3 Kendo.1 Iaido.1 Jodo.1 Observații.3
0 Activ RO.00205 TAI NaN Abrudan Dorin-Ștefan M 1991-12-27 Cluj-Napoca, Cj, Ro 1.911227e+12 ... 2018-02-23 120.0 NaN NaN NaN NaN NaN NaN NaN NaN
1 Inactiv RO.00083 ICH Transfer: CRK => ICH - 2009 Ah-hu Weizhi Stéphen M 1980-06-12 Saint-Denis, Reunion NaN ... NaT NaN NaN NaN NaN NaN NaN NaN NaN NaN
2 Activ NaN SAM NaN Ailincăi Cătălin M 1970-06-05 Suceava, Sv, Ro 1.700605e+12 ... 2018-05-03 NaN 120.0 NaN Legitimare NaN NaN NaN NaN NaN
3 Activ RO.00133 TAI Transfer: BDS => TAI - 2013\n Alexa Ionel-Aliodor M 1980-05-11 Valea Moldovei, SV, RO 1.800511e+12 ... 2018-02-23 120.0 NaN NaN NaN NaN NaN NaN NaN NaN
4 Activ NaN YUK NaN Ancuța Dan M 2000-12-22 Galaţi, Gl, Ro 5.001222e+12 ... 2018-02-21 90.0 NaN NaN Legitimare NaN NaN NaN NaN NaN

5 rows × 54 columns


In [378]:
members.columns


Out[378]:
Index([                              231,                         'Nr. EKF',
                                  'Club',                      'Unnamed: 3',
                                'Numele',                       'Prenumele',
                                   'Gen',                   'Data naşterii',
       'Locul naşterii, oraş/judeţ/tara',                             'CNP',
                              'practică',                           '1 kyu',
                                 '1 dan',                           '2 dan',
                                 '3 dan',                           '4 dan',
                                 '5 dan',                           '6 dan',
                                 '7 dan',                           '8 dan',
                                'Renshi',                          'Kyoshi',
                                'Hanshi',                         '1 kyu.1',
                               '1 dan.1',                         '2 dan.1',
                               '3 dan.1',                         '4 dan.1',
                               '1 kyu.2',                         '1 dan.2',
                               '2 dan.2',                         '3 dan.2',
                               '4 dan.2',                           'grade',
                               'Telefon',                          'E-mail',
                                'Altele',    'Domiciliu stabil, oraş/judeţ',
                                  'Data',                            'Suma',
                            'Observații',                          'Data.1',
                                'Suma.1',                    'Observații.1',
                                'Data.2',                           'Kendo',
                                 'Iaido',                            'Jodo',
                          'Observații.2',                          'Data.3',
                               'Kendo.1',                         'Iaido.1',
                                'Jodo.1',                    'Observații.3'],
      dtype='object')

In [379]:
members=members[[231,                         'Nr. EKF',
                                  'Club',                      'Unnamed: 3',
                                'Numele',                       'Prenumele',
                                   'Gen',                   'Data naşterii',
                                  '1 kyu','practică',
                                 '1 dan',                           '2 dan',
                                 '3 dan',                           '4 dan',
                                 '5 dan',                           '6 dan',
                                 '7 dan',                           '8 dan']]

In [380]:
members.head()


Out[380]:
231 Nr. EKF Club Unnamed: 3 Numele Prenumele Gen Data naşterii 1 kyu practică 1 dan 2 dan 3 dan 4 dan 5 dan 6 dan 7 dan 8 dan
0 Activ RO.00205 TAI NaN Abrudan Dorin-Ștefan M 1991-12-27 2015-08-08 00:00:00 NaN 2015-12-19 NaT NaT NaT NaT NaT NaN NaN
1 Inactiv RO.00083 ICH Transfer: CRK => ICH - 2009 Ah-hu Weizhi Stéphen M 1980-06-12 NaN NaN 2009-08-01 2010-09-26 NaT NaT NaT NaT NaN NaN
2 Activ NaN SAM NaN Ailincăi Cătălin M 1970-06-05 NaN NaN NaT NaT NaT NaT NaT NaT NaN NaN
3 Activ RO.00133 TAI Transfer: BDS => TAI - 2013\n Alexa Ionel-Aliodor M 1980-05-11 2012-11-18 00:00:00 NaN 2012-12-15 2013-12-21 2017-11-26 NaT NaT NaT NaN NaN
4 Activ NaN YUK NaN Ancuța Dan M 2000-12-22 NaN NaN NaT NaT NaT NaT NaT NaT NaN NaN

In [381]:
def get_transfer(name,tf,verbose=False):
    if tf==[]:
        return tf
    else:
        to_blank=[' ','(',')','Transfer:','?','FDS','/']
        to_replace={'Hungary':'HUN'}
        to_year={'Gușu Rebeca':'2010'}

        def get_tf_clubs(z):
            for t in range(len(to_blank)):
                z=z.replace(to_blank[t],'')
            for t in to_replace:
                z=z.replace(t,to_replace[t])
            if ('=>') in z:
                from_to=z.find('=>')
                to_return={'from':z[from_to-3:from_to],'to':z[from_to+2:from_to+5],'time':z[-4:]}
                if verbose:
                    to_return['orig']=z
            else:
                print('error with transfer',z)
                to_return=z

            ##check years
            #infer year from wrong note order
            if '20' not in to_return['time']:
                if '20' in z:
                    to_return['time']=z[z.find('20'):z.find('20')+4]
            #if still not inferred, then manual fix
            if '20' not in to_return['time']:
                to_return['time']=to_year[name]
            to_return['time']=int(to_return['time'])
            return to_return

        transfers=str(tf).split('\n')
        tfr=[]
        for i in transfers:
            if not i in ('','nan'):
                tfr.append(get_tf_clubs(i))
        return sorted(tfr, key=lambda k: k['time'])

In [382]:
def get_club_by_year(d,club,year):
    if d==[]:
        return [club]
    else:
        years={}
        transfer_years=[mingrade]+[i['time'] for i in d]+[maxyear]
        transfer_clubs=[d[0]['from']]+[i['to'] for i in d]
        for i in range(1,len(transfer_years)):
            for y in range(transfer_years[i-1],transfer_years[i]+1):
                if y not in years:years[y]=[]
                years[y].append(transfer_clubs[i-1])
        return years[year]

In [350]:
def add_to_club(data,club,year,d):
    if club not in data: data[club]={}
    if year not in data[club]:data[club][year]=[]
    data[club][year].append(d)
    return data

In [370]:
pretty_clubs={'ARA':'Arashi', 'BDS':'Budoshin', 'BSD':'Bushido', 'BTK':'Bushi Tokukai', 'BG':'Bulgaria',
              'CDO':'Coroan de Oțel', 'CRK':'Clubul Român de Kendo', 'HAR':'Hargita', 
              'ICH':'Ichimon', 'IKA':'Ikada','ISH':'Ishhin', 'IT':'Italy','HU':'Hungary','HUN':'Hungary',
              'KAS':'Kashi', 'KNS':'Kenshin', 'KYO':'Kyobukan', 'MC':'Macedonia',
              'SR':'Serbia', 'MN':'Montenegro', 'MOL':'Moldova', 'MUS':'Museido', 
              'RON':'Ronin-do', 'SAK':'Sakura', 'SAM':'Sam-sho','SAN':'Sankon', 'SBK':'Sobukan',
              'SON':'Sonkei', 'SR':'Serbia', 'TAI':'Taiken', 'TR':'Turkey','ACKIJ':'ACKIJ',
               'YUK':'Yu-kai','KAY':'Kaybukan'}
club_replacer={'':'KYO'}

In [371]:
data={}
for i in members.T.iteritems():
    grades=i[1][['1 kyu','1 dan','2 dan','3 dan','4 dan','5 dan','6 dan','7 dan','8 dan']].dropna()
    grades0=i[1][['1 dan','2 dan','3 dan','4 dan','5 dan','6 dan','7 dan','8 dan']].dropna()
    df=pd.DataFrame(grades0)
    df.columns=['dan']
    df=df.reset_index().set_index('dan').sort_index()
    #dummy={j:str(grades[j])[:10] for j in grades.index}
    dummy={}
    grades=pd.to_datetime(grades.astype(str))
    if len(grades)>0:
        mingrade=grades.min().year
        maxgrade=grades.max().year
    else:
        mingrade=np.nan
        maxgrade=np.nan
    dummy['name']=i[1]['Numele']+' '+i[1]['Prenumele']
    dummy['birth']=str(i[1]['Data naşterii'])[:10]
    dummy['gen']=i[1]['Gen']
    dummy['ekf']=i[1]['Nr. EKF']
    dummy['active']=i[1][231]
    club=i[1]['Club']
    dummy['transfer']=get_transfer(dummy['name'],i[1]['Unnamed: 3'])
    
    if np.isnan(mingrade):
        mingrade=2016 #default starting year
    maxyear=2019 #default max year
    
    for year in range(mingrade,maxyear):
                
        #get year from exams
        dummy['dan']=len(df[:str
                            (year)])
        #get club from transfers
        clubs=get_club_by_year(dummy['transfer'],club,year)
        clubs=clubs[:1] #remove this step to double count. this limits to first club in transfer years
        for j in range(len(clubs)):
            iclub=clubs[j]
            if iclub in club_replacer: iclub=club_replacer[iclub]
            dummy['club']=iclub
            dummy['pretty_club']=pretty_clubs[iclub]
            dummy['age']=year-1-pd.to_datetime(dummy['birth']).year
            data=add_to_club(data,iclub,year,dummy.copy())

In [372]:
all_data=[]
for club in data:
    for year in data[club]:
        df=pd.DataFrame(data[club][year])
        df['year']=year
        df['club']=club
        df=df.drop('transfer',axis=1)
        all_data.append(df)
all_data=pd.concat(all_data)

In [374]:
all_data.to_csv('data/members_base.csv')

In [ ]: