In [1]:
import pandas as pd, numpy as np, json
from matches_loader import *
from members_loader import *
from clubs_loader import *
from point_utils import *

In [100]:
members=pd.read_excel('../data/manual/Evidenta membrilor.xlsm',header=[1])

In [101]:
members=members[[231,                         'Nr. EKF',
                                  'Club',                      'Unnamed: 3',
                                'Numele',                       'Prenumele',
                                   'Gen',                   'Data naşterii',
                                  '1 kyu','practică',
                                 '1 dan',                           '2 dan',
                                 '3 dan',                           '4 dan',
                                 '5 dan',                           '6 dan',
                                 '7 dan',                           '8 dan',
                    151,
                                     152,                           '152.1',
                                     175,                               179,
                                     197,214,'231.1']]

In [102]:
nyears=8
members.columns=list(members.columns[:-nyears])+list(range(2019-nyears,2019))

In [103]:
data={}
replace_active={'Activ':'Active','Inactiv':'Inactive','Free':'Inactive','AS':'Abroad',
                'Transferat':'Abroad','Decedat':'Inactive'}
active_redflags=['Deleted']
for i in members.T.iteritems():
    active=i[1][231]
    if active not in active_redflags:
        grades=i[1][['1 kyu','1 dan','2 dan','3 dan','4 dan','5 dan',
                     '6 dan','7 dan','8 dan']].replace('x',pd.NaT).dropna()
        grades0=i[1][['1 dan','2 dan','3 dan','4 dan','5 dan',
                      '6 dan','7 dan','8 dan']].replace('x',pd.NaT).dropna()
        df=pd.DataFrame(grades0)
        df.columns=['dan']
        df=df.reset_index().set_index('dan').sort_index()
        dummy={}
        grades=pd.to_datetime(grades.astype(str))
        active=replace_active[active]
        if len(grades)>0:
            mingrade=grades.min().year
            maxgrade=grades.max().year
        else:
            mingrade=2016 #default starting year
            maxgrade=2016
        if active=='Active':
            maxyear=2019 #default active max year
        else:
            maxyear=min(maxgrade+4,2019) #default active years grace period, if unknown


        dummy['name']=i[1]['Numele']+' '+i[1]['Prenumele']
        dummy['birth']=str(i[1]['Data naşterii'])[:10]
        dummy['gen']=i[1]['Gen']
        dummy['ekf']=i[1]['Nr. EKF']
        club=i[1]['Club']
        dummy['transfer']=get_transfer(dummy['name'],i[1]['Unnamed: 3'])

        for year in range(mingrade,maxyear):
            if year==maxyear-1:
                dummy['active']=active
            else:
                dummy['active']='Active'
            #get year from exams
            dummy['dan']=len(df[:str
                                (year)])
            #get active from member list
            for j in i[1][-nyears:].index:
                if year==j:
                    if i[1][-nyears:][j]=='Da':
                        dummy['active']=active
                    else:
                        dummy['active']='Inactive'
            #get club from transfers
            clubs=clubs_loader.get_club_by_year(dummy['transfer'],club,year,mingrade,maxyear)
            clubs=clubs[:1] #remove this step to double count. this limits to first club in transfer years
            for j in range(len(clubs)):
                iclub=clubs_loader.replacer(clubs[j])
                dummy['club']=iclub
                dummy['age']=year-1-pd.to_datetime(dummy['birth']).year
                data=clubs_loader.add_to_club(data,iclub,year,dummy.copy())

all_data=[]

for club in data:
    for year in data[club]:
        df=pd.DataFrame(data[club][year])
        df['year']=year
        df['club']=club
        df=df.drop('transfer',axis=1)
        all_data.append(df)

members=pd.concat(all_data).reset_index(drop=True)

In [104]:
maxyear


Out[104]:
2014

In [105]:
data['BSD'][2019]


---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
<ipython-input-105-59beb3ce97f1> in <module>()
----> 1 data['BSD'][2019]

KeyError: 2019

In [84]:
members[members['name']=='Pindelea Grigore']


Out[84]:
active age birth club dan ekf gen name year
945 Active 31.0 1968-06-18 MUS 0 RO.00049 M Pindelea Grigore 2000
948 Active 32.0 1968-06-18 MUS 0 RO.00049 M Pindelea Grigore 2001
951 Active 33.0 1968-06-18 MUS 1 RO.00049 M Pindelea Grigore 2002
955 Active 34.0 1968-06-18 MUS 1 RO.00049 M Pindelea Grigore 2003
959 Active 35.0 1968-06-18 MUS 1 RO.00049 M Pindelea Grigore 2004
963 Active 36.0 1968-06-18 MUS 1 RO.00049 M Pindelea Grigore 2005
966 Active 37.0 1968-06-18 MUS 1 RO.00049 M Pindelea Grigore 2006
968 Active 38.0 1968-06-18 MUS 1 RO.00049 M Pindelea Grigore 2007
970 Active 39.0 1968-06-18 MUS 1 RO.00049 M Pindelea Grigore 2008
972 Active 40.0 1968-06-18 MUS 1 RO.00049 M Pindelea Grigore 2009
975 Active 41.0 1968-06-18 MUS 2 RO.00049 M Pindelea Grigore 2010
978 Abroad 42.0 1968-06-18 MUS 2 RO.00049 M Pindelea Grigore 2011
981 Abroad 43.0 1968-06-18 MUS 2 RO.00049 M Pindelea Grigore 2012
984 Abroad 44.0 1968-06-18 MUS 2 RO.00049 M Pindelea Grigore 2013

In [44]:
members


Out[44]:
active age birth club dan ekf gen name year
0 Active 27.0 1979-06-06 KYO 0 RO.00076 F Abe (Carțiș) Emilia 2007
1 Active 36.0 1970-10-21 KYO 2 RO.00028 F Grapă Daniela 2007
2 Active 34.0 1972-10-26 KYO 2 RO.00039 F Hulea Violeta 2007
3 Active 25.0 1981-07-27 KYO 3 RO.00065 M Józsa Levente 2007
4 Active 25.0 1981-10-18 KYO 0 RO.00079 M Kovács Alexandru 2007
5 Active 14.0 1992-06-05 KYO 0 RO.00077 F Mureșan (Egri) Melinda 2007
6 Active 30.0 1976-12-03 KYO 0 RO.00090 M Ratz Bogdan 2007
7 Active 23.0 1983-09-29 KYO 0 RO.00071 F Stoica Loredana 2007
8 Active 28.0 1979-06-06 KYO 0 RO.00076 F Abe (Carțiș) Emilia 2008
9 Active 37.0 1970-10-21 KYO 3 RO.00028 F Grapă Daniela 2008
10 Inactive 35.0 1972-10-26 KYO 2 RO.00039 F Hulea Violeta 2008
11 Active 26.0 1981-07-27 KYO 3 RO.00065 M Józsa Levente 2008
12 Active 26.0 1981-10-18 KYO 1 RO.00079 M Kovács Alexandru 2008
13 Active 15.0 1992-06-05 KYO 0 RO.00077 F Mureșan (Egri) Melinda 2008
14 Active 31.0 1976-12-03 KYO 0 RO.00090 M Ratz Bogdan 2008
15 Active 24.0 1983-09-29 KYO 0 RO.00071 F Stoica Loredana 2008
16 Active 29.0 1979-06-06 KYO 0 RO.00076 F Abe (Carțiș) Emilia 2009
17 Active NaN NaT KYO 0 NaN M Cristea Adrian 2009
18 Active 33.0 1975-05-07 KYO 1 RO.00041 M Gâscă Claudiu 2009
19 Active 38.0 1970-10-21 KYO 3 RO.00028 F Grapă Daniela 2009
20 Active 27.0 1981-07-27 KYO 3 RO.00065 M Józsa Levente 2009
21 Active 27.0 1981-10-18 KYO 1 RO.00079 M Kovács Alexandru 2009
22 Active 16.0 1992-06-05 KYO 0 RO.00077 F Mureșan (Egri) Melinda 2009
23 Active 32.0 1976-12-03 KYO 0 RO.00090 M Ratz Bogdan 2009
24 Active 25.0 1983-09-29 KYO 0 RO.00071 F Stoica Loredana 2009
25 Active 27.0 1981-04-30 KYO 1 RO.00082 M Vrânceanu Radu 2009
26 Inactive 30.0 1979-06-06 KYO 0 RO.00076 F Abe (Carțiș) Emilia 2010
27 Active NaN NaT KYO 0 NaN M Cristea Adrian 2010
28 Active 34.0 1975-05-07 KYO 1 RO.00041 M Gâscă Claudiu 2010
29 Active 22.0 1987-01-18 KYO 0 NaN M Georgescu Radu-Constantin 2010
... ... ... ... ... ... ... ... ... ...
2956 Inactive 36.0 1982-08-15 BSD 0 NaN F Tütsek Kinga 2019
2957 Active 21.0 1981-07-27 FDS 1 RO.00065 M Józsa Levente 2003
2958 Active 28.0 1974-01-20 FDS 1 RO.00047 M Székely József-Pál 2003
2959 Active 22.0 1981-07-27 FDS 1 RO.00065 M Józsa Levente 2004
2960 Active 29.0 1974-01-20 FDS 1 RO.00047 M Székely József-Pál 2004
2961 Active 23.0 1981-07-27 FDS 2 RO.00065 M Józsa Levente 2005
2962 Active 30.0 1974-01-20 FDS 2 RO.00047 M Székely József-Pál 2005
2963 Active 26.0 1974-01-20 FDS 1 RO.00047 M Székely József-Pál 2001
2964 Active 27.0 1974-01-20 FDS 1 RO.00047 M Székely József-Pál 2002
2965 Active 51.0 1964-08-17 ACKIJ 0 RO.00239 M Sakai Toshinobu 2016
2966 Active 52.0 1964-08-17 ACKIJ 0 RO.00239 M Sakai Toshinobu 2017
2967 Active 53.0 1964-08-17 ACKIJ 0 RO.00239 M Sakai Toshinobu 2018
2968 Inactive 54.0 1964-08-17 ACKIJ 0 RO.00239 M Sakai Toshinobu 2019
2969 Active 21.0 1978-06-22 HUN 0 RO.00178 M Szőke Sándor 2000
2970 Active 22.0 1978-06-22 HUN 0 RO.00178 M Szőke Sándor 2001
2971 Active 23.0 1978-06-22 HUN 0 RO.00178 M Szőke Sándor 2002
2972 Active 24.0 1978-06-22 HUN 0 RO.00178 M Szőke Sándor 2003
2973 Active 25.0 1978-06-22 HUN 0 RO.00178 M Szőke Sándor 2004
2974 Active 26.0 1978-06-22 HUN 0 RO.00178 M Szőke Sándor 2005
2975 Active 27.0 1978-06-22 HUN 0 RO.00178 M Szőke Sándor 2006
2976 Active 28.0 1978-06-22 HUN 0 RO.00178 M Szőke Sándor 2007
2977 Active 29.0 1978-06-22 HUN 0 RO.00178 M Szőke Sándor 2008
2978 Active 30.0 1978-06-22 HUN 0 RO.00178 M Szőke Sándor 2009
2979 Active 31.0 1978-06-22 HUN 0 RO.00178 M Szőke Sándor 2010
2980 Active 32.0 1978-06-22 HUN 0 RO.00178 M Szőke Sándor 2011
2981 Active 33.0 1978-06-22 HUN 0 RO.00178 M Szőke Sándor 2012
2982 Active 34.0 1978-06-22 HUN 0 RO.00178 M Szőke Sándor 2013
2983 Active 35.0 1978-06-22 HUN 0 RO.00178 M Szőke Sándor 2014
2984 Active 39.0 1978-06-22 HUN 2 RO.00178 M Szőke Sándor 2018
2985 Inactive 40.0 1978-06-22 HUN 2 RO.00178 M Szőke Sándor 2019

2986 rows × 9 columns


In [ ]:


In [2]:
members=get_members('../data/manual/Evidenta membrilor.xlsm')

In [26]:
data={}
replace_active={'Activ':'Active','Inactiv':'Inactive','Free':'Inactive','AS':'Abroad',
                'Transferat':'Abroad','Decedat':'Inactive'}
active_redflags=['Deleted']
for i in members.T.iteritems():
    active=i[1][231]
    if active not in active_redflags:
        grades=i[1][['1 kyu','1 dan','2 dan','3 dan','4 dan','5 dan',
                     '6 dan','7 dan','8 dan']].replace('x',pd.NaT).dropna()
        grades0=i[1][['1 dan','2 dan','3 dan','4 dan','5 dan',
                      '6 dan','7 dan','8 dan']].replace('x',pd.NaT).dropna()
        df=pd.DataFrame(grades0)
        df.columns=['dan']
        df=df.reset_index().set_index('dan').sort_index()
        dummy={}
        grades=pd.to_datetime(grades.astype(str))
        active=replace_active[active]
        if len(grades)>0:
            mingrade=grades.min().year
            maxgrade=grades.max().year
        else:
            mingrade=2016 #default starting year
            maxgrade=2016
        if active=='Active':
            maxyear=2019 #default active max year
        else:
            maxyear=maxgrade+3 #default active years if unknown


        dummy['name']=i[1]['Numele']+' '+i[1]['Prenumele']
        dummy['birth']=str(i[1]['Data naşterii'])[:10]
        dummy['gen']=i[1]['Gen']
        dummy['ekf']=i[1]['Nr. EKF']
        dummy['active']=active
        club=i[1]['Club']
        dummy['transfer']=get_transfer(dummy['name'],i[1]['Unnamed: 3'])
         
        for year in range(mingrade,maxyear):

            #get year from exams
            dummy['dan']=len(df[:str
                                (year)])
            #get club from transfers
            clubs=clubs_loader.get_club_by_year(dummy['transfer'],club,year,mingrade,maxyear)
            clubs=clubs[:1] #remove this step to double count. this limits to first club in transfer years
            for j in range(len(clubs)):
                iclub=clubs_loader.replacer(clubs[j])
                dummy['club']=iclub
                dummy['age']=year-1-pd.to_datetime(dummy['birth']).year
                data=clubs_loader.add_to_club(data,iclub,year,dummy.copy())

all_data=[]

for club in data:
    for year in data[club]:
        df=pd.DataFrame(data[club][year])
        df['year']=year
        df['club']=club
        df=df.drop('transfer',axis=1)
        all_data.append(df)

members_clean=pd.concat(all_data).reset_index(drop=True)

In [30]:
members_clean.set_index('name').loc['Mureșan (Egri) Melinda']


Out[30]:
active age birth club dan ekf gen year
name
Mureșan (Egri) Melinda Inactive 14.0 1992-06-05 KYO 0 RO.00077 F 2007
Mureșan (Egri) Melinda Inactive 15.0 1992-06-05 KYO 0 RO.00077 F 2008
Mureșan (Egri) Melinda Inactive 16.0 1992-06-05 KYO 0 RO.00077 F 2009

In [28]:
members_clean


Out[28]:
active age birth club dan ekf gen name year
0 Inactive 27.0 1979-06-06 KYO 0 RO.00076 F Abe (Carțiș) Emilia 2007
1 Active 36.0 1970-10-21 KYO 2 RO.00028 F Grapă Daniela 2007
2 Inactive 34.0 1972-10-26 KYO 2 RO.00039 F Hulea Violeta 2007
3 Active 25.0 1981-07-27 KYO 3 RO.00065 M Józsa Levente 2007
4 Inactive 25.0 1981-10-18 KYO 0 RO.00079 M Kovács Alexandru 2007
5 Inactive 14.0 1992-06-05 KYO 0 RO.00077 F Mureșan (Egri) Melinda 2007
6 Abroad 30.0 1976-12-03 KYO 0 RO.00090 M Ratz Bogdan 2007
7 Abroad 23.0 1983-09-29 KYO 0 RO.00071 F Stoica Loredana 2007
8 Inactive 28.0 1979-06-06 KYO 0 RO.00076 F Abe (Carțiș) Emilia 2008
9 Active 37.0 1970-10-21 KYO 3 RO.00028 F Grapă Daniela 2008
10 Active 26.0 1981-07-27 KYO 3 RO.00065 M Józsa Levente 2008
11 Inactive 26.0 1981-10-18 KYO 1 RO.00079 M Kovács Alexandru 2008
12 Inactive 15.0 1992-06-05 KYO 0 RO.00077 F Mureșan (Egri) Melinda 2008
13 Abroad 31.0 1976-12-03 KYO 0 RO.00090 M Ratz Bogdan 2008
14 Abroad 24.0 1983-09-29 KYO 0 RO.00071 F Stoica Loredana 2008
15 Inactive 29.0 1979-06-06 KYO 0 RO.00076 F Abe (Carțiș) Emilia 2009
16 Inactive NaN NaT KYO 0 NaN M Cristea Adrian 2009
17 Inactive 33.0 1975-05-07 KYO 1 RO.00041 M Gâscă Claudiu 2009
18 Active 38.0 1970-10-21 KYO 3 RO.00028 F Grapă Daniela 2009
19 Active 27.0 1981-07-27 KYO 3 RO.00065 M Józsa Levente 2009
20 Inactive 27.0 1981-10-18 KYO 1 RO.00079 M Kovács Alexandru 2009
21 Inactive 16.0 1992-06-05 KYO 0 RO.00077 F Mureșan (Egri) Melinda 2009
22 Abroad 32.0 1976-12-03 KYO 0 RO.00090 M Ratz Bogdan 2009
23 Abroad 25.0 1983-09-29 KYO 0 RO.00071 F Stoica Loredana 2009
24 Abroad 27.0 1981-04-30 KYO 1 RO.00082 M Vrânceanu Radu 2009
25 Abroad NaN NaT KYO 0 RO.00027 M Amurațiu Ștefăniță 1995
26 Inactive NaN NaT KYO 0 RO.00026 M Zahan Sorin 1995
27 Abroad NaN NaT KYO 0 RO.00027 M Amurațiu Ștefăniță 1996
28 Inactive NaN NaT KYO 0 RO.00026 M Zahan Sorin 1996
29 Abroad NaN NaT KYO 0 RO.00027 M Amurațiu Ștefăniță 1997
... ... ... ... ... ... ... ... ... ...
2614 Inactive 49.0 1968-09-27 BSD 0 NaN F Tarcza Zarug Carla Mariana 2018
2615 Inactive 35.0 1982-08-15 BSD 0 NaN F Tütsek Kinga 2018
2616 Active 8.0 2009-11-18 BSD 0 NaN M Veres Cioanta Iustin 2018
2617 Active 21.0 1981-07-27 FDS 1 RO.00065 M Józsa Levente 2003
2618 Inactive 28.0 1974-01-20 FDS 1 RO.00047 M Székely József-Pál 2003
2619 Active 22.0 1981-07-27 FDS 1 RO.00065 M Józsa Levente 2004
2620 Inactive 29.0 1974-01-20 FDS 1 RO.00047 M Székely József-Pál 2004
2621 Active 23.0 1981-07-27 FDS 2 RO.00065 M Józsa Levente 2005
2622 Inactive 30.0 1974-01-20 FDS 2 RO.00047 M Székely József-Pál 2005
2623 Inactive 26.0 1974-01-20 FDS 1 RO.00047 M Székely József-Pál 2001
2624 Inactive 27.0 1974-01-20 FDS 1 RO.00047 M Székely József-Pál 2002
2625 Inactive 51.0 1964-08-17 ACKIJ 0 RO.00239 M Sakai Toshinobu 2016
2626 Inactive 52.0 1964-08-17 ACKIJ 0 RO.00239 M Sakai Toshinobu 2017
2627 Inactive 53.0 1964-08-17 ACKIJ 0 RO.00239 M Sakai Toshinobu 2018
2628 Inactive 21.0 1978-06-22 HUN 0 RO.00178 M Szőke Sándor 2000
2629 Inactive 22.0 1978-06-22 HUN 0 RO.00178 M Szőke Sándor 2001
2630 Inactive 23.0 1978-06-22 HUN 0 RO.00178 M Szőke Sándor 2002
2631 Inactive 24.0 1978-06-22 HUN 0 RO.00178 M Szőke Sándor 2003
2632 Inactive 25.0 1978-06-22 HUN 0 RO.00178 M Szőke Sándor 2004
2633 Inactive 26.0 1978-06-22 HUN 0 RO.00178 M Szőke Sándor 2005
2634 Inactive 27.0 1978-06-22 HUN 0 RO.00178 M Szőke Sándor 2006
2635 Inactive 28.0 1978-06-22 HUN 0 RO.00178 M Szőke Sándor 2007
2636 Inactive 29.0 1978-06-22 HUN 0 RO.00178 M Szőke Sándor 2008
2637 Inactive 30.0 1978-06-22 HUN 0 RO.00178 M Szőke Sándor 2009
2638 Inactive 31.0 1978-06-22 HUN 0 RO.00178 M Szőke Sándor 2010
2639 Inactive 32.0 1978-06-22 HUN 0 RO.00178 M Szőke Sándor 2011
2640 Inactive 33.0 1978-06-22 HUN 0 RO.00178 M Szőke Sándor 2012
2641 Inactive 34.0 1978-06-22 HUN 0 RO.00178 M Szőke Sándor 2013
2642 Inactive 35.0 1978-06-22 HUN 0 RO.00178 M Szőke Sándor 2014
2643 Inactive 39.0 1978-06-22 HUN 2 RO.00178 M Szőke Sándor 2018

2644 rows × 9 columns