In [1]:
import pandas as pd
pd.options.display.max_rows = 999
columns_1 = ['leave','value_1']
columns_2 = ['leave','value_2']
In [2]:
df1 = pd.read_csv('leave_values_20200508_filled_nulls.csv', header=None, names=columns_1).set_index('leave')
In [3]:
# df2 = pd.read_csv('leave_values_20191208.csv', header=None)
# df2 = pd.read_csv('leave_values_20191208.csv', header=None)
# df2 = pd.read_csv('leave_values_20200423_filled_nulls.csv', header=None, names=columns_2).set_index('leave')
df2 = pd.read_csv('quackle_leaves.csv', header=None, names=columns_2).set_index('leave')
In [4]:
df = pd.concat([df1,df2],axis=1)
df = df.reset_index()
In [5]:
df['delta'] = df['value_1']-df['value_2']
df['length'] = df['leave'].apply(lambda x: len(x))
df['vowels'] = df['leave'].apply(lambda x: sum([y in 'AEIOU' for y in x]))
df['consonants'] = df['length']-df['vowels']
df['has_a_blank'] = df['leave'].apply(lambda x: sum([y=='?' for y in x])>0)
df['has_two_blanks'] = df['leave'].apply(lambda x: sum([y=='?' for y in x])==2)
In [6]:
df
Out[6]:
In [17]:
x_tile_df = {i: df.loc[df['length']==i][['leave','value_1','value_2','delta']].sort_values('delta') for i in range(1,7)}
In [18]:
one_tile_df = x_tile_df[1].rename(columns={'value_1':'macondo_0508','value_2':'quackle'})
In [19]:
two_tile_df = x_tile_df[2].rename(columns={'value_1':'macondo_0508','value_2':'quackle'})
In [14]:
one_tile_df.to_csv('comp_1tileleaves_0508_quackle.csv', index=False)
In [15]:
two_tile_df.to_csv('comp_2tileleaves_0508_quackle.csv', index=False)
In [25]:
x_tile_df[6]
Out[25]:
In [ ]: