In [6]:
import pandas as pd
train_data = pd.read_csv('Data/Train_UWu5bXk.csv')
train_data.describe()
print train_data.shape
print train_data.isnull().sum()
In [7]:
test_data = pd.read_csv('Data/Test_u94Q5KV.csv')
test_data.describe()
print test_data.shape
print test_data.isnull().sum()
In [49]:
print train_data['Outlet_Size'].value_counts()
print train_data['Outlet_Type'].value_counts()
# print train_data.groupby('Outlet_Type').apply(lambda x: x['Outlet_Size'].mode())
# print train_data[train_data['Outlet_Size'].apply(lambda x: x == 'High')]['Outlet_Type']
print train_data.groupby(['Outlet_Type', 'Outlet_Identifier'])['Outlet_Size'].value_counts()
In [46]:
print train_data[train_data['Outlet_Size'].isnull()].groupby('Outlet_Identifier')['Outlet_Type'].value_counts()
In [23]:
df = pd.DataFrame({'animal': 'cat dog cat fish dog cat cat'.split(),
....: 'size': list('SSMMMLL'),
....: 'weight': [8, 10, 11, 1, 20, 12, 12],
....: 'adult' : [False] * 5 + [True] * 2});
print df
df.groupby('animal').apply(lambda subf: subf['animal'])
Out[23]: