In [3]:
import numpy as np
import pandas as pd
rng = np.random.RandomState(42)
ser = pd.Series(rng.rand(5))
df = pd.DataFrame({'A': rng.rand(5),'B': rng.rand(5)})
df
Out[3]:
In [4]:
df.sum()
Out[4]:
In [5]:
df.sum(axis=1)
Out[5]:
In [6]:
df.mean()
Out[6]:
In [7]:
df.mean(axis=1)
Out[7]:
In [9]:
df.max()
Out[9]:
In [12]:
df['max']=df.max(axis=1)
df['mean'] = df.mean(axis=1)
df
Out[12]:
In [18]:
df3 = pd.DataFrame({'name': ['Bob', 'Jake', 'Lisa', 'Sue'],
'salary': [70000, 80000, 70000, 90000]})
df3
Out[18]:
In [19]:
dg3 = df3.groupby('salary')
dg3.count()
Out[19]:
In [22]:
df = pd.DataFrame({'key': ['A', 'B', 'C', 'A', 'B', 'C'],'data': range(6)}, columns=['key', 'data'])
df
Out[22]:
In [24]:
df.groupby('key').sum()
Out[24]:
In [25]:
df.groupby('key').mean()
Out[25]:
In [28]:
df.groupby('key').mean().unstack()
Out[28]:
In [47]:
rng = np.random.RandomState(0)
df = pd.DataFrame({'key': ['A', 'B', 'C', 'A', 'B', 'C'],
'data1': range(6),
'data2': rng.randint(0, 10, 6)},
columns = ['key', 'data1', 'data2'])
df
Out[47]:
In [31]:
df.groupby('key').mean()
Out[31]:
In [32]:
df.groupby('key').aggregate(['min',np.median,max])
Out[32]:
In [33]:
df.groupby('key')['data1'].aggregate(['min',np.median,max])
Out[33]:
In [91]:
dfx = df.groupby('key').aggregate({'data1': 'min','data2':'mean'})
dfx
Out[91]:
In [38]:
df.groupby('key').std()
Out[38]:
In [39]:
df1 = df.groupby('key').aggregate({'data1': 'min','data2':'mean'})
df1[df1['data2']>3.6]
Out[39]:
In [81]:
print(df)
print(df.groupby('key').aggregate({'data1': 'mean','data2':'mean'}))
df1 = df.groupby('key').transform(lambda x: x-x.mean())
df2 = pd.DataFrame(df)
print(df2)
print(df1)
df2['x']=df1['data1']
df2
print(id(df2))
print(id(df))
In [66]:
df5 = pd.DataFrame({'A': rng.rand(5),'B': rng.rand(5)})
df5
Out[66]:
In [68]:
df5['C']=[1,2,3,4,5]
df5
Out[68]:
In [71]:
df6 = pd.DataFrame({'D': rng.randint(10,20,5),'E': rng.randint(20,30,5)})
df6
Out[71]:
In [75]:
df5['f'] = df6['D']
df5
Out[75]:
In [90]:
xfit = np.linspace(1, 10,20)
list(xfit.data)
Out[90]: