In [3]:
import numpy as np
import pandas as pd

rng = np.random.RandomState(42)
ser = pd.Series(rng.rand(5))
df = pd.DataFrame({'A': rng.rand(5),'B': rng.rand(5)})
df


Out[3]:
A B
0 0.155995 0.020584
1 0.058084 0.969910
2 0.866176 0.832443
3 0.601115 0.212339
4 0.708073 0.181825

In [4]:
df.sum()


Out[4]:
A    2.389442
B    2.217101
dtype: float64

In [5]:
df.sum(axis=1)


Out[5]:
0    0.176579
1    1.027993
2    1.698619
3    0.813454
4    0.889898
dtype: float64

In [6]:
df.mean()


Out[6]:
A    0.477888
B    0.443420
dtype: float64

In [7]:
df.mean(axis=1)


Out[7]:
0    0.088290
1    0.513997
2    0.849309
3    0.406727
4    0.444949
dtype: float64

In [9]:
df.max()


Out[9]:
A    0.866176
B    0.969910
dtype: float64

In [12]:
df['max']=df.max(axis=1)
df['mean'] = df.mean(axis=1)
df


Out[12]:
A B max mean
0 0.155995 0.020584 0.155995 0.110858
1 0.058084 0.969910 0.969910 0.665968
2 0.866176 0.832443 0.866176 0.854932
3 0.601115 0.212339 0.601115 0.471523
4 0.708073 0.181825 0.708073 0.532657

In [18]:
df3 = pd.DataFrame({'name': ['Bob', 'Jake', 'Lisa', 'Sue'],
                    'salary': [70000, 80000, 70000, 90000]})
df3


Out[18]:
name salary
0 Bob 70000
1 Jake 80000
2 Lisa 70000
3 Sue 90000

In [19]:
dg3 = df3.groupby('salary')
dg3.count()


Out[19]:
name
salary
70000 2
80000 1
90000 1

In [22]:
df = pd.DataFrame({'key': ['A', 'B', 'C', 'A', 'B', 'C'],'data': range(6)}, columns=['key', 'data'])
df


Out[22]:
key data
0 A 0
1 B 1
2 C 2
3 A 3
4 B 4
5 C 5

In [24]:
df.groupby('key').sum()


Out[24]:
data
key
A 3
B 5
C 7

In [25]:
df.groupby('key').mean()


Out[25]:
data
key
A 1.5
B 2.5
C 3.5

In [28]:
df.groupby('key').mean().unstack()


Out[28]:
      key
data  A      1.5
      B      2.5
      C      3.5
dtype: float64

In [47]:
rng = np.random.RandomState(0)
df = pd.DataFrame({'key': ['A', 'B', 'C', 'A', 'B', 'C'],
                   'data1': range(6),
                   'data2': rng.randint(0, 10, 6)},
                  columns = ['key', 'data1', 'data2'])
df


Out[47]:
key data1 data2
0 A 0 5
1 B 1 0
2 C 2 3
3 A 3 3
4 B 4 7
5 C 5 9

In [31]:
df.groupby('key').mean()


Out[31]:
data1 data2
key
A 1.5 4.0
B 2.5 3.5
C 3.5 6.0

In [32]:
df.groupby('key').aggregate(['min',np.median,max])


Out[32]:
data1 data2
min median max min median max
key
A 0 1.5 3 3 4.0 5
B 1 2.5 4 0 3.5 7
C 2 3.5 5 3 6.0 9

In [33]:
df.groupby('key')['data1'].aggregate(['min',np.median,max])


Out[33]:
min median max
key
A 0 1.5 3
B 1 2.5 4
C 2 3.5 5

In [91]:
dfx = df.groupby('key').aggregate({'data1': 'min','data2':'mean'})
dfx


Out[91]:
data1 data2
key
A 0 4.0
B 1 3.5
C 2 6.0

In [38]:
df.groupby('key').std()


Out[38]:
data1 data2
key
A 2.12132 1.414214
B 2.12132 4.949747
C 2.12132 4.242641

In [39]:
df1 = df.groupby('key').aggregate({'data1': 'min','data2':'mean'})


df1[df1['data2']>3.6]


Out[39]:
data1 data2
key
A 0 4.0
C 2 6.0

In [81]:
print(df)
print(df.groupby('key').aggregate({'data1': 'mean','data2':'mean'}))
df1 = df.groupby('key').transform(lambda x: x-x.mean())



df2 = pd.DataFrame(df)
print(df2)
print(df1)
df2['x']=df1['data1']
df2

print(id(df2))
print(id(df))


  key  data1  data2    x
0   A      0      5 -1.5
1   B      1      0 -1.5
2   C      2      3 -1.5
3   A      3      3  1.5
4   B      4      7  1.5
5   C      5      9  1.5
     data1  data2
key              
A      1.5    4.0
B      2.5    3.5
C      3.5    6.0
  key  data1  data2    x
0   A      0      5 -1.5
1   B      1      0 -1.5
2   C      2      3 -1.5
3   A      3      3  1.5
4   B      4      7  1.5
5   C      5      9  1.5
   data1  data2    x
0   -1.5    1.0 -1.5
1   -1.5   -3.5 -1.5
2   -1.5   -3.0 -1.5
3    1.5   -1.0  1.5
4    1.5    3.5  1.5
5    1.5    3.0  1.5
4556284536
4570060784

In [66]:
df5 = pd.DataFrame({'A': rng.rand(5),'B': rng.rand(5)})
df5


Out[66]:
A B
0 0.623564 0.477665
1 0.384382 0.812169
2 0.297535 0.479977
3 0.056713 0.392785
4 0.272656 0.836079

In [68]:
df5['C']=[1,2,3,4,5]
df5


Out[68]:
A B C
0 0.623564 0.477665 1
1 0.384382 0.812169 2
2 0.297535 0.479977 3
3 0.056713 0.392785 4
4 0.272656 0.836079 5

In [71]:
df6 = pd.DataFrame({'D': rng.randint(10,20,5),'E': rng.randint(20,30,5)})
df6


Out[71]:
D E
0 12 24
1 17 25
2 12 25
3 10 26
4 10 28

In [75]:
df5['f'] = df6['D']
df5


Out[75]:
A B C D f
0 0.623564 0.477665 1 12 12
1 0.384382 0.812169 2 17 17
2 0.297535 0.479977 3 12 12
3 0.056713 0.392785 4 10 10
4 0.272656 0.836079 5 10 10

In [90]:
xfit = np.linspace(1, 10,20)
list(xfit.data)


Out[90]:
[1.0,
 1.4736842105263157,
 1.9473684210526314,
 2.4210526315789473,
 2.894736842105263,
 3.3684210526315788,
 3.8421052631578947,
 4.315789473684211,
 4.789473684210526,
 5.263157894736842,
 5.7368421052631575,
 6.2105263157894735,
 6.684210526315789,
 7.157894736842105,
 7.63157894736842,
 8.105263157894736,
 8.578947368421051,
 9.052631578947368,
 9.526315789473683,
 10.0]