In [1]:
import pandas as pd
import numpy as np
from IPython.display import display
d = {'one' : pd.Series([1., 2., 3.], index=['a', 'b', 'c']),
'two' : pd.Series([1., 2., 3., 4.], index=['a', 'b', 'c', 'd'])}
df = pd.DataFrame(d)
df
Out[1]:
In [2]:
pd.DataFrame(d, index=['d', 'b', 'a'])
Out[2]:
In [3]:
pd.DataFrame(d, index=['d', 'b', 'a'], columns=['two', 'three'])
Out[3]:
In [4]:
df.index
Out[4]:
In [5]:
df.columns
Out[5]:
In [6]:
pd.DataFrame({('a', 'b'): {('A', 'B'): 1, ('A', 'C'): 2},
('a', 'a'): {('A', 'C'): 3, ('A', 'B'): 4},
('a', 'c'): {('A', 'B'): 5, ('A', 'C'): 6},
('b', 'a'): {('A', 'C'): 7, ('A', 'B'): 8},
('b', 'b'): {('A', 'D'): 9, ('A', 'B'): 10},
('a', 'c'): {('B', 'B'): 5, ('A', 'C'): 6},
('b', 'a'): {('B', 'C'): 7, ('A', 'B'): 8},
('b', 'b'): {('C', 'D'): 9, ('A', 'B'): 10},
})
Out[6]:
In [7]:
df1 = pd.DataFrame(np.random.randn(10, 4), columns=['A', 'B', 'C', 'D'])
df2 = pd.DataFrame(np.random.randn(7, 3), columns=['A', 'B', 'C'])
display(df1)
display(df2)
df1 + df2
Out[7]:
In [18]:
df1 - df1.iloc[0]
Out[18]:
In [19]:
df1 - df1['A']
Out[19]:
In [10]:
df1.sub(df1['A'], axis=0)
Out[10]:
In [20]:
df1.sub(df1['A'], axis ='index')
Out[20]:
In [23]:
df1.sub(df1.iloc[0], axis ='columns')
Out[23]:
In [12]:
df1['D']=df1['A']+df1['C']
df1
Out[12]:
In [13]:
np.asarray(df1)
Out[13]:
In [17]:
df1.values
Out[17]:
In [24]:
df1.agg(['sum'])
Out[24]:
In [25]:
df1.agg(['sum', 'mean'])
Out[25]:
In [ ]: