In [1]:
import pandas as pd
df = pd.DataFrame({'col1':[1,2,3,4],'col2':[444,555,666,444],'col3':['abc','def','ghi','xyz']})
df.head()
Out[1]:
In [2]:
df['col2'].unique()
Out[2]:
In [3]:
df['col2'].nunique()
Out[3]:
In [4]:
df['col2'].value_counts()
Out[4]:
In [5]:
# Selecionar informacion de un DataFrame usando condiciones de varias columnas
newdf = df[(df['col1']>2) & (df['col2']==444)]
In [6]:
newdf
Out[6]:
In [7]:
def times2(x):
return x*2
In [8]:
df['col1'].apply(times2)
Out[8]:
In [9]:
df['col3'].apply(len)
Out[9]:
In [10]:
df['col1'].sum()
Out[10]:
Borrar permanentemente una columna
In [11]:
del df['col1']
In [12]:
df
Out[12]:
Obtener las columnas y los indices
In [13]:
df.columns
Out[13]:
In [14]:
df.index
Out[14]:
Ordenarun DataFrame:
In [15]:
df
Out[15]:
In [16]:
df.sort_values(by='col2') #inplace=False by default
Out[16]:
Rellenar valores nulos o checar valores nulos
In [17]:
df.isnull()
Out[17]:
In [18]:
# Drop rows with NaN Values
df.dropna()
Out[18]:
Rellenar valores nulos con algo mas
In [19]:
import numpy as np
In [20]:
df = pd.DataFrame({'col1':[1,2,3,np.nan],
'col2':[np.nan,555,666,444],
'col3':['abc','def','ghi','xyz']})
df.head()
Out[20]:
In [21]:
df.fillna('FILL')
Out[21]:
In [22]:
data = {'A':['foo','foo','foo','bar','bar','bar'],
'B':['one','one','two','two','one','one'],
'C':['x','y','x','y','x','y'],
'D':[1,3,2,5,4,1]}
df = pd.DataFrame(data)
In [23]:
df
Out[23]:
In [24]:
df.pivot_table(values='D',index=['A', 'B'],columns=['C'])
Out[24]: