In [1]:
# librerias
import pandas as pd
import numpy as np
In [2]:
from numpy.random import randn
np.random.seed(101)
In [3]:
df = pd.DataFrame(randn(5,4),index='A B C D E'.split(),columns='W X Y Z'.split())
In [4]:
df
Out[4]:
In [187]:
df['W']
Out[187]:
In [5]:
# Pasar una lista de nombres de columnas
df[['W','Z']]
Out[5]:
In [6]:
# Sintaxis tipo SQL (NO RECOMENDABLE!!!)
df.W
Out[6]:
Las columnas de los DataFrames son Series
In [7]:
type(df['W'])
Out[7]:
Crear una nueva columna
In [8]:
df['new'] = df['W'] + df['Y']
In [9]:
df
Out[9]:
Remover una columna
In [12]:
df.drop('new',axis=1)
Out[12]:
In [13]:
# Not inplace unless specified!
df
Out[13]:
In [20]:
df.drop('new',axis=1,inplace=True)
In [21]:
df
Out[21]:
Se pueden borrar filas
In [22]:
df.drop('E',axis=0)
Out[22]:
Seleccionar filas
In [23]:
df.loc['A']
Out[23]:
O seleccionar basado en la posicion en vez de la etiqueta
In [24]:
df.iloc[2]
Out[24]:
Seleccionar un grupo de filas y columnas
In [25]:
df.loc['B','Y']
Out[25]:
In [26]:
df.loc[['A','B'],['W','Y']]
Out[26]:
In [27]:
df
Out[27]:
In [28]:
df>0
Out[28]:
In [29]:
df[df>0]
Out[29]:
In [30]:
df[df['W']>0]
Out[30]:
In [31]:
df[df['W']>0]['Y']
Out[31]:
In [32]:
df[df['W']>0][['Y','X']]
Out[32]:
Para dos condiciones se pueden utilizar | y & con parentesis
In [33]:
df[(df['W']>0) & (df['Y'] > 1)]
Out[33]:
In [34]:
df
Out[34]:
In [35]:
# Valores por default 0,1...n index
df.reset_index()
Out[35]:
In [36]:
newind = 'CA NY WY OR CO'.split()
In [37]:
df['States'] = newind
In [38]:
df
Out[38]:
In [39]:
df.set_index('States')
Out[39]:
In [40]:
df
Out[40]:
In [41]:
df.set_index('States',inplace=True)
In [42]:
df
Out[42]:
In [43]:
# niveles de indices
outside = ['G1','G1','G1','G2','G2','G2']
inside = [1,2,3,1,2,3]
hier_index = list(zip(outside,inside))
hier_index = pd.MultiIndex.from_tuples(hier_index)
In [44]:
hier_index
Out[44]:
In [45]:
df = pd.DataFrame(np.random.randn(6,2),index=hier_index,columns=['A','B'])
df
Out[45]:
Para la jerarquia de indices utilizamos df.loc[], si esta en la columna se puede utilizar la sintaxis de corchetes df[] Hacer un llamado al nivel del indice nos generar un subDataFrame
In [46]:
df.loc['G1']
Out[46]:
In [47]:
df.loc['G1'].loc[1]
Out[47]:
In [48]:
df.index.names
Out[48]:
In [49]:
df.index.names = ['Group','Num']
In [50]:
df
Out[50]:
In [51]:
df.xs('G1')
Out[51]:
In [52]:
df.xs(['G1',1])
Out[52]:
In [53]:
df.xs(1,level='Num')
Out[53]: