In [2]:
# Cargamos las librerías básicas

import pandas as pd
import numpy as np
randn = np.random.randn

Series

Son como vectores, donde cada valor tiene asociado una etiqueta.

Creación

Hay varias formas de crearlos:


In [9]:
s = pd.Series(randn(5))
print s


0    1.728805
1   -0.257590
2    0.394946
3    2.503143
4   -1.793637
dtype: float64

In [10]:
s = pd.Series(randn(5), index=(5-i for i in xrange(5)))
print s


5   -1.493781
4    0.748525
3    0.708729
2   -1.480178
1    0.334116
dtype: float64

In [15]:
d = {'a' : 0., 'b' : 1., 'c' : 2.}
s = pd.Series(d, index=['b', 'c', 'd', 'a'])
print s


b     1
c     2
d   NaN
a     0
dtype: float64

Usos


In [11]:
print s.index
print s.values


Int64Index([5, 4, 3, 2, 1], dtype='int64')
[-1.49378071  0.74852458  0.7087289  -1.48017832  0.33411554]

In [16]:
s[2:]


Out[16]:
d   NaN
a     0
dtype: float64

In [17]:
s[1:]+s[:-1]


Out[17]:
a   NaN
b   NaN
c     4
d   NaN
dtype: float64

In [21]:
print s['b']
print 'f' in s
print 'b' in s
print 'd' in s


1.0
False
True
True

In [24]:
try:
    s['f']
except KeyError:
    print 'f doesn\'t exists'
print s.get('f', np.nan)


f doesn't exists
nan

In [27]:
s.name = 'ey'
print s.name


ey

Dataframes

Un dataframe es una matriz bidimensional donde cada columna puede ser de un tipo de dato distinto.

Creación

A partir de un diccionario de series


In [37]:
d = {'one':   pd.Series((i    for i in xrange(5)), index=['a','b','c','d','e']), 
     'two':   pd.Series((i+5  for i in xrange(5)), index=['a','b','c','d','e']),
     'three': pd.Series((i+15 for i in xrange(5)), index=['a','b','c','d','e'])}

In [38]:
pd.DataFrame(d)


Out[38]:
one three two
a 0 15 5
b 1 16 6
c 2 17 7
d 3 18 8
e 4 19 9

In [39]:
pd.DataFrame(d, index=['a','c','e'])


Out[39]:
one three two
a 0 15 5
c 2 17 7
e 4 19 9

In [42]:
pd.DataFrame(d, columns=['one','two'])


Out[42]:
one two
a 0 5
b 1 6
c 2 7
d 3 8
e 4 9

A partir de un diccionario de listas


In [45]:
d = {'one': [1,2,3], 'two': [3,4,5]}

In [46]:
pd.DataFrame(d)


Out[46]:
one two
0 1 3
1 2 4
2 3 5

In [48]:
pd.DataFrame(d, index=['a','b','c'])


Out[48]:
one two
a 1 3
b 2 4
c 3 5

Insertando columnas, filas, elementos


In [56]:
df = pd.DataFrame(d)
df


Out[56]:
one two
0 1 3
1 2 4
2 3 5

In [66]:
df['one even'] = (df['one'] % 2) == 0
df['two odd'] = (df['two'] % 2) == 1

In [67]:
df


Out[67]:
one two one even two even two odd
0 1 3 False False True
1 2 4 True True False
2 3 5 False False True