In [24]:
%run setup_env.py
%matplotlib inline
randn = np.random.randn
from string import ascii_letters
letters = list(ascii_letters)

Intro to Data Structures

Series

create from ndarray

pd.Series(randn(5), index=list(letters[:5]))

create from dict

d = dict((k, i ** i) for i, k in enumerate(letters[:5]))
pd.Series(d, index=letters[:6])

DataFrame

create from dict of Series or dicts

d = {'one' : pd.Series([1., 2., 3.], index=['a', 'b', 'c']),
     'two' : pd.Series([1., 2., 3., 4.], index=['a', 'b', 'c', 'd'])}
pd.DataFrame(d, index=['d', 'b', 'a', 'c', 'e'], columns=['one', 'two', 'three'])

from structured or record array

data = np.zeros((2,),dtype=[('A', 'i4'),('B', 'f4'),('C', 'a10')])
data[:] = [(1, 2, 'Hello'), (2, 3, 'World')]
pd.DataFrame(data, index=['00', '01'], columns=['A', 'B', 'C', 'D'])

from a list of dicts

data = [{'a': 1, 'b': 2}, {'a': 5, 'b': 10, 'c': 20}]
pd.DataFrame(data)

from a list of tuples

from a Series

alternate constructors

pd.DataFrame.from_records()
pd.DataFrame.from_items([('A', [1,2,3]), ('B', [4, 5, 6])])
pd.DataFrame.from_items([('A', [1,2,3]), ('B', [4, 5, 6])], orient='index', columns=letters[:3])

Column selection addition deletion

Indexing/Selection

Operation Syntax Result
Select column df[col] Series
Select row by label df.loc[label] Series
Select row by integer location df.iloc[loc] Series
Slice rows df[5:10] DataFrame
Select rows by boolean vector df[bool_vec] DataFrame

Data alignment and arithmetic

Transposing

Panel


In [29]:
pd.DataFrame.from_items([('A', [1,2,3]), ('B', [4, 5, 6])], orient='index', columns=letters[:3])


Out[29]:
a b c
A 1 2 3
B 4 5 6