In [1]:
import numpy as np
import pandas as pd
from pandas import Series, DataFrame

In [2]:
arr = np.array([[1,2,np.nan], [np.nan,3,4]])

In [3]:
dframe1 = DataFrame(arr, index=['A', 'B'], columns=['One', 'Two', 'Three'])
dframe1


Out[3]:
One Two Three
A 1.0 2.0 NaN
B NaN 3.0 4.0

In [4]:
dframe1.sum()


Out[4]:
One      1.0
Two      5.0
Three    4.0
dtype: float64

In [5]:
dframe1.sum(axis=1)


Out[5]:
A    3.0
B    7.0
dtype: float64

In [6]:
dframe1.idxmin()


Out[6]:
One      A
Two      A
Three    B
dtype: object

In [7]:
dframe1.cumsum()


Out[7]:
One Two Three
A 1.0 2.0 NaN
B NaN 5.0 4.0

In [8]:
dframe1.describe()


/Users/andymeyers/anaconda/lib/python2.7/site-packages/numpy/lib/function_base.py:3834: RuntimeWarning: Invalid value encountered in percentile
  RuntimeWarning)
Out[8]:
One Two Three
count 1.0 2.000000 1.0
mean 1.0 2.500000 4.0
std NaN 0.707107 NaN
min 1.0 2.000000 4.0
25% NaN 2.250000 NaN
50% NaN 2.500000 NaN
75% NaN 2.750000 NaN
max 1.0 3.000000 4.0

In [9]:
from IPython.display import YouTubeVideo

In [10]:
YouTubeVideo('xGbpuFNR1ME')


Out[10]:

In [13]:
YouTubeVideo('4EXNedimDMs')


Out[13]:

In [15]:
import pandas.io.data as pdweb

import datetime

In [16]:
prices = pdweb.get_data_yahoo(['CVX', 'XOM', 'BP'], start=datetime.datetime(2010, 1,1), end = datetime.datetime(2013, 1,1))['Adj Close']

prices.head()


Out[16]:
BP CVX XOM
Date
2010-01-04 42.257896 61.192517 56.700952
2010-01-05 42.557951 61.625962 56.922342
2010-01-06 42.779422 61.633698 57.414323
2010-01-07 42.765133 61.401501 57.233934
2010-01-08 42.865151 61.509861 57.004337

In [17]:
volume = pdweb.get_data_yahoo(['CVX', 'XOM', 'BP'], start=datetime.datetime(2010, 1,1), end = datetime.datetime(2013, 1,1))['Volume']

In [18]:
volume.head()


Out[18]:
BP CVX XOM
Date
2010-01-04 3956100.0 10173800.0 27809100.0
2010-01-05 4109600.0 10593700.0 30174700.0
2010-01-06 6227900.0 11014600.0 35044700.0
2010-01-07 4431300.0 9626900.0 27192100.0
2010-01-08 3786100.0 5624300.0 24891800.0

In [19]:
rets = prices.pct_change()

In [20]:
corr = rets.corr

In [22]:
%matplotlib inline
prices.plot()


Out[22]:
<matplotlib.axes._subplots.AxesSubplot at 0x118664550>

In [27]:
import seaborn as sns
import matplotlib.pyplot as plt

In [33]:
sns.heatmap(corr(), annot=True)


Out[33]:
<matplotlib.axes._subplots.AxesSubplot at 0x11eab98d0>

In [38]:
ser1 = Series(['w',  'w', 'x', 'y', 'z', 'w', 'x', 'y', 'x', 'a'])

ser1


Out[38]:
0    w
1    w
2    x
3    y
4    z
5    w
6    x
7    y
8    x
9    a
dtype: object

In [40]:
ser1.unique()


Out[40]:
array(['w', 'x', 'y', 'z', 'a'], dtype=object)

In [41]:
ser1.value_counts()


Out[41]:
w    3
x    3
y    2
a    1
z    1
dtype: int64

In [ ]: