IPython is a fancy python console. Try running ipython or ipython --pylab on your command line. Some IPython tips
# Special commands, 'magic functions', begin with %
%quickref, %who, %run, %reset
# Shell Commands
ls, cd, pwd, mkdir
# Need Help?
help(), help(obj), obj?, function?
# Tab completion of variables, attributes and methods
In [1]:
import numpy as np
# np.zeros, np.ones
data0 = np.zeros((2, 4))
data0
Out[1]:
In [2]:
# Make an array with 20 entries 0..19
data1 = np.arange(20)
# print the first 8
data1[0:8]
Out[2]:
In [3]:
# make it a 4,5 array
data = np.arange(20).reshape(4, 5)
data
Out[3]:
In [4]:
print("dtype: ", data.dtype)
result = data * 20.5
print(result)
In [5]:
import pandas as pd
import numpy as np
In [6]:
s1 = pd.Series([1, 2, 3, 4, 5])
s1
Out[6]:
In [7]:
# integer multiplication
print(s1 * 5)
In [8]:
# float multiplication
print(s1 * 5.0)
In [9]:
s2 = pd.Series([1, 2, 3, 4, 5],
index=['a', 'b', 'c', 'd', 'e'])
s2
Out[9]:
In [10]:
dates = pd.date_range('20130626', periods=5)
print(dates)
print()
print(dates[0])
In [11]:
s3 = pd.Series([1, 2, 3, 4, 5], index=dates)
print(s3)
In [12]:
print(s3[0])
print(type(s3[0]))
print()
print(s3[1:3])
print(type(s3[1:3]))
In [13]:
s3[s3 < 3]
Out[13]:
In [14]:
s3['20130626':'20130628']
Out[14]:
In [15]:
data1 = pd.DataFrame(np.random.rand(4, 4))
data1
Out[15]:
In [16]:
dates = pd.date_range('20130626', periods=4)
data2 = pd.DataFrame(
np.random.rand(4, 4),
index=dates, columns=list('ABCD'))
data2
Out[16]:
In [17]:
data2['E'] = data2['B'] + 5 * data2['C']
data2
Out[17]:
See? You never need Excel again!
In [18]:
# Deleting a Column
del data2['E']
data2
Out[18]:
In [19]:
data2
Out[19]:
In [20]:
data2['B']
Out[20]:
In [21]:
data2.B
Out[21]:
In [22]:
data2.loc['20130627']
Out[22]:
In [23]:
data2.iloc[1]
Out[23]:
In [24]:
print(data2.B[0])
print(data2['B'][0])
print(data2.iloc[0,1]) # [row,column]
In [25]:
data3 = pd.DataFrame(np.random.rand(100, 4))
data3.head()
Out[25]:
In [26]:
data3.tail()
Out[26]:
Robust IO tools to read in data from a variety of sources
In [27]:
# simple readcsv
phxtemps1 = pd.read_csv('phx-temps.csv')
phxtemps1.head()
Out[27]:
In [28]:
# define index, parse dates, name columns
phxtemps2 = pd.read_csv(
'phx-temps.csv', index_col=0,
names=['highs', 'lows'], parse_dates=True)
phxtemps2.head()
Out[28]:
In [29]:
import matplotlib.pyplot as plt
%matplotlib inline
phxtemps2.plot() # pandas convenience method
Out[29]:
Boo, Pandas and Friends would cry if they saw such a plot.
In [30]:
phxtemps2['20120101':'20121231'].plot()
Out[30]:
In [31]:
phxtemps2['diff'] = phxtemps2.highs - phxtemps2.lows
phxtemps2['20120101':'20121231'].plot()
Out[31]: