In [ ]:
# MAGIC
%lsmagic
#OR
#lsmagic
In [ ]:
import sys
sys.path
In [ ]:
import sys
import pprint
pprint.pprint(sys.path)
In [ ]:
help(pprint)
In [ ]:
ls -laph
In [ ]:
cd data
In [ ]:
pwd
In [ ]:
ls -laph
In [ ]:
pwd
In [ ]:
cd ../
In [ ]:
pwd
In [ ]:
ls -laph
In [ ]:
pwd
In [ ]:
# an assignment
the_world_is_flat = True
In [ ]:
if the_world_is_flat:
print("1. Be careful not to fall off!")
print("2. Be careful not to fall off ARGGGHHHHHHHHH!")
In [ ]:
# NUMBERS
In [ ]:
height = 201
width = 5
area = height * width
In [ ]:
area
In [ ]:
area * 7
In [ ]:
area = area + area
In [ ]:
area
In [ ]:
_
In [ ]:
# understanding errors
# a padawan asks, "how?"
# replies yoda, "read, you must..."
m
In [ ]:
m = "the force is now with you!"
In [ ]:
m
In [ ]:
tax = 12.5 / 100
price = 100.50
price * tax
In [ ]:
# What is the purpose of _ , the underscore?
# It holds the last printed expression.
# however, don't use it!
price + _
In [ ]:
# see here!
_
In [ ]:
round(_, 2)
In [ ]:
'spam eggs' # single quotes
In [ ]:
'doesn\'t' # use \' to escape the single quote...
In [ ]:
"doesn't" # ...or use double quotes instead
In [ ]:
'"Yes," he said.'
In [ ]:
"\"Yes,\" he said."
In [ ]:
'"Isn\'t," she said.'
In [ ]:
print('"Isn\'t," she said.')
In [ ]:
help(print)
In [ ]:
s = 'First line.\nSecond line\nHAHAHAHAHAHAHA\tJOKER!.' # \n means newline
s
In [ ]:
print(s)
In [ ]:
# For Windows the filepath is defined as seen below...
# using raw strings by adding r before the quote
print('C:\some\naMe\tnamE') # here \n means newline!
In [ ]:
print(r'C:\some\naMe\namE') # note the r before the quote
In [ ]:
print(r'C:/some/name')
In [ ]:
# using triple-quotes either """...""" or '''...''' to spane multiple lines
print("""\
Usage: thingy [OPTIONS]
-h Display this usage message
-H hostname Hostname to connect to
-B banana
""")
In [ ]:
# 3 times 'un', followed by 'ium'
3 * 'gumMMM' + 'dropPPPP' + 'StarWars'
In [ ]:
# parentheses to encapsulate two or more string literals
# side by side will be concatenated
text = ('Put several strings within parentheses '
'to have them joined together. '
'WOO!!!!!!')
text
In [ ]:
# string are lists (i.e. arrays) and thus have indices
word = 'Python'
word[0]
In [ ]:
word[5]
In [ ]:
word[7]
In [ ]:
# indices can be in negative number, starting from the right
word[-1]
In [ ]:
word[-6]
In [ ]:
# slicing
# this is the interval [0, 2)
word[0:2]
In [ ]:
#this is the interval [2, 5)
word[2:5]
In [ ]:
word[:2] + word[2:]
In [ ]:
word[2:] + word[:2]
In [ ]:
s = 'supercalifra gilisticexpialidoc ious'
len(s)
In [ ]:
help(len)
In [ ]:
squares = [1, 4, 9, 16, 25, 36]
squares
In [ ]:
squares[0] = 345
In [ ]:
squares
In [ ]:
squares[-1]
In [ ]:
squares[-3:]
In [ ]:
squares[:-2]
In [ ]:
squares[:]
In [ ]:
# concatenation
squares + [36, 49, 64, 81, 100]
In [ ]:
cubes = [1, 8, 27, 65, 125] # something's wrong here
In [ ]:
cubes
In [ ]:
4**3
In [ ]:
cubes[3] = 64
cubes
In [ ]:
# cubes.<TAB>
In [ ]:
cubes[:] = []
cubes
In [ ]:
# CONTROL FLOW
# if... else statement
x = int(input("Please enter an integer: "))
if x == 5:
print ('x is equal to 5.')
else:
print ('x is not equal to 5.')
In [ ]:
# if... elif... else statement
x = int(input("Please enter an integer: "))
if x == 5:
print ('x is equal to 5.')
elif x > 5:
print ('x is greater than 5')
else:
print ('x is les than 5.')
In [ ]:
# while loop statement
# fibbonaci series:
# the sum of two elements defined the next
a, b, c = 0, 1, int(input("Please enter an integer: ")) # this is a multiple assignment
print(a)
while (b < c):
print(b)
a, b = b, a + b
In [ ]:
help(print)
In [ ]:
a, b, c = 0, 1, int(input("Please enter an integer: ")) # this is a multiple assignment
while (b < c):
print(b, end=', ')
a, b = b, a + b
In [ ]:
# Measure some strings:
words = ['cat', 'window', 'defenestrate', 'aaafkwnfkn']
for word in words:
print(word, len(word))
In [ ]:
# [1:len(words)]
words[2:4]
In [ ]:
for word in words[:]: # Loop over a slice copy of the entire list.
if len(word) > 5:
words.insert(0, word)
words
In [ ]:
help(range)
In [ ]:
for i in range(5):
print(i)
In [ ]:
# To iterate over the indices of a sequence, you can combine range() and len() as follows:
a = ['Mary', 'had', 'a', 'little', 'lamb']
for i in range(len(a)):
print(i, a[i])
# Or
list(enumerate(a))
In [ ]:
help(enumerate)
In [ ]:
type(2)
In [ ]:
type(enumerate(a))
In [ ]:
enumerate(a)
In [ ]:
type(list(enumerate(a)))
In [ ]:
# BREAK and CONTINUE STATEMENT
for x in range(2, 10):
for y in range(2, x):
# % is the modulus or remainder operator
if x % y == 0:
# // is the floor division operator
print(x, 'equals', y, '*', x//y)
break
else:
# loop fell through without finding a factor
print(x, 'is a prime number')
In [ ]:
# Let's talk about the importance of indentation
In [ ]:
for x in range(2, 10):
for y in range(2, x):
if x % y == 0:
print(x, 'equals', y, '*', x//y)
break
else:
# loop fell through without finding a factor
print(x, 'is a prime number')
In [ ]:
for num in range(2, 10):
if num % 2 == 0:
print("Found an even number", num)
#continue
break
print("Found a number", num)
In [ ]:
# Remember:
a, b, c = 0, 1, int(input("enter a number: ")) # this is a multiple assignment
while (b < c):
print(b, end=', ')
a, b = b, a + b
In [ ]:
# used as a placeholder while developing code
def fib(n):
pass # Remember to implement this!
#return None
In [ ]:
# def is a keyword
# def meaning definition
# fib is not a function but a procedure since it doesn’t return a value
n = int(input("enter a number: "))
def fib(n):
pass
In [ ]:
# def is a keyword
# def meaning definition
# fib is not a function but a procedure since it doesn’t return a value
n = int(input("enter a number: "))
def fib(n): # write Fibonacci series up to n
a, b = 0, 1 # this is a multiple assignment
"""Print a Fibonacci series up to n."""
while (b < n):
print(b, end=', ')
a, b = b, a + b
print() # return nothing
fib(n)
In [ ]:
fib
In [ ]:
# Aliasing a function
f = fib
f(3000)
In [ ]:
fib(0)
In [ ]:
print(fib(0))
In [ ]:
# It is simple to write a function that returns a list of the
# numbers of the Fibonacci series, instead of printing it:
n = int(input("enter a number: "))
def fib2(n): # return Fibonacci series up to n
"""Return a list containing the Fibonacci series up to n."""
result = []
a, b = 0, 1
while a < n:
result.append(a) # see below
a, b = b, a+b
return result
fib2(n)
In [ ]:
# LAMBDA EXPRESSIONS
# lambda keyword
# borrowed from functional programming, which is a manifestation of lambda calculus
# lambda function is a whereever function. it is a normal function with some
# syntactic sugar.
# lambda arguments: expression, giving a function object
funcX = lambda x: x + 1 # think f(x) = x + 1
funcXY = lambda x, y: x + y # think f(x, y) = x + y
funcXYZ = lambda x, y, z: x * y + pow(z, 3) # think f(x, y, z) = x * y + z^3
In [ ]:
print(funcX(5))
print(funcXY(2,3))
print(funcXYZ(2,3,4))
In [ ]:
n = int(input("enter a number: "))
def make_incrementor(n):
function = lambda x: x + n
return function
f = make_incrementor(n)
In [ ]:
f(12)
In [ ]:
f(1)
In [ ]:
2**3
In [ ]:
pow(2,3)
In [ ]:
# numpy, Numerical Python, is a package for scientific computing
import numpy as np
In [ ]:
# pandas, Python Data Analysis library
import pandas as pd
In [ ]:
# 2D plotting library
import matplotlib as mpl
In [ ]:
import matplotlib.pyplot as plt
In [ ]:
help
In [ ]:
help(pd)
In [ ]:
help(np)
In [ ]:
help(plt)
In [ ]:
# magic functions in jupyter
# note: this is not python
# these are special jupyter notebook functions
# that act as settings for the notebook
%who
In [ ]:
# %pylab inline
%matplotlib inline
x = np.arange(0, 10, 0.1);
y = np.sin(x)
plt.plot(x, y)
In [ ]:
# creating a DataFrame by passing a numpy array
dates = pd.date_range('20160101', periods=12)
#dates = pd.date_range('20160101', periods=12, freq='M')
#dates = pd.date_range('20160101', periods=12, freq='3M')
#dates = pd.date_range('20040101', periods=144, freq='M')
dates
In [ ]:
help(pd.date_range)
In [ ]:
df = pd.DataFrame(np.random.randn(12,4), index=dates, columns=list('ABCD'))
#df = pd.DataFrame(np.random.randn(144,4), index=dates, columns=list('ABCD'))
df
In [ ]:
df.dtypes
In [ ]:
#df.<TAB>
In [ ]:
df.info()
In [ ]:
# Exploring the data
df.head()
In [ ]:
df.head(1)
In [ ]:
df.tail(3)
In [ ]:
# decomposing the underlying numpy data
In [ ]:
df.index
In [ ]:
df.columns
In [ ]:
df.values
In [ ]:
# getting some summary statistics
df.describe()
In [ ]:
#transposing data
df.T
In [ ]:
# sorting by an axis
df.sort_index(axis=1, ascending=False)
In [ ]:
help(df.sort_index)
In [ ]:
df.sort_values(by='B')
In [ ]:
# selecting (chopping up) data
In [ ]:
# by column
df['A']
In [ ]:
# by row
df[0:3]
In [ ]:
# by row also
df['2016-01-05':'2016-01-07']
In [ ]:
# by label
df.loc[dates[0]]
In [ ]:
# on a multi-axis by label
df.loc[:, ['A', 'B']]
In [ ]:
df.loc['20160102':'20160104',['A','B']]
In [ ]:
# Reduction in the dimensions of the returned object
df.loc['20160102',['A','B']]
In [ ]:
# For getting a scalar value
df.loc[dates[0],'A']
In [ ]:
# For getting fast access to a scalar (equiv to the prior method
df.at[dates[0],'A']
In [ ]:
help(df.iloc)
In [ ]:
# by position
df.iloc[3]
In [ ]:
df.iloc[3:5,0:2]
In [ ]:
df.iloc[[1,2,4],[0,2]]
In [ ]:
df.iloc[1:3,:]
In [ ]:
df.iloc[:,1:3]
In [ ]:
df.iloc[1,1]
In [ ]:
df.iat[1,1]
In [ ]:
# Boolean Indexing
In [ ]:
df[df.A > 0]
In [ ]:
df[df > 0]
In [ ]:
# Descriptive Statistics
In [ ]:
help(df.mean)
In [ ]:
df.mean()
In [ ]:
df.mean(1)
In [ ]:
# Applying functions to data
In [ ]:
help(np.cumsum)
In [ ]:
df.apply(np.cumsum)
In [ ]:
df.apply(lambda x: x.max() - x.min())
In [ ]:
# Reading and Writing Data
In [ ]:
# this is not python
# this is commandline bash commands
ls -laph data/
In [ ]:
# export the dataframe we created to a csv file
df.to_csv('data/data01.csv')
In [ ]:
# import the csv to a pandas dataframe
pd.read_csv('data/data01.csv')
In [ ]:
# %pylab inline
%matplotlib inline
plt.figure()
# plt.figure(figsize=(20, 10))
df.plot()
plt.legend(loc='best');
In [ ]:
help(plt.figure)
In [ ]:
plt.figure(figsize(15,10))
df.plot()
plt.legend(loc='best');
In [ ]:
aapl = pd.read_csv('data/AAPL.csv')
aapl.info()
In [109]:
%%latex
$$c = \sqrt{a^2 + b^2}$$
\begin{equation}
c = \sqrt{a^2 + b^2}
\end{equation}
In [110]:
from IPython.display import Image
i = Image(filename='./ml_map.png')
i
Out[110]:
In [ ]:
In [ ]: