In [ ]:
from numpy import *
from pandas import *
#using import * brings ALL the packages classes and functions into the name space,
#for large packages you can bring in only some parts by doing from [name] import [class/object]
#to avoid name space conflicts you can also rename what you import
#import pandas and rename it
import pandas as pd
#import the Series and dataframe classes
from pandas import Series, DataFrame
#common naming conventions. numpy is np, pandas pd, and matplotlib is plt
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
In [ ]:
#no data types need to be associated with variables
#all variables are "Objects" in python.
num = 5
alpha = 'hello world'
mixed = str(num) + ' ' + alpha
print num
print alpha
print mixed
In [ ]:
#this is a list
a=[0,1,2,3,4,5,6,7,8,9]
a
In [ ]:
#use [x] to access the item at location x in the list
#all lists start at 0
print 'first item', a[0]
#you can also index from back by using -1 for last, -2 for "second from last" etc
print 'last item', a[-1]
#you can "slice" a list using : and ::
print 'first three items', a[:3]
print 'last three items', a[-3:]
print 'start at the 4th item', a[3:]
print 'the odd items', a[::2]
In [ ]:
#all lists have a length, use len(list_name) to get the number of items in it
#mathematical functions can also, in general, be applied to lists if they contain numbers
print 'length of list', len(a)
print 'largets number in list', max(a)
print 'smallest number', min(a)
print 'average', mean(a)
#we can find the index of the max and min using argmax() and argmin()
print 'the largest number in the list is', max(a), 'and is found at index:', argmax(a)
In [ ]:
#items can be added to a list by using list_name.append(item)
#add 3 to the list
a.append(3)
#add 4
a.append(4)
a
In [ ]:
#finally, we can de dupe a list by using the unique function
#we can also sort a list using sort(list_name)
print 'sorted list', sort(a)
print 'select distinct values', unique(a)
In [ ]:
#items can be removed from the list using list_name.remove(item)
#remove 3 from the list
a.remove(3)
#remove 4
a.remove(4)
a
In [ ]:
#we can itterate over n items using a for loop
#a shortcut for making the list [0,...,n-1] is the function range(n)
#print the numbers 0 -4
for i in range(5):
print i
In [ ]:
#print the numbers 0 -4
for i in range(0,5):
print i
In [ ]:
#print the a list
for i in a:
print i
In [ ]:
#itterating over something and appending is a common way of building lists
#create array
output=[]
#build the list holding the first 4 squares by using a for loop
for i in range(5):
output.append(i**2) #**2 operator means squared
output
In [ ]:
#this works but is slow, a faster way to do this is to use list comprehension
output2=[i**2 for i in range(5)]
output2
In [ ]:
#we can also put conditions in the list comprehension
#build the first 10 squares for all the even numbers
output3=[i**2 for i in range(10) if i%2==0] # % is means modulus (remainder)
output3
In [ ]:
#the zip command lines up two lists together
L1=[1,2,3]
L2=['x','y','z']
#the output is a list of tuples
print zip(L1,L2)
In [ ]:
#if they are of different size, it gets chopped off
L1=[1,2,3,4]
L2=['x','y','z']
#the output is a list of tuples
print zip(L1,L2)
In [ ]:
#it is very common to itterate over lists using zip
for list1,list2 in zip(L1,L2):
print list1,list2
In [ ]:
#this can also be done with list comprehension
print [(x,y) for x,y in zip(L1,L2)]
#we can also make more complex lists
output=[(x,y,str(x)+y) for x,y in zip(L1,L2)]
#itterate over our output for a nicer looking print statement
for z in output:
print z
#we can also do this differently
for a1,a2,a3 in output:
print a1,a2,a3
In [ ]:
if 1==1:
print 'one equals one'
In [ ]:
if 1<0:
print 'one is less than zero'
else:
print '1 does not equal to 0'
In [ ]:
if 1<>1: #you can also use the != operator
print 'one does not equal to one '
elif 1==0:
print '1 is equal to zero'
else:
print '1 does not equal to 0'
In [ ]:
if (1==1 and 0<1):
print 'and operator'
if (1==1 or 0==1):
print 'or operator'
In [ ]:
#counter
i = 0
#loop while i < 5
while i < 5:
print i
#increment counter
i = i + 1
In [ ]:
#define functions
def SimpleAdd(Number):
return Number
def SimpleAdd2(N1, N2):
return N1 + N2
def SimpleAdd3(N1=2, N2=2):
return N1 + N2
#return a 10
print SimpleAdd(10)
#return 2 + 5
print SimpleAdd2(2,5)
#return 1 + 3
print SimpleAdd3(1,3)
#use default parameters
print SimpleAdd3()
In [ ]:
from xlwt import *
In [ ]:
# Create a workbook
wb = Workbook()
# Add a sheet/tab
ws0 = wb.add_sheet('Picture_Test')
# Add picture at location (2,1)
# Note: Only accepts bmp files
# i.e. ws0.insert_bitmap('C:\Users\username\Desktop/test.bmp', 2, 1)
ws0.insert_bitmap('DataFiles\\testpic.bmp', 2, 1)
# Write excel file
# Note: This will overwrite any other files with the same name
wb.save('AddImage.xls')
In [ ]:
#convert an image file to a .bmp image file using PIL
from PIL import Image
# Convert to bmp
def ConvertImg(file_in,file_out):
img = Image.open(file_in)
#print len(img.split()) # test
if len(img.split()) == 4:
# prevent IOError: cannot write mode RGBA as BMP
r, g, b, a = img.split()
img = Image.merge("RGB", (r, g, b))
img.save(file_out)
else:
img.save(file_out)
ConvertImg('DataFiles/pngInput.png', 'DataFiles/bmpOutput.bmp')
In [ ]:
import os
# Delete file
os.remove('DataFiles/bmpOutput.bmp')
In [ ]:
from datetime import datetime, date, time
# Get today's date
CurrentTime = datetime.now()
CurrentTime = CurrentTime.strftime("%Y-%m-%d_%H-%M-%S")
CurrentTime
In [ ]:
from sqlalchemy import *
In [ ]:
# DB Parameters
ServerName = "devdb4\sql4"
Database = "BizIntel"
# To create a temp table just add a "#" to the table name
# To create a global table just add a "##" to the table name
TableName = "#TableCheckTest"
# pyobdc must be installed
engine = create_engine('mssql+pyodbc://' + ServerName + '/' + Database)
conn = engine.connect()
In [ ]:
# Required for querying tables
metadata = MetaData(conn)
## Create table
tbl = Table(TableName, metadata,
Column('DateAdded', DateTime),
Column('Revenue', Integer)
)
# This actually creates a table in the sql database
# checkfirst=True >> create if table does not exist
tbl.create(checkfirst=True)
In [ ]:
# Create data to insert into table
# Create a dataframe with dates as your index
data = [1,2,3,4,5,6,7,8,9,10]
idx = date_range('1/1/2012', periods=10, freq='MS')
df = DataFrame(data, index=idx, columns=['Revenue'])
# Remove the index if you want to include it in the insert
df = df.reset_index()
#print df
In [ ]:
# Iterate through each of the columns and insert into table
for x in df.iterrows():
#print list(x[1])
sql = tbl.insert(list(x[1]))
conn.execute(sql)
# select all form table
sql = tbl.select()
result = conn.execute(sql)
for row in result:
print 'Write to SQL', row
In [ ]:
# Select all
sql = tbl.select()
# run sql code
result = conn.execute(sql)
In [ ]:
# Insert to a dataframe
df2 = DataFrame(data=list(result), columns=result.keys())
# Convert data types
df2.DateAdded = df2.DateAdded.astype('datetime64')
df2.Revenue = df2.Revenue.astype('int')
print ' '
print 'Data Types'
print df2.dtypes
# Set index to dataframe
df2 = df2.set_index('DateAdded')
print ' '
print 'Read from SQL', df2
#print df2.head().to_string()
# Close connection
conn.close()
In [ ]:
# Copy paste data into a dataframe
# Make sure you first copy data before running the code
# This example assumes you are copying data with the first column composed of dates
# index_col=0, means the date column is in the first column
# parse_dates=True, makes sure dates are converted to datetime datatypes
df = read_clipboard(index_col=0,parse_dates=True)
df
# This has worked for me in SQL, Excel, and LibreOffice
In [ ]:
# Export dataframe to Excel
# you can export to both .xls and .xlsx
df.to_excel('dfToExcel.xls','Sheet Name')
In [ ]:
# Import packages
from xlwt import *
from pandas import *
import numpy as np
In [ ]:
# Create a dataframe with dates as your index
data = [1,2,3,4,5,6,7,8,9,10]
idx = date_range('1/1/2012', periods=10, freq='MS')
df = DataFrame(data, index=idx, columns=['Revenue'])
df
In [ ]:
# Create a workbook
wb = Workbook()
# Add a sheet/tab
ws0 = wb.add_sheet('DataFrame_Test')
In [ ]:
# Write dataframe
# formatting values in excel
date_xf = easyxf(num_format_str='DD/MM/YYYY') # sets date format in Excel
num_xf = easyxf(num_format_str='#0.000000') # sets date format in Excel
# Iterate through each of the columns and insert into sheet
for i, (date, row) in enumerate(df.T.iteritems()):
#print i, date, row[0],type(row[0]).__name__
# Skip first line
if i > 0:
if type(date).__name__ == 'Timestamp':
ws0.write(i,0,date,date_xf)
elif type(date).__name__ == 'str':
ws0.write(i,0,date)
else:
ws0.write(i,0,date.astype(np.float),num_xf)
if type(row[0]).__name__ == 'Timestamp':
ws0.write(i,1,row[0].astype(np.float),date_xf)
elif type(row[0]).__name__ == 'str':
ws0.write(i,1,row[0].astype(np.float))
else:
ws0.write(i,1,row[0].astype(np.float),num_xf)
In [ ]:
# Remove the index if you want to include it in the insert
df = df.reset_index()
# Rename columns
df.columns = ['DateAdded', 'Revenue']
# Add column Headers
for i, cols in enumerate(df.columns):
#print i, cols
ws0.write(0,i,cols)
In [ ]:
# Write excel file
# Note: This will overwrite any other files with the same name
wb.save('DFtoExcel.xls')
In [ ]:
#import statement (automatic in pylab)
import matplotlib.pyplot as plt
In [ ]:
#make some data
x=linspace(1,50,100) # linspace(start,end,num_points)
y=np.sin(x)
#a line graph:
plot(x,y)
In [ ]:
#a scatter plot
scatter(x,y)
In [ ]:
#another way to make a scatter plot
plot(x,y,'x')
In [ ]:
#by default all the plots go into the same figure. we can make a new figure by calling figure()
#we can also get a refrence to the figure
#WITHOUT calling figure()
#a line graph:
plot(x,y)
#a scatter plot
scatter(x,y)
In [ ]:
#calling figure()
#a line graph:
plot(x,y)
fig=figure()
#a scatter plot
scatter(x,y)
In [ ]:
#plot the data, show data points as x's, connect them with a line, make it red and kinda see through
#name the data
plt.plot(x,y, 'x-', color='red', alpha=.5, label='the data')
#add a title
plt.title('The Title')
#name the axis
xlabel('x axis label')
ylabel('y axis label')
#the legend
plt.legend(loc='best') #loc = 'best' tries to make the legend not overlap the data
#turn a grid on
plt.grid()
#save the figure as an image
#this will create a ".png" file in the file location you run the code in
plt.savefig('test_fig.png')
In [ ]:
#subplots and multiple lines
t=linspace(1,100,100)
x1=np.sin(20./(2*pi) * t)
x2=np.sin(40./(2*pi)*t)
x3=x1+x2
figure()
subplot(2,1,1)
plot(t,x1,label='x1')
plot(t,x2,label='x2')
plot(t,x3,label='x3')
legend(loc='best')
subplot(2,1,2)
#plot a histogram
#we save the histogram data but it is not needed to do so for plotting purposes
x1hist=hist(x1,normed='true', bins=25, alpha=.25,label='x1')
legend(loc='best')
In [ ]:
#more histograms
x1=np.random.randn(1000)
x2=2.0*np.random.randn(1000)+2
x1h=hist(x1,bins=50,alpha=.25)
x2h=hist(x2,bins=50,alpha=.25)
In [ ]:
from mpl_toolkits.basemap import Basemap
import matplotlib.pyplot as plt
In [ ]:
#setup the projection
m = Basemap(projection='merc',llcrnrlat=20,urcrnrlat=50,\
llcrnrlon=-130,urcrnrlon=-60,lat_ts=20,resolution='i')
In [ ]:
#create some points to plot
#provided is a csv file with the lat/long of US states
import pandas as pd
from pandas import DataFrame
data=pd.read_csv('DataFiles\\usa_lat_long.csv') #it has some duplicates
data.head()
In [ ]:
#I just want the lat/long in a list
points=data[['longitude','latitude']].values
In [ ]:
#transform the points into map coordinates
transform_points=[m(lng, lat) for lng, lat in points]
In [ ]:
#do all the drawing:
fig=figure(figsize=(10,10)) #make a larger than default image
fig.add_subplot(1,1,1) #not strictly required
m.drawmapboundary(fill_color='white')
m.fillcontinents(color='white',lake_color='white')
m.drawcoastlines(color='black', linewidth=.3)
m.drawcountries(color='black', linewidth=.3)
m.drawstates(color='black', linewidth=.3)
#plot the points on the map. These are just regular calls to matplotlib with x,y data
#you could also do this in one shot by using plot(xlist, ylist...)
#or using scatter().
for x,y in transform_points:
plot(x,y,'o',color='red',ms=10*rand()) #plot them at random sizes
#we can plot some labels
text(transform_points[7][0],transform_points[7][1],'California',fontsize=15)
text(transform_points[12][0],transform_points[12][1],'Florida',fontsize=15)
#draw some great circles
lng1,lat1 =points[12]
lng2,lat2 =points[7]
m.drawgreatcircle(lng1,lat1,lng2,lat2,linewidth=3,color='blue',alpha=.5)
In [ ]:
# Objective Function
#Max C = 2x + 3y
# Constraints
#2x+y<=15
#x+3y<=20
#x>=0
#y>=0
# Solution
#(5,5)
In [ ]:
from scipy.optimize import minimize
In [ ]:
# Since the objective is to maximize C, we
# multiple the objective function by -1
# x[0] equals x
# x[1] equals y
## Objective Function
fun = lambda x: -1*(2*x[0] + 3*x[1])
In [ ]:
# Equality constraint are defined as follows:
# This is not in the correct format: x = -2
# Equation has to always be equal to zero: x + 2 = 0
# i.e. {'type': 'eq', 'fun': lambda x: x[0] + 2}
#-------------------------------------------------------------
# Inequality contraints are defined as follows:
# This is not in the correct format: 2x+y<=15
# Equation has to always be greater than or equal to zero: 0 <= 15 + -2x + -y
## Contraints
cons = ({'type': 'ineq', 'fun': lambda x: 15 + -2*x[0] + -1*x[1]}, #15-2x-y>=0
{'type': 'ineq', 'fun': lambda x: 20 + -1*x[0] + -3*x[1]}, #20-x-3y
{'type': 'ineq', 'fun': lambda x: x[0]}, #x>=0
{'type': 'ineq', 'fun': lambda x: x[1]}) #y>=0
In [ ]:
# STILL NOT SURE WHAT THIS MEANS
## Bounds
bnds = ((None, None), (None, None))
In [ ]:
## Solver
# = minimize(Objective Function, Initial Guess, Solver to use, Bounds, Constraints)
res = minimize(fun, (0, 0), method='SLSQP', bounds=bnds, constraints=cons)
res
# solution is in res.x
In [ ]:
from sqlalchemy import *
In [ ]:
# Parameters
ServerName = "Type Server Name Here"
Database = "Type database name here"
TableName = "Type table name here"
# Note: pyobdc must be installed
engine = create_engine('mssql+pyodbc://' + ServerName + '/' + Database)
conn = engine.connect()
In [ ]:
# Required for querying tables
metadata = MetaData(conn)
# Table to query
tbl = Table(TableName, metadata, autoload=True, schema="dbo")
# select all where
sql = select(tbl.c.Column1 == 'Type value to match')
# select specific columns
sql = select([tbl.c.Column1, tbl.c.Column2, tbl.c.Column3])
# select top N
sql = select([tbl.c.Column1, tbl.c.Column2, tbl.c.Column3],limit=10)
# select specific column and a where clause
sql = select([tbl.c.Column1], tbl.c.Column1 == 'Type value to match')
# and, or, not, in
sql = select([tbl], and_(tbl.c.Column1 < 4, tbl.c.Column1 != 1))
sql = select([tbl], or_(tbl.c.Column1 < 4, tbl.c.Column1 != 1))
sql = select([tbl], not_(tbl.c.Column1 > 4))
sql = select([tbl], tbl.c.Column1.in_( (1,4) ))
# like, between
sql = select([tbl], tbl.c.Column1.startswith('M'))
sql = select([tbl], tbl.c.Column1.like('%a%'))
sql = select([tbl], tbl.c.Column1.endswith('n'))
sql = select([tbl], tbl.c.Column1.between(30,39),limit=10)
result = conn.execute(sql)
for row in result:
print row
# Close connection
conn.close()
In [ ]:
# Import related packages
from IPython.display import YouTubeVideo,HTML,Image
In [ ]:
# Embed Videos
YouTubeVideo('BHEr1dBc0yM');
In [ ]:
# Embed WebPages
HTML('<iframe src=http://m.bing.com/ width=700 height=350></iframe>');
In [ ]:
# Embed Images
Image(filename='DataFiles\infiniteenergy.jpg');
In [ ]: