iPython 101 - using numpy, scipy, matplotlib, sqlite3 and Bokeh.

it takes a long time to load - be patient!


In [ ]:
a = "hello world - BRUCE"
print(a)
import time

#print("date and time when this Notebook was run on http://localhost:8888/notebooks/ipython_101_notebook-Copy1.ipynb")
print("Date & Time : ",time.strftime("%Y-%m-%d  %H:%M:%S"))

In [63]:
import pandas as pd

In [64]:
ufo=pd.read_csv('http://bit.ly/uforeports')

In [65]:
ufo.head()


Out[65]:
City Colors Reported Shape Reported State Time
0 Ithaca NaN TRIANGLE NY 6/1/1930 22:00
1 Willingboro NaN OTHER NJ 6/30/1930 20:00
2 Holyoke NaN OVAL CO 2/15/1931 14:00
3 Abilene NaN DISK KS 6/1/1931 13:00
4 New York Worlds Fair NaN LIGHT NY 4/18/1933 19:00

In [66]:
ufo.shape


Out[66]:
(18241, 5)

In [67]:
ufo.index


Out[67]:
RangeIndex(start=0, stop=18241, step=1)

In [68]:
ufo.columns


Out[68]:
Index(['City', 'Colors Reported', 'Shape Reported', 'State', 'Time'], dtype='object')

In [71]:
ufo['Location'] = ufo['City'] + ", " + ufo['State']

In [ ]:


In [72]:
ufo.head()


Out[72]:
City Colors Reported Shape Reported State Time Location
0 Ithaca NaN TRIANGLE NY 6/1/1930 22:00 Ithaca, NY
1 Willingboro NaN OTHER NJ 6/30/1930 20:00 Willingboro, NJ
2 Holyoke NaN OVAL CO 2/15/1931 14:00 Holyoke, CO
3 Abilene NaN DISK KS 6/1/1931 13:00 Abilene, KS
4 New York Worlds Fair NaN LIGHT NY 4/18/1933 19:00 New York Worlds Fair, NY

In [73]:
type(ufo.Location)


Out[73]:
pandas.core.series.Series

In [88]:
#ufo.Location
print( (ufo.head(10)).to_string(columns=['Location'],index=False)) #bugger, this is tricky


Location
              Ithaca, NY
         Willingboro, NJ
             Holyoke, CO
             Abilene, KS
New York Worlds Fair, NY
         Valley City, ND
         Crater Lake, CA
                Alma, MI
             Eklutna, AK
             Hubbard, OR

In [ ]:


In [109]:
def A100_initialise_dataframe(): 
 print("BEGIN A100_initialise_dataframe"+"\n") 
 
#Location = r'C:\Users\bruce\OneDrive\Udacity_Intro_to_Data_Analysis\airports.dat'
#AAPL = pd.read_csv(Location,
 global AAPL
 global BAPL
 global CAPL
 AAPL = pd.read_csv( 
    "https://raw.githubusercontent.com/jpatokal/openflights/master/data/airports.dat",
 header=None,
 sep=',',    
 names = ['Airport_ID', 'Name', 'City', 'Country', 'IATA_FAA', 'ICAO', 'Latitude', 'Longitude', 'Altitude', 'Timezone','DST','Tz database_time_zone'],
 
 )

#1,"Goroka","Goroka","Papua New Guinea","GKA","AYGA",-6.081689,145.391881,5282,10,"U","Pacific/Port_Moresby"
#507,"Heathrow","London","United Kingdom","LHR","EGLL",51.4775,-0.461389,83,0,"E","Europe/London"
#503,"City","London","United Kingdom","LCY","EGLC",51.505278,0.055278,19,0,"E","Europe/London"
 print("type(AAPL) : \n", type(AAPL))
 print("\n")
 print("AAPL.dtypes : \n", AAPL.dtypes)
 print("\n")
 print("AAPL.describe() : \n", AAPL.describe())
 print("\n")
 print("AAPL.info() : \n", AAPL.info())
 print("\n")
 print("AAPL.head() : \n", AAPL.head())
 
 print("\n")
 print("display.max_columns = ", pd.get_option("display.max_columns"))

#print(AAPL.head(10)) 
 print("END A100_initialise_dataframe"+"\n") 
 return 

def A200_Method_I():
# Method 1:
 print("BEGIN A200_Method_I"+"\n")    
 print(AAPL[AAPL['Altitude'] == AAPL['Altitude'].max()])
 print("\n")
 print("END A200_Method_I"+"\n")    
 return



def A300_Method_II():
# Method 2:
 print("BEGIN A300_Method_II"+"\n")    
 print("\n")
 Sorted = AAPL.sort_values(['Altitude'], ascending=False)
 print(Sorted.head(1))
 print("\n")
 print(AAPL['Altitude'].max())
 print("\n")
 print("END A300_Method_II"+"\n")    
 return



def A400_make_dataframes():
# Method 2:
 print("BEGIN A400_make_dataframes"+"\n")    
 print("\n")
 #print(AAPL[AAPL['City'] == "Leicester"]) 
 #print(AAPL[AAPL['City'] == "London"])
 BAPL=AAPL[AAPL['City'] == "London"]
 #print("\n")
 #print(BAPL[['City','Country','Latitude','Longitude']])
 #print("\n")

 CAPL=BAPL[BAPL['Country'] == "United Kingdom"]
 #print(CAPL[['Name','City','Country','Latitude','Longitude']])

 CAPL.set_index('Country', inplace=True)
 print(CAPL.head(18))

 print("========================= \n")
 print(
       type( 
            (CAPL.to_csv(columns=['Latitude'], sep=',', index=False)).split()
           )   
      )
 print("========================= \n")
 
 print("========================= \n")
 print(
       type( 
            (CAPL.to_csv(columns=['Longitude'], sep=',', index=False)).split()
           )   
      )
 BW_lat=[51.477500, 51.148056]
 print(type(BW_lat))
   
 print("========================= \n")
 print("\n")

 print("END A400_make_dataframes"+"\n")    
 return


def A000_make_stuff():
# Method 2:
 print("BEGIN A000_make_stuff"+"\n")    
 print("\n")
 
 print("\n")
 print("END A000_make_stuff"+"\n")    
 return



def A500_Render_Gmap():
 #map_options = GMapOptions(lat=30.29, lng=-97.73, map_type="roadmap", zoom=11)
 map_options = GMapOptions(lat=51.148056, lng=-0.190278,map_type="roadmap", zoom=9)
 
 plot = GMapPlot(
     x_range=DataRange1d(), y_range=DataRange1d(), map_options=map_options, title="London"
 )
  
 source = ColumnDataSource(
     data=dict(
        lat=(CAPL.to_csv(columns=['Latitude'], sep=',', index=False)).split(),
        lon=(CAPL.to_csv(columns=['Longitude'], sep=',', index=False)).split(),
#        lat=[51.477500, 51.148056],
#        lon=[-0.461389, -0.190278],

     )
 )
# London Heathrow 
#lat=51.477500
#lon=-0.461389

#London Gatwick
#Lat=51.148056
#Lon=-0.190278
 circle = Circle(x="lon", y="lat", size=10, fill_color="purple", fill_alpha=0.8, line_color=None)
 plot.add_glyph(source, circle)
 
 plot.add_tools(PanTool(), WheelZoomTool(), BoxSelectTool())
 output_file("gmap_plot.html")
 show(plot)
 print("\n")
 print("END A500_Render_Gmap"+"\n")    
 return


import pandas as pd 
 
from bokeh.plotting import figure 
from bokeh.io import output_file, show
from bokeh.models import (
  GMapPlot, GMapOptions, ColumnDataSource, Circle, DataRange1d, PanTool, WheelZoomTool, BoxSelectTool
)

A100_initialise_dataframe() 

#A200_Method_I()

#A300_Method_II()

A400_make_dataframes()

A500_Render_Gmap()


BEGIN A100_initialise_dataframe

type(AAPL) : 
 <class 'pandas.core.frame.DataFrame'>


AAPL.dtypes : 
 Airport_ID                 int64
Name                      object
City                      object
Country                   object
IATA_FAA                  object
ICAO                      object
Latitude                 float64
Longitude                float64
Altitude                   int64
Timezone                 float64
DST                       object
Tz database_time_zone     object
dtype: object


AAPL.describe() : 
         Airport_ID     Latitude    Longitude      Altitude     Timezone
count  8107.000000  8107.000000  8107.000000   8107.000000  8107.000000
mean   4766.361046    26.817720    -3.921969    933.449365     0.169236
std    2943.205193    27.866953    85.900873   1624.740899     5.737326
min       1.000000   -89.999997  -179.877000  -1266.000000   -12.000000
25%    2091.500000     8.824928   -79.022498     38.000000    -5.000000
50%    4257.000000    34.987800     5.292028    272.000000     1.000000
75%    7508.500000    47.957599    49.785821   1020.000000     4.000000
max    9541.000000    82.517778   179.951000  14472.000000    13.000000


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8107 entries, 0 to 8106
Data columns (total 12 columns):
Airport_ID               8107 non-null int64
Name                     8107 non-null object
City                     8107 non-null object
Country                  8107 non-null object
IATA_FAA                 5880 non-null object
ICAO                     8043 non-null object
Latitude                 8107 non-null float64
Longitude                8107 non-null float64
Altitude                 8107 non-null int64
Timezone                 8107 non-null float64
DST                      8107 non-null object
Tz database_time_zone    8107 non-null object
dtypes: float64(3), int64(2), object(7)
memory usage: 760.1+ KB
AAPL.info() : 
 None


AAPL.head() : 
    Airport_ID                        Name          City           Country  \
0           1                      Goroka        Goroka  Papua New Guinea   
1           2                      Madang        Madang  Papua New Guinea   
2           3                 Mount Hagen   Mount Hagen  Papua New Guinea   
3           4                      Nadzab        Nadzab  Papua New Guinea   
4           5  Port Moresby Jacksons Intl  Port Moresby  Papua New Guinea   

  IATA_FAA  ICAO  Latitude   Longitude  Altitude  Timezone DST  \
0      GKA  AYGA -6.081689  145.391881      5282      10.0   U   
1      MAG  AYMD -5.207083  145.788700        20      10.0   U   
2      HGU  AYMH -5.826789  144.295861      5388      10.0   U   
3      LAE  AYNZ -6.569828  146.726242       239      10.0   U   
4      POM  AYPY -9.443383  147.220050       146      10.0   U   

  Tz database_time_zone  
0  Pacific/Port_Moresby  
1  Pacific/Port_Moresby  
2  Pacific/Port_Moresby  
3  Pacific/Port_Moresby  
4  Pacific/Port_Moresby  


display.max_columns =  20
END A100_initialise_dataframe

BEGIN A400_make_dataframes



                Airport_ID                           Name    City IATA_FAA  \
Country                                                                      
United Kingdom         492                          Luton  London      LTN   
United Kingdom         502                        Gatwick  London      LGW   
United Kingdom         503                           City  London      LCY   
United Kingdom         507                       Heathrow  London      LHR   
United Kingdom         548                       Stansted  London      STN   
United Kingdom        7393  London Euston Railway Station  London      NaN   
United Kingdom        7639             Paddington Station  London      QQP   
United Kingdom        7667    London Victoria Bus Station  London      NaN   
United Kingdom        7696              London St Pancras  London      STP   
United Kingdom        7722                London Heliport  London      NaN   
United Kingdom        8024     St Pancras Railway Station  London      QQS   
United Kingdom        8590                   All Airports  London      LON   
United Kingdom        8732           London - Kings Cross  London      QQK   
United Kingdom        8912                 Euston Station  London      NaN   
United Kingdom        8978               Victoria Station  London      NaN   
United Kingdom        9341         Waterloo International  London      NaN   
United Kingdom        9356                 Euston Station  London      QQU   
United Kingdom        9357         Waterloo International  London      QQW   

                ICAO   Latitude  Longitude  Altitude  Timezone DST  \
Country                                                              
United Kingdom  EGGW  51.874722  -0.368333       526       0.0   E   
United Kingdom  EGKK  51.148056  -0.190278       202       0.0   E   
United Kingdom  EGLC  51.505278   0.055278        19       0.0   E   
United Kingdom  EGLL  51.477500  -0.461389        83       0.0   E   
United Kingdom  EGSS  51.885000   0.235000       348       0.0   E   
United Kingdom    \N  51.528400  -0.133100         0       0.0   U   
United Kingdom    \N  51.515833  -0.176111         0       0.0   E   
United Kingdom    \N  51.494999  -0.144643         0       0.0   E   
United Kingdom    \N  51.530000  -0.125000         0       0.0   E   
United Kingdom  EGLW  51.470000  -0.177833        18       0.0   E   
United Kingdom    \N  51.532519  -0.126300        80       0.0   E   
United Kingdom    \N  51.508056  -0.127778        66       0.0   E   
United Kingdom    \N  51.532600   0.123300        72       0.0   E   
United Kingdom    \N  51.528400  -0.133100        89       0.0   E   
United Kingdom    \N  51.496400  -0.143910        48       0.0   E   
United Kingdom    \N  51.503100  -0.114700        10       0.0   E   
United Kingdom    \N  51.528400  -0.133100        89       0.0   E   
United Kingdom    \N  51.503100  -0.114700        10       0.0   E   

               Tz database_time_zone  
Country                               
United Kingdom         Europe/London  
United Kingdom         Europe/London  
United Kingdom         Europe/London  
United Kingdom         Europe/London  
United Kingdom         Europe/London  
United Kingdom         Europe/London  
United Kingdom         Europe/London  
United Kingdom         Europe/London  
United Kingdom         Europe/London  
United Kingdom         Europe/London  
United Kingdom         Europe/London  
United Kingdom         Europe/London  
United Kingdom         Europe/London  
United Kingdom         Europe/London  
United Kingdom         Europe/London  
United Kingdom         Europe/London  
United Kingdom         Europe/London  
United Kingdom         Europe/London  
========================= 

<class 'list'>
========================= 

========================= 

<class 'list'>
<class 'list'>
========================= 



END A400_make_dataframes



END A500_Render_Gmap


In [ ]:
CAPL.index

In [ ]:


In [ ]:
CAPL.shape

In [ ]:
CAPL.head()

In [ ]:
CAPL.set_index('Name', inplace=True)

In [ ]:
CAPL.index

In [ ]:
CAPL.columns

In [ ]:
CAPL.head(100)

In [ ]:
CAPL.loc['All Airports', 'Latitude']

In [ ]:
__author__ = 'Bruce.Woodley'
# version 3
#http://sqlite.org/
#https://addons.mozilla.org/en-us/firefox/addon/sqlite-manager/
#For Ankur Pandey's  "sql learnings"
def main():
    import sqlite3
    import time

    conn = sqlite3.connect('music.sqlite3')
    cur = conn.cursor()
    cur.execute('DROP TABLE IF EXISTS Tracks')
    cur.execute('CREATE TABLE Tracks (title TEXT , plays INTEGER)')
    conn.close()

    conn = sqlite3.connect('music.sqlite3')
    cur = conn.cursor()
    cur.execute('INSERT INTO Tracks (title, plays) VALUES (?, ?)', ('Thunderstruck',20) )
    cur.execute('INSERT INTO Tracks (title, plays) VALUES (?, ?)',('My Way', 15) )
    cur.execute('INSERT INTO Tracks (title, plays) VALUES (?, ?)',('Lean On (by Major Lazer & DJ Snake)', 100) )
    conn.commit()

    print("Tracks:")
    cur.execute('SELECT title, plays FROM Tracks')
    for row in cur :
       print(row)
    cur.execute('DELETE FROM Tracks WHERE plays < 100')
    conn.commit()
    cur.close()
    
    print("\n")
    print("Ankur's SQL Learnings executed on date and time: ",time.strftime("%Y-%m-%d  %H:%M:%S"))
    return

main()

In [ ]:
import pandas as pd 
from bokeh.plotting import figure 

AAPL = pd.read_csv( 
    "http://ichart.yahoo.com/table.csv?s=AAPL&a=0&b=1&c=2000&d=0&e=1&f=2015", 
    parse_dates=['Date']) 
#print(AAPL)

MSFT = pd.read_csv( 
    "http://ichart.yahoo.com/table.csv?s=MSFT&a=0&b=1&c=2000&d=0&e=1&f=2015", 
    parse_dates=['Date']) 

IBM = pd.read_csv( 
    "http://ichart.yahoo.com/table.csv?s=IBM&a=0&b=1&c=2000&d=0&e=1&f=2015", 
    parse_dates=['Date']) 

def make_figure(): 
    p = figure(x_axis_type="datetime", width=700, height=300) 
    p.line(AAPL['Date'], AAPL['Adj Close'], color='#A6CEE3', legend='AAPL') 
    p.line(IBM['Date'], IBM['Adj Close'], color='#33A02C', legend='IBM') 
    p.line(MSFT['Date'], MSFT['Adj Close'], color='#FB9A99', legend='MSFT') 
    p.title = "Stock Closing Prices" 
    p.grid.grid_line_alpha=0.3 
    p.xaxis.axis_label = 'Date' 
    p.yaxis.axis_label = 'Price' 
    p.legend.orientation = "top_left" 
    return p 

from bokeh.io import output_notebook, show 

output_notebook() 

p = make_figure() 

show(p)

In [ ]:
import numpy as np 
from bokeh.plotting import  show, figure 
from bokeh.io import output_notebook

x = np.linspace(-1.0*np.pi, 2*np.pi, 400) 
#print(x)
y1 = np.sin(x) 
#y2 =np.exp(x)

output_notebook() 

p = figure() 
p.circle(x, y1, legend="sin(x)",line_color="orange") 
#p.circle(x, y2, legend="exp(x)",line_color="orange") 




show(p)

In [ ]:


In [ ]:


In [ ]:
from bokeh.sampledata.autompg import autompg as df 
#from bokeh.charts import Scatter, output_file, show 
from bokeh.charts import Scatter  
from bokeh.io import output_notebook, show 



scatter = Scatter(df, x='mpg', y='hp', color='cyl', marker='origin', 
                  title="mpg", xlabel="Miles Per Gallon", ylabel="Horsepower") 

#output_file('scatter.html') 
output_notebook() 


show(scatter)

In [ ]:
#from bokeh.sampledata.autompg import autompg as df
from bokeh.sampledata.autompg import autompg
from bokeh.sampledata.iris import flowers
#from bokeh.sampledata.stocks  import AAPL

#print(type(df))
print(str(flowers))
#print(df.index)
#print(df.info)

In [ ]:
a=0xfe
print(hex(a), int(a), oct(a), bin(a))

print(int('010', 2)  ) 
print(int('010', 8)  ) 
print(int('010', 10)  ) 
print(int('010', 16)  ) 

print(int('ff', 16)  )

In [ ]:
import matplotlib.pyplot as plt
x = [1, 2, 3]
y = [1, 2, 6]
fig = plt.figure()
ax = plt.axes()

plt.plot(x, y)
plt.show()

In [ ]:
'''
  Example of selecting a transformation from two equally probable
  transformations
  '''
  import matplotlib.pyplot as plt
  import random

  def transformation_1(p):
      x = p[0]
      y = p[1]
      return x + 1, y - 1

  def transformation_2(p):
      x = p[0]
      y = p[1]
      return x + 1, y + 1

  def transform(p):
      # List of transformation functions
      transformations = [transformation_1, transformation_2]
      # Pick a random transformation function and call it
      t = random.choice(transformations)
      x, y = t(p)
      return x, y

  def build_trajectory(p, n):
      x = [p[0]]
      y = [p[1]]
      for i in range(n):
          p = transform(p)
          x.append(p[0])
          y.append(p[1])
      return x, y
  if __name__ == '__main__':
      # Initial point
      p = (1, 1)
      n = int(input('Enter the number of iterations: '))
      x, y = build_trajectory(p, n)
      # Plot
      plt.plot(x, y)
      plt.xlabel('X')
      plt.ylabel('Y')
      plt.show()

In [ ]:
'''
Draw a Barnsley Fern
'''
import random
import matplotlib.pyplot as plt

def transformation_1(p):
    x = p[0]
    y = p[1]
    x1 = 0.85*x + 0.04*y
    y1 = -0.04*x + 0.85*y + 1.6
    return x1, y1

def transformation_2(p):
    x = p[0]
    y = p[1]
    x1 = 0.2*x - 0.26*y
    y1 = 0.23*x + 0.22*y + 1.6
    return x1, y1

def transformation_3(p):
    x = p[0]
    y = p[1]
    x1 = -0.15*x + 0.28*y
    y1 = 0.26*x + 0.24*y + 0.44
    return x1, y1

def transformation_4(p):
    x = p[0]
    y = p[1]
    x1 = 0
    y1 = 0.16*y
    return x1, y1

def get_index(probability):
      r = random.random()
      c_probability = 0
      sum_probability = []
      for p in probability:
          c_probability += p
          sum_probability.append(c_probability)
      for item, sp in enumerate(sum_probability):
          if r <= sp:
              return item
      return len(probability)-1

def transform(p):
      # List of transformation functions
      transformations = [transformation_1, transformation_2,
                             transformation_3, transformation_4]
      probability = [0.85, 0.07, 0.07, 0.01]
      # Pick a random transformation function and call it
      tindex = get_index(probability)
      t = transformations[tindex]
      x, y = t(p)
      return x, y

def draw_fern(n):
      # We start with (0, 0)
      x = [0]
      y = [0]

      x1, y1 = 0, 0
      for i in range(n):
         x1, y1 = transform((x1, y1))
         x.append(x1)
         y.append(y1)
      return x, y

if __name__ == '__main__':
      n = int(input('Enter the number of points in the Fern: '))
      x, y = draw_fern(n)
      # Plot the points
      plt.plot(x, y, 'o')
      plt.title('Fern with {0} points'.format(n))
      plt.show()

In [ ]:
import math
math.sin(math.pi/2)

In [ ]:
import inspect 
import sympy
print(sympy.sin(math.pi/2))



#print(dir(sympy))
#print(inspect.getclasstree(sympy))
#print(inspect.getmembers(sympy.tan,isclass))


from sympy import sin, sine_transform, solve
print(sin(math.pi/2))


print("\n")
print ("inspect.ismodule(sympy.sin):", inspect.ismodule(sympy.sin))
print ("inspect.ismethod(sympy.sin):", inspect.ismethod(sympy.sin))
print ("inspect.isclass(sympy.sin):", inspect.isclass(sympy.sin))
print ("inspect.isfunction(sympy.sin)", inspect.isfunction(sympy.sin))
#print(dir(solve))

from sympy import Symbol
theta = Symbol('theta')
print( sympy.sin(theta) + sympy.sin(theta) ) 

from sympy import sin, solve, Symbol
u = Symbol('u')
t = Symbol('t')
g = Symbol('g')
theta1 = Symbol('theta1')
solve(u*sin(theta1)-g*t, t)



x = Symbol('x', positive=True)
if (x+5) > 0:
    print('Do Something where positive=True')
else:
    print('Do Something else where not(positive=True)')


from sympy import Limit, Symbol, S
x = Symbol('x')
y = Limit(1/x, x, S.Infinity)
print(type(y), y)

In [ ]:
import inspect 
import re




print(dir(re))
print("\n")
print ("inspect.ismodule(re.findall):", inspect.ismodule(re.findall))
print ("inspect.ismethod(re.findall):", inspect.ismethod(re.findall))
print ("inspect.isclass(re.findall):", inspect.isclass(re.findall))
print ("inspect.isfunction(re.findall)", inspect.isfunction(re.findall))


#print(inspect.getclasstree(re))
print(inspect.getmembers(re))




#print ("inspect.ismethod(findall):", inspect.ismethod(finall))
#print(dir(solve))

In [ ]:
#from numpy import pi,linspace,sin
#from matplotlib.pyplot import plot
#x = linspace(0,3*pi,500)
#print(x)
#plot(x,sin(x**2))
#title('A simple chirp');
# OneNote Python 101 - Worked Exampes - bokeh multi line, multi axis 


import numpy as np 
from bokeh.plotting import  show, figure 
from bokeh.io import output_notebook

x = np.linspace(0, 2*np.pi, 100) 
#print(x)
y = np.sin(1/x) 

output_notebook() 

p = figure() 
 
p.line(x, y, legend="sin(1/x)") 



show(p)