In [28]:
import sqlite3
#from numpy import loadtxt
import pandas as pd
import pandas.io.sql as pd_sql

# Setup SQL
connection = sqlite3.connect(":memory:") # put it into RAM, but behave like it's an sqlite file living on disk
connection.text_factory = str
cursor = connection.cursor()

###################### Problem 1 ######################
# Load data and create SQL tables
df1 = pd.read_csv("hw_6_data/top_airports.csv")
pd_sql.write_frame(df1, 'top_airports', connection, if_exists='replace')

df2 = pd.read_csv("hw_6_data/ICAO_airports.csv")
pd_sql.write_frame(df2, 'many_airports', connection, if_exists='replace')

sql_cmd = """SELECT airport, icao, city, latitude_deg, longitude_deg FROM top_airports, many_airports
        ON top_airports.icao = many_airports.ident
        ORDER BY airport ASC"""

# Printout to show it worked
cursor.execute(sql_cmd)
db_info = array(cursor.fetchall())
print len(db_info), 'airports matched:'
print ''
for result in db_info:
    for item in result:
        print item, ' | ',
    print ''
    
    
###################### Problem 2 ######################
sql_cmd = """CREATE TABLE weather (aid INTEGER PRIMARY KEY AUTOINCREMENT,
    city TEXT, min_temp FLOAT, max_temp FLOAT, humidity FLOAT, precipitation FLOAT, cloud_cover FLOAT)"""
cursor.execute(sql_cmd)


50 airports matched:

Austin-Bergstrom International Airport  |  KAUS  |  Austin  |  30.1944999695  |  -97.6698989868  |  
Baltimore/Washington International Thurgood Marshall Airport  |  KBWI  |  Baltimore / Glen Burnie  |  39.17539978  |  -76.66829681  |  
Charlotte/Douglas International Airport  |  KCLT  |  Charlotte  |  35.2140007019  |  -80.9430999756  |  
Chicago Midway International Airport  |  KMDW  |  Chicago  |  41.7859992981  |  -87.7524032593  |  
Chicago O'Hare International Airport  |  KORD  |  Chicago  |  41.97859955  |  -87.90480042  |  
Cincinnati/Northern Kentucky International Airport  |  KCVG  |  Covington  |  39.0488014221  |  -84.6678009033  |  
Cleveland-Hopkins International Airport  |  KCLE  |  Cleveland  |  41.4117012024  |  -81.8498001099  |  
Dallas Love Field  |  KDAL  |  Dallas  |  32.8470993042  |  -96.8517990112  |  
Dallas/Fort Worth International Airport  |  KDFW  |  Dallas-Fort Worth  |  32.8968009949  |  -97.0380020142  |  
Denver International Airport  |  KDEN  |  Denver  |  39.8616981506  |  -104.672996521  |  
Detroit Metropolitan Wayne County Airport  |  KDTW  |  Detroit / Romulus  |  42.2123985291  |  -83.3534011841  |  
Fort Lauderdale Hollywood International Airport  |  KFLL  |  Fort Lauderdale  |  26.072599411  |  -80.1527023315  |  
Gen. Edward Lawrence Logan International Airport  |  KBOS  |  Boston  |  42.36429977  |  -71.00520325  |  
General Mitchell International Airport  |  KMKE  |  Milwaukee  |  42.9472007751  |  -87.8965988159  |  
George Bush Intercontinental Airport  |  KIAH  |  Houston  |  29.9843997955  |  -95.3414001465  |  
Hartsfield-Jackson Atlanta International Airport  |  KATL  |  Atlanta  |  33.6366996765  |  -84.4281005859  |  
Honolulu International Airport  |  PHNL  |  Honolulu  |  21.3187007904  |  -157.92199707  |  
Indianapolis International Airport  |  KIND  |  Indianapolis  |  39.717300415  |  -86.2944030762  |  
John F. Kennedy International Airport  |  KJFK  |  New York  |  40.63980103  |  -73.77890015  |  
John Wayne Airport-Orange County  |  KSNA  |  Santa Ana  |  33.67570114  |  -117.8679962  |  
Kansas City International Airport  |  KMCI  |  Kansas City  |  39.2975997925  |  -94.7138977051  |  
LaGuardia Airport (and Marine Air Terminal)  |  KLGA  |  New York  |  40.77719879  |  -73.87259674  |  
Lambert-St. Louis International Airport  |  KSTL  |  St. Louis  |  38.7486991882  |  -90.3700027466  |  
Los Angeles International Airport  |  KLAX  |  Los Angeles  |  33.94250107  |  -118.4079971  |  
Louis Armstrong New Orleans International Airport  |  KMSY  |  New Orleans  |  29.9934005737  |  -90.2580032349  |  
Luis Munoz Marin International Airport  |  TJSJ  |  San Juan / Carolina  |  18.4393997192  |  -66.0018005371  |  
McCarran International Airport  |  KLAS  |  Las Vegas  |  36.08010101  |  -115.1520004  |  
Memphis International Airport  |  KMEM  |  Memphis  |  35.0424003601  |  -89.9766998291  |  
Miami International Airport  |  KMIA  |  Miami  |  25.7931995392  |  -80.2906036377  |  
Minneapolis-St Paul International/Wold-Chamberlain Airport  |  KMSP  |  Minneapolis  |  44.8819999695  |  -93.2218017578  |  
Nashville International Airport  |  KBNA  |  Nashville  |  36.1245002747  |  -86.6781997681  |  
Newark Liberty International Airport  |  KEWR  |  Newark  |  40.6925010681  |  -74.1687011719  |  
Norman Y. Mineta San Jose International Airport  |  KSJC  |  San Jose  |  37.3625984192  |  -121.929000854  |  
Oakland International Airport  |  KOAK  |  Oakland  |  37.7212982178  |  -122.221000671  |  
Orlando International Airport  |  KMCO  |  Orlando  |  28.4293994904  |  -81.3089981079  |  
Philadelphia International Airport  |  KPHL  |  Philadelphia  |  39.8718986511  |  -75.2410964966  |  
Phoenix Sky Harbor International Airport  |  KPHX  |  Phoenix  |  33.434299469  |  -112.012001038  |  
Pittsburgh International Airport  |  KPIT  |  Pittsburgh  |  40.49150085  |  -80.23290253  |  
Portland International Airport  |  KPDX  |  Portland  |  45.58869934  |  -122.5979996  |  
Raleigh-Durham International Airport  |  KRDU  |  Raleigh  |  35.8776016235  |  -78.7874984741  |  
Ronald Reagan Washington National Airport  |  KDCA  |  Washington D.C. / Arlington County  |  38.8521003723  |  -77.0376968384  |  
Sacramento International Airport  |  KSMF  |  Sacramento  |  38.695400238  |  -121.591003418  |  
Salt Lake City International Airport  |  KSLC  |  Salt Lake City  |  40.7883987427  |  -111.977996826  |  
San Antonio International Airport  |  KSAT  |  San Antonio  |  29.533700943  |  -98.4698028564  |  
San Diego International Airport  |  KSAN  |  San Diego  |  32.7336006165  |  -117.190002441  |  
San Francisco International Airport  |  KSFO  |  San Francisco  |  37.6189994812  |  -122.375  |  
Seattle-Tacoma International Airport  |  KSEA  |  Seattle / Tacoma (SeaTac)  |  47.4490013123  |  -122.308998108  |  
Tampa International Airport  |  KTPA  |  Tampa  |  27.9755001068  |  -82.533203125  |  
Washington Dulles International Airport  |  KIAD  |  Washington D.C. / Chantilly / Dulles  |  38.94449997  |  -77.45580292  |  
William P. Hobby Airport  |  KHOU  |  Houston  |  29.64539909  |  -95.27890015  |  
Out[28]:
<sqlite3.Cursor at 0x1074918f0>

All cells below this point are saved scratch work and/or original failed attempts to import the data :(


In [9]:
db_info[0]


Out[9]:
array(['Hartsfield-Jackson Atlanta International Airport', 'Atlanta',
       '33.6366996765', '-84.4281005859'], 
      dtype='|S60')

In [ ]:
sql_cmd = """CREATE TABLE top_airports (aid INTEGER PRIMARY KEY AUTOINCREMENT,
    city TEXT, faa TEXT, iata TEXT, icao TEXT, airport TEXT, role TEXT, enplanements INT)"""
cursor.execute(sql_cmd)
#top_airports_raw = df1.values
#for airport in top_airports_raw:
#    airport[4] = airport[4].replace('/',' ').replace('-',' ')
        

# Load data into table
for airport in top_airports_raw:
    sql_cmd = ("INSERT INTO top_airports (city, faa, iata, icao, airport, role, enplanements) VALUES " + 
        str(tuple(airport)))
    cursor.execute(sql_cmd)
    
# Create table for all airports
sql_cmd = """CREATE TABLE many_airports (aid INTEGER PRIMARY KEY AUTOINCREMENT,
    id INT, ident TEXT, type TEXT, name TEXT, latitude_deg FLOAT, longitude_deg FLOAT, elevation_ft INT,
    continent TEXT, iso_country TEXT, iso_region TEXT, municipality TEXT, scheduled_service TEXT, gps_code TEXT, 
    iata_code TEXT, local_code TEXT, home_link TEXT, wikipedia_link TEXT, keywords TEXT)"""
cursor.execute(sql_cmd)

# Load data into python
#many_airports_raw = loadtxt("hw_6_data/ICAO_airports.csv", skiprows=1, delimiter=",", comments="%", dtype=str)
df2 = pd.read_csv("hw_6_data/ICAO_airports.csv")
many_airports_raw = df2.values
#for airport in many_airports_raw:
#    airport[3] = airport[3].replace('/',' ').replace('-',' ')

# load data into table
for airport in many_airports_raw:
    temp = []
    for i in airport:
        if isinstance(i,str):
            i = i.strip('"') # avoid SQL syntax errors
            temp.append(i)
    sql_cmd = ("INSERT INTO many_airports (id, ident, type, name, latitude_deg, longitude_deg," +
               "elevation_ft, continent, iso_country, iso_region, municipality, scheduled_service, " +
               "gps_code, iata_code, local_code, home_link, wikipedia_link, keywords) VALUES " + 
               str(tuple(temp)))
    cursor.execute(sql_cmd)

# Use LEFT JOIN to grab the desired data from just the top 50 airports
sql_cmd = """SELECT airport, city, latitude_deg, longitude_deg FROM top_airports, many_airports
        ON top_airports.icao = many_airports.ident"""
sql_cmd = """SELECT airport, city, latitude_deg, longitude_deg FROM top_airports LEFT JOIN many_airports
        ON top_airports.airport = many_airports.name"""
cursor.execute(sql_cmd)
db_info = array(cursor.fetchall())
print len(db_info)
for result in db_info:
    for item in result:
        print item, '|',
    print ''

In [202]:
many_airports_raw[0]


Out[202]:
array([6523, '00A', 'heliport', 'Total Rf Heliport', 40.07080078125,
       -74.9336013793945, 11.0, nan, 'US', 'US-PA', 'Bensalem', 'no',
       '00A', nan, '00A', nan, nan, nan], dtype=object)

In [203]:
many_airports_raw[10][3]


Out[203]:
'Lt World Airport'

In [13]:
#sql_cmd = "SELECT id, name FROM many_airports WHERE id <= 6533 ORDER BY id DESC"
sql_cmd = "SELECT airport FROM top_airports ORDER BY airport ASC"
cursor.execute(sql_cmd)
db_info = array(cursor.fetchall())
for i in db_info:
    print i[0]
print len(db_info)


Austin-Bergstrom International Airport
Baltimore/Washington International Thurgood Marshall Airport
Charlotte/Douglas International Airport
Chicago Midway International Airport
Chicago O'Hare International Airport
Cincinnati/Northern Kentucky International Airport
Cleveland-Hopkins International Airport
Dallas Love Field
Dallas/Fort Worth International Airport
Denver International Airport
Detroit Metropolitan Wayne County Airport
Fort Lauderdale Hollywood International Airport
Gen. Edward Lawrence Logan International Airport
General Mitchell International Airport
George Bush Intercontinental Airport
Hartsfield-Jackson Atlanta International Airport
Honolulu International Airport
Indianapolis International Airport
John F. Kennedy International Airport
John Wayne Airport-Orange County
Kansas City International Airport
LaGuardia Airport (and Marine Air Terminal)
Lambert-St. Louis International Airport
Los Angeles International Airport
Louis Armstrong New Orleans International Airport
Luis Munoz Marin International Airport
McCarran International Airport
Memphis International Airport
Miami International Airport
Minneapolis-St Paul International/Wold-Chamberlain Airport
Nashville International Airport
Newark Liberty International Airport
Norman Y. Mineta San Jose International Airport
Oakland International Airport
Orlando International Airport
Philadelphia International Airport
Phoenix Sky Harbor International Airport
Pittsburgh International Airport
Portland International Airport
Raleigh-Durham International Airport
Ronald Reagan Washington National Airport
Sacramento International Airport
Salt Lake City International Airport
San Antonio International Airport
San Diego International Airport
San Francisco International Airport
Seattle-Tacoma International Airport
Tampa International Airport
Washington Dulles International Airport
William P. Hobby Airport
50

In [ ]:
sql_cmd = ("INSERT INTO many_airports (id ident type name latitude_deg longitude_deg" +
               "elevation_ft continent iso_country iso_region municipality scheduled_service " +
               "gps_code iata_code local_code home_link wikipedia_link keywords) VALUES " + 
               "('6523', '00A', 'heliport', 'Total Rf Heliport', '40.07080078125', '-74.9336013793945', '11', 'NA', 'US', 'US-PA', 'Bensalem', 'no', '00A', '', '00A', '', '', '')")
cursor.execute(sql_cmd)

In [62]:
type(airport[3].strip('"'))


Out[62]:
str

In [2]:
import pandas as pd
df = pd.read_csv('hw_6_data/ICAO_airports.csv')
temp = df.values

toy1 = loadtxt("hw_6_data/top_airports.csv", skiprows=1, delimiter=",", dtype=str)
toy2 = pd.read_csv("hw_6_data/top_airports.csv")
toy2 = toy2.values

print toy1[0]
print toy2[0]


['Atlanta' 'ATL' 'ATL' 'KATL'
 'Hartsfield-Jackson Atlanta International Airport' 'P-L' '43130585']
['Atlanta' 'ATL' 'ATL' 'KATL'
 'Hartsfield-Jackson Atlanta International Airport' 'P-L' 43130585]

In [136]:
csv.reader?

In [69]:
header = ["id","ident","type","name","latitude_deg","longitude_deg","elevation_ft","continent","iso_country","iso_region","municipality","scheduled_service","gps_code","iata_code","local_code","home_link","wikipedia_link","keywords"]
new = " ".join(header)
print len(header)
print new


18
id ident type name latitude_deg longitude_deg elevation_ft continent iso_country iso_region municipality scheduled_service gps_code iata_code local_code home_link wikipedia_link keywords

In [10]:
import pandas.io.sql as pd_sql
pd_sql.write_frame(df2, 'test_table_4', connection, if_exists='replace')

In [177]:
pd.concat?