In [28]:
import sqlite3
#from numpy import loadtxt
import pandas as pd
import pandas.io.sql as pd_sql
# Setup SQL
connection = sqlite3.connect(":memory:") # put it into RAM, but behave like it's an sqlite file living on disk
connection.text_factory = str
cursor = connection.cursor()
###################### Problem 1 ######################
# Load data and create SQL tables
df1 = pd.read_csv("hw_6_data/top_airports.csv")
pd_sql.write_frame(df1, 'top_airports', connection, if_exists='replace')
df2 = pd.read_csv("hw_6_data/ICAO_airports.csv")
pd_sql.write_frame(df2, 'many_airports', connection, if_exists='replace')
sql_cmd = """SELECT airport, icao, city, latitude_deg, longitude_deg FROM top_airports, many_airports
ON top_airports.icao = many_airports.ident
ORDER BY airport ASC"""
# Printout to show it worked
cursor.execute(sql_cmd)
db_info = array(cursor.fetchall())
print len(db_info), 'airports matched:'
print ''
for result in db_info:
for item in result:
print item, ' | ',
print ''
###################### Problem 2 ######################
sql_cmd = """CREATE TABLE weather (aid INTEGER PRIMARY KEY AUTOINCREMENT,
city TEXT, min_temp FLOAT, max_temp FLOAT, humidity FLOAT, precipitation FLOAT, cloud_cover FLOAT)"""
cursor.execute(sql_cmd)
Out[28]:
All cells below this point are saved scratch work and/or original failed attempts to import the data :(
In [9]:
db_info[0]
Out[9]:
In [ ]:
sql_cmd = """CREATE TABLE top_airports (aid INTEGER PRIMARY KEY AUTOINCREMENT,
city TEXT, faa TEXT, iata TEXT, icao TEXT, airport TEXT, role TEXT, enplanements INT)"""
cursor.execute(sql_cmd)
#top_airports_raw = df1.values
#for airport in top_airports_raw:
# airport[4] = airport[4].replace('/',' ').replace('-',' ')
# Load data into table
for airport in top_airports_raw:
sql_cmd = ("INSERT INTO top_airports (city, faa, iata, icao, airport, role, enplanements) VALUES " +
str(tuple(airport)))
cursor.execute(sql_cmd)
# Create table for all airports
sql_cmd = """CREATE TABLE many_airports (aid INTEGER PRIMARY KEY AUTOINCREMENT,
id INT, ident TEXT, type TEXT, name TEXT, latitude_deg FLOAT, longitude_deg FLOAT, elevation_ft INT,
continent TEXT, iso_country TEXT, iso_region TEXT, municipality TEXT, scheduled_service TEXT, gps_code TEXT,
iata_code TEXT, local_code TEXT, home_link TEXT, wikipedia_link TEXT, keywords TEXT)"""
cursor.execute(sql_cmd)
# Load data into python
#many_airports_raw = loadtxt("hw_6_data/ICAO_airports.csv", skiprows=1, delimiter=",", comments="%", dtype=str)
df2 = pd.read_csv("hw_6_data/ICAO_airports.csv")
many_airports_raw = df2.values
#for airport in many_airports_raw:
# airport[3] = airport[3].replace('/',' ').replace('-',' ')
# load data into table
for airport in many_airports_raw:
temp = []
for i in airport:
if isinstance(i,str):
i = i.strip('"') # avoid SQL syntax errors
temp.append(i)
sql_cmd = ("INSERT INTO many_airports (id, ident, type, name, latitude_deg, longitude_deg," +
"elevation_ft, continent, iso_country, iso_region, municipality, scheduled_service, " +
"gps_code, iata_code, local_code, home_link, wikipedia_link, keywords) VALUES " +
str(tuple(temp)))
cursor.execute(sql_cmd)
# Use LEFT JOIN to grab the desired data from just the top 50 airports
sql_cmd = """SELECT airport, city, latitude_deg, longitude_deg FROM top_airports, many_airports
ON top_airports.icao = many_airports.ident"""
sql_cmd = """SELECT airport, city, latitude_deg, longitude_deg FROM top_airports LEFT JOIN many_airports
ON top_airports.airport = many_airports.name"""
cursor.execute(sql_cmd)
db_info = array(cursor.fetchall())
print len(db_info)
for result in db_info:
for item in result:
print item, '|',
print ''
In [202]:
many_airports_raw[0]
Out[202]:
In [203]:
many_airports_raw[10][3]
Out[203]:
In [13]:
#sql_cmd = "SELECT id, name FROM many_airports WHERE id <= 6533 ORDER BY id DESC"
sql_cmd = "SELECT airport FROM top_airports ORDER BY airport ASC"
cursor.execute(sql_cmd)
db_info = array(cursor.fetchall())
for i in db_info:
print i[0]
print len(db_info)
In [ ]:
sql_cmd = ("INSERT INTO many_airports (id ident type name latitude_deg longitude_deg" +
"elevation_ft continent iso_country iso_region municipality scheduled_service " +
"gps_code iata_code local_code home_link wikipedia_link keywords) VALUES " +
"('6523', '00A', 'heliport', 'Total Rf Heliport', '40.07080078125', '-74.9336013793945', '11', 'NA', 'US', 'US-PA', 'Bensalem', 'no', '00A', '', '00A', '', '', '')")
cursor.execute(sql_cmd)
In [62]:
type(airport[3].strip('"'))
Out[62]:
In [2]:
import pandas as pd
df = pd.read_csv('hw_6_data/ICAO_airports.csv')
temp = df.values
toy1 = loadtxt("hw_6_data/top_airports.csv", skiprows=1, delimiter=",", dtype=str)
toy2 = pd.read_csv("hw_6_data/top_airports.csv")
toy2 = toy2.values
print toy1[0]
print toy2[0]
In [136]:
csv.reader?
In [69]:
header = ["id","ident","type","name","latitude_deg","longitude_deg","elevation_ft","continent","iso_country","iso_region","municipality","scheduled_service","gps_code","iata_code","local_code","home_link","wikipedia_link","keywords"]
new = " ".join(header)
print len(header)
print new
In [10]:
import pandas.io.sql as pd_sql
pd_sql.write_frame(df2, 'test_table_4', connection, if_exists='replace')
In [177]:
pd.concat?