In [1]:
# Run this cell to set up the notebook.
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib
%matplotlib inline
import matplotlib.pyplot as plt
from client.api.notebook import Notebook
ok = Notebook('lab05.ok')
The Berkeley Boat Club wants to better organize their user data, and they've hired you to do it. Your first job is to implement code for relational algebra operators in python (unlike you, they don't know how to use pandas).
You may want to refer to these slides, to remember what each operation does. You may also want to refer to the pandas documentation.
Here are the Boat Club's databases. Your job is to implement a variety of unary and binary relational algebra operators.
In [2]:
young_sailors = pd.DataFrame({
"sid": [2701, 18869, 63940, 21869, 17436],
"sname": ["Jerry", "Morgan", "Danny", "Jack", "Dustin"],
"rating": [8, 6, 4, 9, 3],
"age": [25, 26, 21, 27, 22],
})
salty_sailors = pd.DataFrame({
"sid": [2701, 17436, 45433, 22689, 46535],
"sname": ["Jerry", "Dustin", "Balon", "Euron", "Victarion"],
"rating": [8, 3, 7, 10, 2],
"age": [25, 22, 39, 35, 37],
})
boats = pd.DataFrame({
"bid": [41116, 54505, 50041, 35168, 58324],
"bname": ["The Black Sparrow", "The Great Kraken", "The Prophetess", "Silence", "Iron Victory"],
"color": ["Black", "Orange", "Silver", "Red", "Grey"],
})
reservations = pd.DataFrame({
"sid": [21869, 45433, 18869, 22689, 21869, 17436, 63940, 45433, 21869, 18869],
"bid": [41116, 35168, 50041, 41116, 58324, 50041, 54505, 41116, 50041, 41116],
"day": ["3/1", "3/1", "3/2", "3/2", "3/2", "3/3", "3/3", "3/3", "3/3", "3/4"],
})
In [3]:
def project(df, columns):
return df[columns] #SOLUTION
In [4]:
project(salty_sailors, ["sname", "age"])
In [5]:
_ = ok.grade('qproject')
_ = ok.backup()
In [6]:
def select(df, condition):
return df[df.apply(condition, axis=1)] #SOLUTION
In [7]:
select(young_sailors, lambda x: x["rating"] > 6)
In [8]:
_ = ok.grade('qselect')
_ = ok.backup()
In [9]:
def union(df1, df2):
return pd.concat([df1, df2]).drop_duplicates() #SOLUTION
In [10]:
union(young_sailors, salty_sailors)
In [11]:
_ = ok.grade('qunion')
_ = ok.backup()
In [12]:
def intersection(df1, df2):
return pd.merge(df1, df2) #SOLUTION
In [13]:
intersection(young_sailors, salty_sailors)
In [14]:
_ = ok.grade('qintersection')
_ = ok.backup()
In [15]:
def difference(df1, df2):
return df1.where(df1.apply(lambda x: ~x.isin(df2[x.name]))).dropna()
In [16]:
difference(young_sailors, salty_sailors)
In [17]:
_ = ok.grade('qdifference')
_ = ok.backup()
In [18]:
def cross_product(df1, df2):
# add a column "tmp-key" of zeros to df1 and df2
df1 = pd.concat([df1, pd.Series(0, index=df1.index, name="tmp-key")], axis=1)
df2 = pd.concat([df2, pd.Series(0, index=df2.index, name="tmp-key")], axis=1)
# use Pandas merge functionality along with drop
# to compute outer product and remove extra column
return (pd
.merge(df1, df2, on="tmp-key")
.drop(["tmp-key"], axis=1)) #SOLUTION
In [19]:
cross_product(young_sailors, salty_sailors)
In [20]:
_ = ok.grade('qcross_product')
_ = ok.backup()
In [21]:
def theta_join(df1, df2, condition):
return select(cross_product(df1, df2), condition)
In [22]:
theta_join(young_sailors, salty_sailors, lambda x: x["age_x"] > x["age_y"])
In [23]:
_ = ok.grade('qtheta_join')
_ = ok.backup()
In [24]:
def natural_join(df1, df2, attr):
return select(cross_product(df1, df2), lambda x: x[attr+"_x"] == x[attr+"_y"])
In [25]:
all_sailors = union(young_sailors, salty_sailors)
sailor_reservtions = natural_join(all_sailors, reservations, "sid")
sailors_and_boats = natural_join(sailor_reservtions, boats, "bid")
project(sailors_and_boats, ["sname", "bname", "day"])
In [26]:
_ = ok.grade('qnatural_join')
_ = ok.backup()
In [ ]:
i_finished_the_lab = False
In [ ]:
_ = ok.grade('qcompleted')
_ = ok.backup()
In [ ]:
_ = ok.submit()
Now, run this code in your terminal to make a
git commit
that saves a snapshot of your changes in git. The last line of the cell
runs git push, which will send your work to your personal Github repo.
# Tell git to commit your changes to this notebook
git add sp17/lab/lab04/lab04.ipynb
# Tell git to make the commit
git commit -m "lab04 finished"
# Send your updates to your personal private repo
git push origin master
In [ ]: