In [1]:
import pandas as pd
academy = pd.read_csv('academy_awards.csv',encoding = 'ISO-8859-1')
academy.head(5)
Out[1]:
In [2]:
academy['Year'] = academy['Year'].str[:4].astype('int64')
academy.head(1)
Out[2]:
In [3]:
later_than_2000 = academy[academy['Year']>2000]
later_than_2000['Year'].unique()
Out[3]:
In [4]:
award_categories = ['Actor -- Leading Role','Actor -- Supporting Role','Actress -- Leading Role','Actress -- Supporting Role']
nominations = later_than_2000[later_than_2000["Category"].isin(award_categories)]
nominations['Category'].unique()
Out[4]:
In [5]:
replacements = { "NO": 0, "YES": 1 }
nominations["Won?"] = nominations["Won?"].map(replacements)
nominations["Won"] = nominations["Won?"]
drop_cols = ["Won?","Unnamed: 5", "Unnamed: 6","Unnamed: 7", "Unnamed: 8", "Unnamed: 9", "Unnamed: 10"]
final_nominations = nominations.drop(drop_cols, axis=1)
In [ ]:
additional_info_one = final_nominations["Additional Info"].str.rstrip("'}")
additional_info_two = additional_info_one.str.split(" {'")
movie_names = additional_info_two.str[0]
characters = additional_info_two.str[1]
final_nominations["Movie"] = movie_names
final_nominations["Character"] = characters
final_nominations = final_nominations.drop("Additional Info", axis=1)
final_nominations
In [ ]:
import sqlite3
conn = sqlite3.connect("nominations.db")
final_nominations.to_sql("nominations", conn, index=False)
In [ ]:
query_one = "pragma table_info(nominations);"
query_two = "select * from nominations limit 10;"
print(conn.execute(query_one).fetchall())
print(conn.execute(query_two).fetchall())
conn.close()