In [1]:
import pandas
# Read in a csv file with the same columns that the
# Lahman baseball data set has -- most importantly, there are columns
# called 'nameFirst' and 'nameLast'.
# (1) Write a function that reads a csv
# located at "path_to_csv" into a pandas dataframe
# and adds a new column called 'nameFull' with a player's full name.
#
# For example:
# for Hank Aaron, nameFull would be 'Hank Aaron',
#
# (2) Write the data in the pandas dataFrame to a new csv file located at
# path_to_new_csv
# WRITE YOUR CODE HERE
In [2]:
import pandas
import pandasql
# Read in our aadhaar_data csv to a pandas dataframe. Afterwards, we rename the columns
# by replacing spaces with underscores and setting all characters to lowercase, so the
# column names more closely resemble columns names one might find in a table.
aadhaar_data = pandas.read_csv('aadhaar_data.csv')
aadhaar_data.rename(columns = lambda x: x.replace(' ', '_').lower(), inplace=True)
# The possible columns to select from aadhaar data are:
# 1) registrar
# 2) enrolment_agency
# 3) state
# 4) district
# 5) sub_district
# 6) pin_code
# 7) gender
# 8) age
# 9) aadhaar_generated
# 10) enrolment_rejected
# 11) residents_providing_email,
# 12) residents_providing_mobile_number
# Select out the first 50 values for "registrar" and "enrolment_agency"
# in the aadhaar_data table using SQL syntax.
#
# The order of the select does matter. Make sure you select registrar then enrolment agency
# in your query.
q = """
-- YOUR QUERY HERE
"""
#Execute your SQL command against the pandas frame
# Note: the sqldf accepts: (1) a sql query string and (2) set of session/environment variables (locals() or globals())
aadhaar_solution = pandasql.sqldf(q.lower(), locals())
In [8]:
# Write a query that will select from the aadhaar_data table how many men and how
# many women over the age of 50 have had aadhaar generated for them in each district
#
# Note:
# (1) The SQL query keywords are case sensitive.
# Therefore, if you want to do a sum make sure you type 'sum' rather than 'SUM'.
#
# (2) Do not include a space between the function and argument(s).
# For example, count(some_field) will be interpreted correctly, count (some_field) will not.
# In your query, please list gender first and then district.
# aadhaar_generated is a column in the Aadhaar Data that denotes the number of Aadhaar approved.
If you need to use an aggregate function (such as sum, count, mean, max) use it in lower-case, for example: sum(some_field). Do not include a space between the function and argument(s). For example, count(some_field) will be interpreted correctly, count (some_field) will not.
q = ""
# Execute your SQL command against the pandas frame
aadhaar_solution = pandasql.sqldf(q.lower(), locals())
In [4]:
# Write a SQL query on a dataframe of weather data.
# The SQL query should return one column and
# one row - a count of the number of days in the dataframe where
# the rain column is equal to 1 (i.e., the number of days it
# rained).
# You might find SQL's count function useful for this exercise.
# You can read more about it here:
# https://dev.mysql.com/doc/refman/5.1/en/counting-rows.html
weather_data = pandas.read_csv('weather_underground.csv')
q = ""
#Execute your SQL command against the pandas frame
rainy_days = pandasql.sqldf(q.lower(), locals())
In [5]:
# Write a SQL query on a dataframe of
# weather data. The SQL query should return two columns and
# two rows - whether it was foggy or not (0 or 1) and the max
# maxtempi for that fog value (i.e., the maximum max temperature
# for both foggy and non-foggy days).
q = ""
#Execute your SQL command against the pandas frame
foggy_days = pandasql.sqldf(q.lower(), locals())
In [6]:
#Write a SQL query on a dataframe of weather data.
# The SQL query should return one column and
# one row - the average meantempi on days that are a Saturday
# or Sunday (i.e., the the average mean temperature on weekends).
# You can access the date in the dataframe via the 'date' column.
#
# Also, you can convert dates to days of the week via the 'strftime' keyword in SQL.
# For example, cast (strftime('%w', date) as integer) will return 0 if the date
# is a Sunday or 6 if the date is a Saturday.
q =""
#Execute your SQL command against the pandas frame
mean_temp_weekends = pandasql.sqldf(q.lower(), locals())
In [7]:
import pandas
import pandasql
# Write SQL query on a dataframe of weather data.
# More specifically you want to find the average
# minimum temperature on rainy days where the minimum temperature
# is greater than 55 degrees.
q = ""
#Execute your SQL command against the pandas frame
avg_min_temp_rainy = pandasql.sqldf(q.lower(), locals())