In [11]:
#Page 1
'''
Read the CSV file "US_births_1994-2003_CDC_NCHS.csv" into a string.
Split the string on the newline character ("\n").
Display the first 10 values in the resulting list.
'''
f = open("US_births_1994-2003_CDC_NCHS.csv", 'r')
data = f.read().split("\n")
data[0:10]
Out[11]:
In [5]:
#Page 2
'''
Create a function named read_csv() that:
Takes a single, required argument, a string representing the file name of the CSV file.
Reads the file into a string, splits the string on the newline character ("\n"), and removes the header row. Assign this list to string_list and create an empty list named final_list.
Uses a for loop to:
Iterate over string_list,
Create an empty list named int_fields,
Splits each row on the comma delimiter (,) and assigns the resulting list to string_fields,
Converts each value in string_fields to an integer and appends to int_fields,
Appends int_fields to final_list.
Returns final_list.
Use the read_csv() function to read in the file "US_births_1994-2003_CDC_NCHS.csv" and assign the result to cdc_list.
Display the first 10 rows of cdc_list to confirm it's a list of lists, containing only integer values, and no header row.
'''
def read_csv(filename):
f = open(filename, 'r')
data = f.read().split("\n")
string_list = data[1:len(data)]
final_list =[]
for s in string_list:
int_fields = []
string_fields = s.split(",")
for f in string_fields:
int_fields.append(int(f))
final_list.append(int_fields)
return final_list
cdc_list = read_csv("US_births_1994-2003_CDC_NCHS.csv")
print(cdc_list[0:10])
In [9]:
#Page 3
'''
Create a function named month_births() that:
Takes a single, required argument, a list of lists.
Creates an empty dictionary, births_per_month, to store the monthly totals.
Uses a for loop to:
Iterate over the list of lists,
Extract the value in the month and births columns,
If the month value already exists as a key in births_per_month, the births value is added to the existing value,
If the month value doesn't exist as a key in births_per_month, it's created and the associated value is the births value.
After the loop, return the births_per_month dictionary.
Use the month_births() function to calculate the monthly totals for the dataset and assign the result to cdc_month_births. Display the dictionary.
'''
def month_births(list_Of_list):
births_per_month = {}
for list in list_Of_list:
month = list[1]
births = list[4]
if month in births_per_month:
births_per_month[month] += births
else:
births_per_month[month] = births
return births_per_month
cdc_month_births = month_births(cdc_list)
print(cdc_month_births)
In [10]:
#Page 4
'''
Create a function named dow_births() that takes a single, required argument (a list of lists) and returns a dictionary containing the total number of births for each unique value of the day_of_week column.
Use the dow_births() function to return the day-of-week totals for the dataset and assign the result to cdc_day_births. Display the dictionary.
'''
def dow_births(list_Of_list):
births_per_week = {}
for list in list_Of_list:
day_of_week = list[3]
births = list[4]
if day_of_week in births_per_week:
births_per_week[day_of_week] += births
else:
births_per_week[day_of_week] = births
return births_per_week
cdc_day_births = dow_births(cdc_list)
print(cdc_day_births)
In [13]:
#Page 4
'''
Create a function named calc_counts() that:
Takes two, required parameters:
data: a list of lists
column: the column number we want to calculate the totals for
Populates and returns a dictionary containing the total number of births for each unique value in the column at position column.
Use the calc_counts() function to:
Return the yearly totals for the dataset and assign the result to cdc_year_births.
Return the monthly totals for the dataset and assign the result to cdc_month_births.
Return the day-of-month totals for the dataset and assign the result to cdc_dom_births.
Return the day-of-week totals for the dataset and assign the result to cdc_dow_births.
'''
def calc_counts(data, column):
births_by_column = {}
for list in data:
col_val = list[column]
births = list[4]
if col_val in births_by_column:
births_by_column[col_val] += births
else:
births_by_column[col_val] = births
return births_by_column
cdc_year_births = calc_counts(cdc_list, 0)
cdc_month_births = calc_counts(cdc_list, 1)
cdc_dom_births = calc_counts(cdc_list, 2)
cdc_dow_births = calc_counts(cdc_list, 3)
print(cdc_year_births)
print("\n")
print(cdc_month_births)
print("\n")
print(cdc_dom_births)
print("\n")
print(cdc_dow_births)
In [ ]: