In [1]:
# reviewing looping through a list
my_list = [1, 2, 3, 4, 5]
for value in my_list:
print(value ** 2, end=' ')
In [2]:
# reviewing range() to loop a set number of times
for value in range(5):
print((value + 1) ** 2, end=' ')
In [3]:
# testing values as we loop through a list
my_value = 2
for value in range(5):
if value == my_value:
print('found it')
In [4]:
# encapsulate with a function and find if some value appears in some list
def find_value(my_value, my_list):
is_found = False
for value in my_list:
if value == my_value:
is_found = True
return is_found
# now use our function
user_id = 3
user_ids = [1, 2, 3, 4, 5, 6]
if find_value(user_id, user_ids):
print('found it')
else:
print('did not find it')
In [5]:
# what if we want to raise all the numbers in a list to the 2nd power?
range(10) ** 2
In [6]:
# oops, error! instead, use list comprehension: do the operation element-wise
[ x ** 2 for x in range(10) ]
Out[6]:
In [7]:
# import python's csv module to open a csv file
import csv
In [8]:
# with guarantees the file will be closed even if we hit an exception
# rt means 'read' and 'text mode'
with open('rents_raw.csv', 'r', encoding='utf-8', newline=None) as csvfile:
my_csv = csv.reader(csvfile)
for row in my_csv:
print(row)
In [9]:
# the column headers are the first row in the data file
# use next to iterate our csv reader to the first row to grab the headers
with open('rents_raw.csv', 'r', encoding='utf-8', newline=None) as csvfile:
my_csv = csv.reader(csvfile)
headers = next(my_csv)
print(headers)
In [10]:
# what is the 1st column (zero-indexed) in our data set?
headers[1]
Out[10]:
In [11]:
# for each row in the data set, print the price column's value
with open('rents_raw.csv', 'r', encoding='utf-8', newline=None) as csvfile:
my_csv = csv.reader(csvfile)
for row in my_csv:
print(row[1])
In [12]:
# create a new list to contain the column of prices in the data set
prices = []
with open('rents_raw.csv', 'r', encoding='utf-8', newline=None) as csvfile:
my_csv = csv.reader(csvfile)
for row in my_csv:
prices.append(row[1])
prices
Out[12]:
This list has a couple of problems. First, it includes the header. Second, it's all strings even though prices are numeric data. Third, it contains some empty strings. We'll have to clean it up.
In [13]:
# to remove the first element of the list, we can just capture position 1 through the end of the list
prices = prices[1:]
prices
Out[13]:
In [14]:
# now let's convert the price strings to integers
for price in prices:
print(int(float(price)))
In [15]:
# you can't convert an empty string to a numeric type
for price in prices:
if not price == '':
print(int(float(price)))
else:
print('None')
In [16]:
# encapsulate this functionality inside a new function
def extract_int_price(price):
if not price == '':
return int(float(price))
else:
return None
In [17]:
# use our function to convert each element in the list of prices to an integer
for price in prices:
print(extract_int_price(price))
In [18]:
# rather than just printing each converted value, turn it into a new list called int_prices
int_prices = []
for price in prices:
int_prices.append(extract_int_price(price))
print(int_prices)
In [19]:
# do the same thing, using list comprehension
int_prices = [ extract_int_price(price) for price in prices ]
print(int_prices)
In [20]:
# replace any forward slashes in neighborhood name with a hyphen
with open('rents_raw.csv', 'r', encoding='utf-8', newline=None) as csvfile:
my_csv = csv.reader(csvfile)
next(my_csv) #skip the header row
for row in my_csv:
print(row[0].replace('/', '-')) #use string.replace() method
first create a couple of functions to do the cleaning
In [21]:
# create a new function to convert bedrooms from a string to an int
def extract_int_bedrooms(bedrooms):
if not bedrooms == '':
return int(float(bedrooms))
else:
return None
In [22]:
# create a new function to replace forward slashes and commas with hyphens
def clean_neighborhood(neighborhood_name):
# you can daisy chain multiple string.replace() methods
return neighborhood_name.replace('/', '-').replace(',', '')
In [23]:
# clean the data set by calling the cleaning functions and save the results to variables
rentals_cleaned = []
with open('rents_raw.csv', 'r', encoding='utf-8', newline=None) as csvfile:
my_csv = csv.reader(csvfile)
next(my_csv)
for row in my_csv:
neighborhood_cleaned = clean_neighborhood(row[0])
price_cleaned = extract_int_price(row[1])
bedrooms_cleaned = extract_int_bedrooms(row[2])
rentals_cleaned.append([neighborhood_cleaned, price_cleaned, bedrooms_cleaned])
# display our nested lists of data
rentals_cleaned
Out[23]:
Use rain.csv to calculate mean and maximum values in a column
In [24]:
# use round to round a number to a specified precision
round(3.14159265359, 5)
Out[24]:
In [25]:
with open('rain.csv', 'r', encoding='utf-8', newline=None) as csvfile:
# initialize a counter and variables to contain our descriptive stats
count = 0 #at the end, divide cumulative_sum by this to get the mean
cumulative_sum = 0 #our rolling sum
max_value = -1 #pick a really small number that's guaranteed to be less than the max
# open the file and skip the header row
my_csv = csv.reader(csvfile)
next(my_csv)
# loop through each data row
for row in my_csv:
# rainfall amount is in column 1, only process this row's value if not an empty string
if not row[1] == '':
# increment the counter and extract this row's rainfall as a float
count = count + 1
rainfall = float(row[1])
# add this row's rainfall to the cumulative sum
cumulative_sum = cumulative_sum + rainfall
# if this row's rainfall is greater than the current max value, update with the new max
if rainfall > max_value:
max_value = rainfall
# after looping through all the rows, divide the cumulative sum by the count and round to get the mean
mean_value = round(cumulative_sum / count, 1)
# print out the mean and max values
print('mean:', mean_value, 'inches')
print('max:', max_value, 'inches')
Now, how would you calculate the minimum?
In [ ]: