In [1]:
animals = ['lion', 'tiger', 'crocodile', 'vulture', 'hippo']

In [2]:
print(animals)


['lion', 'tiger', 'crocodile', 'vulture', 'hippo']

In [3]:
animals = ['lion', 'tiger', 'crocodile', 'vulture', 'hippo']

In [4]:
for creature in animals:
    pass

In [5]:
print('The loop variable is now: ' + creature)


The loop variable is now: hippo

In [6]:
import os

In [7]:
os.mkdir('yearly_files')

In [10]:
import pandas as pd
# Load the data into a DataFrame
surveys_df = pd.read_csv('surveys.csv')
# Select only data for 2002
surveys2002 = surveys_df[surveys_df.year == 2002]
# Write the new DataFrame to a csv file
surveys2002.to_csv('yearly_files/surveys2002.csv')

In [11]:
surveys_df['year']


Out[11]:
0        1977
1        1977
2        1977
3        1977
4        1977
5        1977
6        1977
7        1977
8        1977
9        1977
10       1977
11       1977
12       1977
13       1977
14       1977
15       1977
16       1977
17       1977
18       1977
19       1977
20       1977
21       1977
22       1977
23       1977
24       1977
25       1977
26       1977
27       1977
28       1977
29       1977
         ... 
35519    2002
35520    2002
35521    2002
35522    2002
35523    2002
35524    2002
35525    2002
35526    2002
35527    2002
35528    2002
35529    2002
35530    2002
35531    2002
35532    2002
35533    2002
35534    2002
35535    2002
35536    2002
35537    2002
35538    2002
35539    2002
35540    2002
35541    2002
35542    2002
35543    2002
35544    2002
35545    2002
35546    2002
35547    2002
35548    2002
Name: year, Length: 35549, dtype: int64

In [12]:
surveys_df['year'].unique()


Out[12]:
array([1977, 1978, 1979, 1980, 1981, 1982, 1983, 1984, 1985, 1986, 1987,
       1988, 1989, 1990, 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998,
       1999, 2000, 2001, 2002])

In [13]:
for year in surveys_df['year'].unique():
    filename='yearly_files/surveys' + str(year) + '.csv'
    print(filename)


yearly_files/surveys1977.csv
yearly_files/surveys1978.csv
yearly_files/surveys1979.csv
yearly_files/surveys1980.csv
yearly_files/surveys1981.csv
yearly_files/surveys1982.csv
yearly_files/surveys1983.csv
yearly_files/surveys1984.csv
yearly_files/surveys1985.csv
yearly_files/surveys1986.csv
yearly_files/surveys1987.csv
yearly_files/surveys1988.csv
yearly_files/surveys1989.csv
yearly_files/surveys1990.csv
yearly_files/surveys1991.csv
yearly_files/surveys1992.csv
yearly_files/surveys1993.csv
yearly_files/surveys1994.csv
yearly_files/surveys1995.csv
yearly_files/surveys1996.csv
yearly_files/surveys1997.csv
yearly_files/surveys1998.csv
yearly_files/surveys1999.csv
yearly_files/surveys2000.csv
yearly_files/surveys2001.csv
yearly_files/surveys2002.csv

In [16]:
# Load the data into a DataFrame
surveys_df = pd.read_csv('surveys.csv')

for year in surveys_df['year'].unique():
    # Select data for the year
    surveys_year = surveys_df[surveys_df.year == year]

    # Write the new DataFrame to a csv file
    filename = 'yearly_files/surveys' + str(year) + '.csv'
    surveys_year.to_csv(filename)

In [17]:
filename = 'yearly_files/surveys' + str(year) + '.csv'

In [18]:
def this_is_the_function_name(input_argument1, input_argument2):

    # The body of the function is indented
    # This function prints the two arguments to screen
    print('The function arguments are:', input_argument1, input_argument2, '(this is done inside the function!)')

    # And returns their product
    return input_argument1 * input_argument2

In [19]:
product_of_inputs = this_is_the_function_name(2,5)


The function arguments are: 2 5 (this is done inside the function!)

In [20]:
print('Their product is:', product_of_inputs, '(this is done outside the function!)')


Their product is: 10 (this is done outside the function!)

In [23]:
def one_year_csv_writer(this_year, all_data):
    """
    Writes a csv file for data from a given year.

    this_year --- year for which data is extracted
    all_data --- DataFrame with multi-year data
    """

    # Select data for the year
    surveys_year = all_data[all_data.year == this_year]

    # Write the new DataFrame to a csv file
    filename = 'yearly_files/function_surveys' + str(this_year) + '.csv'
    surveys_year.to_csv(filename)

In [24]:
one_year_csv_writer(2002,surveys_df)

In [25]:
def yearly_data_csv_writer(start_year, end_year, all_data):
    """
    Writes separate csv files for each year of data.

    start_year --- the first year of data we want
    end_year --- the last year of data we want
    all_data --- DataFrame with multi-year data
    """

    # "end_year" is the last year of data we want to pull, so we loop to end_year+1
    for year in range(start_year, end_year+1):
        one_year_csv_writer(year, all_data)

In [26]:
# Load the data into a DataFrame
surveys_df = pd.read_csv('surveys.csv')

In [27]:
# Create csv files
yearly_data_csv_writer(1977, 2002, surveys_df)

In [29]:
def yearly_data_arg_test(all_data, start_year = 1977, end_year = 2002):
        """
        Modified from yearly_data_csv_writer to test default argument values!

        start_year --- the first year of data we want --- default: 1977
        end_year --- the last year of data we want --- default: 2002
        all_data --- DataFrame with multi-year data
        """

        return start_year, end_year

start,end = yearly_data_arg_test (surveys_df, 1988, 1993)
print('Both optional arguments:\t', start, end)


Both optional arguments:	 1988 1993

In [30]:
start,end = yearly_data_arg_test (surveys_df)
print('Default values:\t\t\t', start, end)


Default values:			 1977 2002

In [31]:
def yearly_data_arg_test(all_data, start_year = None, end_year = None):
        """
        Modified from yearly_data_csv_writer to test default argument values!

        start_year --- the first year of data we want --- default: None - check all_data
        end_year --- the last year of data we want --- default: None - check all_data
        all_data --- DataFrame with multi-year data
        """

        if not start_year:
            start_year = min(all_data.year)
        if not end_year:
            end_year = max(all_data.year)

        return start_year, end_year

In [32]:
start,end = yearly_data_arg_test (surveys_df, 1988, 1993)
print('Both optional arguments:\t', start, end)


Both optional arguments:	 1988 1993

In [34]:
start,end = yearly_data_arg_test (surveys_df)
print('Default values:\t\t\t', start, end)


Default values:			 1977 2002

In [36]:
a = 5

if a<0: # meets first condition?

# if a IS less than zero
    print('a is a negative number')

elif a>0: # did not meet first condition. meets second condition?

    # if a ISN'T less than zero and IS more than zero
    print('a is a positive number')

else: # met neither condition

    # if a ISN'T less than zero and ISN'T more than zero
    print('a must be zero!')


a is a positive number

In [37]:
def yearly_data_arg_test(all_data, start_year = None, end_year = None):
        """
        Modified from yearly_data_csv_writer to test default argument values!

        start_year --- the first year of data we want --- default: None - check all_data
        end_year --- the last year of data we want --- default: None - check all_data
        all_data --- DataFrame with multi-year data
        """

        if not start_year:
            start_year = min(all_data.year)
        if not end_year:
            end_year = max(all_data.year)

        return start_year, end_year

In [38]:
start,end = yearly_data_arg_test (surveys_df)
    print('Default values:\t\t\t', start, end)

    start,end = yearly_data_arg_test (surveys_df, 1988, 1993)
    print('No keywords:\t\t\t', start, end)

    start,end = yearly_data_arg_test (surveys_df, start_year = 1988, end_year = 1993)
    print('Both keywords, in order:\t', start, end)

    start,end = yearly_data_arg_test (surveys_df, end_year = 1993, start_year = 1988)
    print('Both keywords, flipped:\t\t', start, end)

    start,end = yearly_data_arg_test (surveys_df, start_year = 1988)
    print('One keyword, default end:\t', start, end)

    start,end = yearly_data_arg_test (surveys_df, end_year = 1993)
    print('One keyword, default start:\t', start, end)


Default values:			 1977 2002
No keywords:			 1988 1993
Both keywords, in order:	 1988 1993
Both keywords, flipped:		 1988 1993
One keyword, default end:	 1988 2002
One keyword, default start:	 1977 1993

In [ ]: