Load the datafile survey_-_cleaned.csv, which contains the form responses to the Suomen Parhaat Boulderit 2018 survey.
In [1]:
    
import pandas as pd
import numpy as np
# Load cleaned dataset
spb2018_df = pd.read_csv("data/survey_-_cleaned.csv")
# Drop duplicates (exclude the Timestamp column from comparisons)
spb2018_df = spb2018_df.drop_duplicates(subset=spb2018_df.columns.values.tolist()[1:])
spb2018_df.head()
    
    Out[1]:
Create boulders template file boulders_-_template.csv.
In [2]:
    
def create_boulders_template():
    boulder_name_columns = [spb2018_df["Boulderin nimi"], spb2018_df["Boulderin nimi.1"], spb2018_df["Boulderin nimi.2"]]
    unique_boulder_names_s = pd.concat(boulder_name_columns, ignore_index=True).dropna().drop_duplicates().sort_values().reset_index(drop=True)
    unique_boulder_names_s.to_csv("data/boulders_-_template.csv", index=False)
    
create_boulders_template()