notebook.community

Edit and run



In [ ]:

    
# Import the libraries we need
from os import getcwd, listdir
from os.path import abspath, dirname, isfile, join, splitext
import pandas as pd
from pivottablejs import pivot_ui



In [ ]:

    
# Get the output directory for the csv file
output_folder_name = 'q1_most_polluted_state'   # TODO: Update this if needed
output_dir = abspath(join(getcwd(), '..', 'data', output_folder_name))



In [ ]:

    
# Create a list of csv files in the output directory
csv_files = [join(output_dir, f) for f in listdir(output_dir) if isfile(join(output_dir, f)) and 
             splitext(join(output_dir, f))[1] == '.csv']
print("{} csv files found".format(len(csv_files)))



In [ ]:

    
# Create a single csv file from the output csv files
output_file = join(output_dir, 'q1_output.csv')

with open(output_file, 'w') as o_file:
    for num in range(0, len(csv_files)):
        with open(csv_files[num], 'r') as f:
            if num == 0:
                lines = f.readlines()
            else:
                lines = f.readlines()[1:]
            for line in lines:
                 o_file.write(line)



In [ ]:

    
# Create a Pandas DataFrame from the csv file and name the columns
df = pd.read_csv(output_file, header=None, names=['arithmetic_mean', 'parameter_name', 'state'])
df.head()



In [ ]:

    
# Get the row and column counts
rows_cols = df.shape
print("Rows: {}".format(rows_cols[0]))
print("Columns: {}".format(rows_cols[1]))



In [ ]:

    
# Create a PivotTable using pivottable.js
pivot_ui(df)



In [ ]: