In [ ]:
# Import the libraries we need
from os import getcwd, listdir
from os.path import abspath, dirname, isfile, join, splitext
import pandas as pd
from pivottablejs import pivot_ui
In [ ]:
# Get the output directory for the csv file
output_folder_name = 'q1_most_polluted_state' # TODO: Update this if needed
output_dir = abspath(join(getcwd(), '..', 'data', output_folder_name))
In [ ]:
# Create a list of csv files in the output directory
csv_files = [join(output_dir, f) for f in listdir(output_dir) if isfile(join(output_dir, f)) and
splitext(join(output_dir, f))[1] == '.csv']
print("{} csv files found".format(len(csv_files)))
In [ ]:
# Create a single csv file from the output csv files
output_file = join(output_dir, 'q1_output.csv')
with open(output_file, 'w') as o_file:
for num in range(0, len(csv_files)):
with open(csv_files[num], 'r') as f:
if num == 0:
lines = f.readlines()
else:
lines = f.readlines()[1:]
for line in lines:
o_file.write(line)
In [ ]:
# Create a Pandas DataFrame from the csv file and name the columns
df = pd.read_csv(output_file, header=None, names=['arithmetic_mean', 'parameter_name', 'state'])
df.head()
In [ ]:
# Get the row and column counts
rows_cols = df.shape
print("Rows: {}".format(rows_cols[0]))
print("Columns: {}".format(rows_cols[1]))
In [ ]:
# Create a PivotTable using pivottable.js
pivot_ui(df)
In [ ]: