In [5]:
import pandas as pd
import glob
In [6]:
excel_files = glob.glob('files/*.xlsx') # the glob module automatically creates a list of all files in the files folder ending in xlsx using * wildcard
In [30]:
df_all = None # set the consolidated data frame to be None
for excel_file in excel_files:
file_name = excel_file.split('.')[0][6:] # Parse the file name to remove the .xlsx extension and files/ folder
df = pd.read_excel(excel_file) # Automatically parses the Excel file into a pandas Data Frame
df['file_name'] = file_name # store the file name to keep track of which file the row came from
if df_all is not None:
df_all = df_all.append(df) # append each chunk of data to the consolidated Data Frame
else:
df_all = df # initialize with the first chunk of data
In [34]:
df_all.to_csv('files/all_claims_files.csv', index=False) # index=False omits the index of the Data Frame from being written
In [ ]: