notebook.community

Edit and run



In [5]:

    
import pandas as pd
import glob



In [6]:

    
excel_files = glob.glob('files/*.xlsx')  # the glob module automatically creates a list of all files in the files folder ending in xlsx using * wildcard



In [30]:

    
df_all = None # set the consolidated data frame to be None
for excel_file in excel_files:
    file_name = excel_file.split('.')[0][6:] # Parse the file name to remove the .xlsx extension and files/ folder
    df = pd.read_excel(excel_file) # Automatically parses the Excel file into a pandas Data Frame
    df['file_name'] = file_name # store the file name to keep track of which file the row came from
    if df_all is not None:
        df_all = df_all.append(df) # append each chunk of data to the consolidated Data Frame
    else:
        df_all = df # initialize with the first chunk of data



In [34]:

    
df_all.to_csv('files/all_claims_files.csv', index=False) # index=False omits the index of the Data Frame from being written



In [ ]: