In [2]:
import os
import csv
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
% matplotlib inline

In [3]:
# pandas options
pd.options.display.max_rows = 6

Convert our txt files to csv format (internally)

You will need to create a .\Groundwater-Composition-csv folder to store results in, which is used later on.


In [4]:
inFolder = r'..\Data\Groundwater-Composition2'
#os.listdir(inFolder)

csvFolder = r'..\Data\Groundwater-Composition-csv'
for file in os.listdir(inFolder):
    current_file = inFolder + '\\' + file
    outFile = csvFolder + '\\' + file
    with open(current_file, 'r') as in_file:
        lines = in_file.read().splitlines()
        stripped = [line.replace("\t",',').split(',') for line in lines]
        with open(outFile, 'w') as out_file:
            writer = csv.writer(out_file)
            writer.writerows(stripped)

Single File


In [3]:
inFile = r'..\Data\Groundwater-Composition-csv\B25A0857.txt'
header_names = "NITG-nr","Monster datum","Monster-nr","Monster apparatuur","Mengmonster","Bovenkant monster (cm tov MV)","Onderkant monster (cm tov MV)","Analyse datum","CO2 (mg/l)","CO3-- (mg/l)","Ca (mg/l)","Cl- (mg/l)","EC (uS/cm)","Fe (mg/l)","HCO3 (mg/l)","KLEUR (mgPt/l)","KMNO4V-O (mg/l)","Mg (mg/l)","Mn (mg/l)","NH4 (mg/l)","NH4-ORG (mg/l)","NO2 (mg/l)","NO3 (mg/l)","Na (mg/l)","NaHCO3 (mg/l)","SO4 (mg/l)","SiO2 (mg/l)","T-PO4 (mg/l)","TEMP-V (C)","TIJDH (mmol/l)","TOTH (mmol/l)","pH (-)"
df = pd.read_csv(inFile, skiprows=6, parse_dates=[1], sep=',', header=None, names=header_names)

In [4]:
df


Out[4]:
NITG-nr Monster datum Monster-nr Monster apparatuur Mengmonster Bovenkant monster (cm tov MV) Onderkant monster (cm tov MV) Analyse datum CO2 (mg/l) CO3-- (mg/l) ... NO3 (mg/l) Na (mg/l) NaHCO3 (mg/l) SO4 (mg/l) SiO2 (mg/l) T-PO4 (mg/l) TEMP-V (C) TIJDH (mmol/l) TOTH (mmol/l) pH (-)
0 NaN nan NaN NaN NaN NaN NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
1 KWALITEIT gegevens VLOEIBAAR nan NaN NaN NaN NaN NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
2 NITG-nr Monster datum Monster-nr Monster apparatuur Mengmonster Bovenkant monster (cm tov MV) Onderkant monster (cm tov MV) Analyse datum Al (mg/l) As (ug/l) ... KLEUR (mgPt/l) KMNO4V-O (mg/l) Mg (mg/l) Mn (mg/l) NH4 (mg/l) NH4-ORG (mg/l) NO2 (mg/l) NO3 (mg/l) Na (mg/l) NaHCO3 (mg/l)
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
9 B25A0857 11-10-1994 T1989-00-3313 NaN nee 20800.0 21800.0 11-10-1994 NaN NaN ... NaN NaN 289 .33 45.6 NaN NaN .29 2268 NaN
10 B25A0857 11-10-1994 T1989-00-3313 NaN nee 20800.0 21800.0 11-10-1994 NaN .7 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
11 B25A0857 27-11-1969 C1969-11-1057 NaN nee 20800.0 21800.0 27-11-1969 NaN NaN ... 40 83 355 .55 50 .47 0 0 2860 0

12 rows × 32 columns


In [8]:
inFile = r'..\Data\Groundwater-Composition\B25A0857.txt'
header_names = 'NITG-nr', 'X-coord', 'Y-coord', 'Coordinaat systeem', 'Kaartblad', 'Bepaling locatie', 'Maaiveldhoogte (m tov NAP)', 'Bepaling maaiveldhoogte', 'OLGA-nr', 'RIVM-nr', 'Aantal analyses', 'Meetnet', 'Indeling'
df = pd.read_csv(
    inFile, 
    skiprows=2, 
    parse_dates=[1], 
    nrows=1, 
    delim_whitespace=True, 
    header=None, 
    names=header_names
)

In [9]:
df

Loading all the files

Now we can process all the files correctly. The following will look for our pre-processed files in .\Groundwater-Composition-csv.


In [5]:
data_header_names = "NITG-nr","Monster datum","Monster-nr","Monster apparatuur","Mengmonster","Bovenkant monster (cm tov MV)","Onderkant monster (cm tov MV)","Analyse datum","CO2 (mg/l)","CO3-- (mg/l)","Ca (mg/l)","Cl- (mg/l)","EC (uS/cm)","Fe (mg/l)","HCO3 (mg/l)","KLEUR (mgPt/l)","KMNO4V-O (mg/l)","Mg (mg/l)","Mn (mg/l)","NH4 (mg/l)","NH4-ORG (mg/l)","NO2 (mg/l)","NO3 (mg/l)","Na (mg/l)","NaHCO3 (mg/l)","SO4 (mg/l)","SiO2 (mg/l)","T-PO4 (mg/l)","TEMP-V (C)","TIJDH (mmol/l)","TOTH (mmol/l)","pH (-)"

header_header_names = 'NITG-nr', 'X-coord', 'Y-coord', 'Coordinaat systeem', 'Kaartblad', 'Bepaling locatie', 'Maaiveldhoogte (m tov NAP)', 'Bepaling maaiveldhoogte', 'OLGA-nr', 'RIVM-nr', 'Aantal analyses', 'Meetnet', 'Indeling'

In [6]:
def pre_read(file):
    i=0
    loc=0
    metarow,skiprows = 0, 0
    with open(file) as f:
        for line in f:
            if line[:7] == 'LOCATIE':
                loc=loc+1
                if loc==1:
                    metarow = i+1
                if loc==2:
                    skiprows = i+1
        i+=1
    return metarow,skiprows

In [8]:
df_list = []

for file in os.listdir(inFolder):
    current_file = csvFolder + '\\' + file
    # print(current_file)
    metarow,skip = pre_read(current_file)
    df = pd.read_csv(
        current_file,
        skiprows=skip,
        parse_dates=[1],
        sep=',',
        header=None,
        names=data_header_names
        )
    df_list.append(df)
    
    # print(df_list)
all_dfs = pd.concat(df_list)

In [9]:
all_dfs = all_dfs[all_dfs['NITG-nr'].str.contains("NITG-nr") == False]
all_dfs = all_dfs[all_dfs['NITG-nr'].str.contains("LOCATIE") == False]
all_dfs = all_dfs[all_dfs['NITG-nr'].str.contains("KWALITEIT") == False]
all_dfs = all_dfs[all_dfs['Monster apparatuur'].str.contains("Rijksdriehoeksmeting") == False]

In [10]:
all_dfs


Out[10]:
NITG-nr Monster datum Monster-nr Monster apparatuur Mengmonster Bovenkant monster (cm tov MV) Onderkant monster (cm tov MV) Analyse datum CO2 (mg/l) CO3-- (mg/l) ... NO3 (mg/l) Na (mg/l) NaHCO3 (mg/l) SO4 (mg/l) SiO2 (mg/l) T-PO4 (mg/l) TEMP-V (C) TIJDH (mmol/l) TOTH (mmol/l) pH (-)
6 B24H1244 02-09-2009 L2009-09-0117 Slangenpomp nee 10000.0 12000.0 02-09-2009 2 40 ... .17 <.05 25 <1 <.05 <1 41 14 6.4 470
6 B25D0639 21-07-2009 L2009-07-0018 Slangenpomp nee NaN NaN 21-07-2009 4 220 ... 9.1 <.05 2200 <1 <.05 <1 180 6.4 3 1700
7 B25D0639 21-07-2009 L2009-07-0014 Slangenpomp nee NaN NaN 21-07-2009 8 160 ... 11 <.05 290 <1 .08 <1 8.7 23 11 360
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
9 B25D0639 21-07-2009 L2009-07-0013 Slangenpomp nee NaN NaN 21-07-2009 6 130 ... 21 <.05 200 <1 .21 <1 9.1 18 8.5 690
10 B25E0884 22-07-2009 L2009-07-0019 Slangenpomp nee 8080.0 10080.0 22-07-2009 NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
11 B25E0884 22-07-2009 L2009-07-0020 Slangenpomp nee 23040.0 25040.0 22-07-2009 NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN

7 rows × 32 columns