In [2]:

    
import os
import csv
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
% matplotlib inline



In [3]:

    
# pandas options
pd.options.display.max_rows = 6

Convert our txt files to csv format (internally)

You will need to create a .\Groundwater-Composition-csv folder to store results in, which is used later on.



In [4]:

    
inFolder = r'..\Data\Groundwater-Composition2'
#os.listdir(inFolder)

csvFolder = r'..\Data\Groundwater-Composition-csv'
for file in os.listdir(inFolder):
    current_file = inFolder + '\\' + file
    outFile = csvFolder + '\\' + file
    with open(current_file, 'r') as in_file:
        lines = in_file.read().splitlines()
        stripped = [line.replace("\t",',').split(',') for line in lines]
        with open(outFile, 'w') as out_file:
            writer = csv.writer(out_file)
            writer.writerows(stripped)

Single File



In [3]:

    
inFile = r'..\Data\Groundwater-Composition-csv\B25A0857.txt'
header_names = "NITG-nr","Monster datum","Monster-nr","Monster apparatuur","Mengmonster","Bovenkant monster (cm tov MV)","Onderkant monster (cm tov MV)","Analyse datum","CO2 (mg/l)","CO3-- (mg/l)","Ca (mg/l)","Cl- (mg/l)","EC (uS/cm)","Fe (mg/l)","HCO3 (mg/l)","KLEUR (mgPt/l)","KMNO4V-O (mg/l)","Mg (mg/l)","Mn (mg/l)","NH4 (mg/l)","NH4-ORG (mg/l)","NO2 (mg/l)","NO3 (mg/l)","Na (mg/l)","NaHCO3 (mg/l)","SO4 (mg/l)","SiO2 (mg/l)","T-PO4 (mg/l)","TEMP-V (C)","TIJDH (mmol/l)","TOTH (mmol/l)","pH (-)"
df = pd.read_csv(inFile, skiprows=6, parse_dates=[1], sep=',', header=None, names=header_names)



In [4]:

    
df









    Out[4]:






  
    
      
      NITG-nr
      Monster datum
      Monster-nr
      Monster apparatuur
      Mengmonster
      Bovenkant monster (cm tov MV)
      Onderkant monster (cm tov MV)
      Analyse datum
      CO2 (mg/l)
      CO3-- (mg/l)
      ...
      NO3 (mg/l)
      Na (mg/l)
      NaHCO3 (mg/l)
      SO4 (mg/l)
      SiO2 (mg/l)
      T-PO4 (mg/l)
      TEMP-V (C)
      TIJDH (mmol/l)
      TOTH (mmol/l)
      pH (-)
    
  
  
    
      0
      NaN
      nan
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      ...
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
    
    
      1
      KWALITEIT gegevens VLOEIBAAR
      nan
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      ...
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
    
    
      2
      NITG-nr
      Monster datum
      Monster-nr
      Monster apparatuur
      Mengmonster
      Bovenkant monster (cm tov MV)
      Onderkant monster (cm tov MV)
      Analyse datum
      Al (mg/l)
      As (ug/l)
      ...
      KLEUR (mgPt/l)
      KMNO4V-O (mg/l)
      Mg (mg/l)
      Mn (mg/l)
      NH4 (mg/l)
      NH4-ORG (mg/l)
      NO2 (mg/l)
      NO3 (mg/l)
      Na (mg/l)
      NaHCO3 (mg/l)
    
    
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
    
    
      9
      B25A0857
      11-10-1994
      T1989-00-3313
      NaN
      nee
      20800.0
      21800.0
      11-10-1994
      NaN
      NaN
      ...
      NaN
      NaN
      289
      .33
      45.6
      NaN
      NaN
      .29
      2268
      NaN
    
    
      10
      B25A0857
      11-10-1994
      T1989-00-3313
      NaN
      nee
      20800.0
      21800.0
      11-10-1994
      NaN
      .7
      ...
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
    
    
      11
      B25A0857
      27-11-1969
      C1969-11-1057
      NaN
      nee
      20800.0
      21800.0
      27-11-1969
      NaN
      NaN
      ...
      40
      83
      355
      .55
      50
      .47
      0
      0
      2860
      0
    
  

12 rows × 32 columns



In [8]:

    
inFile = r'..\Data\Groundwater-Composition\B25A0857.txt'
header_names = 'NITG-nr', 'X-coord', 'Y-coord', 'Coordinaat systeem', 'Kaartblad', 'Bepaling locatie', 'Maaiveldhoogte (m tov NAP)', 'Bepaling maaiveldhoogte', 'OLGA-nr', 'RIVM-nr', 'Aantal analyses', 'Meetnet', 'Indeling'
df = pd.read_csv(
    inFile, 
    skiprows=2, 
    parse_dates=[1], 
    nrows=1, 
    delim_whitespace=True, 
    header=None, 
    names=header_names
)



In [9]:

    
df

Loading all the files

Now we can process all the files correctly. The following will look for our pre-processed files in .\Groundwater-Composition-csv.



In [5]:

    
data_header_names = "NITG-nr","Monster datum","Monster-nr","Monster apparatuur","Mengmonster","Bovenkant monster (cm tov MV)","Onderkant monster (cm tov MV)","Analyse datum","CO2 (mg/l)","CO3-- (mg/l)","Ca (mg/l)","Cl- (mg/l)","EC (uS/cm)","Fe (mg/l)","HCO3 (mg/l)","KLEUR (mgPt/l)","KMNO4V-O (mg/l)","Mg (mg/l)","Mn (mg/l)","NH4 (mg/l)","NH4-ORG (mg/l)","NO2 (mg/l)","NO3 (mg/l)","Na (mg/l)","NaHCO3 (mg/l)","SO4 (mg/l)","SiO2 (mg/l)","T-PO4 (mg/l)","TEMP-V (C)","TIJDH (mmol/l)","TOTH (mmol/l)","pH (-)"

header_header_names = 'NITG-nr', 'X-coord', 'Y-coord', 'Coordinaat systeem', 'Kaartblad', 'Bepaling locatie', 'Maaiveldhoogte (m tov NAP)', 'Bepaling maaiveldhoogte', 'OLGA-nr', 'RIVM-nr', 'Aantal analyses', 'Meetnet', 'Indeling'



In [6]:

    
def pre_read(file):
    i=0
    loc=0
    metarow,skiprows = 0, 0
    with open(file) as f:
        for line in f:
            if line[:7] == 'LOCATIE':
                loc=loc+1
                if loc==1:
                    metarow = i+1
                if loc==2:
                    skiprows = i+1
        i+=1
    return metarow,skiprows



In [8]:

    
df_list = []

for file in os.listdir(inFolder):
    current_file = csvFolder + '\\' + file
    # print(current_file)
    metarow,skip = pre_read(current_file)
    df = pd.read_csv(
        current_file,
        skiprows=skip,
        parse_dates=[1],
        sep=',',
        header=None,
        names=data_header_names
        )
    df_list.append(df)
    
    # print(df_list)
all_dfs = pd.concat(df_list)



In [9]:

    
all_dfs = all_dfs[all_dfs['NITG-nr'].str.contains("NITG-nr") == False]
all_dfs = all_dfs[all_dfs['NITG-nr'].str.contains("LOCATIE") == False]
all_dfs = all_dfs[all_dfs['NITG-nr'].str.contains("KWALITEIT") == False]
all_dfs = all_dfs[all_dfs['Monster apparatuur'].str.contains("Rijksdriehoeksmeting") == False]



In [10]:

    
all_dfs









    Out[10]:






  
    
      
      NITG-nr
      Monster datum
      Monster-nr
      Monster apparatuur
      Mengmonster
      Bovenkant monster (cm tov MV)
      Onderkant monster (cm tov MV)
      Analyse datum
      CO2 (mg/l)
      CO3-- (mg/l)
      ...
      NO3 (mg/l)
      Na (mg/l)
      NaHCO3 (mg/l)
      SO4 (mg/l)
      SiO2 (mg/l)
      T-PO4 (mg/l)
      TEMP-V (C)
      TIJDH (mmol/l)
      TOTH (mmol/l)
      pH (-)
    
  
  
    
      6
      B24H1244
      02-09-2009
      L2009-09-0117
      Slangenpomp
      nee
      10000.0
      12000.0
      02-09-2009
      2
      40
      ...
      .17
      <.05
      25
      <1
      <.05
      <1
      41
      14
      6.4
      470
    
    
      6
      B25D0639
      21-07-2009
      L2009-07-0018
      Slangenpomp
      nee
      NaN
      NaN
      21-07-2009
      4
      220
      ...
      9.1
      <.05
      2200
      <1
      <.05
      <1
      180
      6.4
      3
      1700
    
    
      7
      B25D0639
      21-07-2009
      L2009-07-0014
      Slangenpomp
      nee
      NaN
      NaN
      21-07-2009
      8
      160
      ...
      11
      <.05
      290
      <1
      .08
      <1
      8.7
      23
      11
      360
    
    
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
    
    
      9
      B25D0639
      21-07-2009
      L2009-07-0013
      Slangenpomp
      nee
      NaN
      NaN
      21-07-2009
      6
      130
      ...
      21
      <.05
      200
      <1
      .21
      <1
      9.1
      18
      8.5
      690
    
    
      10
      B25E0884
      22-07-2009
      L2009-07-0019
      Slangenpomp
      nee
      8080.0
      10080.0
      22-07-2009
      NaN
      NaN
      ...
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
    
    
      11
      B25E0884
      22-07-2009
      L2009-07-0020
      Slangenpomp
      nee
      23040.0
      25040.0
      22-07-2009
      NaN
      NaN
      ...
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
    
  

7 rows × 32 columns

	NITG-nr	Monster datum	Monster-nr	Monster apparatuur	Mengmonster	Bovenkant monster (cm tov MV)	Onderkant monster (cm tov MV)	Analyse datum	CO2 (mg/l)	CO3-- (mg/l)	...	NO3 (mg/l)	Na (mg/l)	NaHCO3 (mg/l)	SO4 (mg/l)	SiO2 (mg/l)	T-PO4 (mg/l)	TEMP-V (C)	TIJDH (mmol/l)	TOTH (mmol/l)	pH (-)
0	NaN	nan	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
1	KWALITEIT gegevens VLOEIBAAR	nan	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
2	NITG-nr	Monster datum	Monster-nr	Monster apparatuur	Mengmonster	Bovenkant monster (cm tov MV)	Onderkant monster (cm tov MV)	Analyse datum	Al (mg/l)	As (ug/l)	...	KLEUR (mgPt/l)	KMNO4V-O (mg/l)	Mg (mg/l)	Mn (mg/l)	NH4 (mg/l)	NH4-ORG (mg/l)	NO2 (mg/l)	NO3 (mg/l)	Na (mg/l)	NaHCO3 (mg/l)
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
9	B25A0857	11-10-1994	T1989-00-3313	NaN	nee	20800.0	21800.0	11-10-1994	NaN	NaN	...	NaN	NaN	289	.33	45.6	NaN	NaN	.29	2268	NaN
10	B25A0857	11-10-1994	T1989-00-3313	NaN	nee	20800.0	21800.0	11-10-1994	NaN	.7	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
11	B25A0857	27-11-1969	C1969-11-1057	NaN	nee	20800.0	21800.0	27-11-1969	NaN	NaN	...	40	83	355	.55	50	.47	0	0	2860	0

	NITG-nr	Monster datum	Monster-nr	Monster apparatuur	Mengmonster	Bovenkant monster (cm tov MV)	Onderkant monster (cm tov MV)	Analyse datum	CO2 (mg/l)	CO3-- (mg/l)	...	NO3 (mg/l)	Na (mg/l)	NaHCO3 (mg/l)	SO4 (mg/l)	SiO2 (mg/l)	T-PO4 (mg/l)	TEMP-V (C)	TIJDH (mmol/l)	TOTH (mmol/l)	pH (-)
6	B24H1244	02-09-2009	L2009-09-0117	Slangenpomp	nee	10000.0	12000.0	02-09-2009	2	40	...	.17	<.05	25	<1	<.05	<1	41	14	6.4	470
6	B25D0639	21-07-2009	L2009-07-0018	Slangenpomp	nee	NaN	NaN	21-07-2009	4	220	...	9.1	<.05	2200	<1	<.05	<1	180	6.4	3	1700
7	B25D0639	21-07-2009	L2009-07-0014	Slangenpomp	nee	NaN	NaN	21-07-2009	8	160	...	11	<.05	290	<1	.08	<1	8.7	23	11	360
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
9	B25D0639	21-07-2009	L2009-07-0013	Slangenpomp	nee	NaN	NaN	21-07-2009	6	130	...	21	<.05	200	<1	.21	<1	9.1	18	8.5	690
10	B25E0884	22-07-2009	L2009-07-0019	Slangenpomp	nee	8080.0	10080.0	22-07-2009	NaN	NaN	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
11	B25E0884	22-07-2009	L2009-07-0020	Slangenpomp	nee	23040.0	25040.0	22-07-2009	NaN	NaN	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN