In [2]:
import os
import csv
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
% matplotlib inline
In [3]:
# pandas options
pd.options.display.max_rows = 6
In [4]:
inFolder = r'..\Data\Groundwater-Composition2'
#os.listdir(inFolder)
csvFolder = r'..\Data\Groundwater-Composition-csv'
for file in os.listdir(inFolder):
current_file = inFolder + '\\' + file
outFile = csvFolder + '\\' + file
with open(current_file, 'r') as in_file:
lines = in_file.read().splitlines()
stripped = [line.replace("\t",',').split(',') for line in lines]
with open(outFile, 'w') as out_file:
writer = csv.writer(out_file)
writer.writerows(stripped)
In [3]:
inFile = r'..\Data\Groundwater-Composition-csv\B25A0857.txt'
header_names = "NITG-nr","Monster datum","Monster-nr","Monster apparatuur","Mengmonster","Bovenkant monster (cm tov MV)","Onderkant monster (cm tov MV)","Analyse datum","CO2 (mg/l)","CO3-- (mg/l)","Ca (mg/l)","Cl- (mg/l)","EC (uS/cm)","Fe (mg/l)","HCO3 (mg/l)","KLEUR (mgPt/l)","KMNO4V-O (mg/l)","Mg (mg/l)","Mn (mg/l)","NH4 (mg/l)","NH4-ORG (mg/l)","NO2 (mg/l)","NO3 (mg/l)","Na (mg/l)","NaHCO3 (mg/l)","SO4 (mg/l)","SiO2 (mg/l)","T-PO4 (mg/l)","TEMP-V (C)","TIJDH (mmol/l)","TOTH (mmol/l)","pH (-)"
df = pd.read_csv(inFile, skiprows=6, parse_dates=[1], sep=',', header=None, names=header_names)
In [4]:
df
Out[4]:
In [8]:
inFile = r'..\Data\Groundwater-Composition\B25A0857.txt'
header_names = 'NITG-nr', 'X-coord', 'Y-coord', 'Coordinaat systeem', 'Kaartblad', 'Bepaling locatie', 'Maaiveldhoogte (m tov NAP)', 'Bepaling maaiveldhoogte', 'OLGA-nr', 'RIVM-nr', 'Aantal analyses', 'Meetnet', 'Indeling'
df = pd.read_csv(
inFile,
skiprows=2,
parse_dates=[1],
nrows=1,
delim_whitespace=True,
header=None,
names=header_names
)
In [9]:
df
In [5]:
data_header_names = "NITG-nr","Monster datum","Monster-nr","Monster apparatuur","Mengmonster","Bovenkant monster (cm tov MV)","Onderkant monster (cm tov MV)","Analyse datum","CO2 (mg/l)","CO3-- (mg/l)","Ca (mg/l)","Cl- (mg/l)","EC (uS/cm)","Fe (mg/l)","HCO3 (mg/l)","KLEUR (mgPt/l)","KMNO4V-O (mg/l)","Mg (mg/l)","Mn (mg/l)","NH4 (mg/l)","NH4-ORG (mg/l)","NO2 (mg/l)","NO3 (mg/l)","Na (mg/l)","NaHCO3 (mg/l)","SO4 (mg/l)","SiO2 (mg/l)","T-PO4 (mg/l)","TEMP-V (C)","TIJDH (mmol/l)","TOTH (mmol/l)","pH (-)"
header_header_names = 'NITG-nr', 'X-coord', 'Y-coord', 'Coordinaat systeem', 'Kaartblad', 'Bepaling locatie', 'Maaiveldhoogte (m tov NAP)', 'Bepaling maaiveldhoogte', 'OLGA-nr', 'RIVM-nr', 'Aantal analyses', 'Meetnet', 'Indeling'
In [6]:
def pre_read(file):
i=0
loc=0
metarow,skiprows = 0, 0
with open(file) as f:
for line in f:
if line[:7] == 'LOCATIE':
loc=loc+1
if loc==1:
metarow = i+1
if loc==2:
skiprows = i+1
i+=1
return metarow,skiprows
In [8]:
df_list = []
for file in os.listdir(inFolder):
current_file = csvFolder + '\\' + file
# print(current_file)
metarow,skip = pre_read(current_file)
df = pd.read_csv(
current_file,
skiprows=skip,
parse_dates=[1],
sep=',',
header=None,
names=data_header_names
)
df_list.append(df)
# print(df_list)
all_dfs = pd.concat(df_list)
In [9]:
all_dfs = all_dfs[all_dfs['NITG-nr'].str.contains("NITG-nr") == False]
all_dfs = all_dfs[all_dfs['NITG-nr'].str.contains("LOCATIE") == False]
all_dfs = all_dfs[all_dfs['NITG-nr'].str.contains("KWALITEIT") == False]
all_dfs = all_dfs[all_dfs['Monster apparatuur'].str.contains("Rijksdriehoeksmeting") == False]
In [10]:
all_dfs
Out[10]: