In [1]:
import os
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
% matplotlib inline

In [2]:
# pandas options
pd.options.display.max_rows = 6

In [3]:
inFile = r'.\Groundwater-Levels\B24F0047001_0.csv'
df = pd.read_csv(inFile, skiprows=10, parse_dates=[2], index_col =2)

In [4]:
df


Out[4]:
LOCATIE FILTERNUMMER STAND (MV) BIJZONDERHEID
PEIL DATUM TIJD
1970-04-28 00:00:00 B24F0047 1 585 NaN
1970-05-14 00:00:00 B24F0047 1 589 NaN
1970-05-28 00:00:00 B24F0047 1 584 NaN
... ... ... ... ...
2013-06-27 16:08:00 B24F0047 1 485 NaN
2014-10-01 14:02:00 B24F0047 1 506 NaN
2015-09-10 12:02:00 B24F0047 1 535 NaN

781 rows × 4 columns


In [5]:
df['STAND (MV)'].plot()


Out[5]:
<matplotlib.axes._subplots.AxesSubplot at 0x1650fc1c780>

In [104]:


In [ ]:

Load all the files in a folder


In [6]:
correct_cols = ['Locatie', 'Filternummer','Peildatum', 'Stand (cm t.o.v. MP)',
       'Stand (cm t.o.v. MV)', 'Stand (cm t.o.v. NAP)', 'Bijzonderheid',
       'Opmerking', 'Unnamed: 8', 'Unnamed: 9', 'Unnamed: 10']

In [7]:
inFolder = r'.\Groundwater-Levels'
#os.listdir(inFolder)

In [8]:
def read_header(file):
    i=0
    loc=0
    with open(file) as f:
        for line in f:
            if line[:7]=='Locatie':
                loc = loc + 1
                if loc==1:
                    metarow = i
                if loc==2:
                    #print('over')
                    #print(i)
                    skiprows = i
                    break
            i=i+1
    return metarow,skiprows

In [9]:
def get_xy(file,metarow):
    with open(file) as f:
        lines = f.readlines()
    line = lines[metarow+1].split(',')
    return [int(x) for x in line[3:5]]

In [10]:
df_list = []
for file in os.listdir(inFolder):
    
    wellFile,ext = os.path.splitext(file)
    if wellFile[-2:] == '_1':
        print(file,end=', ')
        try:
            # read header 
            metarow,skiprows = read_header(inFolder + '\\' +file)
            (x,y) = get_xy(inFolder + '\\' +file,metarow)
            # read data
            df = pd.read_csv(inFolder + '\\' +file, skiprows=skiprows,index_col=False, parse_dates=[2],dayfirst=True)
            
            # check if the right columns were imported
            ll = [i for i, j in zip(df.columns.tolist(), correct_cols) if i != j ]
            if len(ll)==0: 
                # it's ok
                df['x'] = x
                df['y'] = y
                df_list.append(df)
                print('ok')                
            else:
                print('wrong columns')
        except:
            print('error')

# merge all the dataframes
all_dfs = pd.concat(df_list)


B24F0047001_1.csv, ok
B24F0047002_1.csv, ok
B24F0107001_1.csv, ok
B24H0302001_1.csv, error
B24H0371001_1.csv, ok
B24H0371004_1.csv, ok
B24H1244001_1.csv, ok
B25A0537001_1.csv, ok
B25A0538001_1.csv, ok
B25A0540001_1.csv, ok
B25A0541001_1.csv, ok
B25A0543001_1.csv, ok
B25A0549001_1.csv, ok
B25A0549002_1.csv, ok
B25A0554001_1.csv, ok
B25A0554002_1.csv, ok
B25A0556001_1.csv, ok
B25A0556002_1.csv, ok
B25A0556003_1.csv, ok
B25A0828001_1.csv, ok
B25A0828002_1.csv, ok
B25A0857001_1.csv, ok
B25A0857002_1.csv, ok
B25A1211001_1.csv, error
B25A1211002_1.csv, ok
B25A1211003_1.csv, ok
B25A1212001_1.csv, ok
B25A1212002_1.csv, ok
B25A1212003_1.csv, ok
B25A1212004_1.csv, ok
B25A1320001_1.csv, ok
B25A1320002_1.csv, ok
B25A1395001_1.csv, ok
B25A1395002_1.csv, ok
B25A1395003_1.csv, ok
B25A1395004_1.csv, ok
B25A1507001_1.csv, ok
B25A1507002_1.csv, ok
B25A1507003_1.csv, ok
B25A1507004_1.csv, ok
B25A1512001_1.csv, ok
B25A1517001_1.csv, ok
B25A1517002_1.csv, ok
B25A1542001_1.csv, ok
B25A1543001_1.csv, ok
B25A1546001_1.csv, ok
B25A1547001_1.csv, ok
B25A1548001_1.csv, ok
B25A1549001_1.csv, ok

In [11]:
# set index
all_dfs.set_index(['Locatie','Filternummer','Peildatum'],inplace=True)

In [12]:
all_dfs


Out[12]:
Stand (cm t.o.v. MP) Stand (cm t.o.v. MV) Stand (cm t.o.v. NAP) Bijzonderheid Opmerking Unnamed: 8 Unnamed: 9 Unnamed: 10 x y
Locatie Filternummer Peildatum
B24F0047 1 1970-04-28 584.0 585.0 195.0 NaN NaN NaN NaN NaN 99920 489815
1970-05-14 588.0 589.0 191.0 NaN NaN NaN NaN NaN 99920 489815
1970-05-28 583.0 584.0 196.0 NaN NaN NaN NaN NaN 99920 489815
... ... ... ... ... ... ... ... ... ... ... ... ...
B25A1549 1 1997-11-28 83.0 98.0 -88.0 NaN NaN NaN NaN NaN 105033 488347
1997-12-15 78.0 93.0 -83.0 NaN NaN NaN NaN NaN 105033 488347
1997-12-29 77.0 92.0 -82.0 NaN NaN NaN NaN NaN 105033 488347

71655 rows × 10 columns


In [13]:
# list the well numbers
all_dfs.index.get_level_values(0).unique()


Out[13]:
Index(['B24F0047', 'B24F0107', 'B24H0371', 'B24H1244', 'B25A0537', 'B25A0538',
       'B25A0540', 'B25A0541', 'B25A0543', 'B25A0549', 'B25A0554', 'B25A0556',
       'B25A0828', 'B25A0857', 'B25A1211', 'B25A1212', 'B25A1320', 'B25A1395',
       'B25A1507', 'B25A1512', 'B25A1517', 'B25A1542', 'B25A1543', 'B25A1546',
       'B25A1547', 'B25A1548', 'B25A1549'],
      dtype='object', name='Locatie')

In [14]:
# select only the interesting columns
all_depths = all_dfs[['Stand (cm t.o.v. NAP)','x','y']]
all_depths = all_depths.rename(columns={'Stand (cm t.o.v. NAP)':'depth'})

In [15]:
# some global stats
all_depths.describe().T


Out[15]:
count mean std min 25% 50% 75% max
depth 65890.0 -15.879314 142.031544 -529.0 -84.0 -55.0 28.0 437.0
x 71655.0 102711.619147 2142.434790 99793.0 100900.0 102490.0 104630.0 108260.0
y 71655.0 488596.966311 1589.758769 484691.0 487620.0 489400.0 489815.0 490467.0

In [16]:
# table with average depths
all_depths.mean(level=[0,1]).style


Out[16]:
depth x y
Locatie Filternummer
B24F0047 1 197.247 99920 489815
2 81.5479 99920 489815
B24F0107 1 251.756 99900 489140
B24H0371 1 nan 99863 484691
4 nan 99863 484691
B24H1244 1 369.262 99793 486217
B25A0537 1 -206.075 104060 489080
B25A0538 1 -144.565 102880 488460
B25A0540 1 -134.049 103874 487547
B25A0541 1 -157.606 104600 487620
B25A0543 1 -94.5175 102490 487750
B25A0549 1 -11.9168 102370 489650
2 -93.953 102370 489650
B25A0554 1 -42.3572 102610 489500
2 -94.577 102610 489500
B25A0556 1 -53.2376 102753 489402
2 -53.3555 102753 489402
3 -99.3167 102753 489402
B25A0828 1 -75.8625 104965 490467
2 -85.6342 104965 490467
B25A0857 1 -424.75 108260 487630
2 -421.981 108260 487630
B25A1211 2 -20.9013 101250 488000
3 19.0815 101250 488000
B25A1212 1 -95.3738 105750 489400
2 -118.529 105750 489400
3 -102.421 105750 489400
4 -98.8821 105750 489400
B25A1320 1 196.434 100900 489980
2 62.0134 100900 489980
B25A1395 1 -63.5259 102300 488350
2 -10.4911 102300 488350
3 -24.677 102300 488350
4 -24.9555 102300 488350
B25A1507 1 -28.7414 103710 487590
2 -28.1086 103710 487590
3 -25.702 103710 487590
4 -56.4917 103710 487590
B25A1512 1 -151.245 103499 489956
B25A1517 1 183.193 100625 490115
2 -51.7745 100625 490115
B25A1542 1 203.434 100030 489495
B25A1543 1 38.6626 100420 489500
B25A1546 1 -80.8556 104270 489030
B25A1547 1 -86.2222 104630 489600
B25A1548 1 -48.346 103870 488400
B25A1549 1 -93.8789 105033 488347

In [17]:
# plot on a map
means = all_depths.mean(level=[0,1])
means.plot.scatter(x='x',y='y',c='depth', cmap='plasma', s=50, figsize=(10,10))


Out[17]:
<matplotlib.axes._subplots.AxesSubplot at 0x16512168cf8>

In [ ]: