2D Preprocessing the GlobColour Dataset

  • prepare dataset for LDS-fitting:
    • load all 5 variables and merge (interpolate) with the float dataset
    • load the distance to coast and merge (interpolate) with the float dataset
    • output the data on disk
    • (plan)if needed, split Nov-Dec, encoding the weekly number

In [1]:
import xarray as xr
import numpy as np
import pandas as pd
%matplotlib inline
from matplotlib import pyplot as plt
from dask.diagnostics import ProgressBar
import seaborn as sns
from matplotlib.colors import LogNorm

In [41]:
# resampling frequency in number of days
freq=15

In [3]:
from tools.load_GlobColor_dataset import load_dataset
import importlib
importlib.reload(load_dataset)


Out[3]:
<module 'tools.load_GlobColor_dataset.load_dataset' from '/Users/vyan2000/work_linux/2Archive/myproject/20161024xray_oceancolor/ocean_color-master/tools/load_GlobColor_dataset/load_dataset.py'>

In [5]:
############### CHL1
ds_daily = load_dataset.load_chl1()
ds_daily.chlor_a.sel(time='2002-04-28').plot()


100%|██████████| 5538/5538 [01:06<00:00, 83.52it/s]
<xarray.Dataset>
Dimensions:    (lat: 553, lon: 721, time: 5538)
Coordinates:
  * lat        (lat) float32 28.0208 27.9792 27.9375 27.8958 27.8542 27.8125 ...
  * lon        (lon) float32 44.9792 45.0208 45.0625 45.1042 45.1458 45.1875 ...
  * time       (time) datetime64[ns] 2002-04-28 2002-04-29 2002-04-30 ...
Data variables:
    CHL1_mean  (time, lat, lon) float64 nan nan nan nan nan nan nan nan nan ...
Attributes:
    Conventions:                CF-1.4
    title:                      GlobColour daily merged MERIS/SeaWiFS product
    product_name:               L3m_20020428__665648402_4_AVW-MERSWF_CHL1_DAY...
    product_type:               day
    product_version:            2016.1
    product_level:              3
    parameter_code:             CHL1
    parameter:                  Chlorophyll concentration
    parameter_algo_list:        ,
    publication:                O'Reilly, J.E., and 24 Coauthors, 2000: SeaWi...
    site_name:                  665648402
    sensor_name:                WEIGHTED_AVERAGING
    sensor:                     Merged data - weighted mean
    sensor_name_list:           MER,SWF
    start_time:                 20020427T224343Z
    end_time:                   20020429T004709Z
    duration_time:              PT93807S
    period_start_day:           20020428
    period_end_day:             20020428
    period_duration_day:        P1D
    grid_type:                  Equirectangular
    spatial_resolution:         4.63831
    nb_equ_bins:                721
    registration:               5
    lat_step:                   0.0416667
    lon_step:                   0.0416667
    earth_radius:               6378.137
    max_north_grid:             28.0417
    max_south_grid:             5.0
    max_west_grid:              44.9583
    max_east_grid:              75.0
    northernmost_latitude:      26.7917
    southernmost_latitude:      5.0
    westernmost_longitude:      49.75
    easternmost_longitude:      75.0
    nb_grid_bins:               398713
    nb_bins:                    398713
    pct_bins:                   100.0
    nb_valid_bins:              62260
    pct_valid_bins:             15.6152
    software_name:              globcolour_l3_extract
    software_version:           2016.1
    institution:                ACRI
    processing_time:            20170723T001655Z
    netcdf_version:             4.3.3.1 of Jul  8 2016 18:15:50 $
    DPM_reference:              GC-UD-ACRI-PUG
    IODD_reference:             GC-UD-ACRI-PUG
    references:                 http://www.globcolour.info
    contact:                    service@globcolour.info
    copyright:                  Copyright ACRI-ST - GlobColour. GlobColour ha...
    history:                    20170723T001655Z: globcolour_l3_extract.sh -i...
    input_files:                S2002117220453.L2_GAC_OC.nc,S2002117234340.L2...
    input_files_reprocessings:  2014.0,2014.0,2014.0,2014.0,2014.0,2014.0,201...
Out[5]:
<matplotlib.collections.QuadMesh at 0x11afe0be0>

In [6]:
freq_resample = str(8) + 'D'
ds_8day = ds_daily.resample(freq_resample, dim='time')  # see the above for doc, test case, & default behavior
ds_8day


Out[6]:
<xarray.Dataset>
Dimensions:  (lat: 553, lon: 721, time: 695)
Coordinates:
  * lat      (lat) float32 28.0208 27.9792 27.9375 27.8958 27.8542 27.8125 ...
  * lon      (lon) float32 44.9792 45.0208 45.0625 45.1042 45.1458 45.1875 ...
  * time     (time) datetime64[ns] 2002-04-28 2002-05-06 2002-05-14 ...
Data variables:
    chlor_a  (time, lat, lon) float64 nan nan nan nan nan nan nan nan nan ...

In [7]:
# check data quality
both_datasets = [ds_8day, ds_daily]
print([(ds.nbytes / 1e6) for ds in both_datasets])


[2216.854936, 17664.630152]

In [8]:
def fix_bad_data(ds):
    # for some reason, the cloud / land mask is backwards on some data
    # this is obvious because there are chlorophyl values less than zero
    bad_data = ds.chlor_a.groupby('time').min() < 0
    # loop through and fix
    for n in np.nonzero(bad_data.values)[0]:
        data = ds.chlor_a[n].values 
        ds.chlor_a.values[n] = np.ma.masked_less(data, 0).filled(np.nan)

In [9]:
[fix_bad_data(ds) for ds in both_datasets]


Out[9]:
[None, None]

In [10]:
# Count the number of ocean data points
(ds_8day.chlor_a>0).sum(dim='time').plot()


/Users/vyan2000/local/miniconda3/envs/condapython3/lib/python3.5/site-packages/xarray/core/variable.py:1164: RuntimeWarning: invalid value encountered in greater
  if not reflexive
Out[10]:
<matplotlib.collections.QuadMesh at 0x1214b8860>

In [11]:
#  find a mask for the land
ocean_mask = (ds_8day.chlor_a>0).sum(dim='time')>0
num_ocean_points = ocean_mask.sum().values
ocean_mask.plot()
plt.title('%g total ocean points' % num_ocean_points)


/Users/vyan2000/local/miniconda3/envs/condapython3/lib/python3.5/site-packages/xarray/core/variable.py:1164: RuntimeWarning: invalid value encountered in greater
  if not reflexive
Out[11]:
<matplotlib.text.Text at 0x11bbb7828>

In [12]:
plt.figure(figsize=(8,6))
ds_daily.chlor_a.sel(time='2002-11-18',method='nearest').plot(norm=LogNorm())


Out[12]:
<matplotlib.collections.QuadMesh at 0x11f3e5e80>
/Users/vyan2000/local/miniconda3/envs/condapython3/lib/python3.5/site-packages/matplotlib/colors.py:1022: RuntimeWarning: invalid value encountered in less_equal
  mask |= resdat <= 0

In [13]:
ds_daily.groupby('time').count() # information from original data


Out[13]:
<xarray.Dataset>
Dimensions:  (time: 5538)
Coordinates:
  * time     (time) datetime64[ns] 2002-04-28 2002-04-29 2002-04-30 ...
Data variables:
    chlor_a  (time) int64 62260 52391 58822 43103 37195 55773 25428 19652 ...

In [14]:
ds_daily.chlor_a.groupby('time').count()/float(num_ocean_points)


Out[14]:
<xarray.DataArray 'chlor_a' (time: 5538)>
array([ 0.248381,  0.20901 ,  0.234666, ...,  0.003495,  0.020705,  0.034552])
Coordinates:
  * time     (time) datetime64[ns] 2002-04-28 2002-04-29 2002-04-30 ...

In [15]:
count_8day,count_daily = [ds.chlor_a.groupby('time').count()/float(num_ocean_points)
                            for ds in (ds_8day,ds_daily)]

plt.figure(figsize=(12,4))
count_8day.plot(color='k')
count_daily.plot(color='r')

plt.legend(['8 day','daily'])


Out[15]:
<matplotlib.legend.Legend at 0x1224d1518>

In [16]:
# Maps of individual days
target_date = '2003-02-15'
plt.figure(figsize=(8,6))
ds_8day.chlor_a.sel(time=target_date, method='nearest').plot(norm=LogNorm())


Out[16]:
<matplotlib.collections.QuadMesh at 0x89b9c1c88>
/Users/vyan2000/local/miniconda3/envs/condapython3/lib/python3.5/site-packages/matplotlib/colors.py:1022: RuntimeWarning: invalid value encountered in less_equal
  mask |= resdat <= 0

In [17]:
plt.figure(figsize=(8,6))
ds_daily.chlor_a.sel(time=target_date, method='nearest').plot(norm=LogNorm())


Out[17]:
<matplotlib.collections.QuadMesh at 0x60430aba8>
/Users/vyan2000/local/miniconda3/envs/condapython3/lib/python3.5/site-packages/matplotlib/colors.py:1022: RuntimeWarning: invalid value encountered in less_equal
  mask |= resdat <= 0

In [42]:
freq


Out[42]:
15

In [43]:
# next carry out interpolation starts here
freq_resample = str(freq) + 'D'
ds_resample = ds_daily.resample(freq_resample, dim='time')  # see the above for doc, test case, & default behavior
ds_resample


Out[43]:
<xarray.Dataset>
Dimensions:  (lat: 553, lon: 721, time: 371)
Coordinates:
  * lat      (lat) float32 28.0208 27.9792 27.9375 27.8958 27.8542 27.8125 ...
  * lon      (lon) float32 44.9792 45.0208 45.0625 45.1042 45.1458 45.1875 ...
  * time     (time) datetime64[ns] 2002-04-28 2002-05-13 2002-05-28 ...
Data variables:
    chlor_a  (time, lat, lon) float64 nan nan nan nan nan nan nan nan nan ...

In [44]:
plt.figure(figsize=(8,6))
ds_resample.chlor_a.sel(time=target_date, method='nearest').plot(norm=LogNorm())


Out[44]:
<matplotlib.collections.QuadMesh at 0x74f516780>
/Users/vyan2000/local/miniconda3/envs/condapython3/lib/python3.5/site-packages/matplotlib/colors.py:1022: RuntimeWarning: invalid value encountered in less_equal
  mask |= resdat <= 0

In [45]:
# check the range for the longitude
print(ds_resample.lon.min(),'\n' ,ds_resample.lat.min())


<xarray.DataArray 'lon' ()>
array(44.97917175292969) 
 <xarray.DataArray 'lat' ()>
array(5.020830154418945)

++++++++++++++++++++++++++++++++++++++++++++++

All GDP Floats

Load the float data

Map a (time, lon, lat) to a value on the cholorphlly value


In [22]:
from buyodata import buoydata
import os

In [23]:
fnamesAll = ['./gdp_float/buoydata_1_5000.dat','./gdp_float/buoydata_5001_10000.dat','./gdp_float/buoydata_10001_15000.dat','./gdp_float/buoydata_15001_jun17.dat']

In [24]:
# read them and cancatenate them into one DataFrame
dfAll = pd.concat([buoydata.read_buoy_data(f) for f in fnamesAll])  # around 4~5 minutes

#mask = df.time>='2002-07-04' # we only have data after this data for PAR
dfvvAll = dfAll[dfAll.time>='2002-07-04']

sum(dfvvAll.time<'2002-07-04') # recheck whether the time is


Out[24]:
0

In [25]:
# process the data so that the longitude are all >0
print('before processing, the minimum longitude is%f4.3 and maximum is %f4.3' % (dfvvAll.lon.min(), dfvvAll.lon.max()))
mask = dfvvAll.lon<0
dfvvAll.loc[mask].lon = dfvvAll.loc[mask].lon + 360
print('after processing, the minimum longitude is %f4.3 and maximum is %f4.3' % (dfvvAll.lon.min(),dfvvAll.lon.max()) )

dfvvAll.describe()


before processing, the minimum longitude is0.0000004.3 and maximum is 360.0000004.3
after processing, the minimum longitude is 0.0000004.3 and maximum is 360.0000004.3
Out[25]:
id lat lon temp ve vn spd var_lat var_lon var_tmp
count 2.389698e+07 2.371797e+07 2.371797e+07 2.219488e+07 2.368696e+07 2.368696e+07 2.368696e+07 2.389698e+07 2.389698e+07 2.389698e+07
mean 4.883353e+06 -2.187376e+00 2.120937e+02 1.973936e+01 3.363264e-01 4.785506e-01 2.602809e+01 7.490929e+00 7.491234e+00 7.122957e+01
std 1.623920e+07 3.431072e+01 9.804786e+01 8.370632e+00 2.510642e+01 2.044987e+01 1.927200e+01 8.622426e+01 8.622424e+01 2.572025e+02
min 2.578000e+03 -7.764700e+01 0.000000e+00 -1.685000e+01 -2.916220e+02 -2.576900e+02 0.000000e+00 5.268300e-07 -3.941600e-02 1.001300e-03
25% 5.341100e+04 -3.203800e+01 1.475780e+02 1.405900e+01 -1.396000e+01 -1.040500e+01 1.285300e+01 3.939500e-06 6.804800e-06 1.419500e-03
50% 7.872000e+04 -5.065000e+00 2.153410e+02 2.199700e+01 -4.570000e-01 2.100000e-01 2.169100e+01 8.430400e-06 1.366400e-05 1.683600e-03
75% 1.184620e+05 2.783100e+01 3.069600e+02 2.682800e+01 1.360500e+01 1.107800e+01 3.394800e+01 1.817300e-05 3.414100e-05 2.190100e-03
max 6.490986e+07 8.989900e+01 3.600000e+02 4.595000e+01 2.912850e+02 2.783220e+02 2.952150e+02 1.000000e+03 1.000000e+03 1.000000e+03

In [26]:
# Select only the arabian sea region
arabian_sea = (dfvvAll.lon > 45) & (dfvvAll.lon< 75) & (dfvvAll.lat> 5) & (dfvvAll.lat <28)
# arabian_sea = {'lon': slice(45,75), 'lat': slice(5,28)} # later use this longitude and latitude
floatsAll = dfvvAll.loc[arabian_sea]   # directly use mask
print('dfvvAll.shape is %s, floatsAll.shape is %s' % (dfvvAll.shape, floatsAll.shape) )


dfvvAll.shape is (23896980, 11), floatsAll.shape is (130171, 11)

In [27]:
# pands dataframe cannot do the resamplingn properly
# cause we are really indexing on ['time','id'], pandas.dataframe.resample cannot do this
# TypeError: Only valid with DatetimeIndex, TimedeltaIndex or PeriodIndex, but got an instance of 'MultiIndex'

In [28]:
# dump the surface floater data from pandas.dataframe to xarray.dataset
floatsDSAll = xr.Dataset.from_dataframe(floatsAll.set_index(['time','id']) ) # set time & id as the index); use reset_index to revert this operation
floatsDSAll


Out[28]:
<xarray.Dataset>
Dimensions:  (id: 311, time: 18968)
Coordinates:
  * time     (time) datetime64[ns] 2002-07-04 2002-07-04T06:00:00 ...
  * id       (id) int64 7574 10206 10208 11089 15703 15707 27069 27139 28842 ...
Data variables:
    lat      (time, id) float64 nan 16.3 14.03 16.4 14.04 nan 20.11 nan ...
    lon      (time, id) float64 nan 66.23 69.48 64.58 69.51 nan 68.55 nan ...
    temp     (time, id) float64 nan nan nan 28.0 28.53 nan 28.93 nan 27.81 ...
    ve       (time, id) float64 nan 8.68 5.978 6.286 4.844 nan 32.9 nan ...
    vn       (time, id) float64 nan -13.18 -18.05 -7.791 -17.47 nan 15.81 ...
    spd      (time, id) float64 nan 15.78 19.02 10.01 18.13 nan 36.51 nan ...
    var_lat  (time, id) float64 nan 0.0002661 5.01e-05 5.018e-05 5.024e-05 ...
    var_lon  (time, id) float64 nan 0.0006854 8.851e-05 9.018e-05 8.968e-05 ...
    var_tmp  (time, id) float64 nan 1e+03 1e+03 0.003733 0.0667 nan 0.001683 ...

In [46]:
print(floatsDSAll.dims)
# resample on the xarray.dataset onto two-day frequency
floatsDSAll_resample =floatsDSAll.resample(freq_resample, dim='time')
print(floatsDSAll_resample.dims)   # downsampling on the 'time' dimension 17499/9/4=  around 486
floatsDSAll_resample


Frozen(SortedKeysDict({'time': 18968, 'id': 311}))
Frozen(SortedKeysDict(OrderedDict([('time', 366), ('id', 311)])))
Out[46]:
<xarray.Dataset>
Dimensions:  (id: 311, time: 366)
Coordinates:
  * time     (time) datetime64[ns] 2002-07-04 2002-07-19 2002-08-03 ...
  * id       (id) int64 7574 10206 10208 11089 15703 15707 27069 27139 28842 ...
Data variables:
    lat      (time, id) float64 nan 16.27 13.55 15.66 13.61 nan 19.97 nan ...
    lon      (time, id) float64 nan 66.66 70.2 65.25 70.17 nan 70.05 nan ...
    temp     (time, id) float64 nan nan nan 27.77 28.59 nan 28.92 nan 27.23 ...
    ve       (time, id) float64 nan 6.14 11.37 9.377 10.19 nan 25.86 nan ...
    vn       (time, id) float64 nan 0.3577 -5.286 -14.1 -4.513 nan -5.424 ...
    spd      (time, id) float64 nan 7.395 15.01 18.7 13.97 nan 27.87 nan ...
    var_lat  (time, id) float64 nan 0.001451 6.257e-05 6.695e-05 5.535e-05 ...
    var_lon  (time, id) float64 nan 0.00561 0.0001185 0.0001289 0.000102 nan ...
    var_tmp  (time, id) float64 nan 1e+03 1e+03 0.003614 0.08862 nan ...

In [47]:
# transfer it back to pandas.dataframe for plotting
floatsDFAll_resample = floatsDSAll_resample.to_dataframe()
floatsDFAll_resample
floatsDFAll_resample = floatsDFAll_resample.reset_index()
floatsDFAll_resample
# visualize the subsamping of floats around arabian region
fig, ax  = plt.subplots(figsize=(12,10))
floatsDFAll_resample.plot(kind='scatter', x='lon', y='lat', c='temp', cmap='RdBu_r', edgecolor='none', ax=ax)


Out[47]:
<matplotlib.axes._subplots.AxesSubplot at 0x11f1c0048>

In [48]:
# get the value for the chlorophyll for each data entry
floatsDFAll_resample_timeorder = floatsDFAll_resample.sort_values(['time','id'],ascending=True)
floatsDFAll_resample_timeorder[:20] # check whether it is time ordered!!
# should we drop nan to speed up??


Out[48]:
id time lat lon temp ve vn spd var_lat var_lon var_tmp
0 7574 2002-07-04 NaN NaN NaN NaN NaN NaN NaN NaN NaN
366 10206 2002-07-04 16.265717 66.663800 NaN 6.140233 0.357733 7.394967 0.001451 0.005610 1000.000000
732 10208 2002-07-04 13.549633 70.195217 NaN 11.373300 -5.285617 15.006967 0.000063 0.000118 1000.000000
1098 11089 2002-07-04 15.657150 65.248067 27.773283 9.376883 -14.097033 18.695917 0.000067 0.000129 0.003614
1464 15703 2002-07-04 13.611350 70.165200 28.590333 10.194983 -4.513033 13.965250 0.000055 0.000102 0.088623
1830 15707 2002-07-04 NaN NaN NaN NaN NaN NaN NaN NaN NaN
2196 27069 2002-07-04 19.969700 70.048350 28.916267 25.855350 -5.424417 27.865400 0.000057 0.000106 0.001731
2562 27139 2002-07-04 NaN NaN NaN NaN NaN NaN NaN NaN NaN
2928 28842 2002-07-04 18.350883 60.961600 27.226833 5.825783 -9.921900 16.832533 0.000149 0.000362 0.003382
3294 34159 2002-07-04 13.394633 60.516650 NaN 31.603317 16.559017 36.755683 0.000061 0.000116 1000.000000
3660 34173 2002-07-04 NaN NaN NaN NaN NaN NaN NaN NaN NaN
4026 34210 2002-07-04 5.882953 56.749953 26.354721 -5.144814 -18.675465 26.752744 0.000066 0.000129 0.003705
4392 34211 2002-07-04 7.797533 69.070367 28.430017 19.858683 -14.960467 27.234933 0.000053 0.000098 0.003538
4758 34212 2002-07-04 6.519433 66.877317 28.568833 34.703000 1.993683 42.610483 0.000055 0.000102 0.003553
5124 34223 2002-07-04 NaN NaN NaN NaN NaN NaN NaN NaN NaN
5490 34310 2002-07-04 5.023286 70.029000 28.954857 8.074714 -0.021000 10.808286 0.000056 0.000103 0.003749
5856 34311 2002-07-04 9.730864 69.980455 28.593818 -2.896714 -11.504476 13.484762 0.000061 0.000114 0.003594
6222 34312 2002-07-04 9.638095 65.167048 28.129857 7.005500 -15.195200 17.403650 0.000075 0.000154 0.003670
6588 34314 2002-07-04 5.116600 54.903200 26.905500 -6.718778 -5.381333 16.793444 0.000047 0.000087 0.003738
6954 34315 2002-07-04 5.162294 59.998824 28.303294 -17.022625 5.554625 28.943938 0.000052 0.000094 0.003575

In [49]:
print(floatsDFAll_resample_timeorder.lon.shape)
print(floatsDFAll_resample_timeorder.lon.dropna().shape )

############ interpolation starts from here
# to understand the float data better
# a: Look into the floatsDFAll_9Dtimeorder data in more details
# check the nan counts in each id
# plot the trajectory of {time, lat, lon, temperature,} for each float id,  
# this steps helps to understand the float dataset and if there is a need, improve it.

# b: take the float data as it is, and do the interpolation, whenever there is a nan value use the nearest neigbhours....
# check whether the quality of interpolataion is improved, if not, then have to fall back to to task 1

# c: vectorization

# DataFrame panel data 
# floatsDFAll_9Dtimeorder.set_index(['id','time']) 
# the inverse operation # floatsDFAll_9Dtimeorder.reset_index()
# look into the data
print(floatsDFAll_resample_timeorder[100:105])
# so far there is no need to convert it into a panel 
# floatsDFAll_9DtimeorderPanel = floatsDFAll_9Dtimeorder.to_panel

# plot the temperature for one float, the temperature do have a trend
#maskid = (floatsDFAll_resample_timeorder.id == 63069) & (floatsDFAll_resample_timeorder.time > '2007-01-01') & (floatsDFAll_resample_timeorder.time < '2009-01-01')
maskid = (floatsDFAll_resample_timeorder.id == 63069)
print(floatsDFAll_resample_timeorder[maskid].dropna(subset=['id', 'lat', 'lon', 'time']) )
floatsDFAll_resample_timeorder[maskid].dropna(subset=['id', 'lat', 'lon', 'time']).plot(x='time', y ='temp')

# set of all float ids
print(floatsDFAll_resample_timeorder.id.unique())

# this is a float that explains the need for temperature data
maskid2 = floatsDFAll_resample_timeorder.id == 10208
print(floatsDFAll_resample_timeorder[maskid2].head())


(113826,)
(2566,)
          id       time  lat  lon  temp  ve  vn  spd  var_lat  var_lon  \
36600  62558 2002-07-04  NaN  NaN   NaN NaN NaN  NaN      NaN      NaN   
36966  63036 2002-07-04  NaN  NaN   NaN NaN NaN  NaN      NaN      NaN   
37332  63067 2002-07-04  NaN  NaN   NaN NaN NaN  NaN      NaN      NaN   
37698  63068 2002-07-04  NaN  NaN   NaN NaN NaN  NaN      NaN      NaN   
38064  63069 2002-07-04  NaN  NaN   NaN NaN NaN  NaN      NaN      NaN   

       var_tmp  
36600      NaN  
36966      NaN  
37332      NaN  
37698      NaN  
38064      NaN  
          id       time        lat        lon       temp         ve  \
38189  63069 2007-08-22  15.798379  58.013000  25.771362  45.319667   
38190  63069 2007-09-06  17.658433  59.207817  26.384367   3.862417   
38191  63069 2007-09-21  16.433267  61.175617  27.712067   7.830650   
38192  63069 2007-10-06  16.330617  60.592183  28.309833  -4.990567   
38193  63069 2007-10-21  19.042383  61.008650  28.125817   5.503000   
38194  63069 2007-11-05  18.880117  61.267933  28.218967   3.265300   
38195  63069 2007-11-20  16.803200  61.537933  27.157950  -0.842867   
38196  63069 2007-12-05  14.707333  60.352500  26.446283 -22.301833   
38197  63069 2007-12-20  15.612200  56.282917  25.156883 -34.859217   
38198  63069 2008-01-04  13.515217  51.348033  25.630450 -51.461750   
38199  63069 2008-01-19  12.486889  46.915644  25.859156 -37.580333   
38200  63069 2008-02-03  11.060667  45.168424  25.571485  -0.614545   

              vn        spd   var_lat   var_lon   var_tmp  
38189   1.987018  54.823982  0.000020  0.000042  0.003654  
38190  12.117300  41.143817  0.000017  0.000036  0.001901  
38191 -16.066867  23.843750  0.000018  0.000039  0.001854  
38192  18.453617  23.674983  0.000010  0.000018  0.001972  
38193  12.149783  16.783567  0.000010  0.000019  0.002038  
38194 -12.378483  14.491983  0.000012  0.000023  0.002118  
38195 -22.047533  23.004433  0.000008  0.000015  0.001806  
38196  -4.578383  26.139633  0.000012  0.000023  0.001792  
38197  -0.469817  49.507017  0.000013  0.000027  0.002139  
38198 -11.676050  73.437817  0.000012  0.000026  0.001931  
38199 -14.802867  53.443956  0.000017  0.000040  0.002127  
38200  16.681121  22.760788  0.000013  0.000027  0.001834  
[    7574    10206    10208    11089    15703    15707    27069    27139
    28842    34159    34173    34210    34211    34212    34223    34310
    34311    34312    34314    34315    34374    34708    34709    34710
    34714    34716    34718    34719    34720    34721    34722    34723
    36530    36537    37192    37194    37200    37213    37214    37641
    39204    40084    40273    40552    41317    42519    43598    43740
    43743    43744    43746    43748    43995    45748    46470    46471
    46472    53352    53358    53363    53364    53365    53366    53367
    53404    54017    54038    54371    57939    57940    59213    59215
    59362    59363    59365    59366    59367    59368    59369    59371
    59372    59373    59394    60492    62193    62195    62196    62197
    62198    62199    62200    62201    62202    62207    62552    62553
    62554    62555    62556    62557    62558    63036    63067    63068
    63069    63070    63071    63072    63073    63074    63075    63076
    63926    63928    63929    63930    63934    63935    70695    70696
    70697    70699    70952    71138    71139    71140    71141    71142
    71158    71159    71160    71161    71162    72633    72634    72638
    73076    73077    73079    75137    75138    75140    75141    75142
    79184    79185    79188    79322    81824    81828    81851    81938
    81996    81997    82625    83499    88651    88652    88659    88671
    90485    90513    92626    92627    92630    92631    98672    98673
    98674    98675    98676    98679   101609   101833   109290   109377
   109382   109402   109404   109544   109551   114553   114556   114559
   114575   114873   114874   114875   114876   114917   114945   114948
   116006   116184   116187   116212   116345   116463   116464   116465
   116466   116467   116468   126933   126935   126950   127055   127406
   127429   132485   133654   133659   135776   135780   135781   135782
   135784   135785   135786   135787   135788   135789   135790   145074
   145716   145717   145719   145731   147127   147136   147140   147142
   147144   147145   147148  2134712  2343739  2444350  3098671  3098678
  3098682 60073460 60074440 60077450 60150420 60454500 60656200 60657200
 60658190 60659110 60659120 60659190 60659200 60940960 60940970 60941960
 60941970 60942960 60942970 60943960 60943970 60944960 60944970 60945970
 60946960 60947960 60947970 60948960 60950430 61873850 62321420 62321990
 62322970 62323420 63157510 63157520 63158530 63254830 63254850 63254860
 63255180 63255200 63255860 63258870 63258880 63258900 63258950 63259180
 63259190 63259200 63259230 63259240 63259260 63347940 63348680 63348700
 63348720 63348750 63351000 63353030 63354000 63354010 63354040 63355030
 63355040 63355050 64111550 64113560 64113600 64115560 64117500]
        id       time        lat        lon  temp         ve         vn  \
732  10208 2002-07-04  13.549633  70.195217   NaN  11.373300  -5.285617   
733  10208 2002-07-19  12.458050  70.996300   NaN   0.988950 -23.323367   
734  10208 2002-08-03   9.475817  71.234250   NaN   8.418333 -15.787333   
735  10208 2002-08-18   7.935850  72.611817   NaN  11.693867 -19.061967   
736  10208 2002-09-02   6.446661  73.541915   NaN   0.523155  -4.246793   

           spd   var_lat   var_lon  var_tmp  
732  15.006967  0.000063  0.000118   1000.0  
733  26.479183  0.000065  0.000124   1000.0  
734  21.439450  0.000066  0.000128   1000.0  
735  26.718767  0.000069  0.000137   1000.0  
736  19.433897  0.000060  0.000113   1000.0  

In [50]:
################
# test case 1: take a single entry (southeast corner for valid values)
row_case1 =  pd.DataFrame(data = {'time':'2002-07-13 00:00:00', 'id': 10206, 'lon':74.7083358765, 'lat':5.20833349228},index=[1])
print(row_case1)

################
# test case 2
# take a {time-list, id-list, lon-list, lat-list}, index-list 
# carry out the interpolation
#row_case2 =  pd.DataFrame(data = {'time':['2002-07-13 00:00:00','2002-07-22 00:00:00'] , 'id': [10206, 10206], 'lon':[74.7083358765, 74.6250076294], 'lat':[5.20833349228, 5.29166173935]},index=[2,3])
#print(row_case2)
################
# test case 3
row_case2 =  pd.DataFrame(data = {'time':['2002-07-13 00:00:00', '2002-07-22 00:00:00', '2002-07-13 00:00:00'] , 'id': [10206, 10206, 10206], 'lon':[74.7083358765, 74.6250076294,74.7083358765], 'lat':[5.20833349228, 5.29166173935, 5.20833349228]},index=[1,2,3])
print(row_case2)



####
## get the indices of time, lat, lon
idx_time = ds_resample.indexes['time']
idx_lat = ds_resample.indexes['lat']
idx_lon = ds_resample.indexes['lon']

#### 
#interpolation on the time dimension
time_len = len(row_case2.time.values)
xtime_test = list([ np.datetime64(row_case2.time.values[i]) for i in range(0,time_len)  ] )  # for delta 
print('\n xtime_test \n', xtime_test)

'''caution: cannot do this inside the function get_loc,
see https://github.com/pandas-dev/pandas/issues/3488
'''
itime_nearest = [idx_time.get_loc(xtime_test[i], method='nearest') for i in range(0, time_len)]
print('\n itime_nearest \n', itime_nearest)  # [1,2]

xtime_nearest =  ds_resample.time[itime_nearest].values  #  ['2002-07-13T00:00:00.000000000' '2002-07-22T00:00:00.000000000']
print('\n xtime_nearest\n', xtime_nearest)  # ['2002-07-13T00:00:00.000000000' '2002-07-22T00:00:00.000000000']
print('xtime_nearest', type(xtime_nearest)) # xtime_nearest <class 'numpy.ndarray'> # time_nearest <class 'numpy.datetime64'>

# the time distance in days
delta_xtime = (xtime_test - xtime_nearest) / np.timedelta64(1, 'D')
print('\n delta_xtime in days \n', delta_xtime)
print(type(delta_xtime))

itime_next = [itime_nearest[i]+1 if  delta_xtime[i] >=0  else itime_nearest[i]-1  for i in range(0, time_len) ]
print('\n itime_next \n',itime_next)  # [2, 3]

# find the next coordinate values
xtime_next = ds_resample.time[itime_next].values
print('\n xtime_next \n', xtime_next) # ['2002-07-22T00:00:00.000000000' '2002-07-31T00:00:00.000000000']

# prepare for the Tri-linear interpolation
base_time = (xtime_next - xtime_nearest) / np.timedelta64(1, 'D')  # [ 9.  9.]
print('\n base_time \n', base_time)
w_time = delta_xtime / base_time  
print('\n w_time \n', w_time) # [ 0.  0.]


#### 
#interpolation on the lat dimension
xlat_test = row_case2.lat.values + 0.06   # base [ 5.20833349  5.29166174] # cell distance around .8, use .2 & .6 as two tests
print('\n xlat_test \n', xlat_test)       # xlat_test [ 5.26833349  5.35166174]

ilat_nearest = [idx_lat.get_loc(xlat_test[i], method='nearest') for i in range(0, time_len)]
print('\n ilat_nearest \n', ilat_nearest) # [272, 271]

xlat_nearest = ds_resample.lat[ilat_nearest].values  
print('\n xlat_nearest \n', xlat_nearest) # [ 5.29166174  5.37499762]

delta_xlat = xlat_test - xlat_nearest
print("\n delta_xlat \n",delta_xlat)      #  [-0.02332825 -0.02333588]


# the nearest index is on the right; but order of the latitude is different, it is descending
ilat_next = [ilat_nearest[i]-1 if  delta_xlat[i] >=0  else ilat_nearest[i]+1  for i in range(0, time_len) ]
print('\n ilat_next \n', ilat_next)  # [273, 272]

# find the next coordinates value
xlat_next = ds_resample.lat[ilat_next].values
print('\n xlat_next \n', xlat_next)  # [ 5.20833349  5.29166174]

# prepare for the Tri-linear interpolation
w_lat = delta_xlat / (xlat_next - xlat_nearest)
print('\n w_lat \n', w_lat) # [ 0.27995605  0.28002197]

#### 
#interpolation on the lon dimension
xlon_test = row_case2.lon.values +0.06 # base [74.7083358765, 74.6250076294] # cell distance around .8, use .2 & .6 as two tests
print('\n xlon_test \n', xlon_test)  # [ 74.76833588  74.68500763]

ilon_nearest = [idx_lon.get_loc(xlon_test[i], method='nearest') for i in range(0, time_len)]
print('\n ilon_nearest \n', ilon_nearest) # [357, 356]

xlon_nearest = ds_resample.lon[ilon_nearest].values  
print('\n xlon_nearest \n', xlon_nearest) # [ 74.79166412  74.70833588]

delta_xlon = xlon_test - xlon_nearest     
print("\n delta_xlon \n", delta_xlon)     #  [-0.02332825 -0.02332825]

ilon_next = [ilon_nearest[i]+1 if  delta_xlon[i] >=0  else ilon_nearest[i]-1  for i in range(0, time_len) ]
print('\n ilon_next \n',ilon_next)  # [356, 355]

# find the next coordinate values
xlon_next = ds_resample.lon[ilon_next].values
print("\n xlon_next \n", xlon_next) # [ 74.70833588  74.62500763]

# prepare for the Tri-linear interpolation
w_lon = delta_xlon / (xlon_next - xlon_nearest)
print("\n w_lon \n", w_lon) # [ 0.27995605  0.27995605]

####
# local Tensor product for Trilinear interpolation
# caution: nan values, store as "list_of_array to 2d_array" first, then sum

# no casting to list needed here, inputs are already lists
tmp = np.array([
         ds_resample.chlor_a.isel_points(time=itime_nearest, lat=ilat_nearest, lon=ilon_nearest).values,
         ds_resample.chlor_a.isel_points(time=itime_nearest, lat=ilat_nearest, lon=ilon_next).values,
         ds_resample.chlor_a.isel_points(time=itime_nearest, lat=ilat_next, lon=ilon_nearest).values,
         ds_resample.chlor_a.isel_points(time=itime_nearest, lat=ilat_next, lon=ilon_next).values,
         ds_resample.chlor_a.isel_points(time=itime_next, lat=ilat_nearest, lon=ilon_nearest).values,
         ds_resample.chlor_a.isel_points(time=itime_next, lat=ilat_nearest, lon=ilon_next).values,
         ds_resample.chlor_a.isel_points(time=itime_next, lat=ilat_next, lon=ilon_nearest).values,
         ds_resample.chlor_a.isel_points(time=itime_next, lat=ilat_next, lon=ilon_next).values ])

weights =  np.array([(1-w_time)*(1-w_lat)*(1-w_lon), 
                     (1-w_time)*(1-w_lat)*w_lon,
                     (1-w_time)*w_lat*(1-w_lon), 
                     (1-w_time)*w_lat*w_lon,
                        w_time*(1-w_lat)*(1-w_lon),
                        w_time*(1-w_lat)*w_lon,
                        w_time*w_lat*(1-w_lon),
                        w_time*w_lat*w_lon ])


# how to deal with "nan" values, fill in missing values for the np.array tmpAll 
# or fill the mean values to the unweighted array
# http://stackoverflow.com/questions/18689235/numpy-array-replace-nan-values-with-average-of-columns

print('\n neighbouring tensor used \n', tmp)
'''
 neighbouring tensor used 
 [[        nan  0.181841  ]
 [ 0.245878           nan]
 [        nan         nan]
 [        nan         nan]
 [ 0.19680101         nan]
 [        nan         nan]
 [        nan         nan]
 [ 0.18532801         nan]]
'''

# column min: (nan+0.245878 + nan + nan + 0.19680101 + nan +  nan + 0.18532801)/8 = 0.20933567333
col_mean = np.nanmean(tmp, axis=0)
print('\n its mean along axis 0(column) \n', col_mean)  #  [ 0.20933567  0.181841  ]


# filling the missing values.
inds = np.where(np.isnan(tmp))
print('\n nan index\n', inds)
tmp[inds]=np.take(col_mean, inds[1])
print('\n values after the fill \n', tmp)

print('\n weighting tensor used \n', weights)

print("weights.shape", weights.shape) # (8, 3)
print("tmp.shape", tmp.shape)  # (8, 3)

nrow_w, ncol_w = weights.shape
nrow_t, ncol_t = tmp.shape
assert nrow_w == nrow_t, "the row count of weights and values are not the same!"
assert ncol_w == ncol_t, "the row count of weights and values are not the same!"
print('\n tensor product\n', np.dot(weights[:,0], tmp[:,0]) ) # 0.216701896135 should be [ 0.2167019]

# new interpolation process of the Chl_a
chl_new = np.empty(ncol_w)
for i in range(0, ncol_w, 1):
    chl_new[i] =  np.dot(weights[:,i], tmp[:,i])

print('chl_newInt',  chl_new) #  [ 0.2167019  0.181841   0.2167019]
# validate by 1D array
# val = np.array([0.20933567, 0.245878,  0.20933567,
#                0.20933567, 0.19680101, 0.20933567,
#               0.20933567,0.18532801]) 
# np.dot(val, weights) # 0.21670189702309739


## output xarray.dataarray of points, see examples below
# this is the way how xarray.Dataset works
# if you want a xarray.DataArray, first generate a xarray.Dataset, then select DataArray from there.
chl_newInt = xr.Dataset({'chl': (['points'], np.random.randn(3))},
                        coords={
                                'time':(['points'],['2002-07-13 00:00:00', '2002-07-22 00:00:00', '2002-07-13 00:00:00']) , 
                                'id': (['points'], [10206, 10206, 10206]), 
                                'lon': (['points'], [74.7083358765, 74.6250076294,74.7083358765]),
                                'lat':(['points'], [5.20833349228, 5.29166173935, 5.20833349228]), 
                                'points': (['points'], [0,1,2])}) # this way the dims is set to point

print('\n',chl_newInt.chl)
'''
### Task: output xarray.dataarray as points
## example 1
arr = xr.DataArray(np.random.rand(4,3), [('time', pd.date_range('2000-01-01', periods=4)), ('space', ['IA', 'IL', 'IN'])] )
print("first example",arr)
print("\n \n")

## example2 -- concrete xr.DataArray
data = np.random.rand(4, 3)
locs = ['IA', 'IL', 'IN']
times = pd.date_range('2000-01-01', periods=4)
brr = xr.DataArray(data, coords={'time': times, 'space': locs, 'const': 42, \
                          'ranking': ('space', [1, 2, 3])}, \
             dims=['time', 'space'])
print("second example",brr)
'''

'''
### the target output
#the output generated by the xarray.DataSet => xarray.DataArray
<xarray.DataArray 'chlor_a' (points: 147112)>
array([ nan,  nan,  nan, ...,  nan,  nan,  nan])
Coordinates:
    time     (points) datetime64[ns] 2002-07-04 2002-07-04 2002-07-04 ...
    lon      (points) float64 74.96 66.54 69.88 65.04 69.88 74.96 69.46 ...
    lat      (points) float64 27.96 16.21 13.62 16.04 13.62 27.96 20.04 ...
  * points   (points) int64 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 ...
  
### the solution approach we tried to is: first,  generate a xarray.DataSet
                                          second, generte a xarray.DataArray from the xarray DataSet if needed.
 
# 1. looking at this example, DataArray seems to be working.

# works here
da_test1 = xr.DataArray(np.random.rand(3), dims=['x'],
                        coords={'x': np.array([10206, 10206, 10206]), 'y':  2} )
print('\n da_test1', da_test1)

##########
# but not works here
da_test1 = xr.DataArray(np.random.rand(3), dims=['x'],
                        coords={'x': np.array([10206, 10206, 10206]), 'y':  np.array([10206, 10206, 10206])} )
print('\n da_test1', da_test1)

# err's
##########

##########
# works here
da_test1 = xr.DataArray(np.random.rand(3), dims=['x'],
                        coords={'x': np.array([10206, 10206, 10206]), 'y':  2} )
print('\n da_test1', da_test1)

# da_test1 <xarray.DataArray (x: 3)>
# array([ 0.90386212,  0.21516239,  0.44707272])
# Coordinates:
#   * x        (x) int64 10206 10206 10206
#     y        int64 2
##########

##########
# so we do this
# this is the way how xarray.Dataset works
# if you want a xarray.DataArray, first generate a xarray.Dataset, then select DataArray from there.
chl_newInt = xr.Dataset({'chl': (['id'], np.random.randn(3))},
                        coords={ 
                           'id':  (['id'], np.array([10206, 10206, 10206])), 
                           'lon': (['id'], np.array([74.7083358765, 74.6250076294,74.7083358765])), 
                           'lat': (['id'], np.array([5.20833349228, 5.29166173935, 5.20833349228]))})
print('chl_newInt', chl_newInt) # generate the xarray.DataSet
print('\n \n')
print('chl_newInt.chl', chl_newInt.chl) # xarray.DataSet contains many xarray.DataArray


#chl_newInt <xarray.Dataset>
#Dimensions:  (id: 3)
#Coordinates:
#    lon      (id) float64 74.71 74.63 74.71
#  * id       (id) int64 10206 10206 10206
#    lat      (id) float64 5.208 5.292 5.208
#Data variables:
#    chl      (id) float64 0.783 -0.9714 -0.3206



#chl_newInt.chl <xarray.DataArray 'chl' (id: 3)>
#array([ 0.78301614, -0.97144208, -0.3206447 ])
#Coordinates:
#    lon      (id) float64 74.71 74.63 74.71
#  * id       (id) int64 10206 10206 10206
#    lat      (id) float64 5.208 5.292 5.208

'''
print()


      id       lat        lon                 time
1  10206  5.208333  74.708336  2002-07-13 00:00:00
      id       lat        lon                 time
1  10206  5.208333  74.708336  2002-07-13 00:00:00
2  10206  5.291662  74.625008  2002-07-22 00:00:00
3  10206  5.208333  74.708336  2002-07-13 00:00:00

 xtime_test 
 [numpy.datetime64('2002-07-13T00:00:00'), numpy.datetime64('2002-07-22T00:00:00'), numpy.datetime64('2002-07-13T00:00:00')]

 itime_nearest 
 [5, 6, 5]

 xtime_nearest
 ['2002-07-12T00:00:00.000000000' '2002-07-27T00:00:00.000000000'
 '2002-07-12T00:00:00.000000000']
xtime_nearest <class 'numpy.ndarray'>

 delta_xtime in days 
 [ 1. -5.  1.]
<class 'numpy.ndarray'>

 itime_next 
 [6, 5, 6]

 xtime_next 
 ['2002-07-27T00:00:00.000000000' '2002-07-12T00:00:00.000000000'
 '2002-07-27T00:00:00.000000000']

 base_time 
 [ 15. -15.  15.]

 w_time 
 [ 0.06666667  0.33333333  0.06666667]

 xlat_test 
 [ 5.26833349  5.35166174  5.26833349]

 ilat_nearest 
 [546, 544, 546]

 xlat_nearest 
 [ 5.27083015  5.35416365  5.27083015]

 delta_xlat 
 [-0.00249666 -0.00250191 -0.00249666]

 ilat_next 
 [547, 545, 547]

 xlat_next 
 [ 5.22916365  5.31249666  5.22916365]

 w_lat 
 [ 0.05992012  0.06004532  0.05992012]

 xlon_test 
 [ 74.76833588  74.68500763  74.76833588]

 ilon_nearest 
 [715, 713, 715]

 xlon_nearest 
 [ 74.77084351  74.68750763  74.77084351]

 delta_xlon 
 [-0.00250763 -0.0025     -0.00250763]

 ilon_next 
 [714, 712, 714]

 xlon_next 
 [ 74.72917175  74.64584351  74.72917175]

 w_lon 
 [ 0.06017576  0.06000366  0.06017576]

 neighbouring tensor used 
 [[ 0.21930429  0.21265694  0.21930429]
 [ 0.2433444   0.21827146  0.2433444 ]
 [ 0.22944275  0.16807152  0.22944275]
 [ 0.23756409         nan  0.23756409]
 [ 0.21491265  0.1794105   0.21491265]
 [        nan         nan         nan]
 [        nan         nan         nan]
 [        nan  0.15022902         nan]]

 its mean along axis 0(column) 
 [ 0.22891364  0.18572789  0.22891364]

 nan index
 (array([3, 5, 5, 5, 6, 6, 6, 7, 7]), array([1, 0, 1, 2, 0, 1, 2, 0, 2]))

 values after the fill 
 [[ 0.21930429  0.21265694  0.21930429]
 [ 0.2433444   0.21827146  0.2433444 ]
 [ 0.22944275  0.16807152  0.22944275]
 [ 0.23756409  0.18572789  0.23756409]
 [ 0.21491265  0.1794105   0.21491265]
 [ 0.22891364  0.18572789  0.22891364]
 [ 0.22891364  0.18572789  0.22891364]
 [ 0.22891364  0.15022902  0.22891364]]

 weighting tensor used 
 [[  8.24609203e-01   5.89035972e-01   8.24609203e-01]
 [  5.27986856e-02   3.76004821e-02   5.27986856e-02]
 [  5.25600891e-02   3.76282528e-02   5.25600891e-02]
 [  3.36535611e-03   2.40195933e-03   3.36535611e-03]
 [  5.89006573e-02   2.94517986e-01   5.89006573e-02]
 [  3.77133468e-03   1.88002411e-02   3.77133468e-03]
 [  3.75429208e-03   1.88141264e-02   3.75429208e-03]
 [  2.40382579e-04   1.20097966e-03   2.40382579e-04]]
weights.shape (8, 3)
tmp.shape (8, 3)

 tensor product
 0.220983857493
chl_newInt [ 0.22098386  0.20024613  0.22098386]

 <xarray.DataArray 'chl' (points: 3)>
array([-0.803608, -0.260642,  0.663399])
Coordinates:
    id       (points) int64 10206 10206 10206
  * points   (points) int64 0 1 2
    time     (points) <U19 '2002-07-13 00:00:00' '2002-07-22 00:00:00' ...
    lat      (points) float64 5.208 5.292 5.208
    lon      (points) float64 74.71 74.63 74.71


In [51]:
### output benchmark
### output the dataset ds_9day and output the dataframe  
#ds_9day.to_netcdf("ds_9day.nc")

#row_case4 = pd.DataFrame(data={'time':list(floatsDFAll_9Dtimeorder.time), 'lon':list(floatsDFAll_9Dtimeorder.lon), 'lat':list(floatsDFAll_9Dtimeorder.lat), 'id':list(floatsDFAll_9Dtimeorder.id) } )
##print('\n before dropping nan \n', row_case4)
## process to drop nan in any of the columns [id], [lat], [lon], [time]
#row_case4 = row_case4.dropna(subset=['id', 'lat', 'lon', 'time'], how = 'any') # these four fields are critical
#row_case4.to_csv("row_case4.csv")

In [52]:
floatsDFAll_resample_timeorder


Out[52]:
id time lat lon temp ve vn spd var_lat var_lon var_tmp
0 7574 2002-07-04 NaN NaN NaN NaN NaN NaN NaN NaN NaN
366 10206 2002-07-04 16.265717 66.663800 NaN 6.140233 0.357733 7.394967 0.001451 0.005610 1000.000000
732 10208 2002-07-04 13.549633 70.195217 NaN 11.373300 -5.285617 15.006967 0.000063 0.000118 1000.000000
1098 11089 2002-07-04 15.657150 65.248067 27.773283 9.376883 -14.097033 18.695917 0.000067 0.000129 0.003614
1464 15703 2002-07-04 13.611350 70.165200 28.590333 10.194983 -4.513033 13.965250 0.000055 0.000102 0.088623
1830 15707 2002-07-04 NaN NaN NaN NaN NaN NaN NaN NaN NaN
2196 27069 2002-07-04 19.969700 70.048350 28.916267 25.855350 -5.424417 27.865400 0.000057 0.000106 0.001731
2562 27139 2002-07-04 NaN NaN NaN NaN NaN NaN NaN NaN NaN
2928 28842 2002-07-04 18.350883 60.961600 27.226833 5.825783 -9.921900 16.832533 0.000149 0.000362 0.003382
3294 34159 2002-07-04 13.394633 60.516650 NaN 31.603317 16.559017 36.755683 0.000061 0.000116 1000.000000
3660 34173 2002-07-04 NaN NaN NaN NaN NaN NaN NaN NaN NaN
4026 34210 2002-07-04 5.882953 56.749953 26.354721 -5.144814 -18.675465 26.752744 0.000066 0.000129 0.003705
4392 34211 2002-07-04 7.797533 69.070367 28.430017 19.858683 -14.960467 27.234933 0.000053 0.000098 0.003538
4758 34212 2002-07-04 6.519433 66.877317 28.568833 34.703000 1.993683 42.610483 0.000055 0.000102 0.003553
5124 34223 2002-07-04 NaN NaN NaN NaN NaN NaN NaN NaN NaN
5490 34310 2002-07-04 5.023286 70.029000 28.954857 8.074714 -0.021000 10.808286 0.000056 0.000103 0.003749
5856 34311 2002-07-04 9.730864 69.980455 28.593818 -2.896714 -11.504476 13.484762 0.000061 0.000114 0.003594
6222 34312 2002-07-04 9.638095 65.167048 28.129857 7.005500 -15.195200 17.403650 0.000075 0.000154 0.003670
6588 34314 2002-07-04 5.116600 54.903200 26.905500 -6.718778 -5.381333 16.793444 0.000047 0.000087 0.003738
6954 34315 2002-07-04 5.162294 59.998824 28.303294 -17.022625 5.554625 28.943938 0.000052 0.000094 0.003575
7320 34374 2002-07-04 NaN NaN NaN NaN NaN NaN NaN NaN NaN
7686 34708 2002-07-04 10.333167 61.099483 27.384050 22.553517 4.269317 24.302933 0.000058 0.000110 0.001807
8052 34709 2002-07-04 6.396520 52.252040 26.204160 0.462760 46.740240 81.157560 0.000079 0.000162 0.001818
8418 34710 2002-07-04 12.618183 49.966633 31.137717 -15.991800 -6.248550 46.052550 0.000046 0.000087 0.001837
8784 34714 2002-07-04 13.767567 65.507833 27.870250 34.244133 1.335217 35.355967 0.000059 0.000110 0.001815
9150 34716 2002-07-04 7.695783 66.931733 28.781367 25.851967 2.107750 34.797783 0.000057 0.000106 0.001772
9516 34718 2002-07-04 14.420783 73.395800 28.827417 16.290550 -39.532167 44.367833 0.000055 0.000102 0.001691
9882 34719 2002-07-04 16.656500 71.694917 28.910950 14.472783 -20.354250 26.786467 0.000060 0.000112 0.001666
10248 34720 2002-07-04 13.938883 69.771467 28.658633 11.449783 -12.078667 20.477317 0.000061 0.000114 0.001750
10614 34721 2002-07-04 16.653933 65.645233 27.876133 5.294917 -11.221400 13.610417 0.000058 0.000109 0.001740
... ... ... ... ... ... ... ... ... ... ... ...
103211 63255200 2017-06-30 NaN NaN NaN NaN NaN NaN NaN NaN NaN
103577 63255860 2017-06-30 NaN NaN NaN NaN NaN NaN NaN NaN NaN
103943 63258870 2017-06-30 NaN NaN NaN NaN NaN NaN NaN NaN NaN
104309 63258880 2017-06-30 NaN NaN NaN NaN NaN NaN NaN NaN NaN
104675 63258900 2017-06-30 10.611667 67.725333 29.285333 11.653500 -5.334000 12.912000 0.000794 0.000473 0.002321
105041 63258950 2017-06-30 NaN NaN NaN NaN NaN NaN NaN NaN NaN
105407 63259180 2017-06-30 9.708250 71.031750 29.490500 17.048000 -12.979667 21.434667 0.000538 0.000273 0.002068
105773 63259190 2017-06-30 NaN NaN NaN NaN NaN NaN NaN NaN NaN
106139 63259200 2017-06-30 NaN NaN NaN NaN NaN NaN NaN NaN NaN
106505 63259230 2017-06-30 NaN NaN NaN NaN NaN NaN NaN NaN NaN
106871 63259240 2017-06-30 NaN NaN NaN NaN NaN NaN NaN NaN NaN
107237 63259260 2017-06-30 NaN NaN NaN NaN NaN NaN NaN NaN NaN
107603 63347940 2017-06-30 NaN NaN NaN NaN NaN NaN NaN NaN NaN
107969 63348680 2017-06-30 NaN NaN NaN NaN NaN NaN NaN NaN NaN
108335 63348700 2017-06-30 NaN NaN NaN NaN NaN NaN NaN NaN NaN
108701 63348720 2017-06-30 NaN NaN NaN NaN NaN NaN NaN NaN NaN
109067 63348750 2017-06-30 NaN NaN NaN NaN NaN NaN NaN NaN NaN
109433 63351000 2017-06-30 NaN NaN NaN NaN NaN NaN NaN NaN NaN
109799 63353030 2017-06-30 NaN NaN NaN NaN NaN NaN NaN NaN NaN
110165 63354000 2017-06-30 NaN NaN NaN NaN NaN NaN NaN NaN NaN
110531 63354010 2017-06-30 NaN NaN NaN NaN NaN NaN NaN NaN NaN
110897 63354040 2017-06-30 NaN NaN NaN NaN NaN NaN NaN NaN NaN
111263 63355030 2017-06-30 NaN NaN NaN NaN NaN NaN NaN NaN NaN
111629 63355040 2017-06-30 NaN NaN NaN NaN NaN NaN NaN NaN NaN
111995 63355050 2017-06-30 NaN NaN NaN NaN NaN NaN NaN NaN NaN
112361 64111550 2017-06-30 15.278385 67.633154 28.739692 14.127583 -23.375000 27.887500 0.000003 0.000005 0.001705
112727 64113560 2017-06-30 13.827154 62.739077 27.976923 0.487667 -8.357417 10.514917 0.000107 0.000049 0.001705
113093 64113600 2017-06-30 NaN NaN NaN NaN NaN NaN NaN NaN NaN
113459 64115560 2017-06-30 16.871250 73.274250 28.602000 -0.189000 16.893333 17.142667 0.000003 0.000005 0.001753
113825 64117500 2017-06-30 6.149231 71.412077 29.282000 3.511583 -21.195083 22.225833 0.000003 0.000005 0.001705

113826 rows × 11 columns


In [53]:
#### Interface modification
# Generalized interface
# -- using dataframe to generalize the interpolation operation
# -- dataset for resampling and difference along dim='time', 
#   with coord = {'time','id'}


#### the approach using Linear Interpolations with 3D tensors
# Keyword Arguments
# approach 1 
# each of the indexers component might be ordered differently
#############
# def sel_points_multilinear(dset, dims='points', out = 'str', **indexers):
## test case
# def sel_points_multilinear(ds_9day, dims = 'points', out ='chlor_a', 
#                            time = list(['2002-07-13 00:00:00']),
#                            lat = list([5]),  lon = list([70]) ):
############
# e.g. time-ascending, lat-descending, need to tell 'time' from 'lat'
# use different parameters for inputs
## approach 2 
## from dataframe to dataset
## input:
##     dframe: list of {time}, {lan}, {lon}, {id}. bounds-aware
##     dset:   carry out the interpolation use dset data structure and its component 
## output:
##     a list or dataframe with chl_newInt.chl

# remember to take log_e instead of log_10

# clean up this notebook, seperate, clean, and take notes

# test case 4: use the full real data
#del(interpolate)
#del(sel_points_multilinear)
# froms dir import src  # to call src.functions
from tools.time_lat_lon_interpolate import interpolate
importlib.reload(interpolate)

# process to drop nan in any of the columns [id], [lat], [lon], [time]
row_case4 = floatsDFAll_resample_timeorder.dropna(subset=['id', 'lat', 'lon', 'time'], how = 'any') # these four fields are critical
# print('\n after dropping nan \n', row_case4)
result_out4 = interpolate.sel_points_multilinear_time_lat_lon(ds_resample, row_case4, dims = 'points', col_name ='chlor_a')
print('\n *** after the interpolation *** \n', result_out4)
# important: keep the id, since the dataframe has been modified in a bound-aware way in the function
print('\n *** this two length should be equal *** %d >= %d?' %(len(row_case4.index), len(result_out4.index) ) )


/Users/vyan2000/local/miniconda3/envs/condapython3/lib/python3.5/site-packages/numpy/lib/nanfunctions.py:703: RuntimeWarning: Mean of empty slice
  warnings.warn("Mean of empty slice", RuntimeWarning)
/Users/vyan2000/local/miniconda3/envs/condapython3/lib/python3.5/site-packages/pandas/core/indexing.py:296: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self.obj[key] = _infer_fill_value(value)
/Users/vyan2000/local/miniconda3/envs/condapython3/lib/python3.5/site-packages/pandas/core/indexing.py:476: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self.obj[item] = s
 *** after the interpolation *** 
               id       time        lat        lon       temp         ve  \
366        10206 2002-07-04  16.265717  66.663800        NaN   6.140233   
732        10208 2002-07-04  13.549633  70.195217        NaN  11.373300   
1098       11089 2002-07-04  15.657150  65.248067  27.773283   9.376883   
1464       15703 2002-07-04  13.611350  70.165200  28.590333  10.194983   
2196       27069 2002-07-04  19.969700  70.048350  28.916267  25.855350   
2928       28842 2002-07-04  18.350883  60.961600  27.226833   5.825783   
3294       34159 2002-07-04  13.394633  60.516650        NaN  31.603317   
4026       34210 2002-07-04   5.882953  56.749953  26.354721  -5.144814   
4392       34211 2002-07-04   7.797533  69.070367  28.430017  19.858683   
4758       34212 2002-07-04   6.519433  66.877317  28.568833  34.703000   
5490       34310 2002-07-04   5.023286  70.029000  28.954857   8.074714   
5856       34311 2002-07-04   9.730864  69.980455  28.593818  -2.896714   
6222       34312 2002-07-04   9.638095  65.167048  28.129857   7.005500   
6588       34314 2002-07-04   5.116600  54.903200  26.905500  -6.718778   
6954       34315 2002-07-04   5.162294  59.998824  28.303294 -17.022625   
7686       34708 2002-07-04  10.333167  61.099483  27.384050  22.553517   
8052       34709 2002-07-04   6.396520  52.252040  26.204160   0.462760   
8418       34710 2002-07-04  12.618183  49.966633  31.137717 -15.991800   
8784       34714 2002-07-04  13.767567  65.507833  27.870250  34.244133   
9150       34716 2002-07-04   7.695783  66.931733  28.781367  25.851967   
9516       34718 2002-07-04  14.420783  73.395800  28.827417  16.290550   
9882       34719 2002-07-04  16.656500  71.694917  28.910950  14.472783   
10248      34720 2002-07-04  13.938883  69.771467  28.658633  11.449783   
10614      34721 2002-07-04  16.653933  65.645233  27.876133   5.294917   
10980      34722 2002-07-04  10.935983  70.908850  28.703450   7.445617   
11346      34723 2002-07-04  16.587100  66.599800  28.407783   5.319900   
86010    2134712 2002-07-04   8.795967  64.194267  28.258667  12.196000   
367        10206 2002-07-19  16.072700  67.886683        NaN  14.527167   
733        10208 2002-07-19  12.458050  70.996300        NaN   0.988950   
1099       11089 2002-07-19  14.965100  66.350767  27.791150   6.747217   
...          ...        ...        ...        ...        ...        ...   
85276     147144 2017-06-15  14.572667  70.179567  29.493000  10.190933   
99550   62321990 2017-06-15  13.434167  60.508167  28.278083  16.173300   
100648  63157510 2017-06-15   7.810517  59.133933  28.565683  27.087900   
101380  63158530 2017-06-15   7.241083  57.466850  29.115433  -7.467850   
102478  63254860 2017-06-15   6.650636  57.272091  29.173409 -13.571667   
102844  63255180 2017-06-15   5.942217  62.901967  29.877033   6.626267   
103576  63255860 2017-06-15   8.232364  56.526273  28.334909   0.638300   
104674  63258900 2017-06-15  11.394667  66.441133  29.527950  19.424917   
105040  63258950 2017-06-15   9.665528  67.081306  29.651694   4.542086   
105406  63259180 2017-06-15  10.315883  70.122550  29.755983  11.165400   
105772  63259190 2017-06-15   8.182300  71.503133  29.743433   2.719310   
106504  63259230 2017-06-15  11.710174  69.002391  29.589565   6.724391   
108700  63348720 2017-06-15   7.832875  58.230143  28.273804  31.217091   
109066  63348750 2017-06-15   5.647766  66.040532  30.124574   2.377617   
112360  64111550 2017-06-15  16.753833  67.083500  29.548500   4.374650   
112726  64113560 2017-06-15  14.357517  62.191567  28.389650  13.057833   
113458  64115560 2017-06-15  16.855283  72.553533  29.724217  10.948783   
113824  64117500 2017-06-15   6.810550  71.186017  29.476617   2.463900   
84545     147140 2017-06-30  11.557667  61.136333  28.039000  83.892000   
85277     147144 2017-06-30  14.291000  70.923667  29.260333  17.542500   
99551   62321990 2017-06-30  12.863000  61.727800  27.980800  24.212500   
100649  63157510 2017-06-30   8.170833  60.425000  28.591667  25.888400   
101381  63158530 2017-06-30   7.562400  57.591200  28.432600   2.102750   
102845  63255180 2017-06-30   6.457000  63.455750  29.492250  26.794667   
104675  63258900 2017-06-30  10.611667  67.725333  29.285333  11.653500   
105407  63259180 2017-06-30   9.708250  71.031750  29.490500  17.048000   
112361  64111550 2017-06-30  15.278385  67.633154  28.739692  14.127583   
112727  64113560 2017-06-30  13.827154  62.739077  27.976923   0.487667   
113459  64115560 2017-06-30  16.871250  73.274250  28.602000  -0.189000   
113825  64117500 2017-06-30   6.149231  71.412077  29.282000   3.511583   

               vn        spd   var_lat   var_lon      var_tmp   chlor_a  
366      0.357733   7.394967  0.001451  0.005610  1000.000000       NaN  
732     -5.285617  15.006967  0.000063  0.000118  1000.000000       NaN  
1098   -14.097033  18.695917  0.000067  0.000129     0.003614       NaN  
1464    -4.513033  13.965250  0.000055  0.000102     0.088623       NaN  
2196    -5.424417  27.865400  0.000057  0.000106     0.001731       NaN  
2928    -9.921900  16.832533  0.000149  0.000362     0.003382       NaN  
3294    16.559017  36.755683  0.000061  0.000116  1000.000000  0.321245  
4026   -18.675465  26.752744  0.000066  0.000129     0.003705  0.328691  
4392   -14.960467  27.234933  0.000053  0.000098     0.003538  0.096233  
4758     1.993683  42.610483  0.000055  0.000102     0.003553  0.094689  
5490    -0.021000  10.808286  0.000056  0.000103     0.003749  0.109962  
5856   -11.504476  13.484762  0.000061  0.000114     0.003594  0.115337  
6222   -15.195200  17.403650  0.000075  0.000154     0.003670  0.165898  
6588    -5.381333  16.793444  0.000047  0.000087     0.003738  0.338628  
6954     5.554625  28.943938  0.000052  0.000094     0.003575  0.177327  
7686     4.269317  24.302933  0.000058  0.000110     0.001807  0.412506  
8052    46.740240  81.157560  0.000079  0.000162     0.001818  0.501909  
8418    -6.248550  46.052550  0.000046  0.000087     0.001837  0.224649  
8784     1.335217  35.355967  0.000059  0.000110     0.001815  0.344161  
9150     2.107750  34.797783  0.000057  0.000106     0.001772  0.110550  
9516   -39.532167  44.367833  0.000055  0.000102     0.001691  0.419638  
9882   -20.354250  26.786467  0.000060  0.000112     0.001666       NaN  
10248  -12.078667  20.477317  0.000061  0.000114     0.001750  0.236139  
10614  -11.221400  13.610417  0.000058  0.000109     0.001740       NaN  
10980  -16.828850  20.732050  0.000061  0.000115     0.001779       NaN  
11346   -1.815867  10.339867  0.000064  0.000124     0.001786       NaN  
86010  -20.248500  29.976417  0.000085  0.000176     0.001942  0.175341  
367     -8.716150  19.262550  0.001116  0.004011  1000.000000  0.137352  
733    -23.323367  26.479183  0.000065  0.000124  1000.000000  0.099238  
1099     6.272917  13.490517  0.000059  0.000109     0.003728  0.324946  
...           ...        ...       ...       ...          ...       ...  
85276   -2.238083  14.281700  0.000347  0.000176     0.002037       NaN  
99550   -6.508667  18.012117  0.002924  0.002439     0.001867       NaN  
100648   6.130500  34.657567  0.000010  0.000008     0.001766  0.314935  
101380   6.872683  28.816750  0.000076  0.000041     0.001765  0.144886  
102478 -13.335286  21.792000  0.002290  0.001733     0.001853  0.191329  
102844   6.505583  14.081183  0.000066  0.000034     0.001887  0.078646  
103576   0.823200   1.099600  0.018849  0.019519     0.002027  0.260779  
104674  -5.209017  26.490717  0.000199  0.000106     0.001803       NaN  
105040 -28.119771  31.811457  0.000415  0.000222     0.001851  0.104562  
105406  -7.142583  15.643117  0.000918  0.000631     0.001846  0.082649  
105772  -7.233862   8.656517  0.007749  0.007112     0.001956  0.112472  
106504 -17.843696  21.039565  0.005488  0.004817     0.001931  0.041524  
108700 -17.404636  39.430727  0.008580  0.008565     0.001927  0.223120  
109066 -11.912617  15.776319  0.000015  0.000010     0.001807  0.093046  
112360 -12.741350  17.871400  0.000004  0.000006     0.001684       NaN  
112726  -6.607233  16.582283  0.000069  0.000034     0.001684       NaN  
113458  -1.390750  16.621250  0.000003  0.000005     0.001686       NaN  
113824  -6.642417  15.401850  0.000003  0.000005     0.001684  0.124498  
84545  -12.843000  85.019000  0.000032  0.000014     0.001585  0.424789  
85277  -15.481500  23.507500  0.000129  0.000055     0.001716       NaN  
99551  -13.379500  27.666750  0.008339  0.007574     0.002022       NaN  
100649   5.273200  27.029800  0.000015  0.000010     0.001986  0.129658  
101381 -33.419500  33.622250  0.000004  0.000006     0.001870  0.149060  
102845  -4.388000  27.262333  0.000476  0.000268     0.002092  0.130959  
104675  -5.334000  12.912000  0.000794  0.000473     0.002321       NaN  
105407 -12.979667  21.434667  0.000538  0.000273     0.002068       NaN  
112361 -23.375000  27.887500  0.000003  0.000005     0.001705       NaN  
112727  -8.357417  10.514917  0.000107  0.000049     0.001705       NaN  
113459  16.893333  17.142667  0.000003  0.000005     0.001753       NaN  
113825 -21.195083  22.225833  0.000003  0.000005     0.001705  0.135096  

[2559 rows x 12 columns]

 *** this two length should be equal *** 2566 >= 2559?

In [54]:
# output the dataframe result_out4
var1 = "chl"
outdir_prefix = "./data_globcolour/output.data.interpolate/" + "df_Globcolor_"
outdir = outdir_prefix + var1 + "_" + str(freq) + "d.csv"
result_out4.to_csv(outdir)
print(pd.read_csv(outdir,index_col=0))

### plot for id 125776, which will be fit by LDS
plt.figure(figsize=(8,6))
result_out4[result_out4.id == 135776].plot(x='time', y ='chlor_a', title=('id - %d' % 135776) )
plt.show();
plt.close("all")


              id        time        lat        lon       temp         ve  \
366        10206  2002-07-04  16.265717  66.663800        NaN   6.140233   
732        10208  2002-07-04  13.549633  70.195217        NaN  11.373300   
1098       11089  2002-07-04  15.657150  65.248067  27.773283   9.376883   
1464       15703  2002-07-04  13.611350  70.165200  28.590333  10.194983   
2196       27069  2002-07-04  19.969700  70.048350  28.916267  25.855350   
2928       28842  2002-07-04  18.350883  60.961600  27.226833   5.825783   
3294       34159  2002-07-04  13.394633  60.516650        NaN  31.603317   
4026       34210  2002-07-04   5.882953  56.749953  26.354721  -5.144814   
4392       34211  2002-07-04   7.797533  69.070367  28.430017  19.858683   
4758       34212  2002-07-04   6.519433  66.877317  28.568833  34.703000   
5490       34310  2002-07-04   5.023286  70.029000  28.954857   8.074714   
5856       34311  2002-07-04   9.730864  69.980455  28.593818  -2.896714   
6222       34312  2002-07-04   9.638095  65.167048  28.129857   7.005500   
6588       34314  2002-07-04   5.116600  54.903200  26.905500  -6.718778   
6954       34315  2002-07-04   5.162294  59.998824  28.303294 -17.022625   
7686       34708  2002-07-04  10.333167  61.099483  27.384050  22.553517   
8052       34709  2002-07-04   6.396520  52.252040  26.204160   0.462760   
8418       34710  2002-07-04  12.618183  49.966633  31.137717 -15.991800   
8784       34714  2002-07-04  13.767567  65.507833  27.870250  34.244133   
9150       34716  2002-07-04   7.695783  66.931733  28.781367  25.851967   
9516       34718  2002-07-04  14.420783  73.395800  28.827417  16.290550   
9882       34719  2002-07-04  16.656500  71.694917  28.910950  14.472783   
10248      34720  2002-07-04  13.938883  69.771467  28.658633  11.449783   
10614      34721  2002-07-04  16.653933  65.645233  27.876133   5.294917   
10980      34722  2002-07-04  10.935983  70.908850  28.703450   7.445617   
11346      34723  2002-07-04  16.587100  66.599800  28.407783   5.319900   
86010    2134712  2002-07-04   8.795967  64.194267  28.258667  12.196000   
367        10206  2002-07-19  16.072700  67.886683        NaN  14.527167   
733        10208  2002-07-19  12.458050  70.996300        NaN   0.988950   
1099       11089  2002-07-19  14.965100  66.350767  27.791150   6.747217   
...          ...         ...        ...        ...        ...        ...   
85276     147144  2017-06-15  14.572667  70.179567  29.493000  10.190933   
99550   62321990  2017-06-15  13.434167  60.508167  28.278083  16.173300   
100648  63157510  2017-06-15   7.810517  59.133933  28.565683  27.087900   
101380  63158530  2017-06-15   7.241083  57.466850  29.115433  -7.467850   
102478  63254860  2017-06-15   6.650636  57.272091  29.173409 -13.571667   
102844  63255180  2017-06-15   5.942217  62.901967  29.877033   6.626267   
103576  63255860  2017-06-15   8.232364  56.526273  28.334909   0.638300   
104674  63258900  2017-06-15  11.394667  66.441133  29.527950  19.424917   
105040  63258950  2017-06-15   9.665528  67.081306  29.651694   4.542086   
105406  63259180  2017-06-15  10.315883  70.122550  29.755983  11.165400   
105772  63259190  2017-06-15   8.182300  71.503133  29.743433   2.719310   
106504  63259230  2017-06-15  11.710174  69.002391  29.589565   6.724391   
108700  63348720  2017-06-15   7.832875  58.230143  28.273804  31.217091   
109066  63348750  2017-06-15   5.647766  66.040532  30.124574   2.377617   
112360  64111550  2017-06-15  16.753833  67.083500  29.548500   4.374650   
112726  64113560  2017-06-15  14.357517  62.191567  28.389650  13.057833   
113458  64115560  2017-06-15  16.855283  72.553533  29.724217  10.948783   
113824  64117500  2017-06-15   6.810550  71.186017  29.476617   2.463900   
84545     147140  2017-06-30  11.557667  61.136333  28.039000  83.892000   
85277     147144  2017-06-30  14.291000  70.923667  29.260333  17.542500   
99551   62321990  2017-06-30  12.863000  61.727800  27.980800  24.212500   
100649  63157510  2017-06-30   8.170833  60.425000  28.591667  25.888400   
101381  63158530  2017-06-30   7.562400  57.591200  28.432600   2.102750   
102845  63255180  2017-06-30   6.457000  63.455750  29.492250  26.794667   
104675  63258900  2017-06-30  10.611667  67.725333  29.285333  11.653500   
105407  63259180  2017-06-30   9.708250  71.031750  29.490500  17.048000   
112361  64111550  2017-06-30  15.278385  67.633154  28.739692  14.127583   
112727  64113560  2017-06-30  13.827154  62.739077  27.976923   0.487667   
113459  64115560  2017-06-30  16.871250  73.274250  28.602000  -0.189000   
113825  64117500  2017-06-30   6.149231  71.412077  29.282000   3.511583   

               vn        spd   var_lat   var_lon      var_tmp   chlor_a  
366      0.357733   7.394967  0.001451  0.005610  1000.000000       NaN  
732     -5.285617  15.006967  0.000063  0.000118  1000.000000       NaN  
1098   -14.097033  18.695917  0.000067  0.000129     0.003614       NaN  
1464    -4.513033  13.965250  0.000055  0.000102     0.088623       NaN  
2196    -5.424417  27.865400  0.000057  0.000106     0.001731       NaN  
2928    -9.921900  16.832533  0.000149  0.000362     0.003382       NaN  
3294    16.559017  36.755683  0.000061  0.000116  1000.000000  0.321245  
4026   -18.675465  26.752744  0.000066  0.000129     0.003705  0.328691  
4392   -14.960467  27.234933  0.000053  0.000098     0.003538  0.096233  
4758     1.993683  42.610483  0.000055  0.000102     0.003553  0.094689  
5490    -0.021000  10.808286  0.000056  0.000103     0.003749  0.109962  
5856   -11.504476  13.484762  0.000061  0.000114     0.003594  0.115337  
6222   -15.195200  17.403650  0.000075  0.000154     0.003670  0.165898  
6588    -5.381333  16.793444  0.000047  0.000087     0.003738  0.338628  
6954     5.554625  28.943938  0.000052  0.000094     0.003575  0.177327  
7686     4.269317  24.302933  0.000058  0.000110     0.001807  0.412506  
8052    46.740240  81.157560  0.000079  0.000162     0.001818  0.501909  
8418    -6.248550  46.052550  0.000046  0.000087     0.001837  0.224649  
8784     1.335217  35.355967  0.000059  0.000110     0.001815  0.344161  
9150     2.107750  34.797783  0.000057  0.000106     0.001772  0.110550  
9516   -39.532167  44.367833  0.000055  0.000102     0.001691  0.419638  
9882   -20.354250  26.786467  0.000060  0.000112     0.001666       NaN  
10248  -12.078667  20.477317  0.000061  0.000114     0.001750  0.236139  
10614  -11.221400  13.610417  0.000058  0.000109     0.001740       NaN  
10980  -16.828850  20.732050  0.000061  0.000115     0.001779       NaN  
11346   -1.815867  10.339867  0.000064  0.000124     0.001786       NaN  
86010  -20.248500  29.976417  0.000085  0.000176     0.001942  0.175341  
367     -8.716150  19.262550  0.001116  0.004011  1000.000000  0.137352  
733    -23.323367  26.479183  0.000065  0.000124  1000.000000  0.099238  
1099     6.272917  13.490517  0.000059  0.000109     0.003728  0.324946  
...           ...        ...       ...       ...          ...       ...  
85276   -2.238083  14.281700  0.000347  0.000176     0.002037       NaN  
99550   -6.508667  18.012117  0.002924  0.002439     0.001867       NaN  
100648   6.130500  34.657567  0.000010  0.000008     0.001766  0.314935  
101380   6.872683  28.816750  0.000076  0.000041     0.001765  0.144886  
102478 -13.335286  21.792000  0.002290  0.001733     0.001853  0.191329  
102844   6.505583  14.081183  0.000066  0.000034     0.001887  0.078646  
103576   0.823200   1.099600  0.018849  0.019519     0.002027  0.260779  
104674  -5.209017  26.490717  0.000199  0.000106     0.001803       NaN  
105040 -28.119771  31.811457  0.000415  0.000222     0.001851  0.104562  
105406  -7.142583  15.643117  0.000918  0.000631     0.001846  0.082649  
105772  -7.233862   8.656517  0.007749  0.007112     0.001956  0.112472  
106504 -17.843696  21.039565  0.005488  0.004817     0.001931  0.041524  
108700 -17.404636  39.430727  0.008580  0.008565     0.001927  0.223120  
109066 -11.912617  15.776319  0.000015  0.000010     0.001807  0.093046  
112360 -12.741350  17.871400  0.000004  0.000006     0.001684       NaN  
112726  -6.607233  16.582283  0.000069  0.000034     0.001684       NaN  
113458  -1.390750  16.621250  0.000003  0.000005     0.001686       NaN  
113824  -6.642417  15.401850  0.000003  0.000005     0.001684  0.124498  
84545  -12.843000  85.019000  0.000032  0.000014     0.001585  0.424789  
85277  -15.481500  23.507500  0.000129  0.000055     0.001716       NaN  
99551  -13.379500  27.666750  0.008339  0.007574     0.002022       NaN  
100649   5.273200  27.029800  0.000015  0.000010     0.001986  0.129658  
101381 -33.419500  33.622250  0.000004  0.000006     0.001870  0.149060  
102845  -4.388000  27.262333  0.000476  0.000268     0.002092  0.130959  
104675  -5.334000  12.912000  0.000794  0.000473     0.002321       NaN  
105407 -12.979667  21.434667  0.000538  0.000273     0.002068       NaN  
112361 -23.375000  27.887500  0.000003  0.000005     0.001705       NaN  
112727  -8.357417  10.514917  0.000107  0.000049     0.001705       NaN  
113459  16.893333  17.142667  0.000003  0.000005     0.001754       NaN  
113825 -21.195083  22.225833  0.000003  0.000005     0.001705  0.135096  

[2559 rows x 12 columns]
<matplotlib.figure.Figure at 0x120c852e8>

In [55]:
#### now interpolate the distance to the coast
from tools.time_lat_lon_interpolate import interpolate
importlib.reload(interpolate)
from tools import distance
importlib.reload(distance)


result_dist = interpolate.sel_points_multilinear_dist_lat_lon(result_out4, dims='points', col_name='dist') # interpolation

print('\n *** after the interpolation *** \n', result_dist)
# important: keep the id, since the dataframe has been modified in a bound-aware way in the function
print('\n *** this two length should be equal *** %d >= %d?' %(len(result_out4.index), len(result_dist.index) ) )


after processing, the minimum longitude is 0.0200004.3 and maximum is 359.9800004.3
dist_db.shape is (40500000, 3), dist_db_arabian.shape is (431250, 3)
 ******** Interpolation of distance using (lat, lon) ******* 


 *** after the interpolation *** 
               id       time        lat        lon       temp         ve  \
366        10206 2002-07-04  16.265717  66.663800        NaN   6.140233   
732        10208 2002-07-04  13.549633  70.195217        NaN  11.373300   
1098       11089 2002-07-04  15.657150  65.248067  27.773283   9.376883   
1464       15703 2002-07-04  13.611350  70.165200  28.590333  10.194983   
2196       27069 2002-07-04  19.969700  70.048350  28.916267  25.855350   
2928       28842 2002-07-04  18.350883  60.961600  27.226833   5.825783   
3294       34159 2002-07-04  13.394633  60.516650        NaN  31.603317   
4026       34210 2002-07-04   5.882953  56.749953  26.354721  -5.144814   
4392       34211 2002-07-04   7.797533  69.070367  28.430017  19.858683   
4758       34212 2002-07-04   6.519433  66.877317  28.568833  34.703000   
5490       34310 2002-07-04   5.023286  70.029000  28.954857   8.074714   
5856       34311 2002-07-04   9.730864  69.980455  28.593818  -2.896714   
6222       34312 2002-07-04   9.638095  65.167048  28.129857   7.005500   
6588       34314 2002-07-04   5.116600  54.903200  26.905500  -6.718778   
6954       34315 2002-07-04   5.162294  59.998824  28.303294 -17.022625   
7686       34708 2002-07-04  10.333167  61.099483  27.384050  22.553517   
8052       34709 2002-07-04   6.396520  52.252040  26.204160   0.462760   
8418       34710 2002-07-04  12.618183  49.966633  31.137717 -15.991800   
8784       34714 2002-07-04  13.767567  65.507833  27.870250  34.244133   
9150       34716 2002-07-04   7.695783  66.931733  28.781367  25.851967   
9516       34718 2002-07-04  14.420783  73.395800  28.827417  16.290550   
9882       34719 2002-07-04  16.656500  71.694917  28.910950  14.472783   
10248      34720 2002-07-04  13.938883  69.771467  28.658633  11.449783   
10614      34721 2002-07-04  16.653933  65.645233  27.876133   5.294917   
10980      34722 2002-07-04  10.935983  70.908850  28.703450   7.445617   
11346      34723 2002-07-04  16.587100  66.599800  28.407783   5.319900   
86010    2134712 2002-07-04   8.795967  64.194267  28.258667  12.196000   
367        10206 2002-07-19  16.072700  67.886683        NaN  14.527167   
733        10208 2002-07-19  12.458050  70.996300        NaN   0.988950   
1099       11089 2002-07-19  14.965100  66.350767  27.791150   6.747217   
...          ...        ...        ...        ...        ...        ...   
85276     147144 2017-06-15  14.572667  70.179567  29.493000  10.190933   
99550   62321990 2017-06-15  13.434167  60.508167  28.278083  16.173300   
100648  63157510 2017-06-15   7.810517  59.133933  28.565683  27.087900   
101380  63158530 2017-06-15   7.241083  57.466850  29.115433  -7.467850   
102478  63254860 2017-06-15   6.650636  57.272091  29.173409 -13.571667   
102844  63255180 2017-06-15   5.942217  62.901967  29.877033   6.626267   
103576  63255860 2017-06-15   8.232364  56.526273  28.334909   0.638300   
104674  63258900 2017-06-15  11.394667  66.441133  29.527950  19.424917   
105040  63258950 2017-06-15   9.665528  67.081306  29.651694   4.542086   
105406  63259180 2017-06-15  10.315883  70.122550  29.755983  11.165400   
105772  63259190 2017-06-15   8.182300  71.503133  29.743433   2.719310   
106504  63259230 2017-06-15  11.710174  69.002391  29.589565   6.724391   
108700  63348720 2017-06-15   7.832875  58.230143  28.273804  31.217091   
109066  63348750 2017-06-15   5.647766  66.040532  30.124574   2.377617   
112360  64111550 2017-06-15  16.753833  67.083500  29.548500   4.374650   
112726  64113560 2017-06-15  14.357517  62.191567  28.389650  13.057833   
113458  64115560 2017-06-15  16.855283  72.553533  29.724217  10.948783   
113824  64117500 2017-06-15   6.810550  71.186017  29.476617   2.463900   
84545     147140 2017-06-30  11.557667  61.136333  28.039000  83.892000   
85277     147144 2017-06-30  14.291000  70.923667  29.260333  17.542500   
99551   62321990 2017-06-30  12.863000  61.727800  27.980800  24.212500   
100649  63157510 2017-06-30   8.170833  60.425000  28.591667  25.888400   
101381  63158530 2017-06-30   7.562400  57.591200  28.432600   2.102750   
102845  63255180 2017-06-30   6.457000  63.455750  29.492250  26.794667   
104675  63258900 2017-06-30  10.611667  67.725333  29.285333  11.653500   
105407  63259180 2017-06-30   9.708250  71.031750  29.490500  17.048000   
112361  64111550 2017-06-30  15.278385  67.633154  28.739692  14.127583   
112727  64113560 2017-06-30  13.827154  62.739077  27.976923   0.487667   
113459  64115560 2017-06-30  16.871250  73.274250  28.602000  -0.189000   
113825  64117500 2017-06-30   6.149231  71.412077  29.282000   3.511583   

               vn        spd   var_lat   var_lon      var_tmp   chlor_a  \
366      0.357733   7.394967  0.001451  0.005610  1000.000000       NaN   
732     -5.285617  15.006967  0.000063  0.000118  1000.000000       NaN   
1098   -14.097033  18.695917  0.000067  0.000129     0.003614       NaN   
1464    -4.513033  13.965250  0.000055  0.000102     0.088623       NaN   
2196    -5.424417  27.865400  0.000057  0.000106     0.001731       NaN   
2928    -9.921900  16.832533  0.000149  0.000362     0.003382       NaN   
3294    16.559017  36.755683  0.000061  0.000116  1000.000000  0.321245   
4026   -18.675465  26.752744  0.000066  0.000129     0.003705  0.328691   
4392   -14.960467  27.234933  0.000053  0.000098     0.003538  0.096233   
4758     1.993683  42.610483  0.000055  0.000102     0.003553  0.094689   
5490    -0.021000  10.808286  0.000056  0.000103     0.003749  0.109962   
5856   -11.504476  13.484762  0.000061  0.000114     0.003594  0.115337   
6222   -15.195200  17.403650  0.000075  0.000154     0.003670  0.165898   
6588    -5.381333  16.793444  0.000047  0.000087     0.003738  0.338628   
6954     5.554625  28.943938  0.000052  0.000094     0.003575  0.177327   
7686     4.269317  24.302933  0.000058  0.000110     0.001807  0.412506   
8052    46.740240  81.157560  0.000079  0.000162     0.001818  0.501909   
8418    -6.248550  46.052550  0.000046  0.000087     0.001837  0.224649   
8784     1.335217  35.355967  0.000059  0.000110     0.001815  0.344161   
9150     2.107750  34.797783  0.000057  0.000106     0.001772  0.110550   
9516   -39.532167  44.367833  0.000055  0.000102     0.001691  0.419638   
9882   -20.354250  26.786467  0.000060  0.000112     0.001666       NaN   
10248  -12.078667  20.477317  0.000061  0.000114     0.001750  0.236139   
10614  -11.221400  13.610417  0.000058  0.000109     0.001740       NaN   
10980  -16.828850  20.732050  0.000061  0.000115     0.001779       NaN   
11346   -1.815867  10.339867  0.000064  0.000124     0.001786       NaN   
86010  -20.248500  29.976417  0.000085  0.000176     0.001942  0.175341   
367     -8.716150  19.262550  0.001116  0.004011  1000.000000  0.137352   
733    -23.323367  26.479183  0.000065  0.000124  1000.000000  0.099238   
1099     6.272917  13.490517  0.000059  0.000109     0.003728  0.324946   
...           ...        ...       ...       ...          ...       ...   
85276   -2.238083  14.281700  0.000347  0.000176     0.002037       NaN   
99550   -6.508667  18.012117  0.002924  0.002439     0.001867       NaN   
100648   6.130500  34.657567  0.000010  0.000008     0.001766  0.314935   
101380   6.872683  28.816750  0.000076  0.000041     0.001765  0.144886   
102478 -13.335286  21.792000  0.002290  0.001733     0.001853  0.191329   
102844   6.505583  14.081183  0.000066  0.000034     0.001887  0.078646   
103576   0.823200   1.099600  0.018849  0.019519     0.002027  0.260779   
104674  -5.209017  26.490717  0.000199  0.000106     0.001803       NaN   
105040 -28.119771  31.811457  0.000415  0.000222     0.001851  0.104562   
105406  -7.142583  15.643117  0.000918  0.000631     0.001846  0.082649   
105772  -7.233862   8.656517  0.007749  0.007112     0.001956  0.112472   
106504 -17.843696  21.039565  0.005488  0.004817     0.001931  0.041524   
108700 -17.404636  39.430727  0.008580  0.008565     0.001927  0.223120   
109066 -11.912617  15.776319  0.000015  0.000010     0.001807  0.093046   
112360 -12.741350  17.871400  0.000004  0.000006     0.001684       NaN   
112726  -6.607233  16.582283  0.000069  0.000034     0.001684       NaN   
113458  -1.390750  16.621250  0.000003  0.000005     0.001686       NaN   
113824  -6.642417  15.401850  0.000003  0.000005     0.001684  0.124498   
84545  -12.843000  85.019000  0.000032  0.000014     0.001585  0.424789   
85277  -15.481500  23.507500  0.000129  0.000055     0.001716       NaN   
99551  -13.379500  27.666750  0.008339  0.007574     0.002022       NaN   
100649   5.273200  27.029800  0.000015  0.000010     0.001986  0.129658   
101381 -33.419500  33.622250  0.000004  0.000006     0.001870  0.149060   
102845  -4.388000  27.262333  0.000476  0.000268     0.002092  0.130959   
104675  -5.334000  12.912000  0.000794  0.000473     0.002321       NaN   
105407 -12.979667  21.434667  0.000538  0.000273     0.002068       NaN   
112361 -23.375000  27.887500  0.000003  0.000005     0.001705       NaN   
112727  -8.357417  10.514917  0.000107  0.000049     0.001705       NaN   
113459  16.893333  17.142667  0.000003  0.000005     0.001753       NaN   
113825 -21.195083  22.225833  0.000003  0.000005     0.001705  0.135096   

               dist  
366      644.346876  
732      304.915617  
1098     790.995091  
1464     312.036955  
2196     105.763836  
2928     312.718966  
3294     637.078686  
4026     761.139172  
4392     427.708647  
4758     644.256384  
5490     316.339631  
5856     254.118974  
6222     777.066104  
6588     628.158223  
6954    1012.615255  
7686     756.055492  
8052     306.502480  
8418     103.261978  
8784     763.459988  
9150     642.504705  
9516      85.027550  
9882     170.756169  
10248    367.784917  
10614    680.264624  
10980    131.953916  
11346    620.209000  
86010    898.206857  
367      581.752175  
733      160.061930  
1099     733.142554  
...             ...  
85276    388.223010  
99550    633.297339  
100648   725.658656  
101380   667.943026  
102478   715.916792  
102844  1085.649383  
103576   522.599171  
104674   617.336774  
105040   571.323295  
105406   232.404475  
105772   167.192022  
106504   342.140474  
108700   658.910708  
109066   742.014936  
112360   574.311951  
112726   692.122499  
113458    77.225451  
113824   173.402129  
84545    726.254721  
85277    326.533641  
99551    772.348381  
100649   805.220865  
101381   643.899278  
102845  1022.681205  
104675   481.403226  
105407   141.373115  
112361   623.755105  
112727   775.259018  
113459     2.169704  
113825   145.314631  

[2559 rows x 13 columns]

 *** this two length should be equal *** 2559 >= 2559?

In [56]:
from tools import distance
importlib.reload(distance)

print("\n ******* Visualization on 'distance to coast' ******* \n" )
distance.spatial_plots_distance(result_dist)


 ******* Visualization on 'distance to coast' ******* 

Out[56]:
0

In [57]:
# output the dataframe result_dist
vardist = "dist"
outdir_prefix = "./data_globcolour/output.data.interpolate/" + "df_Globcolor_"
outdir = outdir_prefix + var1 + vardist + "_" + str(freq) + "d.csv"
result_dist.to_csv(outdir)
print(pd.read_csv(outdir,index_col=0))


              id        time        lat        lon       temp         ve  \
366        10206  2002-07-04  16.265717  66.663800        NaN   6.140233   
732        10208  2002-07-04  13.549633  70.195217        NaN  11.373300   
1098       11089  2002-07-04  15.657150  65.248067  27.773283   9.376883   
1464       15703  2002-07-04  13.611350  70.165200  28.590333  10.194983   
2196       27069  2002-07-04  19.969700  70.048350  28.916267  25.855350   
2928       28842  2002-07-04  18.350883  60.961600  27.226833   5.825783   
3294       34159  2002-07-04  13.394633  60.516650        NaN  31.603317   
4026       34210  2002-07-04   5.882953  56.749953  26.354721  -5.144814   
4392       34211  2002-07-04   7.797533  69.070367  28.430017  19.858683   
4758       34212  2002-07-04   6.519433  66.877317  28.568833  34.703000   
5490       34310  2002-07-04   5.023286  70.029000  28.954857   8.074714   
5856       34311  2002-07-04   9.730864  69.980455  28.593818  -2.896714   
6222       34312  2002-07-04   9.638095  65.167048  28.129857   7.005500   
6588       34314  2002-07-04   5.116600  54.903200  26.905500  -6.718778   
6954       34315  2002-07-04   5.162294  59.998824  28.303294 -17.022625   
7686       34708  2002-07-04  10.333167  61.099483  27.384050  22.553517   
8052       34709  2002-07-04   6.396520  52.252040  26.204160   0.462760   
8418       34710  2002-07-04  12.618183  49.966633  31.137717 -15.991800   
8784       34714  2002-07-04  13.767567  65.507833  27.870250  34.244133   
9150       34716  2002-07-04   7.695783  66.931733  28.781367  25.851967   
9516       34718  2002-07-04  14.420783  73.395800  28.827417  16.290550   
9882       34719  2002-07-04  16.656500  71.694917  28.910950  14.472783   
10248      34720  2002-07-04  13.938883  69.771467  28.658633  11.449783   
10614      34721  2002-07-04  16.653933  65.645233  27.876133   5.294917   
10980      34722  2002-07-04  10.935983  70.908850  28.703450   7.445617   
11346      34723  2002-07-04  16.587100  66.599800  28.407783   5.319900   
86010    2134712  2002-07-04   8.795967  64.194267  28.258667  12.196000   
367        10206  2002-07-19  16.072700  67.886683        NaN  14.527167   
733        10208  2002-07-19  12.458050  70.996300        NaN   0.988950   
1099       11089  2002-07-19  14.965100  66.350767  27.791150   6.747217   
...          ...         ...        ...        ...        ...        ...   
85276     147144  2017-06-15  14.572667  70.179567  29.493000  10.190933   
99550   62321990  2017-06-15  13.434167  60.508167  28.278083  16.173300   
100648  63157510  2017-06-15   7.810517  59.133933  28.565683  27.087900   
101380  63158530  2017-06-15   7.241083  57.466850  29.115433  -7.467850   
102478  63254860  2017-06-15   6.650636  57.272091  29.173409 -13.571667   
102844  63255180  2017-06-15   5.942217  62.901967  29.877033   6.626267   
103576  63255860  2017-06-15   8.232364  56.526273  28.334909   0.638300   
104674  63258900  2017-06-15  11.394667  66.441133  29.527950  19.424917   
105040  63258950  2017-06-15   9.665528  67.081306  29.651694   4.542086   
105406  63259180  2017-06-15  10.315883  70.122550  29.755983  11.165400   
105772  63259190  2017-06-15   8.182300  71.503133  29.743433   2.719310   
106504  63259230  2017-06-15  11.710174  69.002391  29.589565   6.724391   
108700  63348720  2017-06-15   7.832875  58.230143  28.273804  31.217091   
109066  63348750  2017-06-15   5.647766  66.040532  30.124574   2.377617   
112360  64111550  2017-06-15  16.753833  67.083500  29.548500   4.374650   
112726  64113560  2017-06-15  14.357517  62.191567  28.389650  13.057833   
113458  64115560  2017-06-15  16.855283  72.553533  29.724217  10.948783   
113824  64117500  2017-06-15   6.810550  71.186017  29.476617   2.463900   
84545     147140  2017-06-30  11.557667  61.136333  28.039000  83.892000   
85277     147144  2017-06-30  14.291000  70.923667  29.260333  17.542500   
99551   62321990  2017-06-30  12.863000  61.727800  27.980800  24.212500   
100649  63157510  2017-06-30   8.170833  60.425000  28.591667  25.888400   
101381  63158530  2017-06-30   7.562400  57.591200  28.432600   2.102750   
102845  63255180  2017-06-30   6.457000  63.455750  29.492250  26.794667   
104675  63258900  2017-06-30  10.611667  67.725333  29.285333  11.653500   
105407  63259180  2017-06-30   9.708250  71.031750  29.490500  17.048000   
112361  64111550  2017-06-30  15.278385  67.633154  28.739692  14.127583   
112727  64113560  2017-06-30  13.827154  62.739077  27.976923   0.487667   
113459  64115560  2017-06-30  16.871250  73.274250  28.602000  -0.189000   
113825  64117500  2017-06-30   6.149231  71.412077  29.282000   3.511583   

               vn        spd   var_lat   var_lon      var_tmp   chlor_a  \
366      0.357733   7.394967  0.001451  0.005610  1000.000000       NaN   
732     -5.285617  15.006967  0.000063  0.000118  1000.000000       NaN   
1098   -14.097033  18.695917  0.000067  0.000129     0.003614       NaN   
1464    -4.513033  13.965250  0.000055  0.000102     0.088623       NaN   
2196    -5.424417  27.865400  0.000057  0.000106     0.001731       NaN   
2928    -9.921900  16.832533  0.000149  0.000362     0.003382       NaN   
3294    16.559017  36.755683  0.000061  0.000116  1000.000000  0.321245   
4026   -18.675465  26.752744  0.000066  0.000129     0.003705  0.328691   
4392   -14.960467  27.234933  0.000053  0.000098     0.003538  0.096233   
4758     1.993683  42.610483  0.000055  0.000102     0.003553  0.094689   
5490    -0.021000  10.808286  0.000056  0.000103     0.003749  0.109962   
5856   -11.504476  13.484762  0.000061  0.000114     0.003594  0.115337   
6222   -15.195200  17.403650  0.000075  0.000154     0.003670  0.165898   
6588    -5.381333  16.793444  0.000047  0.000087     0.003738  0.338628   
6954     5.554625  28.943938  0.000052  0.000094     0.003575  0.177327   
7686     4.269317  24.302933  0.000058  0.000110     0.001807  0.412506   
8052    46.740240  81.157560  0.000079  0.000162     0.001818  0.501909   
8418    -6.248550  46.052550  0.000046  0.000087     0.001837  0.224649   
8784     1.335217  35.355967  0.000059  0.000110     0.001815  0.344161   
9150     2.107750  34.797783  0.000057  0.000106     0.001772  0.110550   
9516   -39.532167  44.367833  0.000055  0.000102     0.001691  0.419638   
9882   -20.354250  26.786467  0.000060  0.000112     0.001666       NaN   
10248  -12.078667  20.477317  0.000061  0.000114     0.001750  0.236139   
10614  -11.221400  13.610417  0.000058  0.000109     0.001740       NaN   
10980  -16.828850  20.732050  0.000061  0.000115     0.001779       NaN   
11346   -1.815867  10.339867  0.000064  0.000124     0.001786       NaN   
86010  -20.248500  29.976417  0.000085  0.000176     0.001942  0.175341   
367     -8.716150  19.262550  0.001116  0.004011  1000.000000  0.137352   
733    -23.323367  26.479183  0.000065  0.000124  1000.000000  0.099238   
1099     6.272917  13.490517  0.000059  0.000109     0.003728  0.324946   
...           ...        ...       ...       ...          ...       ...   
85276   -2.238083  14.281700  0.000347  0.000176     0.002037       NaN   
99550   -6.508667  18.012117  0.002924  0.002439     0.001867       NaN   
100648   6.130500  34.657567  0.000010  0.000008     0.001766  0.314935   
101380   6.872683  28.816750  0.000076  0.000041     0.001765  0.144886   
102478 -13.335286  21.792000  0.002290  0.001733     0.001853  0.191329   
102844   6.505583  14.081183  0.000066  0.000034     0.001887  0.078646   
103576   0.823200   1.099600  0.018849  0.019519     0.002027  0.260779   
104674  -5.209017  26.490717  0.000199  0.000106     0.001803       NaN   
105040 -28.119771  31.811457  0.000415  0.000222     0.001851  0.104562   
105406  -7.142583  15.643117  0.000918  0.000631     0.001846  0.082649   
105772  -7.233862   8.656517  0.007749  0.007112     0.001956  0.112472   
106504 -17.843696  21.039565  0.005488  0.004817     0.001931  0.041524   
108700 -17.404636  39.430727  0.008580  0.008565     0.001927  0.223120   
109066 -11.912617  15.776319  0.000015  0.000010     0.001807  0.093046   
112360 -12.741350  17.871400  0.000004  0.000006     0.001684       NaN   
112726  -6.607233  16.582283  0.000069  0.000034     0.001684       NaN   
113458  -1.390750  16.621250  0.000003  0.000005     0.001686       NaN   
113824  -6.642417  15.401850  0.000003  0.000005     0.001684  0.124498   
84545  -12.843000  85.019000  0.000032  0.000014     0.001585  0.424789   
85277  -15.481500  23.507500  0.000129  0.000055     0.001716       NaN   
99551  -13.379500  27.666750  0.008339  0.007574     0.002022       NaN   
100649   5.273200  27.029800  0.000015  0.000010     0.001986  0.129658   
101381 -33.419500  33.622250  0.000004  0.000006     0.001870  0.149060   
102845  -4.388000  27.262333  0.000476  0.000268     0.002092  0.130959   
104675  -5.334000  12.912000  0.000794  0.000473     0.002321       NaN   
105407 -12.979667  21.434667  0.000538  0.000273     0.002068       NaN   
112361 -23.375000  27.887500  0.000003  0.000005     0.001705       NaN   
112727  -8.357417  10.514917  0.000107  0.000049     0.001705       NaN   
113459  16.893333  17.142667  0.000003  0.000005     0.001754       NaN   
113825 -21.195083  22.225833  0.000003  0.000005     0.001705  0.135096   

               dist  
366      644.346876  
732      304.915617  
1098     790.995091  
1464     312.036955  
2196     105.763836  
2928     312.718966  
3294     637.078686  
4026     761.139172  
4392     427.708647  
4758     644.256384  
5490     316.339631  
5856     254.118974  
6222     777.066104  
6588     628.158223  
6954    1012.615255  
7686     756.055492  
8052     306.502480  
8418     103.261978  
8784     763.459988  
9150     642.504705  
9516      85.027550  
9882     170.756169  
10248    367.784917  
10614    680.264624  
10980    131.953916  
11346    620.209000  
86010    898.206857  
367      581.752175  
733      160.061930  
1099     733.142554  
...             ...  
85276    388.223010  
99550    633.297339  
100648   725.658656  
101380   667.943026  
102478   715.916792  
102844  1085.649383  
103576   522.599171  
104674   617.336774  
105040   571.323295  
105406   232.404475  
105772   167.192022  
106504   342.140474  
108700   658.910708  
109066   742.014936  
112360   574.311951  
112726   692.122499  
113458    77.225451  
113824   173.402129  
84545    726.254721  
85277    326.533641  
99551    772.348381  
100649   805.220865  
101381   643.899278  
102845  1022.681205  
104675   481.403226  
105407   141.373115  
112361   623.755105  
112727   775.259018  
113459     2.169704  
113825   145.314631  

[2559 rows x 13 columns]

In [ ]:


In [ ]: