In [1]:
% matplotlib inline
import pandas as pd
import glob
import matplotlib.pyplot as plt
import matplotlib as mpl
import numpy
import numpy as np
mpl.rcParams['ytick.labelsize'] = 14
mpl.rcParams['xtick.labelsize'] = 14
mpl.rcParams['font.size'] = 14
In [2]:
GRLM = "/home2/svimal/Github/Nina/data/lake_610/610_GRLM10.txt"; print GRLM
MODIS = "/home2/svimal/Github/Nina/data/lake_610/MODIS_dates_and_241_area.txt"; print MODIS
df_grlm = pd.read_csv(GRLM, skiprows=43, delim_whitespace=True, names="mission,cycle,date,hour,minute,lake_height,error,mean(decibels),IonoCorrection,TropCorrection".split(","), engine='python', index_col=False)
df_grlm.head(5)
Out[2]:
In [3]:
df_grlm = pd.read_csv(GRLM, skiprows=43, delim_whitespace=True, names="mission,cycle,date,hour,minute,lake_height,error,mean(decibels),IonoCorrection,TropCorrection".split(","), engine='python', index_col=False)
def get_year(date): return int(str(date)[0:4])
def get_month(date): return int(str(date)[4:6])
def get_day(date): return int(str(date)[6:])
df_grlm['year'] = df_grlm['date'].apply(get_year)
df_grlm['month'] = df_grlm['date'].apply(get_month)
df_grlm['day'] = df_grlm['date'].apply(get_day)
df_grlm = df_grlm.where(df_grlm.minute < 61 ) # remove lines that do not have time
df_grlm = df_grlm.where(df_grlm.lake_height < 900 ) # remove entries that do not have lake-height
In [4]:
df_grlm.lake_height.plot(); plt.title("Actual data without resampling"); plt.ylabel("Variation (m)")
Out[4]:
In [5]:
df_grlm.lake_height.interpolate().plot(); plt.title("Interpolated Actual data without resampling"); plt.ylabel("Variation (m)")
Out[5]:
In [6]:
df = df_grlm
df[["year", "month", "day", "hour", "minute"]] = df[["year", "month", "day", "hour", "minute"]].fillna(0).astype(int)
df['Time'] = df.year.astype(str).str.cat(df.month.astype(str).astype(str), sep='-').str.cat(df.day.astype(str), sep='-')\
.str.cat(df.hour.astype(str).astype(str), sep='-').str.cat(df.minute.astype(str).astype(str), sep='-')
df = df.where(df.year>10) # to ger rid of all the nan values
df.index = pd.to_datetime(pd.Series(df["Time"]), format="%Y-%m-%d-%H-%M");
print df.index[0:3], df.index[-3:]
In [7]:
df["lake_height"].resample("M").mean().plot(); plt.title("Mean Monthly Altimetry"); plt.ylabel("Variation (m)")
Out[7]:
In [8]:
df["lake_height"].resample("A").mean().plot(); plt.title("Mean Annual Altimetry"); plt.ylabel("Variation (m)")
Out[8]:
In [9]:
"""import scipy.io
mat = scipy.io.loadmat('/home2/svimal/Github/Nina/data/lake_610/ts_GLWD1_241_h22v04.mat')
print mat.keys()
import numpy as np
np.shape(mat["miss_ts"].T), np.shape(mat["ts_water"])
miss_ts = np.squeeze(mat["miss_ts"].T)
ts_water = np.squeeze(mat["ts_water"])
miss_ts[0:3], ts_water[0:3]
#mat["miss_ts"][0][0:5], mat["ts_water"][0:5]"""
Out[9]:
In [10]:
df_modis = pd.read_csv(MODIS, names=["Dates", "Area"], sep="\t",engine='python', index_col=False)
#print df_modis.Dates.head(10),df_modis.Dates.tail(10)
print pd.date_range('1993-01-01', periods=len(df_modis), freq='10D')
print df_modis.Dates.head(1), df_modis.Dates.tail(1)
df_modis.index = pd.date_range('1993-01-01', periods=len(df_modis), freq='10D')
print df_modis.head(2)
df_modis.Area.plot(); plt.title("MODIS data from file " +str(MODIS).split("/")[-1]); plt.ylabel("MODIS Area")
Out[10]:
In [11]:
df_glrm_daily = df["lake_height"].resample("D").mean().interpolate()
df_modis_daily = df_modis["Area"].resample("D").mean().interpolate()
start = max(df_glrm_daily.index[0], df_modis_daily.index[0])
end = min(df_glrm_daily.index[-1], df_modis_daily.index[-1])
print start, end
df_glrm_subset = df_glrm_daily[(df_glrm_daily.index >= start) & (df_glrm_daily.index <= end)]
df_modis_subset = df_modis_daily[(df_modis_daily.index >= start) & (df_modis_daily.index <= end)]
print len(df_glrm_subset), len(df_modis_subset)
In [12]:
fig, ax1 = plt.subplots()
t = df_glrm_subset.resample("W").mean().interpolate().index
s1 = df_glrm_subset.resample("W").mean().interpolate().tolist()
ax1.plot(t, s1, 'b-'); ax1.set_xlabel('time (s)')
# Make the y-axis label, ticks and tick labels match the line color.
ax1.set_ylabel('GLRM_10 Altimetry', color='b')
ax1.tick_params('y', colors='b'); ax2 = ax1.twinx()
s2 = df_modis_subset.resample("W").mean().interpolate().tolist()
ax2.plot(t, s2, 'r-'); ax2.set_ylabel('MODIS Surface Area', color='r')
ax2.tick_params('y', colors='r'); fig.tight_layout(); plt.title( "GRLM & MODIS Overlapping period (Weekly)"); plt.show()
In [13]:
cor = numpy.corrcoef(df_glrm_subset.resample("W").mean().interpolate().tolist(),
df_modis_subset.resample("W").mean().interpolate().tolist())
print "correlation coefficient is: " , cor[0][1]
In [19]:
fig, ax1 = plt.subplots()
t = df_glrm_subset.resample("A").mean().interpolate().index
s1 = df_glrm_subset.resample("A").mean().interpolate().tolist()
ax1.plot(t, s1, 'b-'); ax1.set_xlabel('time (s)')
# Make the y-axis label, ticks and tick labels match the line color.
ax1.set_ylabel('GLRM_10 Altimetry', color='b')
ax1.tick_params('y', colors='b'); ax2 = ax1.twinx()
s2 = df_modis_subset.resample("A").mean().interpolate().tolist()
ax2.plot(t, s2, 'r-'); ax2.set_ylabel('MODIS Surface Area', color='r')
ax2.tick_params('y', colors='r'); fig.tight_layout(); plt.title( "GRLM & MODIS Overlapping period"); plt.show()
In [20]:
cor = numpy.corrcoef(df_glrm_subset.resample("A").mean().interpolate().tolist(),
df_modis_subset.resample("A").mean().interpolate().tolist())
print("correlation coefficient is: " , cor[0][1])
In [40]:
storage_changes = []
for i in range(len(s1)-1):
del_height = s1[i+1]-s1[i]
avg_area = (s2[i+1] + s2[i])/2
storage_change = del_height*avg_area
storage_changes.append(storage_change)
plt.plot(t[0:-1], storage_changes); plt.title("Storage (Volume) Change"); plt.ylabel("Volume Change (${m^3}$)")
Out[40]: