In [698]:
% matplotlib inline
import pandas as pd
import glob
import matplotlib.pyplot as plt
In [699]:
GRLM = "345_GRLM10.txt"; print GRLM
df_grlm = pd.read_csv(GRLM, skiprows=43, delim_whitespace=True, names="mission,cycle,date,hour,minute,lake_height,error,mean(decibels),IonoCorrection,TropCorrection".split(","), engine='python', index_col=False)
df_grlm.head(5)
Out[699]:
In [700]:
df_grlm = pd.read_csv(GRLM, skiprows=43, delim_whitespace=True, names="mission,cycle,date,hour,minute,lake_height,error,mean(decibels),IonoCorrection,TropCorrection".split(","), engine='python', index_col=False)
def get_year(date): return int(str(date)[0:4])
def get_month(date): return int(str(date)[4:6])
def get_day(date): return int(str(date)[6:])
df_grlm['year'] = df_grlm['date'].apply(get_year)
df_grlm['month'] = df_grlm['date'].apply(get_month)
df_grlm['day'] = df_grlm['date'].apply(get_day)
df_grlm = df_grlm.where(df_grlm.minute < 61 ) # remove lines that do not have time
df_grlm = df_grlm.where(df_grlm.lake_height < 900 ) # remove entries that do not have lake-height
In [701]:
df_grlm.lake_height.plot(); plt.title("Actual data without resampling"); plt.ylabel("Variation (m)")
Out[701]:
In [702]:
df_grlm.lake_height.interpolate().plot(); plt.title("Interpolated Actual data without resampling"); plt.ylabel("Variation (m)")
Out[702]:
In [703]:
df = df_grlm
df[["year", "month", "day", "hour", "minute"]] = df[["year", "month", "day", "hour", "minute"]].fillna(0).astype(int)
df['Time'] = df.year.astype(str).str.cat(df.month.astype(str).astype(str), sep='-').str.cat(df.day.astype(str), sep='-')\
.str.cat(df.hour.astype(str).astype(str), sep='-').str.cat(df.minute.astype(str).astype(str), sep='-')
df = df.where(df.year>10) # to ger rid of all the nan values
df.index = pd.to_datetime(pd.Series(df["Time"]), format="%Y-%m-%d-%H-%M");
print df.index[0:3], df.index[-3:]
In [704]:
df["lake_height"].resample("M").mean().plot(); plt.title("Mean Monthly Altimetry"); plt.ylabel("Variation (m)")
Out[704]:
In [705]:
df["lake_height"].resample("A").mean().plot(); plt.title("Mean Annual Altimetry"); plt.ylabel("Variation (m)")
Out[705]:
In [706]:
df_modis = pd.read_csv('MODIS_t.txt', names=["Area"], engine='python', index_col=False)
df_time = pd.read_csv('DV.txt', sep = "\t", names=["Year", "Month", "Day", "", "", ""], engine='python', index_col=False)
df_time['Time'] = df_time.Year.astype(str).str.cat(df_time.Month.astype(str).astype(str), sep='-').str.cat(df_time.Day.astype(str), sep='-')
df_time = df_time.where(df_time.Year>10) # to ger rid of all the nan values
df_modis.index = pd.to_datetime(pd.Series(df_time["Time"]), format="%Y-%m-%d")#df.index[0:3]
In [707]:
df_modis.plot(); plt.title("MODIS data - Surface Area"); plt.ylabel("Surface Area (sq.m.?)")
Out[707]:
In [708]:
df_glrm_subset = df["lake_height"].resample("D").mean().interpolate()
df_glrm_subset = df_glrm_subset[(df_glrm_subset.index > '2008-07-22') & (df_glrm_subset.index <= '2015-08-13')]
df_glrm_subset.plot(); plt.legend(); plt.title("Subset of Altimetry"); plt.ylabel("Variation (m)")
df_glrm_subset.index
Out[708]:
In [709]:
df_modis_daily = df_modis["Area"].resample("D").mean().interpolate()
df_modis_subset = df_modis_daily[(df_modis_daily.index > '2008-07-22') & (df_modis_daily.index <= '2015-08-13')]
df_modis_subset.plot()
df_modis_subset.index
Out[709]:
In [710]:
# QA: Create a time series of time alone, to check the number of data points that we should have for days.
#Note the vaiable called length
print pd.date_range('22/07/2008', periods=len(df_modis_subset), freq='D')
In [711]:
# Check if the two vectors are of the same length
print len(df_glrm_subset.tolist()), len(df_modis_subset.tolist())
In [712]:
import numpy
cor = numpy.corrcoef(df_glrm_subset.resample("W").mean().interpolate().tolist(),
df_modis_subset.resample("W").mean().interpolate().tolist())
print "correlation coefficient is: " , cor[0][1]