In [1]:
%matplotlib inline
import matplotlib as mpl
import matplotlib.pyplot as plt
mpl.rc('font', size=15)
mpl.rc('figure', figsize=(8, 5))
import numpy as np
import scipy.signal as sig
import keras
from keras.layers import Input, Dense, Activation, Dropout
from keras.models import Model
from keras.models import load_model
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score
from keras import regularizers
from keras.initializers import glorot_normal, glorot_uniform
from keras.optimizers import Adam
from keras import backend
from mpl_toolkits.basemap import Basemap
from matplotlib.path import Path
In [2]:
# fix random seed for reproducibility
np.random.seed(7)
# Load data and exclude nan value
#data = np.genfromtxt('IRISwest.txt')
data = np.genfromtxt('IRIS.txt')
In [3]:
#####maybe if we cut out the IRIS P waves we can get better results##
#going to chooose 6000
#Velocity= np.divide(data[:,13],(np.subtract(data[:,17],data[:,1])))
##print(Velocity)
#pwaveomit = 6000
#data = data[Velocity<6000]
print (len(data))
In [4]:
#To cut out non land points#
#eq_lat2 = data[:,11]
#eq_lon2 = data[:,12]
#map1 = Basemap(projection='aeqd', lon_0 = 10, lat_0 = 50, resolution='h')
#lats = eq_lat2 #[:100] \
#lons = eq_lon2
#x, y = map1(lons, lats)
#locations = np.c_[x, y]
#polygons = [Path(p.boundary) for p in map1.landpolygons]
#result = np.zeros(len(locations), dtype=bool)
#for polygon in polygons:
# result += np.array(polygon.contains_points(locations))
###eq_lat1=lats[result]
###eq_lon1=lons[result]
#print (len(data))
###print (result)
#data =data[result]
#print (len(data))
In [5]:
# Extract X and y and divide into train, val, and test set
X = data[:, [2, 11, 12, 13, 14, 15]]
Z = np.log10(data[:, 18])
print(X.shape)
#y =np.subtract((data[:,17 ]),(data[:, 1]))
y= np.divide(data[:,13],(np.subtract(data[:,17],data[:,1]))) #iris
print(y)
#cutting out earthquakes with a ground velocity less 1e-6
mask = Z > -6.0
y = y[mask]
X = X[mask]
data =data[mask]
distance = data[:,13]
# Normalizing
X -= np.mean(X, axis=0) #these standard deviations need to be changed if im not doing log?
X /= np.std(X, axis=0)
mean_y = np.mean(y, axis=0)
stdv_y = np.std(y, axis=0)
y = (y-mean_y)/stdv_y
# Shuffle and divide into train and val set
mask = np.random.permutation(X.shape[0]) #(does this work with seed)
X = X[mask]
y = y[mask]
distance = distance[mask]
tfrac = int(0.8*y.size)
X_train = X[:tfrac]
y_train = y[:tfrac]
X_val = X[tfrac:]
y_val = y[tfrac:]
distance_val=distance[tfrac:]
print('X_train shape: {}'.format(X_train.shape))
print('y_train shape: {}'.format(y_train.shape))
print('X_val shape: {}'.format(X_val.shape))
print('y_val shape: {}'.format(y_val.shape))
In [6]:
#root mean sqaure error metric
#def rmse(y_val, y_pred):
# return backend.sqrt(backend.mean(backend.square(y_pred - y_val), axis=-1))
In [7]:
def QuakeNet(input_shape, lr=1e-3, reg=0.00, dropout=0.5):
#orig (input_shape, lr=1e-3, reg=0.00, dropout=0.0)
X_input = Input(input_shape)
X = Dense(64, kernel_regularizer=regularizers.l2(reg),
activation='relu')(X_input)
X = Dense(64, kernel_regularizer=regularizers.l2(reg),
activation='relu')(X)
X = Dense(64, kernel_regularizer=regularizers.l2(reg),
activation='relu')(X)
X = Dense(64, kernel_regularizer=regularizers.l2(reg),
activation='relu')(X)
X = Dense(64, kernel_regularizer=regularizers.l2(reg),
activation='relu')(X)
X = Dense(64, kernel_regularizer=regularizers.l2(reg),
activation='relu')(X)
X = Dense(64, kernel_regularizer=regularizers.l2(reg),
activation='relu')(X)
X = Dropout(rate=dropout)(X)
X = Dense(1, kernel_regularizer=regularizers.l2(reg))(X)
model = Model(inputs=X_input, outputs=X, name='QuakeNet')
model.compile(optimizer=Adam(lr=lr), loss='mse') #metrics=[rmse])
return model
In [8]:
input_shape = (X_train.shape[1], )
model = QuakeNet(input_shape=input_shape)
model.summary()
In [9]:
stats = model.fit(X_train, y_train, epochs=400, batch_size=250, validation_data=(X_val, y_val))
In [ ]:
model.save('allirisavtoat.hdf5')
#from keras.utils import load_model
model = load_model('allirisavtoat.hdf5')
In [ ]:
y_pred = model.predict(X_val) # X_val could be new data too?
# Inverse-normalize
y_val = y_val*stdv_y + mean_y
y_pred = y_pred*stdv_y + mean_y
In [ ]:
#TO evaluate the model not the predictions
#kfold = KFold(n_splits=10, shuffle=True, random_state=7)
#results = cross_val_score(model, X_val, y_val, cv=kfold)
#print (results)
#print(cross_val_score)
#print("Baseline: %.2f%% (%.2f%%)" % (results.mean()*100, results.std()*100))
In [ ]:
print(y_val.shape)
y_pred = y_pred.flatten()
print(y_pred)
fig, ax = plt.subplots()
print(distance_val.shape)
y_val = (1/y_val)*(distance_val)
y_pred = (1/y_pred)*(distance_val)
v_min = min(np.min(y_val), np.min(y_pred))
v_max = max(np.max(y_val), np.max(y_pred))
x = np.linspace(v_min, v_max, 1000)
y_val = abs(y_val)
y_pred = abs(y_pred)
###converting back to arrival times
#y_val = (1/y_val)*(distance_val)
#y_pred = (1/y_pred)*(distance_val)
ax.plot(y_val, y_pred, '.')
ax.plot(x, x)
ax.set(xlabel='Prediction', ylabel='True')
fig.tight_layout()
plt.savefig('allirisAVtoAT.png', dpi =300,bbox_inches='tight')
plt.show()
In [ ]:
x = np.linspace(v_min, v_max, 1000)
fig2, ax, = plt.subplots()
x_bins = np.logspace(np.log10(y_val.min()), np.log10(y_val.max()),np.sqrt(15000)) #12279
y_bins = np.logspace(np.log10(y_pred.min()), np.log10(y_pred.max()),np.sqrt(15000))
H, xedges, yedges = np.histogram2d(y_val, y_pred, bins=[x_bins,y_bins])
#ax2 = fig.add_subplot(212)
h = ax.pcolormesh(xedges, yedges, H.T)
#ax.set_aspect('equal')
#ax.set(adjustable='box-forced', aspect='equal')
#a2.imshow(img, origin='lower', extent=extent, aspect='auto')
#ax.set_xscale('log')
#ax.set_yscale('log')
ax.axis([yedges.min(),yedges.max(),yedges.min(),yedges.max()])
ax.set(ylabel='Predicted arrival times [s]', xlabel='Actual Arrival times [s]',title = 'Actual vs. predicted arrival times')
cbar = plt.colorbar(h, ax=ax)
ax.plot(x, x, c='r',linewidth=.5)
#ax.set_ylim([0, 10e-2])
#ax.set_xlim([0, 10e-2])
#ax.set_aspect('equal')
#cbar =plt.colorbar()
#cbar.ax.set_ylabel('Counts')
cbar.set_label('Counts', rotation=270,labelpad=9)
fig.tight_layout()
ax.set(adjustable='box', aspect='equal')
plt.savefig('allirisAVtoATdensity.png', dpi =300,bbox_inches='tight')
plt.show()
In [ ]:
z = np.array(abs((y_val -y_pred)/y_val))
print(z)
print(z.shape)
print(np.min(z))
print(np.max(z))
print (np.average(z))
#x_bins = np.logspace(np.log10(antiy_val.min()), np.log10(antiy_val.max()),np.sqrt(12279))
#y_bins = np.logspace(np.log10(antiy_pred.min()), np.log10(antiy_pred.max()),np.sqrt(12279))
plt.hist(z, bins=30,range =[0,1.0], facecolor='blue', alpha=0.5)
#plt.ylim(0,24000)
yticks(range(0, 30000))
plt.xlabel('(Predicted-Actual)/Actual Error')
plt.ylabel('Counts')
plt.title('Predicted Arrival times amount falling within error')
plt.savefig('allirisAVtoAThist.png', dpi =300,bbox_inches='tight')
plt.show()
In [ ]:
#to check if target is uniformly distributed (input data not predicted)
#weights = (np.ones_like(y_pred)/float(len(y_pred)))*100
#bins =100
#plt.hist(y, bins=bins, facecolor='blue', alpha=0.5)
#plt.xlabel('EQ ground velocities [meters/seconds]')
In [ ]:
#plt.plot(stats.history['rmse'])
#plt.xlabel('Counts')
#plt.ylabel('rmse')
#plt.title('Root mean square error for Average Velocity')
#plt.savefig('iwAVATrmse1.png', dpi =300,bbox_inches='tight')
#plt.show()
In [ ]:
In [ ]:
In [ ]: