In [1]:
%matplotlib inline
import os
import sys
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import eqcat.parsers.isf_catalogue_reader as icr
import eqcat.catalogue_query_tools as cqt
In [2]:
# read the catalogue - why do we bother making an hdf5?
raw_file_name = "Marmaries-catalogue1.txt"
base = os.path.basename(raw_file_name)
db_file_name = os.path.splitext(base)[0] + '.hdf5'
rejection_keywords = ["mining", "geothermal", "explosion", "quarry",
"reservoir", "induced", "rockburst"]
reader = icr.ISFReader(raw_file_name,
rejection_keywords=rejection_keywords)
catalogue = reader.read_file("TUR", "ISC")
In [3]:
# summarize reasons for rejections
if not os.path.isfile(db_file_name) and len(reader.rejected_catalogue) > 0:
num_rejections = len(reader.rejected_catalogue.events)
keyword_counts = dict(zip(rejection_keywords, [0]*num_rejections))
for event in reader.rejected_catalogue.events:
for keyword in rejection_keywords:
if keyword.lower() in event.comment.lower():
keyword_counts[keyword] = keyword_counts[keyword] + 1
num_keywords_found = sum(keyword_counts.values())
print "Note: %d rejection keywords found in %d rejected events." % \
(num_keywords_found, num_rejections)
print keyword_counts
In [4]:
# build the catalogue if we haven't already
if not os.path.isfile(db_file_name):
_ = catalogue.build_dataframe(db_file_name)
db1 = cqt.CatalogueDB(db_file_name)
In [5]:
pd.concat([db1.origins.head(), db1.origins.tail()])
Out[5]:
In [6]:
pd.concat([db1.magnitudes.head(),db1.magnitudes.tail()])
Out[6]:
In [7]:
db1.origins
temp = db1.origins["eventID"]
temp = list(temp)
uniqueIDs = set(temp)
print "Number of magnitudes: ", len(db1.magnitudes)
print "Number of origins: ", len(db1.origins)
print "Number of unique events: ", len(uniqueIDs)
In [8]:
# Set up the configuration of the limits
outlier_fraction = 0.001
llon = np.floor(db1.origins.longitude.quantile(outlier_fraction))
ulon = np.ceil(db1.origins.longitude.quantile(1 - outlier_fraction))
llat = np.floor(db1.origins.latitude.quantile(2*outlier_fraction))
ulat = np.ceil(db1.origins.latitude.quantile(1 - 2*outlier_fraction))
map_config = {"llon": llon, "ulon": ulon, "llat": llat, "ulat": ulat,
"parallel": 5.0, "meridian": 5.0, "resolution": "l"}
cqt.plot_catalogue_map(map_config, db1)
In [9]:
selector = cqt.CatalogueSelector(db1)
turkey_catalogue = selector.select_within_depth_range(100,600)
cqt.plot_catalogue_map(map_config, turkey_catalogue)
In [10]:
selector = cqt.CatalogueSelector(db1)
turkey_catalogue = selector.select_within_magnitude_range(5,8)
cqt.plot_catalogue_map(map_config, turkey_catalogue)
In [15]:
agency_magnitude_stats = cqt.get_agency_magtype_statistics(db1, quiet=True)
In [12]:
def count_magnitudes(mag, agency_magnitude_stats):
# case-insensitive summary of which achency has a magnitude
has_mag = {}
mag = mag.lower()
MAG = mag.upper()
mags = [mag[0] + mag[1], mag[0] + MAG[1],
MAG[0] + mag[1], MAG[0] + MAG[1]]
for i, agency in enumerate(agency_magnitude_stats):
if agency_magnitude_stats[agency].has_key('Magnitudes'):
n = 0
for mag in mags:
if agency_magnitude_stats[agency]['Magnitudes'].has_key(mag):
n = n + agency_magnitude_stats[agency]['Magnitudes'][mag]
if n > 1:
has_mag[agency] = n
return has_mag
In [13]:
print 'Mw: ', len(count_magnitudes('Mw', agency_magnitude_stats))
print 'Md: ', len(count_magnitudes('Md', agency_magnitude_stats))
print 'Ms: ', len(count_magnitudes('Ms', agency_magnitude_stats))
print 'Ml: ', len(count_magnitudes('Ml', agency_magnitude_stats))
print 'Mb: ', len(count_magnitudes('Mb', agency_magnitude_stats))
In [14]:
agency_counts = count_magnitudes('MW', agency_magnitude_stats)
sorted(agency_counts.items(), key=lambda x: x[1], reverse=True)
Out[14]:
In [15]:
cqt.get_agency_magnitude_pairs(db1,('NEIC', 'MS'),('IDC', 'MS'),no_case=True);
In [16]:
# round 1
MW_ref = [('MED_RCMT', 'MW'), ('HRVD', 'MW'), ('ZUR_RMT', 'MW'),
('GCMT', 'MW'), ('NEIC', 'MW')]
check = ('ISK', 'ML')
for ref in MW_ref:
cqt.get_agency_magnitude_pairs(db1, check, ref, no_case=True)
In [17]:
# round 2
MS_ref = [('IDC', 'MS'), ('ISC', 'MS'), ('ISCJB', 'MS'),
('MOS', 'MS'), ('NEIC', 'MS')]
check = ('IDC', 'MS')
for ref in MS_ref:
cqt.get_agency_magnitude_pairs(db1, check, ref)
In [18]:
# round 3
MD_ref = [('ATH', 'MD'), ('ISK', 'MD')]
check = ('CSEM', 'MD')
for ref in MD_ref:
cqt.get_agency_magnitude_pairs(db1, check, ref)
In [19]:
# round 3
ML_ref = [('ISK', 'ML'), ('ATH', 'ML'), ('THE', 'ML'), ('DDA', 'ML')]
check = ('CSEM', 'ML')
for ref in ML_ref:
cqt.get_agency_magnitude_pairs(db1, check, ref)
In [20]:
references = [
# considered "reference" MW after verification
[('MED_RCMT', 'MW'), ('HRVD', 'MW')], # 218 events
[('ZUR_RMT', 'MW'), ('HRVD', 'MW')], # 84 events
[('GCMT', 'MW'), ('MED_RCMT', 'MW')], # 53 events
[('NEIC', 'MW'), ('MED_RCMT', 'MW')], # 40 events
[('NEIC', 'MW'), ('GCMT', 'MW')], # 29 events
]
comparisons = [
# MW => reference MW
[('NIC', 'MW'), ('MED_RCMT', 'MW')], # 85 overlap => 561 events
[('CSEM', 'MW'), ('NIC', 'MW')], # 238 overlap => 287 events
# MS => reference MW
[('IDC', 'MS'), ('MED_RCMT', 'MW')], # 196 overlap => 1279 events
[('ISC', 'MS'), ('MED_RCMT', 'MW')], # 149 overlap => 762 events
[('ISCJB', 'MS'), ('MED_RCMT', 'MW')], # 79 overlap => 403 events
[('MOS', 'MS'), ('MED_RCMT', 'MW')], # 64 overlap => 291 events
[('BJI', 'MS'), ('MED_RCMT', 'MW')], # 125 overlap => 241 events
[('CSEM', 'MS'), ('MED_RCMT', 'MW')], # 51 overlap => 128 events
[('NEIC', 'MS'), ('MED_RCMT', 'MW')], # 29 overlap = > 65 events
[('NEIC', 'MS'), ('HRVD', 'MW')], # 23 overlap = > 65 events
# MD => reference MW
[('ATH', 'MD'), ('MED_RCMT', 'MW')], # 87 overlap => 14988 events
[('ISK', 'MD'), ('MED_RCMT', 'MW')], # 55 overlap => 17210 events
[('HLW', 'MD'), ('MED_RCMT', 'MW')], # 40 overlap => 510 events
[('DDA', 'MD'), ('MED_RCMT', 'MW')], # 23 overlap => 6172 events
[('GII', 'MD'), ('MED_RCMT', 'MW')], # 34 overlap => 241 events
# MD => MD => reference MW
[('CSEM', 'MD'), ('ISK', 'MD')], # 8512 overlap => 10863 events
# MB => reference MW
[('ISC', 'MB'), ('MED_RCMT', 'MW')], # 182 overlap => 3953 events
[('IDC', 'MB'), ('MED_RCMT', 'MW')], # 205 overlap => 2455 events
[('NEIC', 'MB'), ('MED_RCMT', 'MW')], # 182 overlap => 1849 events
[('ISCJB', 'MB'), ('MED_RCMT', 'MW')], # 91 overlap => 1161 events
[('MOS', 'MB'), ('MED_RCMT', 'MW')], # 189 overlap => 936 events
[('NIC', 'MB'), ('MED_RCMT', 'MW')], # 128 overlap => 689 events
[('BJI', 'MB'), ('MED_RCMT', 'MW')], # 161 overlap => 602 events
[('CSEM', 'MB'), ('MED_RCMT', 'MW')], # 120 overlap => 425 events
# ML => reference MW
[('ISK', 'ML'), ('MED_RCMT', 'MW')], # 162 overlap => 6545 events
[('ATH', 'ML'), ('MED_RCMT', 'MW')], # 175 overlap => 6365 events
[('THE', 'ML'), ('MED_RCMT', 'MW')], # 172 overlap => 4637 events
[('DDA', 'ML'), ('MED_RCMT', 'MW')], # 73 overlap => 2999 events
[('IDC', 'ML'), ('MED_RCMT', 'MW')], # 167 overlap => 2050 events
[('NIC', 'ML'), ('MED_RCMT', 'MW')], # 129 overlap => 1147 events
[('HLW', 'ML'), ('MED_RCMT', 'MW')], # 55 overlap => 589 events
# ML => ML => reference MW
[('CSEM', 'ML'), ('ISK', 'ML')], # 1969 overlap => 2999 events
# [("XXX","MM"), ("YYY", "NN")], # ZZZZ overlap => WWWW events
]
In [21]:
any([ref == MW_ref[0] for ref in MW_ref])
Out[21]:
In [33]:
for pair in comparisons:
from_agency = pair[0][0]
from_mag = pair[0][1]
to_agency = pair[1][0]
to_mag = pair[1][1]
# start function definition
print 'def from_%s_%s(M):' % (from_agency, from_mag)
print '"""'
# capture regression results as comments
query, cat = cqt.get_agency_magnitude_pairs(
db1, pair[0], pair[1], no_case=True)
agency_counts = count_magnitudes(from_mag, agency_magnitude_stats)
print 'Potential yeild %d magnitudes' % (agency_counts[from_agency])
# do regression
regressor = cqt.CatalogueRegressor(query)
results = regressor.run_regression("polynomial", [0, 1])
print '"""'
# finish generating function definitions
if any([ref == pair[1] for ref in MW_ref]):
print " return %.3f + %.3f*M" % (
results.beta[0], results.beta[1])
else:
print " return from_%s_%s(%.3f + %.3f*M)" % (
to_agency, to_mag, results.beta[0], results.beta[1])
print
print 'def from_%s_%s_sigma(M):' % (from_agency, from_mag)
_, _, st_dev = regressor.retrieve_model()
if any([ref == pair[1] for ref in MW_ref]):
print " return %.3f" % st_dev
else:
print " return math.sqrt(%.3f**2 + from_%s_%s_sigma(M)**2)" % (
st_dev, to_agency, to_mag)
print
In [31]:
print regressor.standard_deviation
In [25]:
# now just do a visual comparison, for review
for pair in comparisons:
print
query_mb, cat_mb= cqt.get_agency_magnitude_pairs(
db1, pair[0], pair[1], no_case=True)
cqt.plot_catalogue_map(map_config, cat_mb)
regressor_mb = cqt.CatalogueRegressor(query_mb)
results_mb = regressor_mb.run_regression("polynomial", [0, 1])
regressor_mb.results.pprint()
regressor_mb.plot_model_density(overlay=False, sample=0)
print "Model standard deviation is " \
+ str(regressor_mb.standard_deviation.round(3))