In [1]:
%matplotlib inline
import os
import sys
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import eqcat.parsers.isf_catalogue_reader as icr
import eqcat.catalogue_query_tools as cqt
from IPython.utils import io
In [2]:
# read the catalogue - why do we bother making an hdf5?
raw_file_name = "Marmaries-catalogue1.txt"
base = os.path.basename(raw_file_name)
db_file_name = os.path.splitext(base)[0] + '.hdf5'
rejection_keywords = ["mining", "geothermal", "explosion", "quarry",
"reservoir", "induced", "rockburst"]
reader = icr.ISFReader(raw_file_name,
rejection_keywords=rejection_keywords)
catalogue = reader.read_file("TUR", "ISC")
In [3]:
# summarize reasons for rejections
if not os.path.isfile(db_file_name) and len(reader.rejected_catalogue) > 0:
num_rejections = len(reader.rejected_catalogue.events)
keyword_counts = dict(zip(rejection_keywords, [0]*num_rejections))
for event in reader.rejected_catalogue.events:
for keyword in rejection_keywords:
if keyword.lower() in event.comment.lower():
keyword_counts[keyword] = keyword_counts[keyword] + 1
num_keywords_found = sum(keyword_counts.values())
print "Note: %d rejection keywords found in %d rejected events." % \
(num_keywords_found, num_rejections)
print keyword_counts
In [4]:
# build the catalogue if we haven't already
if not os.path.isfile(db_file_name):
_ = catalogue.build_dataframe(db_file_name)
db1 = cqt.CatalogueDB(db_file_name)
In [5]:
db1.origins
temp = db1.origins["eventID"]
temp = list(temp)
uniqueIDs = set(temp)
print "Number of magnitudes: ", len(db1.magnitudes)
print "Number of origins: ", len(db1.origins)
print "Number of unique events: ", len(uniqueIDs)
In [ ]:
overlap_threshhold = 50
agency_mag_file_name = os.path.splitext(base)[0] + ('_%d_AgencyPairs.txt' % overlap_threshhold)
#if not os.path.isfile(agency_mag_file_name):
#with open(agency_mag_file_name, 'w') as sys.stdout:
with io.capture_output() as captured:
cqt.mine_agency_magnitude_combinations(
db1,agency_magnitude_stats, overlap_threshhold,
no_case=True, quiet=True)
with
In [ ]:
pairs = np.genfromtxt(agency_mag_file_name,
dtype="S64,S64,S64,S64,S64,S64,S64,i,S64")
In [ ]:
# have a peek at the data
pairs[:5]
In [ ]:
# parse the data and look at sets
agencies1 = [pair[1][1:-1].replace('_','') for pair in pairs]
mags1 = [pair[2][:-1].upper() for pair in pairs]
agencies2 = [pair[4][1:-1].replace('_','') for pair in pairs]
mags2 = [pair[5][:-1].upper() for pair in pairs]
nums_shared = [pair[7] for pair in pairs]
print set(mags1)
print set(mags2)
print list(set(mags1) - set(mags2))
print list(set(mags2) - set(mags1))
print set(agencies1)
print set(agencies2)
print list(set(agencies1) - set(agencies2))
print list(set(agencies2) - set(agencies1))
In [ ]:
# prioritize magnitudes and print pairs in order of size of overlap
mag_priority = ['MW','MS','MD','MB','ML']
tex_format = '%s, %s <- [%d] -> %s, %s'
data = zip(agencies1, mags1, agencies2, mags2, nums_shared)
data = list(set(data))
data = sorted(data, key=lambda pair: pair[4], reverse=True)
for i, m1 in enumerate(mag_priority):
for m2 in mag_priority[i:]:
for agency1, mag1, agency2, mag2, n_shared in data:
if (mag1 == m1 and mag2 == m2) or (mag1 == m2 and mag2 == m1):
n1 = agency_magnitude_stats[agency1]['Magnitudes'][mag1]
n2 = agency_magnitude_stats[agency2]['Magnitudes'][mag2]
if mag_priority.index(m1) >= mag_priority.index(m2):
print tex_format % (agency1, mag1, n_shared,agency2, mag2)
else:
print tex_format % ( gency2, mag2, n_shared,
agency1, mag1)
In [ ]: