In [1]:
import os
import pickle

In [10]:
class Attribute(object):
    def __init__(self, name, dtype, width, fmt, null):
        self.name = name
        self.dtype = dtype
        self.width = width
        self.fmt = fmt
        self.null = null
#         print("Attribute:", self.name, self.dtype, self.width, self.fmt, self.null)

class Relation(object):
    def __init__(self, name, fields):
        self.name = name
        self.fields = fields
#         print("Relation:", self.name, self.fields)

def parse_chunk(chunk):
    if len(chunk.split()) == 0:
        return(None)
    if chunk.split()[0] == "Attribute":
        return(parse_attribute(chunk))
    elif chunk.split()[0] == "Relation":
        return(parse_relation(chunk))

def parse_attribute(chunk):
    chunk = [line.strip() for line in chunk.strip().split("\n")]
    name, dtype, width, fmt, null = None, None, None, None, None
    for line in chunk:
        if len(line.split()) == 0:
            continue
        if line.split()[0] == "Attribute":
            name = line.split()[1]
        elif line.split()[0] in ("Real", "Integer", "String", "Time", "YearDay") and dtype is None:
            if line.split()[0] == "Real":
                dtype = float
            elif line.split()[0] == "Integer":
                dtype = int
            elif line.split()[0] == "String":
                dtype = str
            elif line.split()[0] == "Time":
                dtype = float
            elif line.split()[0] == "YearDay":
                dtype = int
            width = int("".join(line.split()[1:]).strip("()"))
        elif line.split()[0] == "Format":
            fmt = "".join(line.split()[1:]).strip("()\"")
        elif line.split()[0] == "Null":
            null = "".join(line.split()[1:]).strip("()\"")
    return(Attribute(name, dtype, width, fmt, null))

def parse_relation(chunk):
    chunk = [line.strip() for line in chunk.strip().split("\n")]
    name, fields = None, None
    for line in chunk:
        if len(line.split()) == 0:
            continue
        if line.split()[0] == "Relation":
            name = line.split()[1]
        elif line.split()[0] == "Fields":
            fields = " ".join(line.split()[1:]).strip("()").split()
    return(Relation(name, fields))

def _parse_relation(chunk):
    chunk = [line.strip() for line in chunk.strip().split("\n")]
    name, fields = None, None
    i = 0
    while i < len(chunk):
        line = chunk[i]
        if len(line.split()) == 0:
            continue
        if line.split()[0] == "Relation":
            name = line.split()[1]
        elif line.split()[0] == "Fields":
            fields = " ".join(line.split()[1:]).strip("()").split()
            while line.strip()[-1] != ")":
                fields += " ".join(line.split()[1:]).strip("()").split()
                i += 1
                line = chunk[i]
            print(fields)
    return(Relation(name, fields))

In [11]:
with open("/opt/antelope/5.8/data/schemas/css3.0") as inf:
    data = inf.read().split(";")
chunks = [parse_chunk(chunk) for chunk in data]
schema = {"Attributes": {obj.name: {"dtype": obj.dtype,
                                    "width": obj.width,
                                    "format": obj.fmt,
                                    "null": obj.null}
                         for obj in chunks
                         if isinstance(obj, Attribute)},
          "Relations": {obj.name: obj.fields for obj in chunks
                                             if isinstance(obj, Relation)}
          }
for attr in schema["Attributes"]:
    attrib = schema["Attributes"][attr]
    attrib["null"]

In [69]:
print("found", len(schema["Attributes"]), "attributes...")
print("found", len(schema["Relations"]), "relations...")
with open("/Users/malcolcw/Source/seismic-python/seispy/data/schemas/css3.0.pkl", "wb") as outf:
    print("pickling schema...")
    pickle.dump(schema, outf)


found 421 attributes...
found 66 relations...
pickling schema...

Extension tables


In [17]:
ext = {"Attributes": {},
       "Relations": {}}
for file in sorted(os.listdir("/opt/antelope/5.8/contrib/data/schemas/css3.0.ext")):
    # This is a hack to avoid a couple troublesome files
    if file in ("tortrk", "dlsite", "staq330"):
        continue
    with open("/opt/antelope/5.8/contrib/data/schemas/css3.0.ext/%s" % file) as inf:
        data = inf.read().split(";")
    chunks = [parse_chunk(chunk) for chunk in data]
    for obj in chunks:
        if obj is None:
            continue
        # Raise an exception if Attribute is being re-defined
        if isinstance(obj, Attribute):
            if obj.name in ext["Attributes"]:
                if ext["Attributes"][obj.name]["dtype"] != obj.dtype\
                        or ext["Attributes"][obj.name]["width"] != obj.width\
                        or ext["Attributes"][obj.name]["format"] != obj.fmt\
                        or ext["Attributes"][obj.name]["null"] != obj.dtype(obj.null):
                    raise(ValueError(obj.name, file))
            ext["Attributes"][obj.name] = {"dtype": obj.dtype,
                                           "width": obj.width,
                                           "format": obj.fmt,
                                           "null": obj.dtype(obj.null) if obj.null is not None else None}
        elif isinstance(obj, Relation):
            # Raise an exception if a Relation is being re-defined
            if obj.name in ext["Relations"]:
                if ext["Relations"][obj.name] != obj.fields:
                    raise(ValueError(obj.name))
            ext["Relations"][obj.name] = obj.fields

Set some ill-defined null values


In [56]:
ext["Attributes"]["model"]["null"] = "-"
ext["Attributes"]["dlcalseq"]["null"] = "-"
ext["Attributes"]["dlcalseqt"]["null"] = "-"
# ext["Attributes"]["xlow"]["null"] = -99999.999
# ext["Attributes"]["xhigh"]["null"] = -99999.999
# ext["Attributes"]["ylow"]["null"] = -99999.999
# ext["Attributes"]["yhigh"]["null"] = -99999.999
# ext["Attributes"]["zlow"]["null"] = -99999.999
# ext["Attributes"]["zhigh"]["null"] = -99999.999
ext["Attributes"]["qgridtype"]["null"] = "-"

# schema["Attributes"]["coterr"]["null"] =  -99.9
# schema["Attributes"]["claerr"]["null"] =  -99.9
# schema["Attributes"]["cloerr"]["null"] =  -99.9
# schema["Attributes"]["cdperr"]["null"] =  -99.9
# schema["Attributes"]["durat"]["null"] =   -99.9
# schema["Attributes"]["nslpb"]["null"] =  -99
# schema["Attributes"]["nrlpb"]["null"] =  -99
# schema["Attributes"]["tmnlpb"]["null"] =  -99.9
# schema["Attributes"]["nsmw"]["null"] =  -99
# schema["Attributes"]["nrmw"]["null"] =  -99
# schema["Attributes"]["tmnmw"]["null"] =  -99.9
# schema["Attributes"]["dused"]["null"] =  "-"
# schema["Attributes"]["mrrerr"]["null"] =  99.99
# schema["Attributes"]["mtterr"]["null"] =  99.99
# schema["Attributes"]["mfferr"]["null"] =  99.99
# schema["Attributes"]["mrterr"]["null"] =  99.99
# schema["Attributes"]["mrferr"]["null"] =  99.99
# schema["Attributes"]["mtferr"]["null"] =  99.99
                     
# ext["Attributes"]["newsnet"] = {"dtype": str, "format": "%8s", "null": "-", "width": 8}
schema["Attributes"]["mexpon"]["null"] = -99
schema["Attributes"]["bestdc"]["null"] = -9.99
schema["Attributes"]["naxplg"]["null"] = 999.9
schema["Attributes"]["naxazm"]["null"] = 999.9

In [27]:
with open("/Users/malcolcw/Source/seismic-python/seispy/data/schemas/css3.0.ext.pkl", "wb") as outf:
    print("pickling schema...")
    pickle.dump(ext, outf)


pickling schema...

In [45]:
import seispy

In [46]:
schema = seispy.pandas.io.schema.get_schema("css3.0")

In [49]:
schema["Relations"]["adoption"]


---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
<ipython-input-49-04d52322981d> in <module>()
----> 1 schema["Relations"]["adoption"]

KeyError: 'adoption'

In [51]:
for field in schema["Relations"]["moment"]:
    print("schema[\"Attributes\"][\""+field+"\"][\"null\"] = ", schema["Attributes"][field]["null"], schema["Attributes"][field]["format"])


schema["Attributes"]["orid"]["null"] =  -1 %8ld
schema["Attributes"]["mexpon"]["null"] =  None %3ld
schema["Attributes"]["mrr"]["null"] =  99.99 %5.2f
schema["Attributes"]["mtt"]["null"] =  99.99 %5.2f
schema["Attributes"]["mff"]["null"] =  99.99 %5.2f
schema["Attributes"]["mrt"]["null"] =  99.99 %5.2f
schema["Attributes"]["mrf"]["null"] =  99.99 %5.2f
schema["Attributes"]["mtf"]["null"] =  99.99 %5.2f
schema["Attributes"]["mrrerr"]["null"] =  -9.99 %5.2f
schema["Attributes"]["mtterr"]["null"] =  -9.99 %5.2f
schema["Attributes"]["mfferr"]["null"] =  -9.99 %5.2f
schema["Attributes"]["mrterr"]["null"] =  -9.99 %5.2f
schema["Attributes"]["mrferr"]["null"] =  -9.99 %5.2f
schema["Attributes"]["mtferr"]["null"] =  -9.99 %5.2f
schema["Attributes"]["taxval"]["null"] =  99.99 %6.2f
schema["Attributes"]["taxplg"]["null"] =  999.9 %5.1f
schema["Attributes"]["taxazm"]["null"] =  999.9 %5.1f
schema["Attributes"]["paxval"]["null"] =  99.99 %5.2f
schema["Attributes"]["paxplg"]["null"] =  999.9 %5.1f
schema["Attributes"]["paxazm"]["null"] =  999.9 %5.1f
schema["Attributes"]["naxval"]["null"] =  99.99 %5.2f
schema["Attributes"]["naxplg"]["null"] =  AxisAzimuthandPlunge %5.1f
schema["Attributes"]["naxazm"]["null"] =  AxisAzimuthandPlunge %5.1f
schema["Attributes"]["bestdc"]["null"] =  None %5.2f
schema["Attributes"]["str1"]["null"] =  999.9 %5.1f
schema["Attributes"]["dip1"]["null"] =  999.9 %5.1f
schema["Attributes"]["rake1"]["null"] =  9999.9 %6.1f
schema["Attributes"]["str2"]["null"] =  999.9 %5.1f
schema["Attributes"]["dip2"]["null"] =  999.9 %5.1f
schema["Attributes"]["rake2"]["null"] =  9999.9 %6.1f
schema["Attributes"]["dused"]["null"] =  - %-10s
schema["Attributes"]["auth"]["null"] =  - %-15s
schema["Attributes"]["commid"]["null"] =  -1 %8ld
schema["Attributes"]["lddate"]["null"] =  -9999999999.99900 %17.5f

In [72]:
del(schema["Relations"]["wfedit"])

In [73]:
print("found", len(schema["Attributes"]), "attributes...")
print("found", len(schema["Relations"]), "relations...")
with open("/Users/malcolcw/Source/seismic-python/seispy/data/schemas/css3.0.pkl", "wb") as outf:
    print("pickling schema...")
    pickle.dump(schema, outf)


found 421 attributes...
found 64 relations...
pickling schema...

In [74]:
schema["Relations"]["wfdisc"]


Out[74]:
['sta',
 'chan',
 'time',
 'wfid',
 'chanid',
 'jdate',
 'endtime',
 'nsamp',
 'samprate',
 'calib',
 'calper',
 'instype',
 'segtype',
 'datatype',
 'clip',
 'dir',
 'dfile',
 'foff',
 'commid',
 'lddate']

In [ ]: