In [1]:
import os
import pickle
In [10]:
class Attribute(object):
def __init__(self, name, dtype, width, fmt, null):
self.name = name
self.dtype = dtype
self.width = width
self.fmt = fmt
self.null = null
# print("Attribute:", self.name, self.dtype, self.width, self.fmt, self.null)
class Relation(object):
def __init__(self, name, fields):
self.name = name
self.fields = fields
# print("Relation:", self.name, self.fields)
def parse_chunk(chunk):
if len(chunk.split()) == 0:
return(None)
if chunk.split()[0] == "Attribute":
return(parse_attribute(chunk))
elif chunk.split()[0] == "Relation":
return(parse_relation(chunk))
def parse_attribute(chunk):
chunk = [line.strip() for line in chunk.strip().split("\n")]
name, dtype, width, fmt, null = None, None, None, None, None
for line in chunk:
if len(line.split()) == 0:
continue
if line.split()[0] == "Attribute":
name = line.split()[1]
elif line.split()[0] in ("Real", "Integer", "String", "Time", "YearDay") and dtype is None:
if line.split()[0] == "Real":
dtype = float
elif line.split()[0] == "Integer":
dtype = int
elif line.split()[0] == "String":
dtype = str
elif line.split()[0] == "Time":
dtype = float
elif line.split()[0] == "YearDay":
dtype = int
width = int("".join(line.split()[1:]).strip("()"))
elif line.split()[0] == "Format":
fmt = "".join(line.split()[1:]).strip("()\"")
elif line.split()[0] == "Null":
null = "".join(line.split()[1:]).strip("()\"")
return(Attribute(name, dtype, width, fmt, null))
def parse_relation(chunk):
chunk = [line.strip() for line in chunk.strip().split("\n")]
name, fields = None, None
for line in chunk:
if len(line.split()) == 0:
continue
if line.split()[0] == "Relation":
name = line.split()[1]
elif line.split()[0] == "Fields":
fields = " ".join(line.split()[1:]).strip("()").split()
return(Relation(name, fields))
def _parse_relation(chunk):
chunk = [line.strip() for line in chunk.strip().split("\n")]
name, fields = None, None
i = 0
while i < len(chunk):
line = chunk[i]
if len(line.split()) == 0:
continue
if line.split()[0] == "Relation":
name = line.split()[1]
elif line.split()[0] == "Fields":
fields = " ".join(line.split()[1:]).strip("()").split()
while line.strip()[-1] != ")":
fields += " ".join(line.split()[1:]).strip("()").split()
i += 1
line = chunk[i]
print(fields)
return(Relation(name, fields))
In [11]:
with open("/opt/antelope/5.8/data/schemas/css3.0") as inf:
data = inf.read().split(";")
chunks = [parse_chunk(chunk) for chunk in data]
schema = {"Attributes": {obj.name: {"dtype": obj.dtype,
"width": obj.width,
"format": obj.fmt,
"null": obj.null}
for obj in chunks
if isinstance(obj, Attribute)},
"Relations": {obj.name: obj.fields for obj in chunks
if isinstance(obj, Relation)}
}
for attr in schema["Attributes"]:
attrib = schema["Attributes"][attr]
attrib["null"]
In [69]:
print("found", len(schema["Attributes"]), "attributes...")
print("found", len(schema["Relations"]), "relations...")
with open("/Users/malcolcw/Source/seismic-python/seispy/data/schemas/css3.0.pkl", "wb") as outf:
print("pickling schema...")
pickle.dump(schema, outf)
In [17]:
ext = {"Attributes": {},
"Relations": {}}
for file in sorted(os.listdir("/opt/antelope/5.8/contrib/data/schemas/css3.0.ext")):
# This is a hack to avoid a couple troublesome files
if file in ("tortrk", "dlsite", "staq330"):
continue
with open("/opt/antelope/5.8/contrib/data/schemas/css3.0.ext/%s" % file) as inf:
data = inf.read().split(";")
chunks = [parse_chunk(chunk) for chunk in data]
for obj in chunks:
if obj is None:
continue
# Raise an exception if Attribute is being re-defined
if isinstance(obj, Attribute):
if obj.name in ext["Attributes"]:
if ext["Attributes"][obj.name]["dtype"] != obj.dtype\
or ext["Attributes"][obj.name]["width"] != obj.width\
or ext["Attributes"][obj.name]["format"] != obj.fmt\
or ext["Attributes"][obj.name]["null"] != obj.dtype(obj.null):
raise(ValueError(obj.name, file))
ext["Attributes"][obj.name] = {"dtype": obj.dtype,
"width": obj.width,
"format": obj.fmt,
"null": obj.dtype(obj.null) if obj.null is not None else None}
elif isinstance(obj, Relation):
# Raise an exception if a Relation is being re-defined
if obj.name in ext["Relations"]:
if ext["Relations"][obj.name] != obj.fields:
raise(ValueError(obj.name))
ext["Relations"][obj.name] = obj.fields
In [56]:
ext["Attributes"]["model"]["null"] = "-"
ext["Attributes"]["dlcalseq"]["null"] = "-"
ext["Attributes"]["dlcalseqt"]["null"] = "-"
# ext["Attributes"]["xlow"]["null"] = -99999.999
# ext["Attributes"]["xhigh"]["null"] = -99999.999
# ext["Attributes"]["ylow"]["null"] = -99999.999
# ext["Attributes"]["yhigh"]["null"] = -99999.999
# ext["Attributes"]["zlow"]["null"] = -99999.999
# ext["Attributes"]["zhigh"]["null"] = -99999.999
ext["Attributes"]["qgridtype"]["null"] = "-"
# schema["Attributes"]["coterr"]["null"] = -99.9
# schema["Attributes"]["claerr"]["null"] = -99.9
# schema["Attributes"]["cloerr"]["null"] = -99.9
# schema["Attributes"]["cdperr"]["null"] = -99.9
# schema["Attributes"]["durat"]["null"] = -99.9
# schema["Attributes"]["nslpb"]["null"] = -99
# schema["Attributes"]["nrlpb"]["null"] = -99
# schema["Attributes"]["tmnlpb"]["null"] = -99.9
# schema["Attributes"]["nsmw"]["null"] = -99
# schema["Attributes"]["nrmw"]["null"] = -99
# schema["Attributes"]["tmnmw"]["null"] = -99.9
# schema["Attributes"]["dused"]["null"] = "-"
# schema["Attributes"]["mrrerr"]["null"] = 99.99
# schema["Attributes"]["mtterr"]["null"] = 99.99
# schema["Attributes"]["mfferr"]["null"] = 99.99
# schema["Attributes"]["mrterr"]["null"] = 99.99
# schema["Attributes"]["mrferr"]["null"] = 99.99
# schema["Attributes"]["mtferr"]["null"] = 99.99
# ext["Attributes"]["newsnet"] = {"dtype": str, "format": "%8s", "null": "-", "width": 8}
schema["Attributes"]["mexpon"]["null"] = -99
schema["Attributes"]["bestdc"]["null"] = -9.99
schema["Attributes"]["naxplg"]["null"] = 999.9
schema["Attributes"]["naxazm"]["null"] = 999.9
In [27]:
with open("/Users/malcolcw/Source/seismic-python/seispy/data/schemas/css3.0.ext.pkl", "wb") as outf:
print("pickling schema...")
pickle.dump(ext, outf)
In [45]:
import seispy
In [46]:
schema = seispy.pandas.io.schema.get_schema("css3.0")
In [49]:
schema["Relations"]["adoption"]
In [51]:
for field in schema["Relations"]["moment"]:
print("schema[\"Attributes\"][\""+field+"\"][\"null\"] = ", schema["Attributes"][field]["null"], schema["Attributes"][field]["format"])
In [72]:
del(schema["Relations"]["wfedit"])
In [73]:
print("found", len(schema["Attributes"]), "attributes...")
print("found", len(schema["Relations"]), "relations...")
with open("/Users/malcolcw/Source/seismic-python/seispy/data/schemas/css3.0.pkl", "wb") as outf:
print("pickling schema...")
pickle.dump(schema, outf)
In [74]:
schema["Relations"]["wfdisc"]
Out[74]:
In [ ]: