This notebook was used to develop functionality that is now in pmagpy/data_model3.py. Examples of how to use the data_model3 module can be found in the "Importing datamodel module" section below. In general, the data model is imported into the GUIs to provide column names, controlled and suggested vocabularies, and validations for column values.
In [1]:
# import req'd modules
import json
import os
import pandas as pd
from pandas import DataFrame, Series
import numpy as np
import pmagpy.builder2 as builder
In [2]:
# json is the same format that the MagIC data model comes in
# turn json into Python:
json_string = '{"first_name": "Guido", "last_name":"Rossum"}'
parsed = json.loads(json_string)
# turn Python into json
d = {'hello': 'hi', 'so long': 'goodbye'}
dumped = json.dumps(d)
# store json in a file
outfile = open('stored.json', 'w')
json.dump(dumped, outfile)
outfile.close()
# read json file into Python
jstring = json.load(open('stored.json', 'r'))
json.loads(jstring)
Out[2]:
In [3]:
# parsing unicode
unicode('\r\n\n\xc2\xa0\xc2\xa0', errors='ignore')
Out[3]:
In [4]:
# the code in this block has been incorporated into data_model3.py
def get_data_model():
model_file = os.path.join('..', '..', 'pmagpy', 'data_model', 'data_model.json')
f = open(model_file, 'r')
string = '\n'.join(f.readlines())
raw = json.loads(unicode(string, errors='ignore'))
full = DataFrame(raw)
return full
try:
full = get_data_model()
except IOError:
skip = True
print 'Skip this block'
else:
skip = False
if not skip:
DataFrame(full['tables']['locations'])
location = DataFrame(full['tables']['locations']['columns'])
location = location.transpose()
#full['tables']['locations'].pop('columns')
#full['tables']['locations']
# don't really need anything that isn't in ['tables'][table]['columns']
location[:3]
full_df = get_data_model()
data_model = {}
levels = ['specimens', 'samples', 'sites', 'locations', 'criteria']
for level in levels:
df = DataFrame(full_df['tables'][level]['columns'])
data_model[level] = df.transpose()
data_model['sites']
In [5]:
# how to get various different data from the data model
if not skip:
# get all headers of a particular group
cond = location['group'] == 'Age'
age_columns = location[cond]
age_columns
In [6]:
if not skip:
# get a particular column
location.ix['age_high']
In [7]:
if not skip:
# get validations for a particular column
validations = location.ix['age_high']['validations']
validations
In [8]:
if not skip:
# get all groups for locations
location['group'].unique()
In [9]:
if not skip:
# get all rows in a group
group = 'Direction'
location[location['group'] == group]
In [10]:
if not skip:
# get all column labels for locations
print list(location.index)
print 'required()' in location.ix['location']['validations']
print 'required()' in location.ix['continent_ocean']['validations']
In [11]:
if not skip:
# get list of unique groups for location
print location['group'].unique()
#sort column names by group
location.sort_values('group').head()
In [12]:
if not skip:
# get headers the way we do them in the current builder.py
# not sure we will actually want to do it like this as we update magic_gui.py and pmag_gui.py
cond = location['validations'].map(lambda x: 'required()' in str(x))
reqd_loc_headers = [str(i) for i in location[cond].index]
all_loc_headers = [str(i) for i in location['validations'].index if i not in reqd_loc_headers]
headers = [[], reqd_loc_headers, all_loc_headers] # this is basically how self.headers is organizaed now in builder.py
print headers
In [13]:
if not skip:
set(headers[1]) - set(headers[2])
set(headers[2]) - set(headers[1])
In [14]:
import pmagpy.data_model3 as dm
reload(dm)
data_model = dm.DataModel()
data_model.dm['locations'].head()
Out[14]:
In [15]:
import pmagpy.data_model3 as data_model
In [16]:
dir(data_model)
reload(data_model)
model_container = data_model.DataModel()
dm = model_container.dm
locs = dm['locations']
#dm['locations']['validations'].str.join(", ")
locs['str_validations'] = locs['validations'].str.join(", ")
locs[locs['str_validations'].str.contains("required\(\)").fillna(False)]
Out[16]:
In [17]:
print model_container.get_groups('sites')
#print type(model_container.get_headers('sites', 'Age'))
In [18]:
# make sure various pieces of the controlled vocabulary stuff works
import pmagpy.controlled_vocabularies3 as cv
import pmagpy.data_model3 as dm
import numpy as np
reload(dm)
reload(cv)
#print dir(cv)
#print dir(cv.vocab)
vc = cv.Vocabulary()
all_codes, code_types = vc.get_meth_codes()
vc.get_tiered_meth_category('other', all_codes, code_types)
data = vc.get_controlled_vocabularies()
vc.get_tiered_meth_category_offline()
vc.get_all_vocabulary()
def get_cv_from_list(lst):
"""
If there is a controlled vocabulary
"""
try:
for i in lst:
if "cv(" in i:
return i[4:-2]
except TypeError:
return None
else:
return None
data_model = dm.DataModel()
dir(data_model)
data_model.dm['sites']
site_dm = data_model.dm['sites']
site_dm['vocab_name'] = site_dm['validations'].apply(get_cv_from_list)
site_dm[['vocab_name', 'validations']][site_dm['vocab_name'].notnull()]
dir(vc)
print site_dm.ix['geologic_classes']['validations']
vc.vocabularies['geologic_classes'][:5]
vc.vocabularies['age_unit']
vc.vocabularies
Out[18]:
In [19]:
import pmagpy.controlled_vocabularies3 as cv
import pmagpy.data_model3 as dm
import numpy as np
import pandas as pd
reload(dm)
reload(cv)
#print dir(cv)
#print dir(cv.vocab)
vocab = cv.Vocabulary()
vocabulary = vocab.get_controlled_vocabularies()
vocabulary.head()
Out[19]:
In [20]:
from pmagpy.mapping import map_magic
reload(map_magic)
dir(map_magic)
x = map_magic.magic2spd_map.pop('specimen_YT')
print map_magic.mapping.__doc__#(map_magic.magic2spd_map
d = dict(zip(map_magic.magic2spd_map.keys(), range(len(map_magic.magic2spd_map.keys()))))
print d
d2 = map_magic.mapping(d, map_magic.magic2spd_map)
print d2
In [21]:
#print vc.possible_vocabularies
vc.vocabularies['age_unit']
Out[21]:
In [22]:
# working on criteria for lisa
import os
import pmagpy.new_builder as nb
wdir = os.path.join('..', "3_0", "McMurdo")
contribution = nb.Contribution(wdir, read_tables=['criteria'])
crit_container = contribution.tables['criteria']
crit_data = crit_container.df
crit_data = crit_data[crit_data['criterion'].str.contains('IE-')==True] # fish out all the relavent data
crit_dict = dict(crit_data['criterion_value'])
{key.split(".")[1]: value for key, value in crit_dict.items()}
Out[22]:
In [ ]: