This notebook was used to develop functionality that is now in pmagpy/data_model3.py. Examples of how to use the data_model3 module can be found in the "Importing datamodel module" section below. In general, the data model is imported into the GUIs to provide column names, controlled and suggested vocabularies, and validations for column values.

Getting started


In [1]:
# import req'd modules
import json
import os
import pandas as pd
from pandas import DataFrame, Series
import numpy as np
import pmagpy.builder2 as builder

Playing with json & unicode


In [2]:
# json is the same format that the MagIC data model comes in

# turn json into Python:
json_string = '{"first_name": "Guido", "last_name":"Rossum"}'
parsed = json.loads(json_string)

# turn Python into json
d = {'hello': 'hi', 'so long': 'goodbye'}
dumped = json.dumps(d)

# store json in a file
outfile = open('stored.json', 'w')
json.dump(dumped, outfile)
outfile.close()

# read json file into Python
jstring = json.load(open('stored.json', 'r'))
json.loads(jstring)


Out[2]:
{u'hello': u'hi', u'so long': u'goodbye'}

In [3]:
# parsing unicode
unicode('\r\n\n\xc2\xa0\xc2\xa0', errors='ignore')


Out[3]:
u'\r\n\n'

Reading the data model in to pandas


In [4]:
# the code in this block has been incorporated into data_model3.py

def get_data_model():
    model_file = os.path.join('..', '..', 'pmagpy', 'data_model', 'data_model.json')
    f = open(model_file, 'r')
    string = '\n'.join(f.readlines())
    raw = json.loads(unicode(string, errors='ignore'))
    full = DataFrame(raw)
    return full

    
try:
    full = get_data_model()
except IOError:
    skip = True
    print 'Skip this block'
else:
    skip = False
    
if not skip:
    DataFrame(full['tables']['locations'])
    location = DataFrame(full['tables']['locations']['columns'])
    location = location.transpose()
    #full['tables']['locations'].pop('columns')
    #full['tables']['locations']
    # don't really need anything that isn't in ['tables'][table]['columns']
    location[:3]

    full_df = get_data_model()

    data_model = {}
    levels = ['specimens', 'samples', 'sites', 'locations', 'criteria']
    for level in levels:
        df = DataFrame(full_df['tables'][level]['columns'])
        data_model[level] = df.transpose()

    data_model['sites']

Extracting info from the data model


In [5]:
# how to get various different data from the data model

if not skip:
    # get all headers of a particular group
    cond = location['group'] == 'Age'
    age_columns = location[cond]
    age_columns

In [6]:
if not skip:
    # get a particular column
    location.ix['age_high']

In [7]:
if not skip:
    # get validations for a particular column
    validations = location.ix['age_high']['validations']
    validations

In [8]:
if not skip:
    # get all groups for locations
    location['group'].unique()

In [9]:
if not skip:
    # get all rows in a group
    group = 'Direction'
    location[location['group'] == group]

In [10]:
if not skip:
    # get all column labels for locations
    print list(location.index)
    print 'required()' in location.ix['location']['validations']
    print 'required()' in location.ix['continent_ocean']['validations']


[u'age', u'age_high', u'age_low', u'age_sigma', u'age_unit', u'analysts', u'citations', u'conglomerate_test', u'contact_test', u'continent_ocean', u'country', u'criteria', u'description', u'dir_alpha95', u'dir_dec', u'dir_inc', u'dir_k', u'dir_k_ratio', u'dir_n_samples', u'dir_n_sites', u'dir_n_specimens', u'dir_polarity', u'dir_r', u'dir_tilt_correction', u'elevation_high', u'elevation_low', u'expedition_description', u'expedition_leg', u'expedition_name', u'expedition_ship', u'expedition_url', u'experiments', u'external_database_ids', u'fold_test', u'fold_test_significance', u'formations', u'geologic_classes', u'geological_province_sections', u'int_abs', u'int_abs_sigma', u'int_abs_sigma_perc', u'int_n_samples', u'int_n_sites', u'int_n_specimens', u'lat_lon_precision', u'lat_n', u'lat_s', u'lithologies', u'location', u'location_alternatives', u'location_type', u'lon_e', u'lon_w', u'members', u'method_codes', u'ocean_sea', u'padm', u'padm_n_sites', u'padm_sigma', u'paleolat', u'paleolat_sigma', u'paleolon', u'paleolon_sigma', u'pdm', u'pdm_n_sites', u'pdm_sigma', u'pis', u'plate_blocks', u'pole_alpha95', u'pole_antipodal_angle', u'pole_bc_q', u'pole_comp_name', u'pole_conf', u'pole_dm', u'pole_dp', u'pole_k', u'pole_lat', u'pole_lon', u'pole_n_sites', u'pole_r', u'pole_reversed_perc', u'pole_vv_q', u'region', u'result_name', u'result_quality', u'result_type', u'reversal_test', u'rock_magnetic_test', u'rotation_sequence', u'samples', u'scientists', u'sites', u'software_packages', u'specimens', u'tectonic_settings', u'terranes', u'village_city']
True
False

In [11]:
if not skip:
    # get list of unique groups for location
    print location['group'].unique()

    #sort column names by group
    location.sort_values('group').head()


[u'Age' u'Metadata' u'Result' u'Direction' u'Geography' u'Expedition'
 u'Names' u'Geology' u'Paleointensity' u'Location' u'PADM' u'Paleoposition'
 u'PDM' u'Pole']

In [12]:
if not skip:
    # get headers the way we do them in the current builder.py
    # not sure we will actually want to do it like this as we update magic_gui.py and pmag_gui.py
    cond = location['validations'].map(lambda x: 'required()' in str(x))

    reqd_loc_headers = [str(i) for i in location[cond].index]
    all_loc_headers = [str(i) for i in location['validations'].index if i not in reqd_loc_headers]
    headers = [[], reqd_loc_headers, all_loc_headers] # this is basically how self.headers is organizaed now in builder.py

    print headers


[[], ['age_unit', 'geologic_classes', 'lat_n', 'lat_s', 'lithologies', 'location', 'location_type', 'lon_e', 'lon_w'], ['age', 'age_high', 'age_low', 'age_sigma', 'analysts', 'citations', 'conglomerate_test', 'contact_test', 'continent_ocean', 'country', 'criteria', 'description', 'dir_alpha95', 'dir_dec', 'dir_inc', 'dir_k', 'dir_k_ratio', 'dir_n_samples', 'dir_n_sites', 'dir_n_specimens', 'dir_polarity', 'dir_r', 'dir_tilt_correction', 'elevation_high', 'elevation_low', 'expedition_description', 'expedition_leg', 'expedition_name', 'expedition_ship', 'expedition_url', 'experiments', 'external_database_ids', 'fold_test', 'fold_test_significance', 'formations', 'geological_province_sections', 'int_abs', 'int_abs_sigma', 'int_abs_sigma_perc', 'int_n_samples', 'int_n_sites', 'int_n_specimens', 'lat_lon_precision', 'location_alternatives', 'members', 'method_codes', 'ocean_sea', 'padm', 'padm_n_sites', 'padm_sigma', 'paleolat', 'paleolat_sigma', 'paleolon', 'paleolon_sigma', 'pdm', 'pdm_n_sites', 'pdm_sigma', 'pis', 'plate_blocks', 'pole_alpha95', 'pole_antipodal_angle', 'pole_bc_q', 'pole_comp_name', 'pole_conf', 'pole_dm', 'pole_dp', 'pole_k', 'pole_lat', 'pole_lon', 'pole_n_sites', 'pole_r', 'pole_reversed_perc', 'pole_vv_q', 'region', 'result_name', 'result_quality', 'result_type', 'reversal_test', 'rock_magnetic_test', 'rotation_sequence', 'samples', 'scientists', 'sites', 'software_packages', 'specimens', 'tectonic_settings', 'terranes', 'village_city']]

In [13]:
if not skip:
    set(headers[1]) - set(headers[2])
    set(headers[2]) - set(headers[1])

Importing datamodel module


In [14]:
import pmagpy.data_model3 as dm
reload(dm)

data_model = dm.DataModel()
data_model.dm['locations'].head()


Out[14]:
description examples group label notes position previous_columns type unit urls validations
age Location inferred age NaN Age Inferred Age NaN 38 [{u'column': u'average_age', u'table': u'pmag_... Number Custom NaN [requiredUnless("age_low","age_high"), require...
age_high Location inferred age, High range NaN Age Inferred Age High NaN 41 [{u'column': u'average_age_high', u'table': u'... Number Custom NaN [min("age_low"), requiredUnless("age")]
age_low Location inferred age, Low range NaN Age Inferred Age Low NaN 40 [{u'column': u'average_age_low', u'table': u'p... Number Custom NaN [max("age_high"), requiredUnless("age")]
age_sigma Location inferred age, Uncertainty NaN Age Inferred Age Sigma Standard error or standard deviation at one sigma 39 [{u'column': u'average_age_sigma', u'table': u... Number Custom NaN [min(0)]
age_unit Location inferred age, Unit NaN Age Inferred Age Unit NaN 42 [{u'column': u'average_age_unit', u'table': u'... String NaN NaN [cv("age_unit"), required()]

Using the data_model module


In [15]:
import pmagpy.data_model3 as data_model

In [16]:
dir(data_model)
reload(data_model)

model_container = data_model.DataModel()

dm = model_container.dm
locs = dm['locations']
#dm['locations']['validations'].str.join(", ")
locs['str_validations'] = locs['validations'].str.join(", ")
locs[locs['str_validations'].str.contains("required\(\)").fillna(False)]


Out[16]:
description examples group label notes position previous_columns type unit urls validations str_validations
age_unit Location inferred age, Unit NaN Age Inferred Age Unit NaN 42 [{u'column': u'average_age_unit', u'table': u'... String NaN NaN [cv("age_unit"), required()] cv("age_unit"), required()
geologic_classes Colon-delimited list of geologic classes NaN Geology Geologic Classes NaN 18 [{u'column': u'location_class', u'table': u'er... List NaN NaN [cv("class"), required()] cv("class"), required()
lat_n Northernmost latitude of the collection of sites NaN Geography Northernmost Latitude NaN 27 [{u'column': u'location_begin_lat', u'table': ... Number Degrees NaN [min(-90), max(90), min("lat_s"), required()] min(-90), max(90), min("lat_s"), required()
lat_s Southernmost latitude of the collection of sites NaN Geography Southernmost Latitude NaN 26 [{u'column': u'location_begin_lat', u'table': ... Number Degrees NaN [min(-90), max(90), max("lat_n"), required()] min(-90), max(90), max("lat_n"), required()
lithologies Colon-delimited list of lithologies or archeol... NaN Geology Lithologies NaN 19 [{u'column': u'location_lithology', u'table': ... List NaN NaN [cv("lithology"), required()] cv("lithology"), required()
location Name for location, dredge or drill site [San Francisco Volcanic Province, Dredge AMAT0... Names Location Name NaN 1 [{u'column': u'er_location_name', u'table': u'... String NaN NaN [required()] required()
location_type Location type NaN Location Location Type NaN 6 [{u'column': u'location_type', u'table': u'er_... String NaN NaN [cv("location_type"), required()] cv("location_type"), required()
lon_e Easternmost longitude of the collection of sites NaN Geography Easternmost Longitude NaN 29 [{u'column': u'location_begin_lon', u'table': ... Number Degrees NaN [min(0), max(360), required()] min(0), max(360), required()
lon_w Westernmost longitude of the collection of sites NaN Geography Westernmost Longitude NaN 28 [{u'column': u'location_begin_lon', u'table': ... Number Degrees NaN [min(0), max(360), required()] min(0), max(360), required()

In [17]:
print model_container.get_groups('sites')
#print type(model_container.get_headers('sites', 'Age'))


[u'Age', u'Metadata', u'Anisotropy', u'Geology', u'Result', u'Geography', u'Direction', u'Names', u'Site', u'Paleointensity', u'Magnetization', u'Paleoposition', u'VADM', u'VDM', u'VGP']

Controlled vocabularies


In [18]:
# make sure various pieces of the controlled vocabulary stuff works

import pmagpy.controlled_vocabularies3 as cv
import pmagpy.data_model3 as dm
import numpy as np
reload(dm)
reload(cv)
#print dir(cv)


#print dir(cv.vocab)
vc = cv.Vocabulary()
all_codes, code_types = vc.get_meth_codes()
vc.get_tiered_meth_category('other', all_codes, code_types)
data = vc.get_controlled_vocabularies()

vc.get_tiered_meth_category_offline()
vc.get_all_vocabulary()



def get_cv_from_list(lst):
    """
    If there is a controlled vocabulary
    """
    try:
        for i in lst:
            if "cv(" in i:
                return i[4:-2]
    except TypeError:
        return None
    else:
        return None

data_model = dm.DataModel()
dir(data_model)
data_model.dm['sites']
site_dm = data_model.dm['sites']
site_dm['vocab_name'] = site_dm['validations'].apply(get_cv_from_list)
site_dm[['vocab_name', 'validations']][site_dm['vocab_name'].notnull()]


dir(vc)
print site_dm.ix['geologic_classes']['validations']
vc.vocabularies['geologic_classes'][:5]
vc.vocabularies['age_unit']
vc.vocabularies


-I- Getting method codes from earthref.org
-I- Importing controlled vocabularies from https://earthref.org
-I- Getting method codes from earthref.org
-I- Importing controlled vocabularies from https://earthref.org
-I- Importing suggested vocabularies from https://earthref.org
[u'cv("class")', u'required()']
Out[18]:
age_unit                 [Ga, Ka, Ma, Years AD (+/-), Years BP, Years C...
alteration_grade           [Altered, High, Mild, Severe, Trace, Unaltered]
alteration_type          [Acid Leaching, Acid Oxidation, Acid Sulphate,...
aniso_s_unit             [SI, Am^2, bulk in measurements table, Normali...
aniso_type                                         [AMS, AARM, ATRM, AIRM]
assemblage               [Aggregate, In Situ, Mineral Separate, Polycry...
mineral_assemblage       [Aggregate, In Situ, Mineral Separate, Polycry...
int_scat                                  [True, False, true, false, 0, 1]
is_reviewed                               [True, False, true, false, 0, 1]
is_validated                              [True, False, true, false, 0, 1]
geologic_classes         [Archeologic, Extraterrestrial, Extrusive, Ign...
conglomerate_test                 [+, -, G+, G-, Go, IG+, IG-, IGo, ND, o]
contact_test                      [+, -, C+, C-, Co, IC+, IC-, ICo, ND, o]
continent_ocean          [Africa, Antarctica, Artic Ocean, Asia, Atlant...
country                  [Afghanistan, Albania, Algeria, American Samoa...
criterion_operation      [<, <=, =, >, >=, begins with, ends with, does...
int_corr                                                            [c, u]
orientation_quality                                                 [b, g]
quality                                                             [b, g]
result_quality                                                      [b, g]
result_type                                                   [a, i, m, s]
external_database_ids    [20D, ARCHEO00, ARCHEOINTB, ARCH_AU, ARCH_BU, ...
fold_test                [+, -, F+, F-, Fo, ND, RF+, RF-, RFo, SF+, SF-...
lithologies              [Acapulcoite Primitive Achondrite, Achondrite,...
location_type            [Archeological Site, Core, Drill Site, Laborat...
magic_version                                    [2.2, 2.3, 2.4, 2.5, 3.0]
standard                                                            [s, u]
mineral_class            [Actinolite, Adularia, Aegirine, Albite, Allan...
critical_temp_mineral    [Alteration, Antiferromagnetic, Biogenic, Cant...
mineral_type             [Alteration, Antiferromagnetic, Biogenic, Cant...
dir_nrm_origin                                                      [p, s]
ocean_sea                [Adriatic Sea, Aegean Sea, Alboran Sea, Arabia...
plate_blocks             [Adriatic Plate, Aegean Sea Plate, African Pla...
dir_polarity                                               [e, i, n, r, t]
reversal_test                            [+, -, ND, R-, Ra, Rb, Rc, Ro, o]
rock_magnetic_test                                                 [M, ND]
material_type            [Annealed, Ball Milled, Biogenic, Ceramic, Che...
tectonic_settings        [Accreted Terrain, Accretionary Orogen, Accret...
critical_temp_type       [Alteration, Blocking, Curie, Ferrimagnetic Py...
texture                  [Agmatitic, Amygdaloidal, Angular, Antiperthit...
timescale_eon                          [Archean, Phanerozoic, Proterozoic]
timescale_epoch          [Cisuralian, Early Cambrian, Early Cretaceous,...
timescale_era            [Cenozoic, Eoarchean, Mesoarchean, Mesoprotero...
timescale_period         [Calymmian, Cambrian, Carboniferous, Cretaceou...
timescale_stage          [Aalenian, Aeronian, Albian, Anisian, Aptian, ...
geologic_types           [Baked Clay, Baked Contact, Baked Mud, Baked R...
dtype: object

In [19]:
import pmagpy.controlled_vocabularies3 as cv
import pmagpy.data_model3 as dm
import numpy as np
import pandas as pd
reload(dm)
reload(cv)
#print dir(cv)


#print dir(cv.vocab)
vocab = cv.Vocabulary()
vocabulary = vocab.get_controlled_vocabularies()
vocabulary.head()


-I- Importing controlled vocabularies from https://earthref.org
Out[19]:
age_unit            [Ga, Ka, Ma, Years AD (+/-), Years BP, Years C...
alteration_grade      [Altered, High, Mild, Severe, Trace, Unaltered]
alteration_type     [Acid Leaching, Acid Oxidation, Acid Sulphate,...
aniso_s_unit        [SI, Am^2, bulk in measurements table, Normali...
aniso_type                                    [AMS, AARM, ATRM, AIRM]
dtype: object

In [20]:
from pmagpy.mapping import map_magic
reload(map_magic)
dir(map_magic)

x = map_magic.magic2spd_map.pop('specimen_YT')
print map_magic.mapping.__doc__#(map_magic.magic2spd_map
d = dict(zip(map_magic.magic2spd_map.keys(), range(len(map_magic.magic2spd_map.keys()))))
print d
d2 = map_magic.mapping(d, map_magic.magic2spd_map)
print d2


    takes in a dictionary and a mapping which contains new key names,
    and returns a new dictionary with the updated key names, i.e.:
    dictionary = {'a': 1, 'b': 2, 'c': 3}
    mapping = {'a': 'aa', 'c': 'cc'}
    mapped_dictionary = mapping(dictionary, mapping)
    mapped_dictionary = {'aa': 1, b, 2, 'cc': 3}
    
{'specimen_theta': 0, 'fail_ptrm_beta_box_scatter': 1, 'specimen_int_dang': 2, 'specimen_dec': 37, 'specimen_mdev': 5, 'specimen_drat': 52, 'lab_dc_field': 55, 'specimen_k_prime_sse': 49, 'specimen_frac': 8, 'measurement_step_max': 10, 'specimen_PCA_sigma_max': 12, 'specimen_PCA_sigma_min': 53, 'specimen_drats': 40, 'specimen_PCA_sigma_int': 39, 'specimen_b_sigma': 4, 'specimen_ptrms_inc': 17, 'specimen_r_sq': 23, 'specimen_mdrat': 58, 'specimen_dac': 19, 'specimen_dck': 20, 'specimen_gamma': 21, 'specimen_scat_bounding_line_high': 11, 'specimen_int_n': 18, 'specimen_z_md': 24, 'specimen_ac_n': 61, 'specimen_scat_bounding_line_low': 25, 'specimen_inc': 9, 'specimen_int_alpha': 41, 'specimen_int_ptrm_tail_n': 29, 'specimen_cdrat': 30, 'specimen_maxdev': 31, 'specimen_int_uT': 26, 'specimen_int_crm': 7, 'specimen_b': 34, 'specimen_cm_y': 35, 'specimen_cm_x': 36, 'specimen_ptrm': 3, 'specimen_int_mad_anc': 38, 'specimen_fvds': 15, 'specimen_md': 22, 'specimen_b_beta': 16, 'specimen_k_sse': 42, 'specimen_ptrms_dec': 43, 'specimen_gmax': 27, 'fail_arai_beta_box_scatter': 46, 'specimen_g': 45, 'fail_tail_beta_box_scatter': 33, 'specimen_int_ptrm_n': 48, 'specimen_int_mad': 32, 'specimen_f': 47, 'specimen_ptrms_mad': 51, 'specimen_ptrms_angle': 6, 'specimen_dpal': 44, 'specimen_q': 14, 'specimen_tail_drat': 54, 'specimen_dt': 28, 'measurement_step_min': 56, 'specimen_PCA_v1': 57, 'specimen_k_prime': 59, 'specimen_scat': 60, 'specimen_z': 13, 'specimen_coeff_det_sq': 62, 'specimen_k': 50, 'specimen_dtr': 63}
{'MAD_Anc': 38, 'R_corr2': 62, 'fail_ptrm_beta_box_scatter': 1, 'ptrms_angle_Free': 6, 'scat_bounding_line_low': 25, 'PCA_sigma_int_Free': 39, 'PCA_sigma_max_Free': 12, 'max_ptrm_check': 3, 'pTRM_MAD_Free': 51, 'lab_dc_field': 55, 'specimen_int_crm': 7, 'PCA_sigma_min_Free': 53, 'B_anc': 26, 'alpha': 41, 'delta_pal': 44, 'DRAT_tail': 54, 'specimen_dt': 28, 'Inc_Free': 9, 'R_det2': 23, 'best_fit_vector_Free': 57, 'specimen_fvds': 15, 'delta_AC': 19, 'specimen_g': 45, 'delta_CK': 20, 'specimen_k_prime_SSE': 49, 'y_Arai_mean': 35, 'MD_VDS': 22, 'specimen_int_n': 18, 'theta': 0, 'MAD_Free': 32, 'n_ptrm': 48, 'DRAT': 52, 'FRAC': 8, 'x_Arai_mean': 36, 'CDRAT': 30, 'Z': 13, 'Dec_Free': 37, 'mean_DEV': 5, 'tmin': 56, 'delta_TR': 63, 'SCAT': 60, 'DRATS': 40, 'n_add': 61, 'specimen_k_prime': 59, 'specimen_b_sigma': 4, 'fail_tail_beta_box_scatter': 33, 'max_DEV': 31, 'specimen_b': 34, 'fail_arai_beta_box_scatter': 46, 'mean_DRAT': 58, 'IZZI_MD': 24, 'tmax': 10, 'specimen_f': 47, 'specimen_q': 14, 'specimen_b_beta': 16, 'ptrms_inc_Free': 17, 'n_tail': 29, 'GAP-MAX': 27, 'SSE': 42, 'DANG': 2, 'ptrms_dec_Free': 43, 'specimen_k': 50, 'gamma': 21, 'scat_bounding_line_high': 11}

In [21]:
#print vc.possible_vocabularies
vc.vocabularies['age_unit']


Out[21]:
[u'Ga',
 u'Ka',
 u'Ma',
 u'Years AD (+/-)',
 u'Years BP',
 u'Years Cal AD (+/-)',
 u'Years Cal BP']

In [22]:
# working on criteria for lisa

import os
import pmagpy.new_builder as nb
wdir = os.path.join('..', "3_0", "McMurdo")
contribution = nb.Contribution(wdir, read_tables=['criteria'])
crit_container = contribution.tables['criteria']
crit_data = crit_container.df
crit_data = crit_data[crit_data['criterion'].str.contains('IE-')==True] # fish out all the relavent data
crit_dict = dict(crit_data['criterion_value'])
{key.split(".")[1]: value for key, value in crit_dict.items()}


-I- Getting method codes from earthref.org
-I- Importing controlled vocabularies from https://earthref.org
-I- Importing suggested vocabularies from https://earthref.org
Out[22]:
{'int_b_beta': '0.1',
 'int_dang': '10.0',
 'int_frac': '0.78',
 'int_mad': '5.0',
 'int_n_ptrm': '2.0',
 'int_n_specimens': '3.0',
 'int_scat': 'True',
 'int_sigma': '6e-06',
 'int_sigma_perc': '15.0'}

In [ ]: