This notebook was used for developing a script to translate MagIC format files from the 2.5 data format to the 3.0 format. This functionality is now implemented in Pmag GUI.

Getting started


In [1]:
import pmagpy.new_builder as nb
from pmagpy import ipmag
import os
import json
import numpy as np
import sys

import pandas as pd
import numpy as np
from pandas import DataFrame
from pmagpy import builder2 as builder
from pmagpy import validate_upload2 as validate_upload
from pmagpy import pmag
from pmagpy.mapping import map_magic
from pmagpy import pmag

reload(nb)
reload(map_magic)

WD = os.path.realpath(os.path.join("..", "2_5", "McMurdo"))

lowest level: convert 2.5 measurement records --> 3.0 measurement records


In [2]:
# convert magic_measurements to measurements (3.0)
# first unpack lawrence et al., 2009 datafile from MagIC 

!download_magic.py -f zmab0100049tmp03.txt -WD ../2_5/McMurdo -ID ../2_5/McMurdo


['f', 'F', 'A', 'WD', 'ID', 'Fsa', 'Fsi'] 7
-I- using default for arg: F
-
-I- using default for arg: A
-
-I- using default for arg: Fsa
-
-I- using default for arg: Fsi
-
-I- using default for arg: O
-
1  records written to file  ../2_5/McMurdo/er_locations.txt
140  records written to file  ../2_5/McMurdo/er_sites.txt
1418  records written to file  ../2_5/McMurdo/er_samples.txt
1046  records written to file  ../2_5/McMurdo/er_specimens.txt
99  records written to file  ../2_5/McMurdo/er_ages.txt
31  records written to file  ../2_5/McMurdo/er_citations.txt
4  records written to file  ../2_5/McMurdo/er_mailinglist.txt
431  records written to file  ../2_5/McMurdo/er_images.txt
25470  records written to file  ../2_5/McMurdo/magic_measurements.txt
1313  records written to file  ../2_5/McMurdo/pmag_specimens.txt
207  records written to file  ../2_5/McMurdo/pmag_sites.txt
177  records written to file  ../2_5/McMurdo/pmag_results.txt
8  records written to file  ../2_5/McMurdo/pmag_criteria.txt
10  records written to file  ../2_5/McMurdo/rmag_hysteresis.txt
19  records written to file  ../2_5/McMurdo/rmag_anisotropy.txt
19  records written to file  ../2_5/McMurdo/rmag_results.txt
46  records written to file  ../2_5/McMurdo/magic_methods.txt

In [3]:
# read in data model 2.5 measruements file
data2,filetype = pmag.magic_read(WD+'/magic_measurements.txt') 
print filetype, len(data2)


magic_measurements 25470

In [4]:
NewMeas = []
# step through records

for rec in data2: 
    NewMeas.append(map_magic.convert_meas('magic3',rec))
pmag.magic_write(WD+'/measurements.txt',NewMeas,'measurements')


25470  records written to file  /Users/nebula/Python/PmagPy/data_files/2_5/McMurdo/measurements.txt
Out[4]:
(True, '/Users/nebula/Python/PmagPy/data_files/2_5/McMurdo/measurements.txt')

Convert 2.5 measurements file --> 3.0 measurements file


In [5]:
reload(nb)
reload(pmag)


Out[5]:
<module 'pmagpy.pmag' from '/Users/nebula/Python/PmagPy/pmagpy/pmag.pyc'>

In [6]:
WD = os.path.join("..", "2_5", "McMurdo")
#for dtype in ['specimens', 'samples', 'sites', 'locations']:
#    filename = os.path.join(WD, '{}.txt'.format(dtype))
#    if os.path.exists(filename):
#        os.remove(filename)

In [7]:
# convert magic_measurements file only
new_meas, upgraded, no_upgrade = pmag.convert_directory_2_to_3("magic_measurements.txt", WD, WD, meas_only=True)


25470  records written to file  ../2_5/McMurdo/measurements.txt
-I- 3.0 format measurements file was successfully created: ../2_5/McMurdo/measurements.txt

In [8]:
# create a contribution using the converted measurement data
con = nb.Contribution(WD, read_tables=['measurements'])
# use name data in measurement table to create specimen-location tables
con.propagate_measurement_info()
# show sample table created from measurement info
con.tables['samples'].df.head()


-I- Getting method codes from earthref.org
-I- Importing controlled vocabularies from https://earthref.org
-I- Importing suggested vocabularies from https://earthref.org
making new specimen file
-I- writing specimens data to /Users/nebula/Python/PmagPy/data_files/2_5/McMurdo/specimens.txt
making new sample file
-I- writing samples data to /Users/nebula/Python/PmagPy/data_files/2_5/McMurdo/samples.txt
making new site file
-I- writing sites data to /Users/nebula/Python/PmagPy/data_files/2_5/McMurdo/sites.txt
making new location file
-I- writing locations data to /Users/nebula/Python/PmagPy/data_files/2_5/McMurdo/locations.txt
Out[8]:
sample site
mc01f mc01f mc01
mc02c mc02c mc02
mc02e mc02e mc02
mc02g mc02g mc02
mc03b mc03b mc03

In [9]:
# convert a pandas DataFrame to the standard PmagPy formats:
# either a dict of dicts or a list of dicts, each corresponding to one table row

def convert_to_pmag_data_list(df, lst_or_dict):
    """
    """
    dictionary = dict(df.T)
    if lst_or_dict == "lst":
        return [dict(dictionary[key]) for key in dictionary]
    else:
        return {key: dict(dictionary[key]) for key in dictionary}

    
site_df = con.tables['sites'].df.head()    
print convert_to_pmag_data_list(site_df, "dict")
print convert_to_pmag_data_list(site_df, "lst")


{'mc01': {'site': 'mc01', 'location': 'McMurdo'}, 'mc02': {'site': 'mc02', 'location': 'McMurdo'}, 'mc03': {'site': 'mc03', 'location': 'McMurdo'}, 'mc04': {'site': 'mc04', 'location': 'McMurdo'}, 'mc06': {'site': 'mc06', 'location': 'McMurdo'}}
[{'site': 'mc01', 'location': 'McMurdo'}, {'site': 'mc02', 'location': 'McMurdo'}, {'site': 'mc03', 'location': 'McMurdo'}, {'site': 'mc04', 'location': 'McMurdo'}, {'site': 'mc06', 'location': 'McMurdo'}]

Convert 2.5 specimens files --> 3.0 specimens file


In [10]:
import pmagpy.mapping.map_magic as mm
import pmagpy.new_builder as nb
reload(mm)
reload(nb)
reload(pmag)


wdir = os.path.join("..", "2_5", "McMurdo")

#  take er_*.txt files and pmag_*.txt files, combine them, then turn them to 3.0. and write them out

dtype = "specimens"
map_dict = mm.spec_magic2_2_magic3_map
pmag.convert_and_combine_2_to_3(dtype, map_dict, input_dir=wdir, output_dir=wdir)
nb.MagicDataFrame(os.path.join(wdir, "{}.txt".format(dtype))).df.head()


-I- overwriting /Users/nebula/Python/PmagPy/data_files/2_5/McMurdo/specimens.txt
Out[10]:
analysts citations location sample site specimen experiments method_codes software_packages meas_step_max ... int_n_ptrm int_treat_dc_field lithologies dir_mad_free int_md dir_n_measurements int_q int_rsc dir_tilt_correction geologic_types
specimen
mc01a Lisa Tauxe This study McMurdo mc01a mc01 mc01a mc01a-LP-PI-TRM-LP-PI-ALT-PTRM-LP-PI-ZI LP-DIR-T:SO-SIGHT:DE-BFP:DA-DIR-GEO pmagpy-2.58 823 ... None None None 2.6 None 11 None None 0 None
mc01a None This study McMurdo mc01a mc01 mc01a None None None None ... None None Trachyte None None None None None None Lava Flow
mc01a Lisa Tauxe This study McMurdo mc01a mc01 mc01a mc01a-LP-PI-TRM-LP-PI-ALT-PTRM-LP-PI-ZI LP-PI-TRM:LP-PI-ALT-PTRM:LP-PI-ZI:IE-TT pmagpy-2.60 823 ... 5 4e-05 None None -1 None 9.5 0.9523 -1 None
mc01b Lisa Tauxe This study McMurdo mc01b mc01 mc01b mc01b-LP-PI-TRM-LP-PI-ALT-PTRM-LP-PI-ZI LP-DIR-T:SO-SIGHT:DE-BFL:DA-DIR-GEO pmagpy-2.58 773 ... None None None 1.7 None 8 None None 0 None
mc01b None This study McMurdo mc01b mc01 mc01b None None None None ... None None Trachyte None None None None None None Lava Flow

5 rows × 40 columns

Convert 2.5 directory --> 3.0 directory


In [11]:
# converts measurements file and any present specimen, sample, site, or location files to 3.0.
# does not yet handle any other MagIC format files

new_meas, upgraded, not_upgraded = pmag.convert_directory_2_to_3('magic_measurements.txt', wdir, wdir)
print 'upgraded files: {}'.format(', '.join(upgraded))
print 'files that could not be upgraded: {}'.format(', '.join(not_upgraded))


25470  records written to file  ../2_5/McMurdo/measurements.txt
-I- 3.0 format measurements file was successfully created: ../2_5/McMurdo/measurements.txt
-I- overwriting /Users/nebula/Python/PmagPy/data_files/2_5/McMurdo/specimens.txt
-I- overwriting /Users/nebula/Python/PmagPy/data_files/2_5/McMurdo/samples.txt
-I- overwriting /Users/nebula/Python/PmagPy/data_files/2_5/McMurdo/sites.txt
-I- overwriting /Users/nebula/Python/PmagPy/data_files/2_5/McMurdo/locations.txt
upgraded files: measurements.txt, specimens.txt, samples.txt, sites.txt, locations.txt
files that could not be upgraded: er_ages.txt, er_images.txt, pmag_criteria.txt, pmag_results.txt, rmag_anisotropy.txt, rmag_hysteresis.txt, rmag_results.txt

Cast all columns to correct dtype


In [12]:
import pmagpy.new_builder as nb
import pmagpy.data_model3 as data_model
con = nb.Contribution('../3_0/Megiddo', dmodel=data_model.DataModel())


-I- Getting method codes from earthref.org
-I- Importing controlled vocabularies from https://earthref.org
-I- Importing suggested vocabularies from https://earthref.org
-W- No such file: /Users/nebula/Python/PmagPy/data_files/3_0/Megiddo/images.txt

In [13]:
site_dm = con.data_model.dm['sites']
site_dm['name'] = site_dm.index
site_dm[['name', 'type']].head()


Out[13]:
name type
age age Number
age_high age_high Number
age_low age_low Number
age_sigma age_sigma Number
age_unit age_unit String

In [14]:
dtypes = set()
for dm_name in con.data_model.dm:
    dtypes = dtypes.union(con.data_model.dm[dm_name]['type'].unique())
print ", ".join(dtypes)


String, Dictionary, Text, List, Number, Flag, Timestamp, Integer, Matrix

In [15]:
site_df = con.tables['sites'].df
for col_name in site_df.columns:
    dtype = site_dm.loc[col_name, 'type']
    if dtype == 'Number':
        site_df[col_name] = site_df[col_name].astype(float)
    elif dtype == 'Integer':
        site_df[col_name] = site_df[col_name].fillna(0)
        site_df[col_name] = site_df[col_name].astype(int)
        #site_df[col_name] = site_df[col_name].replace(-999, np.nan)  # can't have dtype of int & np.nan/None values
    elif dtype == 'String':
        #print "string", col_name
        site_df[col_name] = site_df[col_name].astype(str)  # can't have dtype of str & np.nan/None values
        #site_df[col_name] == site_df[col_name].astype(int)

for col in ['age', 'dir_n_samples', 'criteria']:
    print col, ":", site_df[col].dtype


age : float64
dir_n_samples : int64
criteria : object

In [16]:
reload(pmag)
#pmag.convert_measfile_2_to_3('magic_measurements.txt', '2_5/McMurdo')

fname = os.path.join("..", '3_0', 'Megiddo', 'sites.txt')
df = nb.MagicDataFrame(os.path.join("..", '3_0', 'Megiddo', 'sites.txt')).df
pmag.magic_read(fname)
df = pd.read_table(fname, skiprows=[0])
df['age'].astype(str).head()


Out[16]:
0    -740
1    -740
2    -850
3    -950
4    -950
Name: age, dtype: object