This notebook demonstrates how to make and use a Python Contribution object. A Contribution is built by reading in MagIC tables from a single directory. Those tables are then stored in pandas DataFrames.

Getting started


In [1]:
# do basic imports and unpack McMurdo data

from pmagpy import ipmag
reload(ipmag)
from pmagpy import pmag
from pmagpy import new_builder as nb
from pmagpy import data_model3
import os
import pandas as pd
import numpy as np
from pandas import DataFrame
from pmagpy.new_builder import Contribution

wdir = os.path.join("..", "3_0", "McMurdo")
#infile = os.path.join(wdir, "lawrence09.v30.txt")
#infile = os.path.join(wdir, "mcmurdo3-with-upgrade.txt") 
#ipmag.download_magic(infile, overwrite=True, dir_path=wdir)

Several ways of creating a contribution


In [2]:
reload(nb)

# test out various ways of creating a contribution

#class Contribution(object):
#    """                                                                                                                   
#    A Contribution is a collection of MagicDataFrames,                                                                    
#    each of which corresponds to one MagIC table.                                                                         
#    The Contribution object also has methods for                                                                          
#    manipulating one or more tables in the contribution --                                                                
#    for example, renaming a site.                                                                                         
#    """
#    def __init__(self, directory, read_tables='all',
#                 custom_filenames=None, single_file=None):



# make contribution reading in all default filenames from working directory
wdir = os.path.join("..", "3_0", "McMurdo")
con = nb.Contribution(wdir)
print 'tables created:', con.tables.keys()
print '-'

# make contribution with some custom filenames
con = nb.Contribution(wdir, custom_filenames={'specimens': 'custom_specimens.txt'})
print 'tables created:', con.tables.keys()
print '-'

# make contribution with custom filenames, and only read in the specimen table to start
con = Contribution(wdir, read_tables=['specimens'], custom_filenames={'sites': 'custom_sites.txt',
                                                                      'specimens': 'custom_specimens.txt'})
print 'tables created:', con.tables.keys()
print '-'

# make contribution with a single, mystery file (can be any datatype)
con = nb.Contribution(wdir, single_file='sites.txt')
print 'tables created:', con.tables.keys()
print '-'


-I- Getting method codes from earthref.org
-I- Importing controlled vocabularies from https://earthref.org
-I- Importing suggested vocabularies from https://earthref.org
tables created: ['measurements', 'ages', 'sites', 'locations', 'samples', 'criteria', 'images', 'contribution', 'specimens']
-
tables created: ['measurements', 'ages', 'sites', 'locations', 'samples', 'criteria', 'images', 'contribution', 'specimens']
-
tables created: ['specimens']
-
tables created: ['sites']
-

In [3]:
# make McMurdo contribution, starting with specimens table

reload(nb)

con = nb.Contribution(wdir, read_tables=['specimens'], custom_filenames={'specimens': 'custom_specimens.txt', 'samples': 'custom_samples.txt',
                                                                         'sites': 'custom_sites.txt'})

print con.filenames
print con.tables.keys()


-I- Getting method codes from earthref.org
-I- Importing controlled vocabularies from https://earthref.org
-I- Importing suggested vocabularies from https://earthref.org
{'measurements': 'measurements.txt', 'ages': 'ages.txt', 'sites': 'custom_sites.txt', 'locations': 'locations.txt', 'samples': 'custom_samples.txt', 'criteria': 'criteria.txt', 'images': 'images.txt', 'contribution': 'contribution.txt', 'specimens': 'custom_specimens.txt'}
['specimens']

In [4]:
# then, add another table to the contribution
# here, we are providing data type but no filename
# this works because we already gave the custom sample filename when we created the contribution
# so the contribution already knows where to look (con.filenames)
con.add_magic_table('samples')
print con.tables.keys()


['specimens', 'samples']

In [5]:
# add another table to the same contribution
# this time, provide a filename but no data type

con.add_magic_table(dtype="unknown", fname="criteria.txt")
# criteria table now included
print con.tables.keys()


['specimens', 'samples', 'criteria']

Functionality with a contribution


In [6]:
# create full McMurdo contribution

reload(nb)

con = nb.Contribution(wdir, custom_filenames={'specimens': 'specimens.txt', 'samples': 'samples.txt',
                                             'sites': 'sites.txt'})


-I- Getting method codes from earthref.org
-I- Importing controlled vocabularies from https://earthref.org
-I- Importing suggested vocabularies from https://earthref.org

In [7]:
con.tables['locations'].df[['sites']]


Out[7]:
sites
location
McMurdo None
McMurdo mc03 : mc04 : mc06 : mc07 : mc08 : mc09 : mc10...
McMurdo mc102 : mc103 : mc105 : mc109 : mc110 : mc112 ...
McMurdo mc02 : mc03 : mc04 : mc06 : mc07 : mc08 : mc09...
McMurdo mc09 : mc105 : mc109 : mc111 : mc113 : mc115 :...

Rename an item


In [8]:
reload(nb)
# rename one of the Contribution's sites
con.rename_item('sites', 'mc03', 'extra_special_site')
con.tables['sites'].df.ix[['extra_special_site']]
# all rows previously named 'mc01' are now named 'extra_special_site'


Out[8]:
age age_sigma age_unit analysts citations criteria description dir_alpha95 dir_comp_name dir_dec ... vadm_n_samples vadm_sigma vdm vdm_n_samples vdm_sigma vgp_dm vgp_dp vgp_lat vgp_lon vgp_n_samples
site
extra_special_site None None None None This study None 10-m thick basalt flow, NE of Scott Base, Hut ... None None None ... None None None None None None None None None None
extra_special_site 0.348 0.004 Ma Lisa Tauxe This study DE-SPEC Direction included in Pmag_Results. 2.3 A 352 ... None None None None None None None None None None
extra_special_site 0.348 0.004 Ma Kristin Lawrence This study DE-SITE Site VGPA comp: (geog. coord). 2.3 None 352 ... None None None None None 3.8 4.4 87.1 123.1 6
extra_special_site None None None None None None None None None None ... None None None None None None None None None None

4 rows × 47 columns


In [9]:
# additionally, 'mc03' has been replaced in the location table under site_names
#con.tables['locations'].df.ix[["Osler Volcanics, Nipigon Strait, Lower Reversed"]][['site_names']]
con.tables['locations'].df[['sites']]#, 'sites_list']]


Out[9]:
sites
location
McMurdo None
McMurdo extra_special_site:mc04:mc06:mc07:mc08:mc09:mc...
McMurdo mc102 : mc103 : mc105 : mc109 : mc110 : mc112 ...
McMurdo mc02:extra_special_site:mc04:mc06:mc07:mc08:mc...
McMurdo mc09 : mc105 : mc109 : mc111 : mc113 : mc115 :...

Propagate data from one table into another


In [10]:
# normally, each table only has one relationship up (i.e., a measurement table will have specimen name, but not sample name)
# sometimes, you need to access location_name at the site level (for example)
# this function propagates names down through any available tables
# the code snippet below won't work if the Contribution can't access the sample and site files!


reload(nb)

con = nb.Contribution(wdir, custom_filenames={'specimens': 'custom_specimens.txt', 'samples': 'custom_samples.txt',
                                             'sites': 'custom_sites.txt'})

con.propagate_name_down('location', 'specimens')

# specimens table now has sample, site, and location_names
con.tables['specimens'].df[['specimen', 'sample', 'site', 'location']].head()


-I- Getting method codes from earthref.org
-I- Importing controlled vocabularies from https://earthref.org
-I- Importing suggested vocabularies from https://earthref.org
Out[10]:
specimen sample site location
specimen
mc01a mc01a mc01a mc01 McMurdo
mc01a mc01a mc01a mc01 McMurdo
mc01b mc01b mc01b mc01 McMurdo
mc01b mc01b mc01b mc01 McMurdo
mc01c mc01c mc01c mc01 McMurdo

In [11]:
# this function propagates values from arbitrary columns down
# i.e., get sample-level azimuth into the measurements table
# note: this will NOT work with names (specimen, sample, etc.).  
# for those relationships, use the above function: propagate_name_down

reload(nb)
con = nb.Contribution(wdir, custom_filenames={'specimens': 'custom_specimens.txt', 'samples': 'custom_samples.txt',
                                             'sites': 'custom_sites.txt'})

meas_container = con.tables['measurements']
meas_df = meas_container.df

meas_df = con.propagate_cols_down(['azimuth', 'dip', 'fake_col'], 'measurements', 'samples')
meas_df.head()[['azimuth', 'dip']]


-I- Getting method codes from earthref.org
-I- Importing controlled vocabularies from https://earthref.org
-I- Importing suggested vocabularies from https://earthref.org
-W- Column 'fake_col' isn't in samples table, skipping it
Out[11]:
azimuth dip
measurement
mc01f-LP-DIR-AF1 94 -55
mc01f-LP-DIR-AF2 94 -55
mc01f-LP-DIR-AF3 94 -55
mc01f-LP-DIR-AF4 94 -55
mc01f-LP-DIR-AF5 94 -55

Writing out a MagIC file


In [12]:
con = nb.Contribution(wdir, custom_filenames={'specimens': 'custom_specimens.txt', 'samples': 'custom_samples.txt',
                                             'sites': 'custom_sites.txt'})
samp_container = con.tables['samples']
samp_container.write_magic_file(custom_name='_samples.txt', dir_path='../3_0/McMurdo')

samp_container.df


-I- writing samples data to /Users/nebula/Python/PmagPy/data_files/3_0/McMurdo/_samples.txt
Out[12]:
azimuth azimuth_dec_correction citations description dip geologic_classes geologic_types lat lithologies lon method_codes orientation_flag sample site
sample
mc01a 260 0 This study Archived samples from 1965, 66 expeditions. -57 Extrusive:Igneous Lava Flow -77.85 Trachyte 166.64 SO-SIGHT:FS-FD g mc01a mc01
mc01b 189 0 This study Archived samples from 1965, 66 expeditions. -63 Extrusive:Igneous Lava Flow -77.85 Trachyte 166.64 SO-SIGHT:FS-FD g mc01b mc01
mc01c 183 0 This study Archived samples from 1965, 66 expeditions. -30 Extrusive:Igneous Lava Flow -77.85 Trachyte 166.64 SO-SIGHT:FS-FD g mc01c mc01
mc01d 133 0 This study Archived samples from 1965, 66 expeditions. -57 Extrusive:Igneous Lava Flow -77.85 Trachyte 166.64 SO-SIGHT:FS-FD g mc01d mc01
mc01e 91 0 This study Archived samples from 1965, 66 expeditions. -51 Extrusive:Igneous Lava Flow -77.85 Trachyte 166.64 SO-SIGHT:FS-FD g mc01e mc01
mc01f 94 0 This study Archived samples from 1965, 66 expeditions. -55 Extrusive:Igneous Lava Flow -77.85 Trachyte 166.64 SO-SIGHT:FS-FD g mc01f mc01
mc01g 69 0 This study Archived samples from 1965, 66 expeditions. -56 Extrusive:Igneous Lava Flow -77.85 Trachyte 166.64 SO-SIGHT:FS-FD g mc01g mc01
mc01h 157 0 This study Archived samples from 1965, 66 expeditions. -41 Extrusive:Igneous Lava Flow -77.85 Trachyte 166.64 SO-SIGHT:FS-FD g mc01h mc01
mc02a 190 0 This study Archived samples from 1965, 66 expeditions. -50 Extrusive:Igneous Lava Flow -77.85 Basalt 166.69 SO-SIGHT:FS-FD g mc02a mc02
mc02b 231 0 This study Archived samples from 1965, 66 expeditions. -59 Extrusive:Igneous Lava Flow -77.85 Basalt 166.69 SO-SIGHT:FS-FD g mc02b mc02
mc02c 220 0 This study Archived samples from 1965, 66 expeditions. -58 Extrusive:Igneous Lava Flow -77.85 Basalt 166.69 SO-SIGHT:FS-FD g mc02c mc02
mc02d 246 0 This study Archived samples from 1965, 66 expeditions. -68 Extrusive:Igneous Lava Flow -77.85 Basalt 166.69 SO-SIGHT:FS-FD g mc02d mc02
mc02e 235 0 This study Archived samples from 1965, 66 expeditions ## ... -43 Extrusive:Igneous Lava Flow -77.85 Basalt 166.69 SO-SIGHT:FS-FD b mc02e mc02
mc02f 182 0 This study Archived samples from 1965, 66 expeditions. -50 Extrusive:Igneous Lava Flow -77.85 Basalt 166.69 SO-SIGHT:FS-FD g mc02f mc02
mc02g 197 0 This study Archived samples from 1965, 66 expeditions. -32 Extrusive:Igneous Lava Flow -77.85 Basalt 166.69 SO-SIGHT:FS-FD g mc02g mc02
mc02h 298 0 This study Archived samples from 1965, 66 expeditions. -48 Extrusive:Igneous Lava Flow -77.85 Basalt 166.69 SO-SIGHT:FS-FD g mc02h mc02
mc03a 306 0 This study Archived samples from 1965, 66 expeditions. -68 Extrusive:Igneous Lava Flow -77.84 Basalt 166.76 SO-SIGHT:FS-FD g mc03a mc03
mc03b 287 0 This study Archived samples from 1965, 66 expeditions. -66 Extrusive:Igneous Lava Flow -77.84 Basalt 166.76 SO-SIGHT:FS-FD g mc03b mc03
mc03c 291 0 This study Archived samples from 1965, 66 expeditions. -76 Extrusive:Igneous Lava Flow -77.84 Basalt 166.76 SO-SIGHT:FS-FD g mc03c mc03
mc03d 250 0 This study Archived samples from 1965, 66 expeditions. -58 Extrusive:Igneous Lava Flow -77.84 Basalt 166.76 SO-SIGHT:FS-FD g mc03d mc03
mc03e 309 0 This study Archived samples from 1965, 66 expeditions. -68 Extrusive:Igneous Lava Flow -77.84 Basalt 166.76 SO-SIGHT:FS-FD g mc03e mc03
mc03f 305 0 This study Archived samples from 1965, 66 expeditions. -86 Extrusive:Igneous Lava Flow -77.84 Basalt 166.76 SO-SIGHT:FS-FD g mc03f mc03
mc03g 288 0 This study Archived samples from 1965, 66 expeditions. -41 Extrusive:Igneous Lava Flow -77.84 Basalt 166.76 SO-SIGHT:FS-FD g mc03g mc03
mc03h 274 0 This study Archived samples from 1965, 66 expeditions. -58 Extrusive:Igneous Lava Flow -77.84 Basalt 166.76 SO-SIGHT:FS-FD g mc03h mc03
mc04a 91 0 This study Archived samples from 1965, 66 expeditions. -43 Intrusive:Igneous Volcanic Dike -77.84 Basalt 166.7 SO-SIGHT:FS-FD g mc04a mc04
mc04b 86 0 This study Archived samples from 1965, 66 expeditions. -50 Intrusive:Igneous Volcanic Dike -77.84 Basalt 166.7 SO-SIGHT:FS-FD g mc04b mc04
mc04c 121 0 This study Archived samples from 1965, 66 expeditions. -33 Extrusive:Igneous Volcanic Dike -77.84 Basalt 166.7 SO-SIGHT:FS-FD g mc04c mc04
mc04d 92 0 This study Archived samples from 1965, 66 expeditions. -55 Extrusive:Igneous Volcanic Dike -77.84 Basalt 166.7 SO-SIGHT:FS-FD g mc04d mc04
mc04e 124 0 This study Archived samples from 1965, 66 expeditions. -68 Intrusive:Igneous Volcanic Dike -77.84 Basalt 166.7 SO-SIGHT:FS-FD g mc04e mc04
mc04f 151 0 This study Archived samples from 1965, 66 expeditions. -39 Intrusive:Igneous Volcanic Dike -77.84 Basalt 166.7 SO-SIGHT:FS-FD g mc04f mc04
... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
mc44c 102 0 This study Archived samples from 1965, 66 expeditions. -25 Extrusive:Igneous Lava Flow -78.36 Basalt 164.26 SO-SIGHT:FS-FD g mc44c mc44
mc44d 145 0 This study Archived samples from 1965, 66 expeditions. -26 Extrusive:Igneous Lava Flow -78.36 Basalt 164.26 SO-SIGHT:FS-FD g mc44d mc44
mc44e 102 0 This study Archived samples from 1965, 66 expeditions. -14 Extrusive:Igneous Lava Flow -78.36 Basalt 164.26 SO-SIGHT:FS-FD g mc44e mc44
mc44f 67 0 This study Archived samples from 1965, 66 expeditions. -17 Extrusive:Igneous Lava Flow -78.36 Basalt 164.26 SO-SIGHT:FS-FD g mc44f mc44
mc44g 179 0 This study Archived samples from 1965, 66 expeditions. -17 Extrusive:Igneous Lava Flow -78.36 Basalt 164.26 SO-SIGHT:FS-FD g mc44g mc44
mc44h 232 0 This study Archived samples from 1965, 66 expeditions. -20 Extrusive:Igneous Lava Flow -78.36 Basalt 164.26 SO-SIGHT:FS-FD g mc44h mc44
mc48a 138 0 This study Archived samples from 1965, 66 expeditions. -53 Extrusive:Igneous Lava Flow -78.24 Basalt 163.36 SO-SIGHT:FS-FD g mc48a mc48
mc48b 131 0 This study Archived samples from 1965, 66 expeditions. -40 Extrusive:Igneous Lava Flow -78.24 Basalt 163.36 SO-SIGHT:FS-FD g mc48b mc48
mc48c 156 0 This study Archived samples from 1965, 66 expeditions. -46 Extrusive:Igneous Lava Flow -78.24 Basalt 163.36 SO-SIGHT:FS-FD g mc48c mc48
mc48d 228 0 This study Archived samples from 1965, 66 expeditions. -49 Extrusive:Igneous Lava Flow -78.24 Basalt 163.36 SO-SIGHT:FS-FD g mc48d mc48
mc48e 151 0 This study Archived samples from 1965, 66 expeditions. -54 Extrusive:Igneous Lava Flow -78.24 Basalt 163.36 SO-SIGHT:FS-FD g mc48e mc48
mc48f 133 0 This study Archived samples from 1965, 66 expeditions. -55 Extrusive:Igneous Lava Flow -78.24 Basalt 163.36 SO-SIGHT:FS-FD g mc48f mc48
mc48g 240 0 This study Archived samples from 1965, 66 expeditions. -53 Extrusive:Igneous Lava Flow -78.24 Basalt 163.36 SO-SIGHT:FS-FD g mc48g mc48
mc48h 175 0 This study Archived samples from 1965, 66 expeditions. -53 Extrusive:Igneous Lava Flow -78.24 Basalt 163.36 SO-SIGHT:FS-FD g mc48h mc48
mc49a 248 0 This study Archived samples from 1965, 66 expeditions. -60 Extrusive:Igneous Lava Flow -78.24 Basalt 163.36 SO-SIGHT:FS-FD g mc49a mc49
mc49b 208 0 This study Archived samples from 1965, 66 expeditions. -73 Extrusive:Igneous Lava Flow -78.24 Basalt 163.36 SO-SIGHT:FS-FD g mc49b mc49
mc49c 206 0 This study Archived samples from 1965, 66 expeditions. -31 Extrusive:Igneous Lava Flow -78.24 Basalt 163.36 SO-SIGHT:FS-FD g mc49c mc49
mc49d 271 0 This study Archived samples from 1965, 66 expeditions. -53 Extrusive:Igneous Lava Flow -78.24 Basalt 163.36 SO-SIGHT:FS-FD g mc49d mc49
mc49e 311 0 This study Archived samples from 1965, 66 expeditions. -19 Extrusive:Igneous Lava Flow -78.24 Basalt 163.36 SO-SIGHT:FS-FD g mc49e mc49
mc49f 206 0 This study Archived samples from 1965, 66 expeditions. -34 Extrusive:Igneous Lava Flow -78.24 Basalt 163.36 SO-SIGHT:FS-FD g mc49f mc49
mc49g 257 0 This study Archived samples from 1965, 66 expeditions. -51 Extrusive:Igneous Lava Flow -78.24 Basalt 163.36 SO-SIGHT:FS-FD g mc49g mc49
mc49h 250 0 This study Archived samples from 1965, 66 expeditions. -37 Extrusive:Igneous Lava Flow -78.24 Basalt 163.36 SO-SIGHT:FS-FD g mc49h mc49
mc50a 19 0 This study Archived samples from 1965, 66 expeditions. -28 Extrusive:Igneous Lava Flow -78.25 Basalt 163.22 SO-SIGHT:FS-FD g mc50a mc50
mc50b 48 0 This study Archived samples from 1965, 66 expeditions. -16 Extrusive:Igneous Lava Flow -78.25 Basalt 163.22 SO-SIGHT:FS-FD g mc50b mc50
mc50c 140 0 This study Archived samples from 1965, 66 expeditions. -43 Extrusive:Igneous Lava Flow -78.25 Basalt 163.22 SO-SIGHT:FS-FD g mc50c mc50
mc50d 90 0 This study Archived samples from 1965, 66 expeditions. -32 Extrusive:Igneous Lava Flow -78.25 Basalt 163.22 SO-SIGHT:FS-FD g mc50d mc50
mc50e 139 0 This study Archived samples from 1965, 66 expeditions. -41 Extrusive:Igneous Lava Flow -78.25 Basalt 163.22 SO-SIGHT:FS-FD g mc50e mc50
mc50f 148 0 This study Archived samples from 1965, 66 expeditions. -46 Extrusive:Igneous Lava Flow -78.25 Basalt 163.22 SO-SIGHT:FS-FD g mc50f mc50
mc50g 155 0 This study Archived samples from 1965, 66 expeditions. -18 Extrusive:Igneous Lava Flow -78.25 Basalt 163.22 SO-SIGHT:FS-FD g mc50g mc50
mc50h 148 0 This study Archived samples from 1965, 66 expeditions. -23 Extrusive:Igneous Lava Flow -78.25 Basalt 163.22 SO-SIGHT:FS-FD g mc50h mc50

1418 rows × 14 columns

Adding an empty dataframe to a contribution


In [13]:
# columns is a list whichever columns you want
dtype = "specimens"
cols = ["col_name1", "col_name2"]
data_container = nb.MagicDataFrame(dtype=dtype, columns=cols)

# or:
dtype = "specimens"
groups = ["Age", "Metadata"]
data_container = nb.MagicDataFrame(dtype=dtype, groups=groups)
# and then:
con.tables[dtype] = data_container

con.tables[dtype].df

con.add_empty_magic_table('fake', col_names=['col1', 'col2'])
con.add_empty_magic_table('images', col_names=['col1', 'col2'])
con.tables['images'].df


-W- fake is not a valid MagIC table name
-I- Valid table names are: measurements, specimens, samples, sites, locations, contribution, criteria, ages, images
Out[13]:
col1 col2

Adding a new item (i.e., a sample)


In [14]:
reload(nb)
wdir = os.path.join("..", "3_0", "McMurdo")
con = nb.Contribution(wdir)
con.tables.keys()


-I- Getting method codes from earthref.org
-I- Importing controlled vocabularies from https://earthref.org
-I- Importing suggested vocabularies from https://earthref.org
Out[14]:
['measurements',
 'ages',
 'sites',
 'locations',
 'samples',
 'criteria',
 'images',
 'contribution',
 'specimens']

In [15]:
# add a new sample
site_name = con.tables['sites'].df.index[0]
samp_name = 'new_sample'
data = {'sample': samp_name, 'site': site_name}

con.add_item('samples', data, samp_name)
con.tables['samples'].df.tail()


Out[15]:
azimuth azimuth_dec_correction citations description dip geologic_classes geologic_types lat lithologies lon method_codes orientation_flag sample site cooling_rate
sample
mc50e 139 0.0 This study Archived samples from 1965, 66 expeditions. -41 Extrusive:Igneous Lava Flow -78.25 Basalt 163.22 SO-SIGHT:FS-FD g mc50e mc50 1e+10
mc50f 148 0.0 This study Archived samples from 1965, 66 expeditions. -46 Extrusive:Igneous Lava Flow -78.25 Basalt 163.22 SO-SIGHT:FS-FD g mc50f mc50 1e+10
mc50g 155 0.0 This study Archived samples from 1965, 66 expeditions. -18 Extrusive:Igneous Lava Flow -78.25 Basalt 163.22 SO-SIGHT:FS-FD g mc50g mc50 1e+10
mc50h 148 0.0 This study Archived samples from 1965, 66 expeditions. -23 Extrusive:Igneous Lava Flow -78.25 Basalt 163.22 SO-SIGHT:FS-FD g mc50h mc50 1e+10
new_sample None NaN None None None None None None None None None None new_sample mc01 None

Mucking around with measurements


In [16]:
con = nb.Contribution(wdir, custom_filenames={'specimens': 'custom_specimens.txt', 'samples': 'custom_samples.txt',
                                             'sites': 'custom_sites.txt'})

meas_container = con.tables['measurements']
meas_data = meas_container.df

meas_data['treatment'] = meas_data['treat_ac_field'].where(cond=meas_data['treat_ac_field'] != "0", other=meas_data['treat_temp'])
meas_data[['treatment', 'treat_ac_field', 'treat_temp']]

meas_data['treat_ac_field'].ix[0] = None
meas_data['treat_ac_field'] = meas_data['treat_ac_field'].astype(float)

meas_data[['treatment', 'treat_ac_field', 'treat_temp']].head()


/usr/local/lib/python2.7/site-packages/pandas/core/indexing.py:140: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self._setitem_with_indexer(indexer, value)
Out[16]:
treatment treat_ac_field treat_temp
measurement
mc01f-LP-DIR-AF1 0 NaN 273
mc01f-LP-DIR-AF2 0.005 0.0050 273
mc01f-LP-DIR-AF3 0.0075 0.0075 273
mc01f-LP-DIR-AF4 0.01 0.0100 273
mc01f-LP-DIR-AF5 0.0125 0.0125 273

Dealing with criteria


In [17]:
#  grab a copy of the criteria and sites table to play with
criteria = con.tables['criteria'].df.copy()
sites = con.tables['sites'].df.copy()
locations = con.tables['locations'].df.copy()
specimens = con.tables['specimens'].df.copy()
samples = con.tables['samples'].df.copy()

In [18]:
criteria.index


Out[18]:
Index([u'sites.int_n_specimens', u'sites.int_sigma', u'sites.int_sigma_perc',
       u'specimens.int_b_beta', u'specimens.int_frac', u'specimens.int_dang',
       u'specimens.int_mad', u'specimens.int_n_ptrm', u'specimens.int_scat',
       u'specimens.dir_mad_free', u'specimens.dir_alpha95',
       u'specimens.dir_n_measurements', u'samples.dir_alpha95',
       u'sites.dir_alpha95', u'sites.dir_n_samples',
       u'sites.dir_n_specimens_lines', u'sites.dir_k', u'sites.dir_polarity',
       u'sites.dir_polarity'],
      dtype='object', name=u'table_column')

In [19]:
cond = criteria.index.str.contains('sample')
samp_crit = criteria[cond].copy()

In [20]:
# get all criteria for samples

# only criteria with 'samples' in table_column_name
cond = criteria.index.str.contains('sample')
samp_crit = criteria[cond].copy()
# remove table name from index
if len(samp_crit):
    samp_crit.index = samp_crit.index.str.replace('samples.', '')
    samp_crit.index.name = 'column_name'


cols = samp_crit.index
list(cols)
cols = list(cols)

samp_crit


Out[20]:
citations criterion criterion_operation criterion_value definition table_column
column_name
dir_alpha95 This study DE-SAMP <= 180 acceptance criteria for study samples.dir_alpha95
sites.dir_n_samples This study DE-SITE >= 5 acceptance criteria for study sites.dir_n_samples

In [21]:
#sites.head().ix[cols]

In [22]:
# create string --> operator conversion    
import operator
ops = {"<": operator.lt, ">": operator.gt, "==": operator.eq, "<=": operator.le, ">=": operator.gt}

# function for applying criteria

In [23]:
# create full McMurdo contribution

reload(nb)

con = nb.Contribution(wdir, custom_filenames={'specimens': 'specimens.txt', 'samples': 'samples.txt',
                                             'sites': 'sites.txt'})


criteria = con.tables['criteria'].df.copy()




def apply_crit(series, crit_series):#, criteria_type):
    """
    Apply 1 criterion (i.e., 1 row of the criteria table) to another table.
    Return series with boolean values for whether the row passes.
    """
    col_name = crit_series.name
    # if there's no value, pass == True
    if col_name not in series:
        return True
    if not series[col_name]:
        return True
        #return "{} not in row".format(col_name)
    # if there is a value, test that it is within correct limits
    crit_name = crit_series['criterion']
    crit_value = float(crit_series['criterion_value'])
    op_str = crit_series['criterion_operation']
    op = ops[op_str]
    value = float(series[col_name])
    result = op(value, crit_value)
    return result



def add_criteria_named(category_name, dtype):
    df = con.tables[dtype].df
    criteria_subset = criteria[criteria['criterion'] == category_name]
    criteria_subset.index = criteria_subset.index.str.replace(dtype + '.', '')
    pass_col_names = []
    for crit_name, crit_row in criteria_subset.iterrows():
        #print 'crit_name', crit_name
        col_name = category_name + "_" + crit_name + "_pass"
        #print 'col_name', col_name
        pass_col_names.append(col_name)
        df[col_name] = df.apply(apply_crit, args=(crit_row,), axis=1)
    return pass_col_names
    

#DE_SPEC = criteria[criteria['criterion'] == 'DE-SPEC']
#DE_SPEC.index = DE_SPEC.index.str.replace('specimens.', '')
#pass_col_names = []
#for crit_name, crit_row in DE_SPEC.iterrows():
#    #print 'crit_name', crit_name
#    col_name = 'DE-SPEC_' + crit_name + "_pass"
#    #print 'col_name', col_name
#    pass_col_names.append(col_name)
#    specimens[col_name] = specimens.apply(apply_crit, args=(crit_row,), axis=1)
    
    
dtype = 'specimens'
criteria_name = 'IE-SPEC'
pass_col = criteria_name + "_pass"
pass_col_names = add_criteria_named(criteria_name, dtype)
print pass_col_names
df = con.tables[dtype].df


col_names = df.columns[df.columns.str.contains(criteria_name)]
df[df[col_names].all(1)]#[col_names]


## all specimens that pass all DE-SPEC criteria
#df[df[pass_col]].index
#df.head()
df.head()[col_names]


-I- Getting method codes from earthref.org
-I- Importing controlled vocabularies from https://earthref.org
-I- Importing suggested vocabularies from https://earthref.org
['IE-SPEC_int_b_beta_pass', 'IE-SPEC_int_frac_pass', 'IE-SPEC_int_dang_pass', 'IE-SPEC_int_mad_pass', 'IE-SPEC_int_n_ptrm_pass', 'IE-SPEC_int_scat_pass']
Out[23]:
IE-SPEC_int_b_beta_pass IE-SPEC_int_frac_pass IE-SPEC_int_dang_pass IE-SPEC_int_mad_pass IE-SPEC_int_n_ptrm_pass IE-SPEC_int_scat_pass
specimen
mc01a True True True True True True
mc01a True True True True True True
mc01b True True True True True True
mc01b True True True True True True
mc01c True True True True True True

In [24]:
reload(nb)
import pmagpy.pmag
reload(pmagpy.pmag)
con = nb.Contribution(wdir)#, custom_filenames={'specimens': 'custom_specimens.txt', 'samples': 'custom_samples.txt',
                                                                             #'sites': 'custom_sites.txt'})



con.tables


-I- Getting method codes from earthref.org
-I- Importing controlled vocabularies from https://earthref.org
-I- Importing suggested vocabularies from https://earthref.org
Out[24]:
{'ages': <pmagpy.new_builder.MagicDataFrame at 0x10c5ea910>,
 'contribution': <pmagpy.new_builder.MagicDataFrame at 0x111394510>,
 'criteria': <pmagpy.new_builder.MagicDataFrame at 0x1108f7450>,
 'images': <pmagpy.new_builder.MagicDataFrame at 0x10c7e5050>,
 'locations': <pmagpy.new_builder.MagicDataFrame at 0x111429550>,
 'measurements': <pmagpy.new_builder.MagicDataFrame at 0x110331210>,
 'samples': <pmagpy.new_builder.MagicDataFrame at 0x1101a4dd0>,
 'sites': <pmagpy.new_builder.MagicDataFrame at 0x110f23c90>,
 'specimens': <pmagpy.new_builder.MagicDataFrame at 0x111e9a0d0>}

Ways of dropping rows in dataframes -- non-unique index is a problem


In [25]:
site_container = con.tables['sites']
site_df = con.tables['sites'].df
#thingee = set([0, 4])
#site_df.index[list(thingee)]

to_drop = [0, 4]
#site_df.drop(site_df.iloc[list(thingee)], inplace=True)
# this doesn't work, because it drops extra values with the same index value (mc01)
#site_df.drop(to_drop, inplace=True)

# this works
df = site_df.iloc[sorted(set(range(len(site_df))) - set([0, 4]))]

# this works
df = site_df.iloc[[i for i in range(len(site_df)) if i not in to_drop]]

# this works
site_df = site_df.reset_index(drop=True).drop(to_drop).set_index('site')
site_df.columns[site_df.columns.str.contains('age')]
site_df.columns[site_df.columns.str.contains("age($|_).*")]


# Solution in new_builder:
site_container.delete_row(1)
site_container.delete_row(1).head()


/usr/local/lib/python2.7/site-packages/ipykernel/__main__.py:20: UserWarning: This pattern has match groups. To actually get the groups, use str.extract.
Out[25]:
age age_sigma age_unit analysts citations criteria description dir_alpha95 dir_comp_name dir_dec ... vadm_n_samples vadm_sigma vdm vdm_n_samples vdm_sigma vgp_dm vgp_dp vgp_lat vgp_lon vgp_n_samples
site
mc01 1.18 0.005 Ma Lisa Tauxe This study IE-SPEC Trachyte flow, N flank Observation hill, Hut P... None None None ... None None None None None None None None None None
mc01 None None None None None None None None None None ... None None None None None None None None None None
mc02 None None None None This study None Upper basalt flow, S. flank Crater Hill, Hut P... None None None ... None None None None None None None None None None
mc02 0.33 0.01 Ma Lisa Tauxe This study DE-SPEC Direction included in Pmag_Results. 2.1 A 328.6 ... None None None None None None None None None None
mc02 0.33 0.01 Ma Kristin Lawrence This study DE-SITE Site VGPA comp: (geog. coord). 2.1 None 328.6 ... None None None None None 2.5 4.1 79 101.2 6

5 rows × 47 columns

Convert 2.5 files --> 3.0 files


In [26]:
# See data_model_conversion.ipynb

Extract minimum/maximum lat/lon by location from sites table


In [27]:
# get minimum/maximum latitude/longitude grouped by location

# set up
wdir = os.path.join("..", '3_0', 'McMurdo')
con = nb.Contribution(wdir, single_file='sites.txt')
site_container = con.tables['sites']
site_df = site_container.df
# Fill in some values
site_container.df['lon'] = ''
site_container.df.iloc[1] = pd.Series({'lon': '2', 'location': 'McMurdo'})
site_container.df.iloc[2] = pd.Series({'location': 'McMurdo2', 'lat': '14.2'})
# fill in string values with None or np.nan
site_container.df['lon'] = np.where(site_container.df['lon'].str.len(), site_container.df['lon'], None)
site_container.df['lat'] = np.where(site_container.df['lat'].str.len(), site_container.df['lat'], None)
# group lat/lon by location name
print 'about to group'

site_container.df['lon'] = site_container.df['lon'].astype(float)

grouped_lon = site_container.df[['lon', 'location']].dropna().groupby('location')
grouped_lat = site_container.df[['lat', 'location']].dropna().groupby('location')

#grouped_lon = site_container.df['lon'].astype(float).dropna().groupby(site_container.df['location'])
#grouped_lat = site_container.df['lat'].astype(float).dropna().groupby(site_container.df['location'])
# get output
print "max longitude:"
print grouped_lon.max()
print ''
print 'min latitude:'
print grouped_lat.min()


about to group
max longitude:
          lon
location     
McMurdo   2.0

min latitude:
           lat
location      
McMurdo  -78.4
McMurdo2  14.2

In [28]:
site_container.df[['lon', 'location']].dropna(subset=['lon']).groupby('location')


Out[28]:
<pandas.core.groupby.DataFrameGroupBy object at 0x10c4294d0>

Propagating values from one df to another


In [29]:
reload(nb)


wdir = os.path.join("..", '3_0', 'McMurdo')
con = nb.Contribution(wdir)#, custom_filenames={'specimens': 'custom_specimens.txt', 'samples': 'custom_samples.txt',


-I- Getting method codes from earthref.org
-I- Importing controlled vocabularies from https://earthref.org
-I- Importing suggested vocabularies from https://earthref.org

In [30]:
print 'lithologies in sample table?', 'lithologies' in con.tables['samples'].df.columns
con.tables['sites'].df.lithologies = 'litho1'
con.tables['sites'].df.iloc[3] = pd.Series({'site': 'mc04', 'lithologies': 'litho2'})
con.tables['sites'].df.head()[['lithologies']]


lithologies in sample table? True
Out[30]:
lithologies
site
mc01 litho1
mc01 litho1
mc01 litho1
mc01 litho2
mc02 litho1

In [31]:
con.tables['samples'].df['lithologies'] = None

con.tables['samples'].df.loc['mc01f', 'lithologies'] = 'litho3'
con.tables['samples'].df.head()[['lithologies']]


Out[31]:
lithologies
sample
mc01a None
mc01b None
mc01c None
mc01d None
mc01e None

In [32]:
# you should be able to run this cell over and over again without generate extra lithologies columns
# this is a fix!

for_propagation = ['lithologies']
con.propagate_cols_down(for_propagation, 'samples', 'sites')

con.tables['samples'].df.head()


site already in samples
Out[32]:
azimuth azimuth_dec_correction citations description dip geologic_classes geologic_types lat lithologies lon method_codes orientation_flag sample site cooling_rate
sample
mc01a 260 0 This study Archived samples from 1965, 66 expeditions. -57 Extrusive:Igneous Lava Flow -77.85 litho1 166.64 SO-SIGHT:FS-FD g mc01a mc01 1e+10
mc01a 260 0 This study Archived samples from 1965, 66 expeditions. -57 Extrusive:Igneous Lava Flow -77.85 litho1 166.64 SO-SIGHT:FS-FD g mc01a mc01 1e+10
mc01a 260 0 This study Archived samples from 1965, 66 expeditions. -57 Extrusive:Igneous Lava Flow -77.85 litho1 166.64 SO-SIGHT:FS-FD g mc01a mc01 1e+10
mc01a 260 0 This study Archived samples from 1965, 66 expeditions. -57 Extrusive:Igneous Lava Flow -77.85 litho2 166.64 SO-SIGHT:FS-FD g mc01a mc01 1e+10
mc01b 189 0 This study Archived samples from 1965, 66 expeditions. -63 Extrusive:Igneous Lava Flow -77.85 litho1 166.64 SO-SIGHT:FS-FD g mc01b mc01 1e+10

Scratch


In [33]:
site_df.iloc[0, 1] = np.nan
site_df.iloc[1, 1] = None
site_df.location.str.split()[:5]


Out[33]:
site
mc01     [McMurdo]
mc01     [McMurdo]
mc01    [McMurdo2]
mc01          None
mc02     [McMurdo]
Name: location, dtype: object

Timing


In [34]:
import timeit
t = timeit.Timer('char in text', setup='text = "sample string"; char = "g"')
t.timeit()

t = timeit.Timer('if None: pass')
print t.timeit()
# approximately 0.04
t = timeit.Timer('if isinstance(x, type(None)): pass', setup='x=None')
print t.timeit()
# approximately 0.4


t = timeit.Timer('x = None')
print t.timeit(10000)

t = timeit.Timer('x = pd.DataFrame()', setup='import pandas as pd')
print t.timeit(10000)


0.0241508483887
0.143810987473
0.000504016876221
2.55077600479

Cleanup


In [35]:
!rm sites.txt samples.txt specimens.txt measurements.txt ages.txt contribution.txt images.txt criteria.txt locations.txt
!rm *.png
!rm *.jpg


rm: sites.txt: No such file or directory
rm: samples.txt: No such file or directory
rm: specimens.txt: No such file or directory
rm: measurements.txt: No such file or directory
rm: ages.txt: No such file or directory
rm: contribution.txt: No such file or directory
rm: images.txt: No such file or directory
rm: criteria.txt: No such file or directory
rm: locations.txt: No such file or directory
rm: *.png: No such file or directory
rm: *.jpg: No such file or directory

In [ ]: