This notebook demonstrates how to use the Python MagicDataFrame object. A MagicDataFrame contains the data from one MagIC-format table and provides functionality for accessing and editing that data.

Getting started


In [1]:
from pmagpy import new_builder as nb
from pmagpy import ipmag
import os
import json
import numpy as np
import sys

import pandas as pd
from pandas import DataFrame
from pmagpy import pmag

working_dir = os.path.join("..", "3_0", "Osler")

Creating a MagicDataFrame


In [2]:
reload(nb)


#class MagicDataFrame(object):
#    """                                                                                                                   
#    Each MagicDataFrame corresponds to one MagIC table.                                                                   
#    The MagicDataFrame object consists of a pandas DataFrame,                                                             
#    and assorted methods for manipulating that DataFrame.                                                                 
#    """ 
#    def __init__(self, magic_file=None, columns=None, dtype=None):
#        """                                                                                                               
#        Provide either a magic_file name or a dtype.                                                                           
#        List of columns is optional,                                                                                      
#        and will only be used if magic_file == None                                                                       
#        """


fname = os.path.join("..", '3_0', 'Osler', 'sites.txt')
# the MagicDataFrame object:
site_container = nb.MagicDataFrame(magic_file=fname)
# the actual pandas DataFrame:
site_df = site_container.df
# show the first 5 site records
site_df[:5]
# FAILS
#print site_df.fillna.__doc__
#site_df.fillna(value=None)
#FAILS
#print site_df.replace.__doc__
#site_df.replace(np.nan, None)
#FAILS
#site_df[site_df.astype(str) == ""] = None
#site_df[site_df.where(site_df.astype(str) == "").notnull()] = None
# WORKS!
#site_df.where(site_df.notnull(), None)
site_df.head()


Out[2]:
bed_dip bed_dip_direction citations conglomerate_test description dir_dec dir_inc dir_k dir_n_samples dir_polarity ... lat lithologies location lon method_codes result_type site vgp_lat vgp_lon vgp_n_samples
site
1 10 135 This study None None 293.1 34.5 517 5 n ... None Basalt Osler Volcanics, Nipigon Strait, Upper Normal None FS-FD:FS-H:FS-LOC-GOOGLE:SO-SM None 1 None None None
1 10 135 This study None None 289.8 43.6 517 5 n ... None Basalt Osler Volcanics, Nipigon Strait, Upper Normal None FS-FD:FS-H:FS-LOC-GOOGLE:SO-SM None 1 None None None
1 None None Cox & Doell 1960:McElhinny & McFadden 2000 ND VGP:Site 1 289.8 43.6 None 1 None ... 48.6264 None Osler Volcanics, Nipigon Strait, Upper Normal 271.894 DE-DI:FS-LOC-GOOGLE:LP-DC2 i 1 31.6516 185.575 1
2 12 140 This study None None 290.6 31.9 243 5 n ... None Basalt Osler Volcanics, Nipigon Strait, Upper Normal None DE-K:LP-DC2:FS-FD:FS-H:FS-LOC-GOOGLE:SO-SM None 2 None None None
2 12 140 This study None None 285.7 42 243 5 n ... None Basalt Osler Volcanics, Nipigon Strait, Upper Normal None FS-FD:FS-H:FS-LOC-GOOGLE:SO-SM None 2 None None None

5 rows × 23 columns


In [ ]:


In [3]:
# make an empty MagicDataFrame with 'Age' and 'Metadata' headers

reload(nb)
fname = os.path.join("..", '3_0', 'Osler', 'sites.txt')
# the MagicDataFrame object:
site_container = nb.MagicDataFrame(dtype='sites', groups=['Age', 'Metadata'])
# the actual pandas DataFrame:
site_df = site_container.df
# show the (empty) dataframe
site_df


Out[3]:
age age_high age_low age_sigma age_unit analysts criteria description external_database_ids instrument_codes rotation_sequence scientists software_packages
site

Indexing and selecting data


In [4]:
fname = os.path.join('..', '3_0', 'Osler', 'sites.txt')
# the MagicDataFrame object:
site_container = nb.MagicDataFrame(fname)
# the actual pandas DataFrame:
site_df = site_container.df

In [5]:
# all sites with site_name (index) of '1'
# will return a smaller DataFrame (or a Series if there is only 1 row with that index)
site_container.df.ix['1']


Out[5]:
bed_dip bed_dip_direction citations conglomerate_test description dir_dec dir_inc dir_k dir_n_samples dir_polarity ... lat lithologies location lon method_codes result_type site vgp_lat vgp_lon vgp_n_samples
site
1 10 135 This study None None 293.1 34.5 517 5 n ... None Basalt Osler Volcanics, Nipigon Strait, Upper Normal None FS-FD:FS-H:FS-LOC-GOOGLE:SO-SM None 1 None None None
1 10 135 This study None None 289.8 43.6 517 5 n ... None Basalt Osler Volcanics, Nipigon Strait, Upper Normal None FS-FD:FS-H:FS-LOC-GOOGLE:SO-SM None 1 None None None
1 None None Cox & Doell 1960:McElhinny & McFadden 2000 ND VGP:Site 1 289.8 43.6 None 1 None ... 48.6264 None Osler Volcanics, Nipigon Strait, Upper Normal 271.894 DE-DI:FS-LOC-GOOGLE:LP-DC2 i 1 31.6516 185.575 1

3 rows × 23 columns


In [6]:
# index by position (using an integer), will always return a single record as Series
# in this case, get the second record
site_container.df.iloc[1]


Out[6]:
bed_dip                                                           10
bed_dip_direction                                                135
citations                                                 This study
conglomerate_test                                               None
description                                                     None
dir_dec                                                        289.8
dir_inc                                                         43.6
dir_k                                                            517
dir_n_samples                                                      5
dir_polarity                                                       n
dir_tilt_correction                                              100
geologic_classes                                           Extrusive
geologic_types                                             Lava Flow
lat                                                             None
lithologies                                                   Basalt
location               Osler Volcanics, Nipigon Strait, Upper Normal
lon                                                             None
method_codes                          FS-FD:FS-H:FS-LOC-GOOGLE:SO-SM
result_type                                                     None
site                                                               1
vgp_lat                                                         None
vgp_lon                                                         None
vgp_n_samples                                                   None
Name: 1, dtype: object

In [7]:
# return all sites with the description column filled in
cond = site_container.df['description'].notnull()
site_container.df[cond].head()


Out[7]:
bed_dip bed_dip_direction citations conglomerate_test description dir_dec dir_inc dir_k dir_n_samples dir_polarity ... lat lithologies location lon method_codes result_type site vgp_lat vgp_lon vgp_n_samples
site
1 None None Cox & Doell 1960:McElhinny & McFadden 2000 ND VGP:Site 1 289.8 43.6 None 1 None ... 48.6264 None Osler Volcanics, Nipigon Strait, Upper Normal 271.894 DE-DI:FS-LOC-GOOGLE:LP-DC2 i 1 31.6516 185.575 1
2 None None Cox & Doell 1960:McElhinny & McFadden 2000 ND VGP:Site 2 285.7 42 None 1 None ... 48.6372 None Osler Volcanics, Nipigon Strait, Upper Normal 271.914 DE-DI:FS-LOC-GOOGLE:LP-DC2 i 2 28.1101 187.515 1
3 None None Cox & Doell 1960:McElhinny & McFadden 2000 ND VGP:Site 3 307.4 37.9 None 1 None ... 48.6408 None Osler Volcanics, Nipigon Strait, Upper Normal 271.928 DE-DI:FS-LOC-GOOGLE:LP-DC2 i 3 40.2606 167.886 1
4 None None Cox & Doell 1960:McElhinny & McFadden 2000 ND VGP:Site 4 302.8 29.9 None 1 None ... 48.6557 None Osler Volcanics, Nipigon Strait, Upper Normal 271.959 DE-DI:FS-LOC-GOOGLE:LP-DC2 i 4 33.4602 167.499 1
5 None None Cox & Doell 1960:McElhinny & McFadden 2000 ND VGP:Site 5 294.7 42.5 None 1 None ... 48.6615 None Osler Volcanics, Nipigon Strait, Upper Normal 271.958 DE-DI:FS-LOC-GOOGLE:LP-DC2 i 5 34.3093 181.257 1

5 rows × 23 columns


In [8]:
# get list of all sites with the same location_name
name = site_df.iloc[0].location
site_df[site_df['location'] == name][['location']]


Out[8]:
location
site
1 Osler Volcanics, Nipigon Strait, Upper Normal
1 Osler Volcanics, Nipigon Strait, Upper Normal
1 Osler Volcanics, Nipigon Strait, Upper Normal
2 Osler Volcanics, Nipigon Strait, Upper Normal
2 Osler Volcanics, Nipigon Strait, Upper Normal
2 Osler Volcanics, Nipigon Strait, Upper Normal
3 Osler Volcanics, Nipigon Strait, Upper Normal
3 Osler Volcanics, Nipigon Strait, Upper Normal
3 Osler Volcanics, Nipigon Strait, Upper Normal
4 Osler Volcanics, Nipigon Strait, Upper Normal
4 Osler Volcanics, Nipigon Strait, Upper Normal
4 Osler Volcanics, Nipigon Strait, Upper Normal
5 Osler Volcanics, Nipigon Strait, Upper Normal
5 Osler Volcanics, Nipigon Strait, Upper Normal
5 Osler Volcanics, Nipigon Strait, Upper Normal

In [9]:
# grab out declinations & inclinations

# get di block, providing the index (slicing the dataframe will be done in the function)
print site_container.get_di_block(do_index=True, item_names=['1', '2'], tilt_corr='100')

# get di block, providing a slice of the DataFrame
print site_container.get_di_block(site_container.df.loc[['1', '2']])


[[289.8, 43.6], [285.7, 42.0]]
[[289.8, 43.6], [285.7, 42.0]]

In [10]:
# Get names of all sites with a particular method code
# (returns a pandas Series with the site name and method code)
site_container.get_records_for_code('DE-K', incl=True)['method_codes'].head()


Out[10]:
site
2    DE-K:LP-DC2:FS-FD:FS-H:FS-LOC-GOOGLE:SO-SM
3    DE-K:LP-DC2:FS-FD:FS-H:FS-LOC-GOOGLE:SO-SM
3    DE-K:LP-DC2:FS-FD:FS-H:FS-LOC-GOOGLE:SO-SM
4    DE-K:LP-DC2:FS-FD:FS-H:FS-LOC-GOOGLE:SO-SM
4    DE-K:LP-DC2:FS-FD:FS-H:FS-LOC-GOOGLE:SO-SM
Name: method_codes, dtype: object

In [11]:
# Get names of all sites WITHOUT a particular method code
site_container.get_records_for_code('DE-K', incl=False)['method_codes'].head()


Out[11]:
site
1    FS-FD:FS-H:FS-LOC-GOOGLE:SO-SM
1    FS-FD:FS-H:FS-LOC-GOOGLE:SO-SM
1        DE-DI:FS-LOC-GOOGLE:LP-DC2
2    FS-FD:FS-H:FS-LOC-GOOGLE:SO-SM
2        DE-DI:FS-LOC-GOOGLE:LP-DC2
Name: method_codes, dtype: object

Changing values


In [12]:
# update all sites named '1' to have a 'bed_dip' of 22  (.loc works in place)
site_df.loc['1', 'bed_dip'] = '22'
site_df.loc['1']


Out[12]:
bed_dip bed_dip_direction citations conglomerate_test description dir_dec dir_inc dir_k dir_n_samples dir_polarity ... lat lithologies location lon method_codes result_type site vgp_lat vgp_lon vgp_n_samples
site
1 22 135 This study None None 293.1 34.5 517 5 n ... None Basalt Osler Volcanics, Nipigon Strait, Upper Normal None FS-FD:FS-H:FS-LOC-GOOGLE:SO-SM None 1 None None None
1 22 135 This study None None 289.8 43.6 517 5 n ... None Basalt Osler Volcanics, Nipigon Strait, Upper Normal None FS-FD:FS-H:FS-LOC-GOOGLE:SO-SM None 1 None None None
1 22 None Cox & Doell 1960:McElhinny & McFadden 2000 ND VGP:Site 1 289.8 43.6 None 1 None ... 48.6264 None Osler Volcanics, Nipigon Strait, Upper Normal 271.894 DE-DI:FS-LOC-GOOGLE:LP-DC2 i 1 31.6516 185.575 1

3 rows × 23 columns


In [13]:
# update any site's value for 'conglomerate_test' to 25 if that value was previously null
site_container.df['conglomerate_test'] = np.where(site_container.df['conglomerate_test'].isnull(), 25, \
                                                  site_container.df['conglomerate_test'])
site_container.df[:5]


Out[13]:
bed_dip bed_dip_direction citations conglomerate_test description dir_dec dir_inc dir_k dir_n_samples dir_polarity ... lat lithologies location lon method_codes result_type site vgp_lat vgp_lon vgp_n_samples
site
1 22 135 This study 25 None 293.1 34.5 517 5 n ... None Basalt Osler Volcanics, Nipigon Strait, Upper Normal None FS-FD:FS-H:FS-LOC-GOOGLE:SO-SM None 1 None None None
1 22 135 This study 25 None 289.8 43.6 517 5 n ... None Basalt Osler Volcanics, Nipigon Strait, Upper Normal None FS-FD:FS-H:FS-LOC-GOOGLE:SO-SM None 1 None None None
1 22 None Cox & Doell 1960:McElhinny & McFadden 2000 ND VGP:Site 1 289.8 43.6 None 1 None ... 48.6264 None Osler Volcanics, Nipigon Strait, Upper Normal 271.894 DE-DI:FS-LOC-GOOGLE:LP-DC2 i 1 31.6516 185.575 1
2 12 140 This study 25 None 290.6 31.9 243 5 n ... None Basalt Osler Volcanics, Nipigon Strait, Upper Normal None DE-K:LP-DC2:FS-FD:FS-H:FS-LOC-GOOGLE:SO-SM None 2 None None None
2 12 140 This study 25 None 285.7 42 243 5 n ... None Basalt Osler Volcanics, Nipigon Strait, Upper Normal None FS-FD:FS-H:FS-LOC-GOOGLE:SO-SM None 2 None None None

5 rows × 23 columns


In [14]:
# new_builder function to update a row (by row number)

ind = 1
row_data = {"bed_dip": "new_value", "new_col": "new_value"}
site_container.update_row(ind, row_data)
site_df.head()[["bed_dip", "new_col", "site"]]


Out[14]:
bed_dip new_col site
site
1 22 None 1.0
1 new_value new_value NaN
1 22 None 1.0
2 12 None 2.0
2 12 None 2.0

In [15]:
site_df.head()[['site', 'new_col', 'citations']]


Out[15]:
site new_col citations
site
1 1.0 None This study
1 NaN new_value None
1 1.0 None Cox & Doell 1960:McElhinny & McFadden 2000
2 2.0 None This study
2 2.0 None This study

In [16]:
# new builder function to update a record
# finds self.df row based on a condition
# then updates that row with new_data
# then deletes any other rows that also meet that condition

site_name = "1"
col_val = "new_value"
# data to add:
new_data = {"citations": "new citation"}
# condition to find row
cond1 = site_df.index.str.contains(site_name) == True
cond2 = site_df['new_col'] == col_val
condition = (cond1 & cond2)
# update record
site_container.update_record(site_name, new_data, condition)
site_df.head()[["citations", "new_col"]]


Out[16]:
citations new_col
site
1 This study None
1 new citation new_value
1 Cox & Doell 1960:McElhinny & McFadden 2000 None
10 Cox & Doell 1960:McElhinny & McFadden 2000 None
10 This study None

In [17]:
# initialize a new site with a name but no values, add it to site table   
site_container.add_blank_row('blank_site')
site_container.df = site_container.df
site_container.df.tail()


Out[17]:
bed_dip bed_dip_direction citations conglomerate_test description dir_dec dir_inc dir_k dir_n_samples dir_polarity ... location lon method_codes result_type site vgp_lat vgp_lon vgp_n_samples new_col num
site
8 18 150 This study 25 None 144.3 -47 679 5.0 r ... Osler Volcanics, Nipigon Strait, Lower Reversed None DE-K:LP-DC2:FS-FD:FS-H:FS-LOC-GOOGLE:SO-SM None 8.0 None None None None 86.0
9 None None Cox & Doell 1960:McElhinny & McFadden 2000 ND VGP:Site 9 110 -56.6 None 1.0 None ... Osler Volcanics, Nipigon Strait, Lower Reversed 271.941 DE-DI:FS-LOC-GOOGLE:LP-DC2 i 9.0 39.325 196.488 1 None 87.0
9 20 145 This study 25 None 110 -56.6 321 3.0 r ... Osler Volcanics, Nipigon Strait, Lower Reversed None DE-K:LP-DC2:FS-FD:FS-H:FS-LOC-GOOGLE:SO-SM None 9.0 None None None None 88.0
9 20 145 This study 25 None 121 -39.1 321 3.0 r ... Osler Volcanics, Nipigon Strait, Lower Reversed None DE-K:LP-DC2:FS-FD:FS-H:FS-LOC-GOOGLE:SO-SM None 9.0 None None None None 89.0
blank_site NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN

5 rows × 25 columns


In [18]:
# copy a site from the site DataFrame, 
#change a few values, 
#then add the new site to the site DataFrame
new_site = site_container.df.ix[2]
new_site['bed_dip'] = "other"
new_site.name = 'new_site'
site_container.df = site_container.df.append(new_site)
site_container.df.tail()


/usr/local/lib/python2.7/site-packages/ipykernel/__main__.py:5: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
Out[18]:
bed_dip bed_dip_direction citations conglomerate_test description dir_dec dir_inc dir_k dir_n_samples dir_polarity ... location lon method_codes result_type site vgp_lat vgp_lon vgp_n_samples new_col num
site
9 None None Cox & Doell 1960:McElhinny & McFadden 2000 ND VGP:Site 9 110 -56.6 None 1.0 None ... Osler Volcanics, Nipigon Strait, Lower Reversed 271.941 DE-DI:FS-LOC-GOOGLE:LP-DC2 i 9.0 39.325 196.488 1 None 87.0
9 20 145 This study 25 None 110 -56.6 321 3.0 r ... Osler Volcanics, Nipigon Strait, Lower Reversed None DE-K:LP-DC2:FS-FD:FS-H:FS-LOC-GOOGLE:SO-SM None 9.0 None None None None 88.0
9 20 145 This study 25 None 121 -39.1 321 3.0 r ... Osler Volcanics, Nipigon Strait, Lower Reversed None DE-K:LP-DC2:FS-FD:FS-H:FS-LOC-GOOGLE:SO-SM None 9.0 None None None None 89.0
blank_site NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
new_site other None Cox & Doell 1960:McElhinny & McFadden 2000 ND VGP:Site 1 289.8 43.6 None 1.0 None ... Osler Volcanics, Nipigon Strait, Upper Normal 271.894 DE-DI:FS-LOC-GOOGLE:LP-DC2 i 1.0 31.6516 185.575 1 None 2.0

5 rows × 25 columns


In [19]:
# remove a row
site_container.delete_row(3)
# this deletes the 4th row
site_df.head()


Out[19]:
bed_dip bed_dip_direction citations conglomerate_test description dir_dec dir_inc dir_k dir_n_samples dir_polarity ... location lon method_codes result_type site vgp_lat vgp_lon vgp_n_samples new_col num
site
1 22 135 This study 25 None 293.1 34.5 517 5.0 n ... Osler Volcanics, Nipigon Strait, Upper Normal None FS-FD:FS-H:FS-LOC-GOOGLE:SO-SM None 1.0 None None None None 0.0
1 new_value None new citation None None None None None NaN None ... None None None None NaN None None None new_value 1.0
1 22 None Cox & Doell 1960:McElhinny & McFadden 2000 ND VGP:Site 1 289.8 43.6 None 1.0 None ... Osler Volcanics, Nipigon Strait, Upper Normal 271.894 DE-DI:FS-LOC-GOOGLE:LP-DC2 i 1.0 31.6516 185.575 1 None 2.0
10 None None Cox & Doell 1960:McElhinny & McFadden 2000 ND VGP:Site 10 128.2 -42 None 1.0 None ... Osler Volcanics, Nipigon Strait, Lower Reversed 271.93 DE-DI:FS-LOC-GOOGLE:LP-DC2 i 10.0 42.8965 169.934 1 None 3.0
10 30 134 This study 25 None 128.2 -42 59 5.0 r ... Osler Volcanics, Nipigon Strait, Lower Reversed None DE-K:LP-DC2:FS-FD:FS-H:FS-LOC-GOOGLE:SO-SM None 10.0 None None None None 4.0

5 rows × 25 columns


In [20]:
# get rid of all rows with index "1" or "2"
site_df.drop(["1", "2"])


Out[20]:
bed_dip bed_dip_direction citations conglomerate_test description dir_dec dir_inc dir_k dir_n_samples dir_polarity ... location lon method_codes result_type site vgp_lat vgp_lon vgp_n_samples new_col num
site
10 None None Cox & Doell 1960:McElhinny & McFadden 2000 ND VGP:Site 10 128.2 -42 None 1.0 None ... Osler Volcanics, Nipigon Strait, Lower Reversed 271.93 DE-DI:FS-LOC-GOOGLE:LP-DC2 i 10.0 42.8965 169.934 1 None 3.0
10 30 134 This study 25 None 128.2 -42 59 5.0 r ... Osler Volcanics, Nipigon Strait, Lower Reversed None DE-K:LP-DC2:FS-FD:FS-H:FS-LOC-GOOGLE:SO-SM None 10.0 None None None None 4.0
10 30 134 This study 25 None 129.7 -12.1 59 5.0 r ... Osler Volcanics, Nipigon Strait, Lower Reversed None DE-K:LP-DC2:FS-FD:FS-H:FS-LOC-GOOGLE:SO-SM None 10.0 None None None None 5.0
11 None None Cox & Doell 1960:McElhinny & McFadden 2000 ND VGP:Site 11 88.3 -59.1 None 1.0 None ... Osler Volcanics, Nipigon Strait, Lower Reversed 271.928 DE-DI:FS-LOC-GOOGLE:LP-DC2 i 11.0 27.7951 211.8 1 None 6.0
11 30 132 This study 25 None 88.3 -59.1 456 3.0 r ... Osler Volcanics, Nipigon Strait, Lower Reversed None DE-K:LP-DC2:FS-FD:FS-H:FS-LOC-GOOGLE:SO-SM None 11.0 None None None None 7.0
11 30 132 This study 25 None 106.7 -33.9 456 3.0 r ... Osler Volcanics, Nipigon Strait, Lower Reversed None DE-K:LP-DC2:FS-FD:FS-H:FS-LOC-GOOGLE:SO-SM None 11.0 None None None None 8.0
12 None None Cox & Doell 1960:McElhinny & McFadden 2000 ND VGP:Site 12 118.6 -48.9 None 1.0 None ... Osler Volcanics, Nipigon Strait, Lower Reversed 271.922 DE-DI:FS-LOC-GOOGLE:LP-DC2 i 12.0 40.3666 183.176 1 None 9.0
12 40 132 This study 25 None 118.6 -48.9 240 5.0 r ... Osler Volcanics, Nipigon Strait, Lower Reversed None DE-K:LP-DC2:FS-FD:FS-H:FS-LOC-GOOGLE:SO-SM None 12.0 None None None None 10.0
12 40 132 This study 25 None 123.2 -9.6 240 5.0 r ... Osler Volcanics, Nipigon Strait, Lower Reversed None DE-K:LP-DC2:FS-FD:FS-H:FS-LOC-GOOGLE:SO-SM None 12.0 None None None None 11.0
13 None None Cox & Doell 1960:McElhinny & McFadden 2000 ND VGP:Site 13 128.1 -38 None 1.0 None ... Osler Volcanics, Nipigon Strait, Lower Reversed 271.915 DE-DI:FS-LOC-GOOGLE:LP-DC2 i 13.0 40.7569 167.304 1 None 12.0
13 35 120 This study 25 None 126.4 -3.3 886 3.0 r ... Osler Volcanics, Nipigon Strait, Lower Reversed None DE-K:LP-DC2:FS-FD:FS-H:FS-LOC-GOOGLE:SO-SM None 13.0 None None None None 13.0
13 35 120 This study 25 None 128.1 -38 886 3.0 r ... Osler Volcanics, Nipigon Strait, Lower Reversed None DE-K:LP-DC2:FS-FD:FS-H:FS-LOC-GOOGLE:SO-SM None 13.0 None None None None 14.0
14 20 120 This study 25 None 108.2 -55.8 121 6.0 r ... Osler Volcanics, Nipigon Strait, Lower Reversed None DE-K:LP-DC2:FS-FD:FS-H:FS-LOC-GOOGLE:SO-SM None 14.0 None None None None 15.0
14 20 120 This study 25 None 111.9 -36.2 121 6.0 r ... Osler Volcanics, Nipigon Strait, Lower Reversed None DE-K:LP-DC2:FS-FD:FS-H:FS-LOC-GOOGLE:SO-SM None 14.0 None None None None 16.0
14 None None Cox & Doell 1960:McElhinny & McFadden 2000 ND VGP:Site 14 108.2 -55.8 None 1.0 None ... Osler Volcanics, Nipigon Strait, Lower Reversed 271.917 DE-DI:FS-LOC-GOOGLE:LP-DC2 i 14.0 37.6671 196.753 1 None 17.0
15 30 115 This study 25 None 117.6 -16.1 402 6.0 r ... Osler Volcanics, Nipigon Strait, Lower Reversed None DE-K:LP-DC2:FS-FD:FS-H:FS-LOC-GOOGLE:SO-SM None 15.0 None None None None 18.0
15 30 115 This study 25 None 118.5 -46 402 6.0 r ... Osler Volcanics, Nipigon Strait, Lower Reversed None DE-K:LP-DC2:FS-FD:FS-H:FS-LOC-GOOGLE:SO-SM None 15.0 None None None None 19.0
15 None None Cox & Doell 1960:McElhinny & McFadden 2000 ND VGP:Site 15 118.5 -46 None 1.0 None ... Osler Volcanics, Nipigon Strait, Lower Reversed 271.902 DE-DI:FS-LOC-GOOGLE:LP-DC2 i 15.0 38.6895 180.853 1 None 20.0
16 45 110 This study 25 None 107.7 -9.3 125 6.0 r ... Osler Volcanics, Nipigon Strait, Lower Reversed None DE-K:LP-DC2:FS-FD:FS-H:FS-LOC-GOOGLE:SO-SM None 16.0 None None None None 21.0
16 None None Cox & Doell 1960:McElhinny & McFadden 2000 ND VGP:Site 16 106 -54.2 None 1.0 None ... Osler Volcanics, Nipigon Strait, Lower Reversed 271.898 DE-DI:FS-LOC-GOOGLE:LP-DC2 i 16.0 35.262 196.544 1 None 22.0
16 45 110 This study 25 None 106 -54.2 125 6.0 r ... Osler Volcanics, Nipigon Strait, Lower Reversed None DE-K:LP-DC2:FS-FD:FS-H:FS-LOC-GOOGLE:SO-SM None 16.0 None None None None 23.0
17 None None Cox & Doell 1960:McElhinny & McFadden 2000 ND VGP:Site 17 93.7 -54.1 None 1.0 None ... Osler Volcanics, Nipigon Strait, Lower Reversed 271.887 DE-DI:FS-LOC-GOOGLE:LP-DC2 i 17.0 27.4921 204.126 1 None 24.0
17 25 110 This study 25 None 93.7 -54.1 362 4.0 r ... Osler Volcanics, Nipigon Strait, Lower Reversed None DE-K:LP-DC2:FS-FD:FS-H:FS-LOC-GOOGLE:SO-SM None 17.0 None None None None 25.0
17 25 110 This study 25 None 99.1 -29.8 362 4.0 r ... Osler Volcanics, Nipigon Strait, Lower Reversed None DE-K:LP-DC2:FS-FD:FS-H:FS-LOC-GOOGLE:SO-SM None 17.0 None None None None 26.0
18 None None Cox & Doell 1960:McElhinny & McFadden 2000 ND VGP:Site 18 116 -53 None 1.0 None ... Osler Volcanics, Nipigon Strait, Lower Reversed 271.835 DE-DI:FS-LOC-GOOGLE:LP-DC2 i 18.0 41.0179 188.806 1 None 27.0
18 12 155 This study 25 None 116 -53 224 4.0 r ... Osler Volcanics, Nipigon Strait, Lower Reversed None DE-K:LP-DC2:FS-FD:FS-H:FS-LOC-GOOGLE:SO-SM None 18.0 None None None None 28.0
18 12 155 This study 25 None 123.8 -43.2 224 4.0 r ... Osler Volcanics, Nipigon Strait, Lower Reversed None DE-K:LP-DC2:FS-FD:FS-H:FS-LOC-GOOGLE:SO-SM None 18.0 None None None None 29.0
19 None None Cox & Doell 1960:McElhinny & McFadden 2000 ND VGP:Site 19 131.7 -64.9 None 1.0 None ... Osler Volcanics, Nipigon Strait, Lower Reversed 271.826 DE-DI:FS-LOC-GOOGLE:LP-DC2 i 19.0 58.0163 197.301 1 None 30.0
19 12 133 This study 25 None 132.1 -53 871 4.0 r ... Osler Volcanics, Nipigon Strait, Lower Reversed None DE-K:LP-DC2:FS-FD:FS-H:FS-LOC-GOOGLE:SO-SM None 19.0 None None None None 31.0
19 12 133 This study 25 None 131.7 -64.9 871 4.0 r ... Osler Volcanics, Nipigon Strait, Lower Reversed None DE-K:LP-DC2:FS-FD:FS-H:FS-LOC-GOOGLE:SO-SM None 19.0 None None None None 32.0
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
28 7 130 This study 25 None 132.3 -47.2 1039 3.0 r ... Osler Volcanics, Nipigon Strait, Lower Reversed None DE-K:LP-DC2:FS-FD:FS-H:FS-LOC-GOOGLE:SO-SM None 28.0 None None None None 61.0
28 None None Cox & Doell 1960:McElhinny & McFadden 2000 ND VGP:Site 28 132.6 -54.1 None 1.0 None ... Osler Volcanics, Nipigon Strait, Lower Reversed 271.899 DE-DI:FS-LOC-GOOGLE:LP-DC2 i 28.0 52.6137 177.838 1 None 62.0
29 7 130 This study 25 None 129.1 -54.5 1149 3.0 r ... Osler Volcanics, Nipigon Strait, Lower Reversed None DE-K:LP-DC2:FS-FD:FS-H:FS-LOC-GOOGLE:SO-SM None 29.0 None None None None 63.0
29 7 130 This study 25 None 129.3 -47.5 1149 3.0 r ... Osler Volcanics, Nipigon Strait, Lower Reversed None DE-K:LP-DC2:FS-FD:FS-H:FS-LOC-GOOGLE:SO-SM None 29.0 None None None None 64.0
29 None None Cox & Doell 1960:McElhinny & McFadden 2000 ND VGP:Site 29 129.1 -54.5 None 1.0 None ... Osler Volcanics, Nipigon Strait, Lower Reversed 271.897 DE-DI:FS-LOC-GOOGLE:LP-DC2 i 29.0 50.5405 181.098 1 None 65.0
3 14 146 This study 25 None 310 24.6 2485 5.0 n ... Osler Volcanics, Nipigon Strait, Upper Normal None DE-K:LP-DC2:FS-FD:FS-H:FS-LOC-GOOGLE:SO-SM None 3.0 None None None None 66.0
3 14 146 This study 25 None 307.4 37.9 2485 5.0 n ... Osler Volcanics, Nipigon Strait, Upper Normal None DE-K:LP-DC2:FS-FD:FS-H:FS-LOC-GOOGLE:SO-SM None 3.0 None None None None 67.0
3 None None Cox & Doell 1960:McElhinny & McFadden 2000 ND VGP:Site 3 307.4 37.9 None 1.0 None ... Osler Volcanics, Nipigon Strait, Upper Normal 271.928 DE-DI:FS-LOC-GOOGLE:LP-DC2 i 3.0 40.2606 167.886 1 None 68.0
30 6 129 This study 25 None 98.7 -75.8 267 5.0 r ... Osler Volcanics, Nipigon Strait, Lower Reversed None DE-K:LP-DC2:FS-FD:FS-H:FS-LOC-GOOGLE:SO-SM None 30.0 None None None None 69.0
30 None None Cox & Doell 1960:McElhinny & McFadden 2000 ND VGP:Site 30 80.5 -80.4 None 1.0 None ... Osler Volcanics, Nipigon Strait, Lower Reversed 271.782 DE-DI:FS-LOC-GOOGLE:LP-DC2 i 30.0 42.4939 246.402 1 None 70.0
30 6 129 This study 25 None 80.5 -80.4 267 5.0 r ... Osler Volcanics, Nipigon Strait, Lower Reversed None DE-K:LP-DC2:FS-FD:FS-H:FS-LOC-GOOGLE:SO-SM None 30.0 None None None None 71.0
4 12 135 This study 25 None 303.9 18.2 331 5.0 n ... Osler Volcanics, Nipigon Strait, Upper Normal None DE-K:LP-DC2:FS-FD:FS-H:FS-LOC-GOOGLE:SO-SM None 4.0 None None None None 72.0
4 12 135 This study 25 None 302.8 29.9 331 5.0 n ... Osler Volcanics, Nipigon Strait, Upper Normal None DE-K:LP-DC2:FS-FD:FS-H:FS-LOC-GOOGLE:SO-SM None 4.0 None None None None 73.0
4 None None Cox & Doell 1960:McElhinny & McFadden 2000 ND VGP:Site 4 302.8 29.9 None 1.0 None ... Osler Volcanics, Nipigon Strait, Upper Normal 271.959 DE-DI:FS-LOC-GOOGLE:LP-DC2 i 4.0 33.4602 167.499 1 None 74.0
5 10 135 This study 25 None 297.3 33.1 130 5.0 n ... Osler Volcanics, Nipigon Strait, Upper Normal None DE-K:LP-DC2:FS-FD:FS-H:FS-LOC-GOOGLE:SO-SM None 5.0 None None None None 75.0
5 10 135 This study 25 None 294.7 42.5 130 5.0 n ... Osler Volcanics, Nipigon Strait, Upper Normal None DE-K:LP-DC2:FS-FD:FS-H:FS-LOC-GOOGLE:SO-SM None 5.0 None None None None 76.0
5 None None Cox & Doell 1960:McElhinny & McFadden 2000 ND VGP:Site 5 294.7 42.5 None 1.0 None ... Osler Volcanics, Nipigon Strait, Upper Normal 271.958 DE-DI:FS-LOC-GOOGLE:LP-DC2 i 5.0 34.3093 181.257 1 None 77.0
6 28 131 This study 25 None 106.1 -44.2 70 5.0 r ... Osler Volcanics, Nipigon Strait, Lower Reversed None DE-K:LP-DC2:FS-FD:FS-H:FS-LOC-GOOGLE:SO-SM None 6.0 None None None None 78.0
6 None None Cox & Doell 1960:McElhinny & McFadden 2000 ND VGP:Site 6 106.1 -44.2 None 1.0 None ... Osler Volcanics, Nipigon Strait, Lower Reversed 271.948 DE-DI:FS-LOC-GOOGLE:LP-DC2 i 6.0 29.5409 188.653 1 None 79.0
6 28 131 This study 25 None 112.6 -18.1 70 5.0 r ... Osler Volcanics, Nipigon Strait, Lower Reversed None DE-K:LP-DC2:FS-FD:FS-H:FS-LOC-GOOGLE:SO-SM None 6.0 None None None None 80.0
7 None None Cox & Doell 1960:McElhinny & McFadden 2000 ND VGP:Site 7 109.8 -62.4 None 1.0 None ... Osler Volcanics, Nipigon Strait, Lower Reversed 271.975 DE-DI:FS-LOC-GOOGLE:LP-DC2 i 7.0 42.9002 203.817 1 None 81.0
7 20 149 This study 25 None 124.5 -45.3 351 6.0 r ... Osler Volcanics, Nipigon Strait, Lower Reversed None DE-K:LP-DC2:FS-FD:FS-H:FS-LOC-GOOGLE:SO-SM None 7.0 None None None None 82.0
7 20 149 This study 25 None 109.8 -62.4 351 6.0 r ... Osler Volcanics, Nipigon Strait, Lower Reversed None DE-K:LP-DC2:FS-FD:FS-H:FS-LOC-GOOGLE:SO-SM None 7.0 None None None None 83.0
8 None None Cox & Doell 1960:McElhinny & McFadden 2000 ND VGP:Site 8 140.7 -64.8 None 1.0 None ... Osler Volcanics, Nipigon Strait, Lower Reversed 271.974 DE-DI:FS-LOC-GOOGLE:LP-DC2 i 8.0 63.7776 192.736 1 None 84.0
8 18 150 This study 25 None 140.7 -64.8 679 5.0 r ... Osler Volcanics, Nipigon Strait, Lower Reversed None DE-K:LP-DC2:FS-FD:FS-H:FS-LOC-GOOGLE:SO-SM None 8.0 None None None None 85.0
8 18 150 This study 25 None 144.3 -47 679 5.0 r ... Osler Volcanics, Nipigon Strait, Lower Reversed None DE-K:LP-DC2:FS-FD:FS-H:FS-LOC-GOOGLE:SO-SM None 8.0 None None None None 86.0
9 None None Cox & Doell 1960:McElhinny & McFadden 2000 ND VGP:Site 9 110 -56.6 None 1.0 None ... Osler Volcanics, Nipigon Strait, Lower Reversed 271.941 DE-DI:FS-LOC-GOOGLE:LP-DC2 i 9.0 39.325 196.488 1 None 87.0
9 20 145 This study 25 None 110 -56.6 321 3.0 r ... Osler Volcanics, Nipigon Strait, Lower Reversed None DE-K:LP-DC2:FS-FD:FS-H:FS-LOC-GOOGLE:SO-SM None 9.0 None None None None 88.0
9 20 145 This study 25 None 121 -39.1 321 3.0 r ... Osler Volcanics, Nipigon Strait, Lower Reversed None DE-K:LP-DC2:FS-FD:FS-H:FS-LOC-GOOGLE:SO-SM None 9.0 None None None None 89.0
blank_site NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN

85 rows × 25 columns

Starting from scratch -- making a blank table


In [21]:
reload(nb)

# create an empty MagicDataFrame with column names
cols = ['analyst_names', 'aniso_ftest', 'aniso_ftest12', 'aniso_ftest23', 'aniso_s', 'aniso_s_mean', 'aniso_s_n_measurements', 'aniso_s_sigma', 'aniso_s_unit', 'aniso_tilt_correction', 'aniso_type', 'aniso_v1', 'aniso_v2', 'aniso_v3', 'citations', 'description', 'dir_alpha95', 'dir_comp_name', 'dir_dec', 'dir_inc', 'dir_mad_free', 'dir_n_measurements', 'dir_tilt_correction', 'experiment_names', 'geologic_classes', 'geologic_types', 'hyst_bc', 'hyst_bcr', 'hyst_mr_moment', 'hyst_ms_moment', 'int_abs', 'int_b', 'int_b_beta', 'int_b_sigma', 'int_corr', 'int_dang', 'int_drats', 'int_f', 'int_fvds', 'int_gamma', 'int_mad_free', 'int_md', 'int_n_measurements', 'int_n_ptrm', 'int_q', 'int_rsc', 'int_treat_dc_field', 'lithologies', 'meas_step_max', 'meas_step_min', 'meas_step_unit', 'method_codes', 'sample_name', 'software_packages', 'specimen_name']
dtype = 'specimens'
data_container = nb.MagicDataFrame(dtype=dtype, columns=None)
df = data_container.df

# create fake specimen data
fake_data = {col: 1 for col in cols}
# include a new column name in the data
fake_data['new_one'] = '999'
# add one row of specimen data (any addition column headers in will be added automatically)
data_container.add_row('name', fake_data)
# add another row
fake_data['other'] = 'cheese'
fake_data.pop('aniso_ftest')
data_container.add_row('name2', fake_data)
# now the dataframe has two new columns, 'new_one' and 'other'
df


Out[21]:
int_drats dir_n_measurements int_n_ptrm aniso_ftest12 int_treat_dc_field int_abs hyst_ms_moment meas_step_unit hyst_bc int_b_sigma ... int_mad_free meas_step_max dir_alpha95 aniso_v1 aniso_v3 dir_dec method_codes hyst_mr_moment specimen_name other
specimen
name 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 ... 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 None
name2 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 ... 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 cheese

2 rows × 57 columns

Interactions between two MagicDataFrames


In [22]:
# get location DataFrame

fname = os.path.join('..', '3_0', 'Osler', 'locations.txt')
loc_container = nb.MagicDataFrame(fname)
loc_df = loc_container.df
loc_df.head()


Out[22]:
citations conglomerate_test contact_test continent_ocean country description dir_alpha95 dir_dec dir_inc dir_k ... pole_comp_name pole_lat pole_lon pole_n_sites region result_type reversal_test rock_magnetic_test site_names tectonic_settings
location
Osler Volcanics, Nipigon Strait, Lower Reversed This study None None North America Canada None None None None None ... None None None None Lake Superior None None None None Continental Rift
Osler Volcanics, Nipigon Strait, Upper Normal This study None None North America Canada None None None None None ... None None None None Lake Superior None None None None Continental Rift
Osler Volcanics, Nipigon Strait, Lower Reversed Cox & Doell 1960:Fisher 1953:McElhinny & McFad... ND ND None None All Reversed Flows 4.62448 114.967 -57.573 40.1644 ... Characteristic 33.8557 178.263 5 None a ND ND 6:7:8:9:10:11:12:13:14:15:16:17:18:19:20:21:22... None
Osler Volcanics, Nipigon Strait, Upper Normal Cox & Doell 1960:Fisher 1953:McElhinny & McFad... ND ND None None All Normal Flows 8.47929 296.462 39.4641 82.3857 ... Characteristic 43.6574 196.338 25 None a ND ND 1:2:3:4:5 None

4 rows × 35 columns


In [23]:
# get all sites belonging to a particular location RECORD (i.e., what used to be a result)
# (diferent from getting all sites with the same location name)
name = loc_df.ix[1].name
loc_record = loc_df.ix[name].ix[1]
site_names = loc_record['site_names']
print "All sites belonging to {}:".format(name), loc_record['site_names']
site_names = site_names.split(":")
# fancy indexing
site_container.df.ix[site_names].head()


All sites belonging to Osler Volcanics, Nipigon Strait, Upper Normal: 1:2:3:4:5
Out[23]:
bed_dip bed_dip_direction citations conglomerate_test description dir_dec dir_inc dir_k dir_n_samples dir_polarity ... location lon method_codes result_type site vgp_lat vgp_lon vgp_n_samples new_col num
site
1 22 135 This study 25 None 293.1 34.5 517 5.0 n ... Osler Volcanics, Nipigon Strait, Upper Normal None FS-FD:FS-H:FS-LOC-GOOGLE:SO-SM None 1.0 None None None None 0.0
1 new_value None new citation None None None None None NaN None ... None None None None NaN None None None new_value 1.0
1 22 None Cox & Doell 1960:McElhinny & McFadden 2000 ND VGP:Site 1 289.8 43.6 None 1.0 None ... Osler Volcanics, Nipigon Strait, Upper Normal 271.894 DE-DI:FS-LOC-GOOGLE:LP-DC2 i 1.0 31.6516 185.575 1 None 2.0
2 12 140 This study 25 None 290.6 31.9 243 5.0 n ... Osler Volcanics, Nipigon Strait, Upper Normal None DE-K:LP-DC2:FS-FD:FS-H:FS-LOC-GOOGLE:SO-SM None 2.0 None None None None 33.0
2 None None Cox & Doell 1960:McElhinny & McFadden 2000 ND VGP:Site 2 285.7 42 None 1.0 None ... Osler Volcanics, Nipigon Strait, Upper Normal 271.914 DE-DI:FS-LOC-GOOGLE:LP-DC2 i 2.0 28.1101 187.515 1 None 34.0

5 rows × 25 columns

Gotchas

  1. Can't do self.df = self.df.append(blah). Must instead do self.df.loc(blah.name) = blah
  2. Beware chained indexing: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  3. To make a real, independent copy of a DataFrame, use DataFrame.copy()
  4. To update inplace: df.loc[:, 'col_name'] = 'blah'
  5. http://stackoverflow.com/questions/37175007/pandas-dataframe-logic-operations-with-nan

Pandas indexing


In [24]:
# first site
print site_container.df.ix[0][:5]
print '-'
# find site by index value
print site_container.df.ix['new_site'][:5]
print '-'
# return all sites' values for a col
site_container.df['bed_dip'][:5]


bed_dip                      22
bed_dip_direction           135
citations            This study
conglomerate_test            25
description                None
Name: 1, dtype: object
-
bed_dip                                                   other
bed_dip_direction                                          None
citations            Cox & Doell 1960:McElhinny & McFadden 2000
conglomerate_test                                            ND
description                                          VGP:Site 1
Name: new_site, dtype: object
-
Out[24]:
site
1            22
1     new_value
1            22
10           30
10           30
Name: bed_dip, dtype: object

Scratch


In [ ]: