This notebook demonstrates how to use the Python MagicDataFrame object. A MagicDataFrame contains the data from one MagIC-format table and provides functionality for accessing and editing that data.
In [1]:
from pmagpy import new_builder as nb
from pmagpy import ipmag
import os
import json
import numpy as np
import sys
import pandas as pd
from pandas import DataFrame
from pmagpy import pmag
working_dir = os.path.join("..", "3_0", "Osler")
In [2]:
reload(nb)
#class MagicDataFrame(object):
# """
# Each MagicDataFrame corresponds to one MagIC table.
# The MagicDataFrame object consists of a pandas DataFrame,
# and assorted methods for manipulating that DataFrame.
# """
# def __init__(self, magic_file=None, columns=None, dtype=None):
# """
# Provide either a magic_file name or a dtype.
# List of columns is optional,
# and will only be used if magic_file == None
# """
fname = os.path.join("..", '3_0', 'Osler', 'sites.txt')
# the MagicDataFrame object:
site_container = nb.MagicDataFrame(magic_file=fname)
# the actual pandas DataFrame:
site_df = site_container.df
# show the first 5 site records
site_df[:5]
# FAILS
#print site_df.fillna.__doc__
#site_df.fillna(value=None)
#FAILS
#print site_df.replace.__doc__
#site_df.replace(np.nan, None)
#FAILS
#site_df[site_df.astype(str) == ""] = None
#site_df[site_df.where(site_df.astype(str) == "").notnull()] = None
# WORKS!
#site_df.where(site_df.notnull(), None)
site_df.head()
Out[2]:
In [ ]:
In [3]:
# make an empty MagicDataFrame with 'Age' and 'Metadata' headers
reload(nb)
fname = os.path.join("..", '3_0', 'Osler', 'sites.txt')
# the MagicDataFrame object:
site_container = nb.MagicDataFrame(dtype='sites', groups=['Age', 'Metadata'])
# the actual pandas DataFrame:
site_df = site_container.df
# show the (empty) dataframe
site_df
Out[3]:
In [4]:
fname = os.path.join('..', '3_0', 'Osler', 'sites.txt')
# the MagicDataFrame object:
site_container = nb.MagicDataFrame(fname)
# the actual pandas DataFrame:
site_df = site_container.df
In [5]:
# all sites with site_name (index) of '1'
# will return a smaller DataFrame (or a Series if there is only 1 row with that index)
site_container.df.ix['1']
Out[5]:
In [6]:
# index by position (using an integer), will always return a single record as Series
# in this case, get the second record
site_container.df.iloc[1]
Out[6]:
In [7]:
# return all sites with the description column filled in
cond = site_container.df['description'].notnull()
site_container.df[cond].head()
Out[7]:
In [8]:
# get list of all sites with the same location_name
name = site_df.iloc[0].location
site_df[site_df['location'] == name][['location']]
Out[8]:
In [9]:
# grab out declinations & inclinations
# get di block, providing the index (slicing the dataframe will be done in the function)
print site_container.get_di_block(do_index=True, item_names=['1', '2'], tilt_corr='100')
# get di block, providing a slice of the DataFrame
print site_container.get_di_block(site_container.df.loc[['1', '2']])
In [10]:
# Get names of all sites with a particular method code
# (returns a pandas Series with the site name and method code)
site_container.get_records_for_code('DE-K', incl=True)['method_codes'].head()
Out[10]:
In [11]:
# Get names of all sites WITHOUT a particular method code
site_container.get_records_for_code('DE-K', incl=False)['method_codes'].head()
Out[11]:
In [12]:
# update all sites named '1' to have a 'bed_dip' of 22 (.loc works in place)
site_df.loc['1', 'bed_dip'] = '22'
site_df.loc['1']
Out[12]:
In [13]:
# update any site's value for 'conglomerate_test' to 25 if that value was previously null
site_container.df['conglomerate_test'] = np.where(site_container.df['conglomerate_test'].isnull(), 25, \
site_container.df['conglomerate_test'])
site_container.df[:5]
Out[13]:
In [14]:
# new_builder function to update a row (by row number)
ind = 1
row_data = {"bed_dip": "new_value", "new_col": "new_value"}
site_container.update_row(ind, row_data)
site_df.head()[["bed_dip", "new_col", "site"]]
Out[14]:
In [15]:
site_df.head()[['site', 'new_col', 'citations']]
Out[15]:
In [16]:
# new builder function to update a record
# finds self.df row based on a condition
# then updates that row with new_data
# then deletes any other rows that also meet that condition
site_name = "1"
col_val = "new_value"
# data to add:
new_data = {"citations": "new citation"}
# condition to find row
cond1 = site_df.index.str.contains(site_name) == True
cond2 = site_df['new_col'] == col_val
condition = (cond1 & cond2)
# update record
site_container.update_record(site_name, new_data, condition)
site_df.head()[["citations", "new_col"]]
Out[16]:
In [17]:
# initialize a new site with a name but no values, add it to site table
site_container.add_blank_row('blank_site')
site_container.df = site_container.df
site_container.df.tail()
Out[17]:
In [18]:
# copy a site from the site DataFrame,
#change a few values,
#then add the new site to the site DataFrame
new_site = site_container.df.ix[2]
new_site['bed_dip'] = "other"
new_site.name = 'new_site'
site_container.df = site_container.df.append(new_site)
site_container.df.tail()
Out[18]:
In [19]:
# remove a row
site_container.delete_row(3)
# this deletes the 4th row
site_df.head()
Out[19]:
In [20]:
# get rid of all rows with index "1" or "2"
site_df.drop(["1", "2"])
Out[20]:
In [21]:
reload(nb)
# create an empty MagicDataFrame with column names
cols = ['analyst_names', 'aniso_ftest', 'aniso_ftest12', 'aniso_ftest23', 'aniso_s', 'aniso_s_mean', 'aniso_s_n_measurements', 'aniso_s_sigma', 'aniso_s_unit', 'aniso_tilt_correction', 'aniso_type', 'aniso_v1', 'aniso_v2', 'aniso_v3', 'citations', 'description', 'dir_alpha95', 'dir_comp_name', 'dir_dec', 'dir_inc', 'dir_mad_free', 'dir_n_measurements', 'dir_tilt_correction', 'experiment_names', 'geologic_classes', 'geologic_types', 'hyst_bc', 'hyst_bcr', 'hyst_mr_moment', 'hyst_ms_moment', 'int_abs', 'int_b', 'int_b_beta', 'int_b_sigma', 'int_corr', 'int_dang', 'int_drats', 'int_f', 'int_fvds', 'int_gamma', 'int_mad_free', 'int_md', 'int_n_measurements', 'int_n_ptrm', 'int_q', 'int_rsc', 'int_treat_dc_field', 'lithologies', 'meas_step_max', 'meas_step_min', 'meas_step_unit', 'method_codes', 'sample_name', 'software_packages', 'specimen_name']
dtype = 'specimens'
data_container = nb.MagicDataFrame(dtype=dtype, columns=None)
df = data_container.df
# create fake specimen data
fake_data = {col: 1 for col in cols}
# include a new column name in the data
fake_data['new_one'] = '999'
# add one row of specimen data (any addition column headers in will be added automatically)
data_container.add_row('name', fake_data)
# add another row
fake_data['other'] = 'cheese'
fake_data.pop('aniso_ftest')
data_container.add_row('name2', fake_data)
# now the dataframe has two new columns, 'new_one' and 'other'
df
Out[21]:
In [22]:
# get location DataFrame
fname = os.path.join('..', '3_0', 'Osler', 'locations.txt')
loc_container = nb.MagicDataFrame(fname)
loc_df = loc_container.df
loc_df.head()
Out[22]:
In [23]:
# get all sites belonging to a particular location RECORD (i.e., what used to be a result)
# (diferent from getting all sites with the same location name)
name = loc_df.ix[1].name
loc_record = loc_df.ix[name].ix[1]
site_names = loc_record['site_names']
print "All sites belonging to {}:".format(name), loc_record['site_names']
site_names = site_names.split(":")
# fancy indexing
site_container.df.ix[site_names].head()
Out[23]:
self.df = self.df.append(blah)
. Must instead do self.df.loc(blah.name) = blah
In [24]:
# first site
print site_container.df.ix[0][:5]
print '-'
# find site by index value
print site_container.df.ix['new_site'][:5]
print '-'
# return all sites' values for a col
site_container.df['bed_dip'][:5]
Out[24]:
In [ ]: