do basic imports and unpack McMurdo data


In [1]:
#from pmagpy import ipmag
#reload(ipmag)
from pmagpy import pmag
from pmagpy import new_builder as nb
from pmagpy import data_model3
reload(data_model3)
import os
import pandas as pd
import numpy as np
from pandas import DataFrame
from pmagpy.new_builder import Contribution
from pmagpy import validate_upload3 as vu3

#import pmagpy.controlled_vocabularies3 as cv

Mess up the Megiddo contribution a bit


In [2]:
dir_path = os.path.join('..', '3_0', 'Megiddo')
con = Contribution(dir_path)

loc_dm = con.tables['locations'].data_model.dm['locations']
loc_df = con.tables['locations'].df
site_dm = con.tables['sites'].data_model.dm['sites']
site_df = con.tables['sites'].df
samp_df = con.tables['samples'].df
samp_dm = con.tables['samples'].data_model.dm['samples']
spec_df = con.tables['specimens'].df
spec_dm = con.tables['specimens'].data_model.dm['specimens']
age_df = con.tables['ages'].df
age_dm = con.tables['ages'].data_model.dm['ages']
meas_df = con.tables['measurements'].df
meas_dm = con.tables['measurements'].data_model.dm['measurements']
cont_df = con.tables['contribution'].df
cont_dm = con.tables['contribution'].data_model.dm['contribution']
crit_df = con.tables['criteria'].df
crit_dm = con.tables['criteria'].data_model.dm['criteria']


current_con = con

# mess up some validations for locations
loc_df.loc['Tel Hazor', 'lat_s'] = 400.
loc_df['dir_inc'] = 5
loc_df.loc['Tel Hazor', 'lat_n'] = 'hello'
loc_df.loc[:, 'lithologies'] = ["Agate:Basalt", "Basalt:random"]
#current_con.tables.pop('sites')

# mess up some validations for sites
site_df.pop('age')
#site_df['dir_tilt_correction'] = 1
site_df.iloc[2, list(site_df.columns).index('dir_tilt_correction')] = 'not a number'
site_df.iloc[0, list(site_df.columns).index('lithologies')] = "Angrite:Basalt"
site_df.iloc[1, list(site_df.columns).index('lithologies')] = "angrite : basalt"

# mess up some validations for ages
age_df.ix[1]['age'] = 'a string'
age_df.ix[1]['site'] = 'fake site'
age_df.ix[1]['age_low'] = 1000000000000.
age_df.pop('citations')

# mess up some validations for samples
samp_df.pop('citations')
samp_df.iloc[0].lon = 600.
samp_df.iloc[0].age = "another string"
samp_df.iloc[0].lat = "stringy"
samp_df.iloc[1].lat = 'hello'
samp_df.iloc[2].specimens = "hz05a2:fake"
samp_df.iloc[3].specimens = "fake : hz05a1"
samp_df.iloc[5].specimens = 'fake_specimen'
samp_df.iloc[7].site = 'fake_site'
samp_df.iloc[0].cooling_rate = 'a string'

# mess up some validations for measurements
meas_df.loc['mgh05a01:LP-PI-TRM1', 'magn_moment'] = 2
meas_df.loc['mgh05a01:LP-PI-TRM1', 'specimen'] = "fake_specimen"
meas_df.pop('experiment')

#current_df.head()
#current_df.head()
#age_df.head()


-I- Getting method codes from earthref.org
-I- Importing controlled vocabularies from https://earthref.org
-I- Importing suggested vocabularies from https://earthref.org
-W- No such file: /Users/nebula/Python/PmagPy/data_files/3_0/Megiddo/images.txt
/usr/local/lib/python2.7/site-packages/ipykernel/__main__.py:39: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
/usr/local/lib/python2.7/site-packages/ipykernel/__main__.py:40: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
/usr/local/lib/python2.7/site-packages/ipykernel/__main__.py:41: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
Out[2]:
measurement
mgh05a01:LP-PI-TRM0     mgh05a01:LP-PI-TRM
mgh05a01:LP-PI-TRM1     mgh05a01:LP-PI-TRM
mgh05a01:LP-PI-TRM2     mgh05a01:LP-PI-TRM
mgh05a01:LP-PI-TRM3     mgh05a01:LP-PI-TRM
mgh05a01:LP-PI-TRM4     mgh05a01:LP-PI-TRM
mgh05a01:LP-PI-TRM5     mgh05a01:LP-PI-TRM
mgh05a01:LP-PI-TRM6     mgh05a01:LP-PI-TRM
mgh05a01:LP-PI-TRM7     mgh05a01:LP-PI-TRM
mgh05a01:LP-PI-TRM8     mgh05a01:LP-PI-TRM
mgh05a01:LP-PI-TRM9     mgh05a01:LP-PI-TRM
mgh05a01:LP-PI-TRM10    mgh05a01:LP-PI-TRM
mgh05a01:LP-PI-TRM11    mgh05a01:LP-PI-TRM
mgh05a01:LP-PI-TRM12    mgh05a01:LP-PI-TRM
mgh05a01:LP-PI-TRM13    mgh05a01:LP-PI-TRM
mgh05a01:LP-PI-TRM14    mgh05a01:LP-PI-TRM
mgh05a01:LP-PI-TRM15    mgh05a01:LP-PI-TRM
mgh05a01:LP-PI-TRM16    mgh05a01:LP-PI-TRM
mgh05a01:LP-PI-TRM17    mgh05a01:LP-PI-TRM
mgh05a01:LP-PI-TRM18    mgh05a01:LP-PI-TRM
mgh05a01:LP-PI-TRM19    mgh05a01:LP-PI-TRM
mgh05a01:LP-PI-TRM20    mgh05a01:LP-PI-TRM
mgh05a01:LP-PI-TRM21    mgh05a01:LP-PI-TRM
mgh05a01:LP-PI-TRM22    mgh05a01:LP-PI-TRM
mgh05a01:LP-PI-TRM23    mgh05a01:LP-PI-TRM
mgh05a01:LP-PI-TRM24    mgh05a01:LP-PI-TRM
mgh05a01:LP-PI-TRM25    mgh05a01:LP-PI-TRM
mgh05a01:LP-PI-TRM26    mgh05a01:LP-PI-TRM
mgh05a01:LP-PI-TRM27    mgh05a01:LP-PI-TRM
mgh05a01:LP-PI-TRM28    mgh05a01:LP-PI-TRM
mgh05a01:LP-PI-TRM29    mgh05a01:LP-PI-TRM
                               ...        
mgh03g07:LP-AN-TRM3     mgh03g07:LP-AN-TRM
mgh03g07:LP-AN-TRM4     mgh03g07:LP-AN-TRM
mgh03g07:LP-AN-TRM5     mgh03g07:LP-AN-TRM
mgh03g07:LP-AN-TRM6     mgh03g07:LP-AN-TRM
mgh03g07:LP-AN-TRM7     mgh03g07:LP-AN-TRM
mgh03g07:LP-AN-TRM8     mgh03g07:LP-AN-TRM
mgh03b06:LP-AN-TRM1     mgh03b06:LP-AN-TRM
mgh03b06:LP-AN-TRM2     mgh03b06:LP-AN-TRM
mgh03b06:LP-AN-TRM3     mgh03b06:LP-AN-TRM
mgh03b06:LP-AN-TRM4     mgh03b06:LP-AN-TRM
mgh03b06:LP-AN-TRM5     mgh03b06:LP-AN-TRM
mgh03b06:LP-AN-TRM6     mgh03b06:LP-AN-TRM
mgh03b06:LP-AN-TRM7     mgh03b06:LP-AN-TRM
mgh03b06:LP-AN-TRM8     mgh03b06:LP-AN-TRM
mgh03h07:LP-AN-TRM1     mgh03h07:LP-AN-TRM
mgh03h07:LP-AN-TRM2     mgh03h07:LP-AN-TRM
mgh03h07:LP-AN-TRM3     mgh03h07:LP-AN-TRM
mgh03h07:LP-AN-TRM4     mgh03h07:LP-AN-TRM
mgh03h07:LP-AN-TRM5     mgh03h07:LP-AN-TRM
mgh03h07:LP-AN-TRM6     mgh03h07:LP-AN-TRM
mgh03h07:LP-AN-TRM7     mgh03h07:LP-AN-TRM
mgh03h07:LP-AN-TRM8     mgh03h07:LP-AN-TRM
mgh03h08:LP-AN-TRM1     mgh03h08:LP-AN-TRM
mgh03h08:LP-AN-TRM2     mgh03h08:LP-AN-TRM
mgh03h08:LP-AN-TRM3     mgh03h08:LP-AN-TRM
mgh03h08:LP-AN-TRM4     mgh03h08:LP-AN-TRM
mgh03h08:LP-AN-TRM5     mgh03h08:LP-AN-TRM
mgh03h08:LP-AN-TRM6     mgh03h08:LP-AN-TRM
mgh03h08:LP-AN-TRM7     mgh03h08:LP-AN-TRM
mgh03h08:LP-AN-TRM8     mgh03h08:LP-AN-TRM
Name: experiment, dtype: object

In [3]:
#import pmagpy.controlled_vocabularies3 as cv
#reload(cv)
#vocab = cv.Vocabulary()
#vocabulary, possible_vocabulary = vocab.get_controlled_vocabularies()

In [4]:
## validation functions


# need to add requiredOneInGroup

# check that values pass validation
# validation checks to add:
# sv (suggested vocab)
# requiredOneInGroup
# requiredUnlessSynthetic

Step by step, here is what the validate_table function does


In [5]:
# validate a DataFrame
current_df = vu3.validate_df(loc_df, loc_dm, current_con)
current_df.dropna(how='all', axis=1)


Out[5]:
citations lat_n lat_s location location_type lon_e lon_w dir_inc lithologies presence_pass_age_requiredUnless presence_pass_age_high_requiredUnless presence_pass_age_low_requiredUnless presence_pass_age_unit_required presence_pass_dir_dec_requiredIfGroup presence_pass_dir_tilt_correction_requiredIfGroup presence_pass_geologic_classes_required type_pass_lat_n_Number value_pass_lat_s_checkMax value_pass_lithologies_cv
location
Tel Hazor This study hello 400.000 Tel Hazor Archeological Site 35.568 35.568 5 Agate:Basalt age column is required unless age_low is prese... age_high column is required unless age is pres... age_low column is required unless age is prese... "age_unit" column is required dir_dec column is required if column group Dir... dir_tilt_correction column is required if colu... "geologic_classes" column is required "hello" should be a number 400.0 (lat_s) must be <= 90.0 (90) None
Tel Megiddo This study 32.585 32.585 Tel Megiddo Archeological Site 35.185 35.185 5 Basalt:random age column is required unless age_low is prese... age_high column is required unless age is pres... age_low column is required unless age is prese... "age_unit" column is required dir_dec column is required if column group Dir... dir_tilt_correction column is required if colu... "geologic_classes" column is required None None "random" is not in controlled vocabulary for l...

In [6]:
# get names of all validation column names added to dataframe by validate_df
value_col_names, present_col_names, type_col_names, missing_group_names, validation_col_names = vu3.get_validation_col_names(current_df)
validation_col_names[:10]


Out[6]:
Index([u'presence_pass_age_requiredUnless',
       u'presence_pass_age_high_requiredUnless',
       u'presence_pass_age_low_requiredUnless',
       u'presence_pass_age_unit_required', u'type_pass_citations_List',
       u'presence_pass_citations_requiredUnlessTable',
       u'presence_pass_dir_dec_requiredIfGroup', u'type_pass_dir_inc_Number',
       u'value_pass_dir_inc_checkMin', u'value_pass_dir_inc_checkMax'],
      dtype='object')

In [7]:
# incorrect data type problems
current_df[type_col_names].dropna(how='all', axis=1).head()


Out[7]:
type_pass_lat_n_Number
location
Tel Hazor "hello" should be a number
Tel Megiddo None

In [8]:
# missing column problems
current_df[present_col_names].dropna(how='all', axis=1).head()


Out[8]:
presence_pass_age_requiredUnless presence_pass_age_high_requiredUnless presence_pass_age_low_requiredUnless presence_pass_age_unit_required presence_pass_dir_dec_requiredIfGroup presence_pass_dir_tilt_correction_requiredIfGroup presence_pass_geologic_classes_required
location
Tel Hazor age column is required unless age_low is prese... age_high column is required unless age is pres... age_low column is required unless age is prese... "age_unit" column is required dir_dec column is required if column group Dir... dir_tilt_correction column is required if colu... "geologic_classes" column is required
Tel Megiddo age column is required unless age_low is prese... age_high column is required unless age is pres... age_low column is required unless age is prese... "age_unit" column is required dir_dec column is required if column group Dir... dir_tilt_correction column is required if colu... "geologic_classes" column is required

In [9]:
# value problems:
current_df[value_col_names].dropna(how='all', axis=1).head()


Out[9]:
value_pass_lat_s_checkMax value_pass_lithologies_cv
location
Tel Hazor 400.0 (lat_s) must be <= 90.0 (90) None
Tel Megiddo None "random" is not in controlled vocabulary for l...

In [10]:
# get all failures, row by row (can also print these failures to logfile OR stdout)
failing_items = vu3.get_row_failures(current_df, value_col_names, type_col_names, verbose=False, outfile=None)
failing_items


Out[10]:
num type_pass_lat_n_Number value_pass_lat_s_checkMax value_pass_lithologies_cv issues
location
Tel Hazor 0 "hello" should be a number 400.0 (lat_s) must be <= 90.0 (90) None {u'value_pass_lat_s_checkMax': u'400.0 (lat_s)...
Tel Megiddo 1 None None "random" is not in controlled vocabulary for l... {u'value_pass_lithologies_cv': u'"random" is n...

In [11]:
all_cols = failing_items.columns
cols = failing_items.iloc[0].dropna().index
#print cols.index
all_cols, cols

col_pos = range(len(all_cols))
col_pos = dict(zip(all_cols, col_pos))
[col_pos[col] for col in cols]
#failing_items.iloc[0, failing_items.columns("num")]
list(failing_items.columns).index("num")


Out[11]:
0

In [12]:
# get lists of: all rows with problems, all columns with problems, and all missing columns
bad_rows, bad_cols, missing_cols = vu3.get_bad_rows_and_cols(current_df, validation_col_names, 
                                                        value_col_names, type_col_names, verbose=True)
print "bad rows:", bad_rows
print "bad columns:", bad_cols
print "missing columns:", missing_cols
formatted_rows = ["row: {}, name: {}\n".format(row[0], row[1]) for row in bad_rows]
print "\n".join(formatted_rows)


-W- these rows have problems: row: 0, name: Tel Hazor
row: 1, name: Tel Megiddo
-W- these columns contain bad values: lithologies, lat_n, lat_s
-W- these required columns are missing: age_high, age_low, age, age_unit, dir_dec, dir_tilt_correction, geologic_classes
bad rows: [(0, 'Tel Hazor'), (1, 'Tel Megiddo')]
bad columns: [u'lat_n', u'lat_s', u'lithologies']
missing columns: [u'age_high', u'age_low', u'age', u'age_unit', u'dir_dec', u'dir_tilt_correction', u'geologic_classes']
row: 0, name: Tel Hazor

row: 1, name: Tel Megiddo


In [13]:
# validate table calls ALL of the above functions
vu3.validate_table(con, 'sites', verbose=True)


-I- Validating sites
hz07	2
type	dir_tilt_correction	"not a number" should be a number
mgh12t1	18
value	criteria	This value: "ACCEPT" is not found in: criteria.criterion
mgh12t1	19
value	criteria	This value: "ACCEPT" is not found in: criteria.criterion
mgj06	21
value	age_high	-2800.0 (age_high) must be >= -2600.0 (age_low)
value	age_low	-2600.0 (age_low) must be <= -2800.0 (age_high)
mgk06	22
value	age_high	-1180.0 (age_high) must be >= -1130.0 (age_low)
value	age_low	-1130.0 (age_low) must be <= -1180.0 (age_high)
mgk09t1	26
value	criteria	This value: "ACCEPT" is not found in: criteria.criterion
mgk09t1	27
value	criteria	This value: "ACCEPT" is not found in: criteria.criterion
mgq04t1	30
value	criteria	This value: "ACCEPT" is not found in: criteria.criterion
mgq04t1	31
value	criteria	This value: "ACCEPT" is not found in: criteria.criterion
mgq05t1	34
value	criteria	This value: "ACCEPT" is not found in: criteria.criterion
mgq05t1	35
value	criteria	This value: "ACCEPT" is not found in: criteria.criterion
mgq05t2	37
value	criteria	This value: "ACCEPT" is not found in: criteria.criterion
mgq05t2	38
value	criteria	This value: "ACCEPT" is not found in: criteria.criterion
-W- these rows have problems: row: 2, name: hz07
row: 18, name: mgh12t1
row: 19, name: mgh12t1
row: 21, name: mgj06
row: 22, name: mgk06
row: 26, name: mgk09t1
row: 27, name: mgk09t1
row: 30, name: mgq04t1
row: 31, name: mgq04t1
row: 34, name: mgq05t1
row: 35, name: mgq05t1
row: 37, name: mgq05t2
row: 38, name: mgq05t2
-W- these columns contain bad values: age_high, dir_tilt_correction, age_low, criteria
-W- these required columns are missing: result_quality
-I- Complete list of row errors can be found in /Users/nebula/Python/PmagPy/data_files/notebooks/sites_errors.txt
Out[13]:
('sites',
 [(2, 'hz07'),
  (18, 'mgh12t1'),
  (19, 'mgh12t1'),
  (21, 'mgj06'),
  (22, 'mgk06'),
  (26, 'mgk09t1'),
  (27, 'mgk09t1'),
  (30, 'mgq04t1'),
  (31, 'mgq04t1'),
  (34, 'mgq05t1'),
  (35, 'mgq05t1'),
  (37, 'mgq05t2'),
  (38, 'mgq05t2')],
 [u'dir_tilt_correction', u'age_high', u'age_low', u'criteria'],
 [u'result_quality'],
 Index([], dtype='object'),
          num type_pass_dir_tilt_correction_Number  \
 site                                                
 hz07       2    "not a number" should be a number   
 mgh12t1   18                                 None   
 mgh12t1   19                                 None   
 mgj06     21                                 None   
 mgk06     22                                 None   
 mgk09t1   26                                 None   
 mgk09t1   27                                 None   
 mgq04t1   30                                 None   
 mgq04t1   31                                 None   
 mgq05t1   34                                 None   
 mgq05t1   35                                 None   
 mgq05t2   37                                 None   
 mgq05t2   38                                 None   
 
                             value_pass_age_high_checkMin  \
 site                                                       
 hz07                                                None   
 mgh12t1                                             None   
 mgh12t1                                             None   
 mgj06    -2800.0 (age_high) must be >= -2600.0 (age_low)   
 mgk06    -1180.0 (age_high) must be >= -1130.0 (age_low)   
 mgk09t1                                             None   
 mgk09t1                                             None   
 mgq04t1                                             None   
 mgq04t1                                             None   
 mgq05t1                                             None   
 mgq05t1                                             None   
 mgq05t2                                             None   
 mgq05t2                                             None   
 
                              value_pass_age_low_checkMax  \
 site                                                       
 hz07                                                None   
 mgh12t1                                             None   
 mgh12t1                                             None   
 mgj06    -2600.0 (age_low) must be <= -2800.0 (age_high)   
 mgk06    -1130.0 (age_low) must be <= -1180.0 (age_high)   
 mgk09t1                                             None   
 mgk09t1                                             None   
 mgq04t1                                             None   
 mgq04t1                                             None   
 mgq05t1                                             None   
 mgq05t1                                             None   
 mgq05t2                                             None   
 mgq05t2                                             None   
 
                                   value_pass_criteria_isIn  \
 site                                                         
 hz07                                                  None   
 mgh12t1  This value: "ACCEPT" is not found in: criteria...   
 mgh12t1  This value: "ACCEPT" is not found in: criteria...   
 mgj06                                                 None   
 mgk06                                                 None   
 mgk09t1  This value: "ACCEPT" is not found in: criteria...   
 mgk09t1  This value: "ACCEPT" is not found in: criteria...   
 mgq04t1  This value: "ACCEPT" is not found in: criteria...   
 mgq04t1  This value: "ACCEPT" is not found in: criteria...   
 mgq05t1  This value: "ACCEPT" is not found in: criteria...   
 mgq05t1  This value: "ACCEPT" is not found in: criteria...   
 mgq05t2  This value: "ACCEPT" is not found in: criteria...   
 mgq05t2  This value: "ACCEPT" is not found in: criteria...   
 
                                                     issues  
 site                                                        
 hz07     {u'type_pass_dir_tilt_correction_Number': u'"n...  
 mgh12t1  {u'value_pass_criteria_isIn': u'This value: "A...  
 mgh12t1  {u'value_pass_criteria_isIn': u'This value: "A...  
 mgj06    {u'value_pass_age_high_checkMin': u'-2800.0 (a...  
 mgk06    {u'value_pass_age_high_checkMin': u'-1180.0 (a...  
 mgk09t1  {u'value_pass_criteria_isIn': u'This value: "A...  
 mgk09t1  {u'value_pass_criteria_isIn': u'This value: "A...  
 mgq04t1  {u'value_pass_criteria_isIn': u'This value: "A...  
 mgq04t1  {u'value_pass_criteria_isIn': u'This value: "A...  
 mgq05t1  {u'value_pass_criteria_isIn': u'This value: "A...  
 mgq05t1  {u'value_pass_criteria_isIn': u'This value: "A...  
 mgq05t2  {u'value_pass_criteria_isIn': u'This value: "A...  
 mgq05t2  {u'value_pass_criteria_isIn': u'This value: "A...  )

In [14]:
reload(vu3)
from pmagpy import validate_upload3
reload(validate_upload3)
## run through and validate entire contribution (call validate_table on each table)
vu3.validate_contribution(con)


validating measurements
-I- Validating measurements
-W- these rows have problems:
row: 1, name: mgh05a01:LP-PI-TRM1
row: 2, name: mgh05a01:LP-PI-TRM2
row: 3, name: mgh05a01:LP-PI-TRM3
row: 4, name: mgh05a01:LP-PI-TRM4
row: 5, name: mgh05a01:LP-PI-TRM5
row: 6, name: mgh05a01:LP-PI-TRM6
row: 7, name: mgh05a01:LP-PI-TRM7
row: 8, name: mgh05a01:LP-PI-TRM8
row: 9, name: mgh05a01:LP-PI-TRM9
row: 10, name: mgh05a01:LP-PI-TRM10
row: 11, name: mgh05a01:LP-PI-TRM11
row: 12, name: mgh05a01:LP-PI-TRM12
row: 13, name: mgh05a01:LP-PI-TRM13
row: 14, name: mgh05a01:LP-PI-TRM14
row: 15, name: mgh05a01:LP-PI-TRM15
row: 16, name: mgh05a01:LP-PI-TRM16
row: 17, name: mgh05a01:LP-PI-TRM17
row: 18, name: mgh05a01:LP-PI-TRM18
row: 19, name: mgh05a01:LP-PI-TRM19
row: 20, name: mgh05a01:LP-PI-TRM20  ...
(for full error output see error file)
-W- these columns contain bad values: specimen, magn_moment, number
-W- these required columns are missing: experiment, instrument_codes
-I- Complete list of row errors can be found in /Users/nebula/Python/PmagPy/data_files/notebooks/measurements_errors.txt
--
validating ages
-I- Validating ages
-W- these rows have problems: row: 19, name: 19
row: 20, name: 20
-W- these columns contain bad values: age_high, age_low
-W- these required columns are missing: citations
-I- Complete list of row errors can be found in /Users/nebula/Python/PmagPy/data_files/notebooks/ages_errors.txt
--
validating sites
-I- Validating sites
-W- these rows have problems: row: 2, name: hz07
row: 18, name: mgh12t1
row: 19, name: mgh12t1
row: 21, name: mgj06
row: 22, name: mgk06
row: 26, name: mgk09t1
row: 27, name: mgk09t1
row: 30, name: mgq04t1
row: 31, name: mgq04t1
row: 34, name: mgq05t1
row: 35, name: mgq05t1
row: 37, name: mgq05t2
row: 38, name: mgq05t2
-W- these columns contain bad values: age_high, dir_tilt_correction, age_low, criteria
-W- these required columns are missing: result_quality
-I- Complete list of row errors can be found in /Users/nebula/Python/PmagPy/data_files/notebooks/sites_errors.txt
--
validating locations
-I- Validating locations
-W- these rows have problems: row: 0, name: Tel Hazor
row: 1, name: Tel Megiddo
-W- these columns contain bad values: lithologies, lat_n, lat_s
-W- these required columns are missing: age_high, age_low, age, age_unit, dir_dec, dir_tilt_correction, geologic_classes
-I- Complete list of row errors can be found in /Users/nebula/Python/PmagPy/data_files/notebooks/locations_errors.txt
--
validating samples
-I- Validating samples
-W- these rows have problems:
row: 0, name: hz05a
row: 1, name: hz05a
row: 2, name: hz05b
row: 3, name: hz05b
row: 5, name: hz05c
row: 7, name: hz05e
row: 9, name: hz05f
row: 11, name: hz05g
row: 14, name: hz06a
row: 16, name: hz06b
row: 18, name: hz06c
row: 20, name: hz07a
row: 22, name: hz07b
row: 24, name: hz07c
row: 26, name: hz07d
row: 28, name: hz07e
row: 30, name: hz09a
row: 34, name: hz10b
row: 36, name: hz10c
row: 38, name: hz10d  ...
(for full error output see error file)
-W- these columns contain bad values: lon, site, criteria, lat, cooling_rate, specimens
-W- these required columns are missing: citations, orientation_quality, result_quality
-I- Complete list of row errors can be found in /Users/nebula/Python/PmagPy/data_files/notebooks/samples_errors.txt
--
validating criteria
-I- Validating criteria
-I- No row errors found!
--
validating contribution
-I- Validating contribution
-W- these rows have problems: row: 0, name: 0
-W- these columns contain bad values: magic_version
-I- Complete list of row errors can be found in /Users/nebula/Python/PmagPy/data_files/notebooks/contribution_errors.txt
--
validating specimens
-I- Validating specimens
-W- these rows have problems:
row: 0, name: hz05a1
row: 1, name: hz05a1
row: 2, name: hz05a2
row: 3, name: hz05a2
row: 4, name: hz05a3
row: 5, name: hz05a3
row: 6, name: hz05b1
row: 7, name: hz05b1
row: 8, name: hz05b2
row: 9, name: hz05b2
row: 10, name: hz05b3
row: 11, name: hz05b3
row: 12, name: hz05b4
row: 13, name: hz05b4
row: 14, name: hz05b5
row: 15, name: hz05b5
row: 16, name: hz05b6
row: 17, name: hz05b6
row: 18, name: hz05b6
row: 19, name: hz05b7  ...
(for full error output see error file)
-W- these columns contain bad values: aniso_s_n_measurements, experiments
-W- these required columns are missing: aniso_tilt_correction, result_type
-I- Complete list of row errors can be found in /Users/nebula/Python/PmagPy/data_files/notebooks/specimens_errors.txt
--

Upload function, with validations


In [15]:
from pmagpy import ipmag
reload(ipmag)
res = ipmag.upload_magic3(0, dir_path)


-W- No such file: /Users/nebula/Python/PmagPy/data_files/3_0/Megiddo/images.txt
-I- Removing:  ['citation_label', 'compilation', 'calculation_type', 'average_n_lines', 'average_n_planes', 'specimen_grade', 'site_vgp_lat', 'site_vgp_lon', 'direction_type', 'specimen_Z', 'magic_instrument_codes', 'cooling_rate_corr', 'cooling_rate_mcd', 'anisotropy_atrm_alt', 'anisotropy_apar_perc', 'anisotropy_F', 'anisotropy_F_crit', 'specimen_scat', 'specimen_gmax', 'specimen_frac', 'site_vadm', 'site_lon', 'site_vdm', 'site_lat', 'measurement_chi', 'specimen_k_prime', 'specimen_k_prime_sse', 'external_database_names', 'external_database_ids', 'Further Notes', 'Typology', 'Notes (Year/Area/Locus/Level)', 'Site', 'Object Number']
-
-I- locations file successfully read in
-I- Validating locations
-W- these required columns are missing: age_high, age_low, age, age_unit, geologic_classes, lithologies
-I- You are missing some required headers
-I- You are missing these required headers: age_high, age_low, age, age_unit, geologic_classes, lithologies
-I- appending locations data to /Users/nebula/Python/PmagPy/data_files/notebooks/../3_0/Megiddo/upload.txt
-I- locations written to  ../3_0/Megiddo/upload.txt
-
-I- samples file successfully read in
-I- Validating samples
-W- these rows have problems:
row: 1, name: hz05a
row: 3, name: hz05b
row: 5, name: hz05c
row: 7, name: hz05e
row: 9, name: hz05f
row: 11, name: hz05g
row: 14, name: hz06a
row: 16, name: hz06b
row: 18, name: hz06c
row: 20, name: hz07a
row: 22, name: hz07b
row: 24, name: hz07c
row: 26, name: hz07d
row: 28, name: hz07e
row: 30, name: hz09a
row: 34, name: hz10b
row: 36, name: hz10c
row: 38, name: hz10d
row: 40, name: hz11a
row: 42, name: hz11b  ...
(for full error output see error file)
-W- these columns contain bad values: criteria
-W- these required columns are missing: orientation_quality, result_quality
-I- Complete list of row errors can be found in /Users/nebula/Python/PmagPy/data_files/notebooks/samples_errors.txt
-I- appending samples data to /Users/nebula/Python/PmagPy/data_files/notebooks/../3_0/Megiddo/upload.txt
-I- samples written to  ../3_0/Megiddo/upload.txt
-
-I- specimens file successfully read in
-I- Validating specimens
-W- these rows have problems:
row: 0, name: hz05a1
row: 1, name: hz05a1
row: 2, name: hz05a2
row: 3, name: hz05a2
row: 4, name: hz05a3
row: 5, name: hz05a3
row: 6, name: hz05b1
row: 7, name: hz05b1
row: 8, name: hz05b2
row: 9, name: hz05b2
row: 10, name: hz05b3
row: 11, name: hz05b3
row: 12, name: hz05b4
row: 13, name: hz05b4
row: 14, name: hz05b5
row: 15, name: hz05b5
row: 16, name: hz05b6
row: 17, name: hz05b6
row: 18, name: hz05b6
row: 19, name: hz05b7  ...
(for full error output see error file)
-W- these columns contain bad values: aniso_s_n_measurements, experiments
-W- these required columns are missing: aniso_tilt_correction, result_type
-I- Complete list of row errors can be found in /Users/nebula/Python/PmagPy/data_files/notebooks/specimens_errors.txt
-I- appending specimens data to /Users/nebula/Python/PmagPy/data_files/notebooks/../3_0/Megiddo/upload.txt
-I- specimens written to  ../3_0/Megiddo/upload.txt
-
-I- sites file successfully read in
-I- Validating sites
-W- these rows have problems: row: 18, name: mgh12t1
row: 19, name: mgh12t1
row: 21, name: mgj06
row: 22, name: mgk06
row: 26, name: mgk09t1
row: 27, name: mgk09t1
row: 30, name: mgq04t1
row: 31, name: mgq04t1
row: 34, name: mgq05t1
row: 35, name: mgq05t1
row: 37, name: mgq05t2
row: 38, name: mgq05t2
-W- these columns contain bad values: age_high, age_low, criteria
-W- these required columns are missing: result_quality
-I- Complete list of row errors can be found in /Users/nebula/Python/PmagPy/data_files/notebooks/sites_errors.txt
-I- appending sites data to /Users/nebula/Python/PmagPy/data_files/notebooks/../3_0/Megiddo/upload.txt
-I- sites written to  ../3_0/Megiddo/upload.txt
-
-I- ages file successfully read in
-I- Validating ages
-W- these rows have problems: row: 19, name: 19
row: 20, name: 20
-W- these columns contain bad values: age_high, age_low
-I- Complete list of row errors can be found in /Users/nebula/Python/PmagPy/data_files/notebooks/ages_errors.txt
-I- appending ages data to /Users/nebula/Python/PmagPy/data_files/notebooks/../3_0/Megiddo/upload.txt
-I- ages written to  ../3_0/Megiddo/upload.txt
-
-I- measurements file successfully read in
-I- Validating measurements
-W- these rows have problems:
row: 1, name: mgh05a01:LP-PI-TRM1
row: 2, name: mgh05a01:LP-PI-TRM2
row: 3, name: mgh05a01:LP-PI-TRM3
row: 4, name: mgh05a01:LP-PI-TRM4
row: 5, name: mgh05a01:LP-PI-TRM5
row: 6, name: mgh05a01:LP-PI-TRM6
row: 7, name: mgh05a01:LP-PI-TRM7
row: 8, name: mgh05a01:LP-PI-TRM8
row: 9, name: mgh05a01:LP-PI-TRM9
row: 10, name: mgh05a01:LP-PI-TRM10
row: 11, name: mgh05a01:LP-PI-TRM11
row: 12, name: mgh05a01:LP-PI-TRM12
row: 13, name: mgh05a01:LP-PI-TRM13
row: 14, name: mgh05a01:LP-PI-TRM14
row: 15, name: mgh05a01:LP-PI-TRM15
row: 16, name: mgh05a01:LP-PI-TRM16
row: 17, name: mgh05a01:LP-PI-TRM17
row: 18, name: mgh05a01:LP-PI-TRM18
row: 19, name: mgh05a01:LP-PI-TRM19
row: 20, name: mgh05a01:LP-PI-TRM20  ...
(for full error output see error file)
-W- these columns contain bad values: number
-W- these required columns are missing: instrument_codes
-I- Complete list of row errors can be found in /Users/nebula/Python/PmagPy/data_files/notebooks/measurements_errors.txt
-I- appending measurements data to /Users/nebula/Python/PmagPy/data_files/notebooks/../3_0/Megiddo/upload.txt
-I- measurements written to  ../3_0/Megiddo/upload.txt
-
-I- criteria file successfully read in
-I- Validating criteria
-I- No row errors found!
-I- appending criteria data to /Users/nebula/Python/PmagPy/data_files/notebooks/../3_0/Megiddo/upload.txt
-I- criteria written to  ../3_0/Megiddo/upload.txt
-
-I- contribution file successfully read in
-I- Validating contribution
-W- these rows have problems: row: 0, name: 0
-W- these columns contain bad values: magic_version
-I- Complete list of row errors can be found in /Users/nebula/Python/PmagPy/data_files/notebooks/contribution_errors.txt
-I- appending contribution data to /Users/nebula/Python/PmagPy/data_files/notebooks/../3_0/Megiddo/upload.txt
-I- contribution written to  ../3_0/Megiddo/upload.txt
-
-I- No images file found, continuing
Finished preparing upload file: ../3_0/Megiddo/Tel-Hazor_Tel-Megiddo_24.Jan.2017_1.txt 
-W- validation of upload file has failed.
These tables have errors: locations, samples, specimens, sites, ages, measurements, contribution
Please fix above errors and try again.
You may run into problems if you try to upload this file to the MagIC database.

In [16]:
result, error_message, failing_tables, failing_items = res
print result, error_message, failing_tables
print failing_items['sites']['missing_columns']
failing_items['sites']['rows']


False file validation has failed.  You may run into problems if you try to upload this file. ['locations', 'samples', 'specimens', 'sites', 'ages', 'measurements', 'contribution']
[u'result_quality']
Out[16]:
num value_pass_age_high_checkMin value_pass_age_low_checkMax value_pass_criteria_isIn issues
site
mgh12t1 18 None None This value: "ACCEPT" is not found in: criteria... {u'value_pass_criteria_isIn': u'This value: "A...
mgh12t1 19 None None This value: "ACCEPT" is not found in: criteria... {u'value_pass_criteria_isIn': u'This value: "A...
mgj06 21 -2800.0 (age_high) must be >= -2600.0 (age_low) -2600.0 (age_low) must be <= -2800.0 (age_high) None {u'value_pass_age_high_checkMin': u'-2800.0 (a...
mgk06 22 -1180.0 (age_high) must be >= -1130.0 (age_low) -1130.0 (age_low) must be <= -1180.0 (age_high) None {u'value_pass_age_high_checkMin': u'-1180.0 (a...
mgk09t1 26 None None This value: "ACCEPT" is not found in: criteria... {u'value_pass_criteria_isIn': u'This value: "A...
mgk09t1 27 None None This value: "ACCEPT" is not found in: criteria... {u'value_pass_criteria_isIn': u'This value: "A...
mgq04t1 30 None None This value: "ACCEPT" is not found in: criteria... {u'value_pass_criteria_isIn': u'This value: "A...
mgq04t1 31 None None This value: "ACCEPT" is not found in: criteria... {u'value_pass_criteria_isIn': u'This value: "A...
mgq05t1 34 None None This value: "ACCEPT" is not found in: criteria... {u'value_pass_criteria_isIn': u'This value: "A...
mgq05t1 35 None None This value: "ACCEPT" is not found in: criteria... {u'value_pass_criteria_isIn': u'This value: "A...
mgq05t2 37 None None This value: "ACCEPT" is not found in: criteria... {u'value_pass_criteria_isIn': u'This value: "A...
mgq05t2 38 None None This value: "ACCEPT" is not found in: criteria... {u'value_pass_criteria_isIn': u'This value: "A...

Scratch


In [17]:
dir_path = os.path.join("..", "3_0", "McMurdo")
con = Contribution(dir_path)

In [18]:
# trying to work with requiredOneInGroup

reload(vu3)
dtype = "ages"
df = con.tables[dtype].df
dm = con.tables[dtype].data_model.dm[dtype]

df = df.drop(["location", "site"], axis=1)

#current_df = vu3.validate_df(loc_df, loc_dm, current_con)
val_df = vu3.validate_df(df, dm, con)


value_col_names, present_col_names, type_col_names, missing_group_names, validation_col_names = vu3.get_validation_col_names(df)
val_df[validation_col_names].dropna(how="all", axis=1)
#val_df[validation_col_names]

#val_df[missing_group_names]
val_df[missing_group_names].head()

#missing_group_names


Out[18]:
group_pass_Names
0 you must have one column from group Names: loc...
1 you must have one column from group Names: loc...
2 you must have one column from group Names: loc...
3 you must have one column from group Names: loc...
4 you must have one column from group Names: loc...

In [19]:
reload(vu3)
if "location" in con.tables['ages'].df.columns:
    con.tables['ages'].df.drop(["location", "site"], axis=1, inplace=True)
con.tables['ages'].df.columns
vu3.validate_table(con, "ages")
con.tables['locations'].df


-I- Validating ages
-I- You are missing some required headers
-I- You need at least one header from these groups: Names
Out[19]:
age age_sigma age_unit analysts citations continent_ocean criteria description dir_alpha95 dir_dec ... pole_lon pole_n_sites pole_reversed_perc region result_name result_type reversal_test sites software_packages tectonic_settings
location
McMurdo None None None None Tauxe et al. 2004 : This study:Mankinen & Cox ... Antarctica None None None None ... None None None Antarctic Plate None None None None None Continental Volcanics
McMurdo 2.562 2.8348 Ma Kristin Lawrence This study None NPOLE:DE-SITE Mean of normal directions. 3.3 16.3 ... 214.5 75 0 None Normal Pole a None mc03 : mc04 : mc06 : mc07 : mc08 : mc09 : mc10... pmagpy-2.60 None
McMurdo 2.554 1.8641 Ma Kristin Lawrence This study None RPOLE:DE-SITE Mean of reverse directions. 4.1 186.8 ... 2.7 46 100 None Reverse pole a None mc102 : mc103 : mc105 : mc109 : mc110 : mc112 ... pmagpy-2.60 None
McMurdo 2.5589 2.5017 Ma Kristin Lawrence This study None DE-SITE Grand Mean of all directions. 2.5 12.1 ... 197.5 121 38 None Grand Mean pole a + mc02 : mc03 : mc04 : mc06 : mc07 : mc08 : mc09... pmagpy-2.60 None
McMurdo 1.8502 1.5241 Ma Kristin Lawrence This study None IE-SITE Average of all V [A] DMs. None None ... None None None None Average V[A]DM a None mc09 : mc105 : mc109 : mc111 : mc113 : mc115 :... pmagpy-2.60 None

5 rows × 47 columns

Filling in an existing dataframe


In [20]:
# keep all of df1, add in any extra from df2
df1 = pd.DataFrame(np.random.randint(1, 10, (3, 5)), columns=['one', 'two', 'three', 'four', 'five'])
df1.iloc[0, 1] = np.nan
df1.iloc[2, 2] = np.nan
df2 = pd.DataFrame(np.random.randint(1, 10, (3, 5)), columns=['one', 'three', 'five', 'seven', 'nine'])
df1


Out[20]:
one two three four five
0 4 NaN 6.0 6 3
1 8 5.0 7.0 9 1
2 9 5.0 NaN 8 7

In [21]:
df2


Out[21]:
one three five seven nine
0 3 2 3 6 7
1 5 1 3 7 3
2 4 9 9 5 8

In [22]:
unique_df2_cols = df2.columns.difference(df1.columns)
unique_df2 = df2[unique_df2_cols]

# this adds in all the unique columns that weren't in df1
concat_df = pd.concat([df1, unique_df2], axis=1)
# fills in null values in df1 with values from df2
concat_df.fillna(df2)


Out[22]:
one two three four five nine seven
0 4 NaN 6.0 6 3 7 6
1 8 5.0 7.0 9 1 3 7
2 9 5.0 9.0 8 7 8 5

In [23]:
! rm *_errors.txt
! rm ./3_0/McMurdo/McMurdo*.txt
#! rm ./3_0/Megiddo/Tel-Hazor*.txt


rm: ./3_0/McMurdo/McMurdo*.txt: No such file or directory