In [1]:
import itertools
import os
import numpy as np
import pandas as pd



# Get the root_path for this jupyter notebook repo.
repo_path = os.path.dirname(os.path.abspath(os.getcwd()))

path_files_locus_index = os.path.join(
    repo_path, 'files', 'tell-dor', 'tell-dor-area-g-locus-image-index.csv'
)

# Path to the Tell Dor file metadata CSV
path_files = os.path.join(
    repo_path, 'files', 'tell-dor', 'tell-dor-files.csv'
)
# Path to the Tell Dor locus metadata CSV 
path_loci = os.path.join(
    repo_path, 'files', 'tell-dor', 'tell-dor-loci.csv'
)
# Output path for associations between the files and the loci.
path_files_contexts = os.path.join(
    repo_path, 'files', 'tell-dor', 'tell-dor-files-contexts.csv'
)


# Read the file - locus index supplied by the Tell Dor team.
fl_df = pd.read_csv(path_files_locus_index)

# Read the file metadata CSV into dataframe f_df.
f_df = pd.read_csv(path_files)

# Read the locus (and wall) CSV into dataframe l_df.
l_df = pd.read_csv(path_loci)

fl_df['Locus_Wall'] = fl_df['Locus_Wall'].astype(str) 
fl_df['Locus ID'] = np.nan
for i, row in fl_df.iterrows():
    wall_id = 'Wall ' + row['Locus_Wall']
    locus_id = 'Locus ' + row['Locus_Wall']
    print('Look for {} or {}'.format(wall_id, locus_id))
    id_indx = ((l_df['Locus ID']==wall_id)|(l_df['Locus ID']==locus_id))
    if l_df[id_indx].empty:
        continue
    up_indx = (fl_df['Locus_Wall'] == row['Locus_Wall'])
    fl_df.loc[up_indx, 'Locus ID'] = l_df[id_indx]['Locus ID'].iloc[0]
    print('Update {} with {}'.format(row['Locus_Wall'], l_df[id_indx]['Locus ID'].iloc[0]))

fl_df.to_csv(path_files_locus_index, index=False)



# Set up a dict for File and Locus (and Wall) associations.
file_locus_data = {
    'File ID':[], 
    'Locus ID': [],
}

# Set up a dict for File and Area associations.
# NOTE: An "Area" is an aggregation of multiple squares in the locus/wall
# datafile. Eric grouped these to make search / browsing easier. They
# don't really have any purpose or value for interpretation.
file_square_data = {
    'File ID':[], 
    'Area': [],
}


def add_to_file_context_data(
    file_ids, 
    context_ids,  
    data,
    context_id_col='Locus ID'
):
    """Adds records of file and context associations to a data dict"""
    if not isinstance(context_ids, list):
        context_ids = [context_ids]
    # Get the cross product of all the file_ids and the
    # context_ids
    crossprod = list(itertools.product(file_ids, context_ids))
    data['File ID'] += [c[0] for c in crossprod]
    data[context_id_col] += [c[1] for c in crossprod]
    return data


Look for Wall 18839 or Locus 18839
Look for Wall 18229 or Locus 18229
Update 18229 with Wall 18229
Look for Wall 18308 or Locus 18308
Update 18308 with Locus 18308
Look for Wall 9729 or Locus 9729
Update 9729 with Wall 9729
Look for Wall 9262 or Locus 9262
Update 9262 with Wall 9262
Look for Wall 9684 or Locus 9684
Update 9684 with Wall 9684
Look for Wall 9626 or Locus 9626
Update 9626 with Wall 9626
Look for Wall 9278 or Locus 9278
Update 9278 with Wall 9278
Look for Wall 9704 or Locus 9704
Update 9704 with Wall 9704
Look for Wall 9857 or Locus 9857
Update 9857 with Locus 9857
Look for Wall 18047 or Locus 18047
Update 18047 with Locus 18047
Look for Wall 18033 or Locus 18033
Update 18033 with Locus 18033
Look for Wall 18297 or Locus 18297
Update 18297 with Locus 18297
Look for Wall 18298 or Locus 18298
Update 18298 with Locus 18298
Look for Wall 18275 or Locus 18275
Update 18275 with Locus 18275
Look for Wall 18243 or Locus 18243
Update 18243 with Locus 18243
Look for Wall 9762 or Locus 9762
Update 9762 with Locus 9762
Look for Wall 9704 or Locus 9704
Update 9704 with Wall 9704
Look for Wall 9779 or Locus 9779
Update 9779 with Locus 9779
Look for Wall 9766 or Locus 9766
Update 9766 with Locus 9766
Look for Wall 9982 or Locus 9982
Update 9982 with Locus 9982
Look for Wall 18073 or Locus 18073
Update 18073 with Locus 18073
Look for Wall 18058 or Locus 18058
Update 18058 with Locus 18058
Look for Wall 9679 or Locus 9679
Update 9679 with Locus 9679
Look for Wall 9066 or Locus 9066
Update 9066 with Wall 9066
Look for Wall 9915 or Locus 9915
Update 9915 with Wall 9915
Look for Wall 9909 or Locus 9909
Update 9909 with Wall 9909
Look for Wall 9362 or Locus 9362
Update 9362 with Locus 9362
Look for Wall 9140 or Locus 9140
Look for Wall 9909 or Locus 9909
Update 9909 with Wall 9909
Look for Wall 18242 or Locus 18242
Update 18242 with Locus 18242
Look for Wall 18029 or Locus 18029
Update 18029 with Locus 18029
Look for Wall 18041 or Locus 18041
Update 18041 with Locus 18041
Look for Wall 18042 or Locus 18042
Update 18042 with Locus 18042
Look for Wall 18049 or Locus 18049
Update 18049 with Locus 18049
Look for Wall 18239 or Locus 18239
Update 18239 with Locus 18239
Look for Wall 18025 or Locus 18025
Update 18025 with Locus 18025
Look for Wall 9140 or Locus 9140
Look for Wall 9262 or Locus 9262
Update 9262 with Wall 9262
Look for Wall 18048 or Locus 18048
Update 18048 with Wall 18048
Look for Wall 9626 or Locus 9626
Update 9626 with Wall 9626
Look for Wall 9275 or Locus 9275
Look for Wall 9262 or Locus 9262
Update 9262 with Wall 9262
Look for Wall 18068 or Locus 18068
Update 18068 with Locus 18068
Look for Wall 18048 or Locus 18048
Update 18048 with Wall 18048
Look for Wall 9065 or Locus 9065
Update 9065 with Wall 9065
Look for Wall 9400 or Locus 9400
Update 9400 with Wall 9400
Look for Wall 9168 or Locus 9168
Update 9168 with Locus 9168
Look for Wall 9066 or Locus 9066
Update 9066 with Wall 9066
Look for Wall 9914 or Locus 9914
Update 9914 with Wall 9914
Look for Wall 9066 or Locus 9066
Update 9066 with Wall 9066
Look for Wall 9914 or Locus 9914
Update 9914 with Wall 9914
Look for Wall 9066 or Locus 9066
Update 9066 with Wall 9066
Look for Wall 9140 or Locus 9140
Look for Wall 9211 or Locus 9211
Update 9211 with Wall 9211
Look for Wall 9262 or Locus 9262
Update 9262 with Wall 9262
Look for Wall 9262 or Locus 9262
Update 9262 with Wall 9262
Look for Wall 18048 or Locus 18048
Update 18048 with Wall 18048
Look for Wall 9262 or Locus 9262
Update 9262 with Wall 9262
Look for Wall 18286 or Locus 18286
Update 18286 with Locus 18286
Look for Wall 18308 or Locus 18308
Update 18308 with Locus 18308
Look for Wall 18229 or Locus 18229
Update 18229 with Wall 18229
Look for Wall 18273 or Locus 18273
Update 18273 with Locus 18273
Look for Wall 18320 or Locus 18320
Update 18320 with Locus 18320
Look for Wall 18341 or Locus 18341
Update 18341 with Locus 18341
Look for Wall 18229 or Locus 18229
Update 18229 with Wall 18229
Look for Wall 18349 or Locus 18349
Update 18349 with Wall 18349
Look for Wall 18313 or Locus 18313
Update 18313 with Locus 18313
Look for Wall 9729 or Locus 9729
Update 9729 with Wall 9729
Look for Wall 9845 or Locus 9845
Update 9845 with Wall 9845
Look for Wall 9211 or Locus 9211
Update 9211 with Wall 9211
Look for Wall 18577 or Locus 18577
Update 18577 with Wall 18577
Look for Wall 18552 or Locus 18552
Update 18552 with Locus 18552
Look for Wall 9636 or Locus 9636
Update 9636 with Wall 9636
Look for Wall 9653 or Locus 9653
Update 9653 with Locus 9653
Look for Wall 9636 or Locus 9636
Update 9636 with Wall 9636
Look for Wall 18363 or Locus 18363
Update 18363 with Locus 18363
Look for Wall 18362 or Locus 18362
Update 18362 with Locus 18362
Look for Wall 18205 or Locus 18205
Update 18205 with Locus 18205
Look for Wall 18200 or Locus 18200
Update 18200 with Locus 18200
Look for Wall 9661 or Locus 9661
Update 9661 with Locus 9661
Look for Wall 9850 or Locus 9850
Update 9850 with Locus 9850
Look for Wall 9724 or Locus 9724
Update 9724 with Locus 9724
Look for Wall 9913 or Locus 9913
Update 9913 with Locus 9913
Look for Wall 9895 or Locus 9895
Update 9895 with Locus 9895
Look for Wall 9882 or Locus 9882
Update 9882 with Locus 9882
Look for Wall 18300 or Locus 18300
Update 18300 with Wall 18300
Look for Wall 18002 or Locus 18002
Update 18002 with Locus 18002
Look for Wall 9715 or Locus 9715
Update 9715 with Wall 9715
Look for Wall 18002 or Locus 18002
Update 18002 with Locus 18002
Look for Wall 9175 or Locus 9175
Update 9175 with Locus 9175
Look for Wall 9664 or Locus 9664
Update 9664 with Locus 9664
Look for Wall 9963 or Locus 9963
Update 9963 with Wall 9963
Look for Wall 18047 or Locus 18047
Update 18047 with Locus 18047
Look for Wall 9800 or Locus 9800
Update 9800 with Wall 9800
Look for Wall 18003 or Locus 18003
Update 18003 with Locus 18003
Look for Wall 18033 or Locus 18033
Update 18033 with Locus 18033
Look for Wall 18047 or Locus 18047
Update 18047 with Locus 18047
Look for Wall 18265 or Locus 18265
Update 18265 with Locus 18265
Look for Wall 18033 or Locus 18033
Update 18033 with Locus 18033
Look for Wall 18033 or Locus 18033
Update 18033 with Locus 18033
Look for Wall 18315 or Locus 18315
Update 18315 with Wall 18315
Look for Wall 9800 or Locus 9800
Update 9800 with Wall 9800
Look for Wall 18076 or Locus 18076
Update 18076 with Locus 18076
Look for Wall 18342 or Locus 18342
Update 18342 with Locus 18342
Look for Wall 18330 or Locus 18330
Update 18330 with Locus 18330
Look for Wall 18343 or Locus 18343
Update 18343 with Locus 18343
Look for Wall 18315 or Locus 18315
Update 18315 with Wall 18315
Look for Wall 18353 or Locus 18353
Update 18353 with Locus 18353
Look for Wall 18296 or Locus 18296
Update 18296 with Wall 18296
Look for Wall 18351 or Locus 18351
Update 18351 with Locus 18351
Look for Wall 18414 or Locus 18414
Update 18414 with Locus 18414
Look for Wall 18339 or Locus 18339
Update 18339 with Locus 18339
Look for Wall 18414 or Locus 18414
Update 18414 with Locus 18414
Look for Wall 18339 or Locus 18339
Update 18339 with Locus 18339
Look for Wall 9626 or Locus 9626
Update 9626 with Wall 9626
Look for Wall 9278 or Locus 9278
Update 9278 with Wall 9278
Look for Wall 9704 or Locus 9704
Update 9704 with Wall 9704
Look for Wall 9857 or Locus 9857
Update 9857 with Locus 9857
Look for Wall 9729 or Locus 9729
Update 9729 with Wall 9729
Look for Wall 9262 or Locus 9262
Update 9262 with Wall 9262
Look for Wall 9684 or Locus 9684
Update 9684 with Wall 9684
Look for Wall 9660 or Locus 9660
Update 9660 with Locus 9660
Look for Wall 9673 or Locus 9673
Update 9673 with Locus 9673
Look for Wall 9678 or Locus 9678
Update 9678 with Locus 9678
Look for Wall 9690 or Locus 9690
Update 9690 with Locus 9690
Look for Wall 9640 or Locus 9640
Update 9640 with Locus 9640
Look for Wall 9623 or Locus 9623
Update 9623 with Locus 9623
Look for Wall 9625 or Locus 9625
Update 9625 with Locus 9625
Look for Wall 9624 or Locus 9624
Update 9624 with Locus 9624
Look for Wall 9622 or Locus 9622
Update 9622 with Locus 9622
Look for Wall 9626 or Locus 9626
Update 9626 with Wall 9626
Look for Wall 9712 or Locus 9712
Update 9712 with Locus 9712
Look for Wall 9728 or Locus 9728
Update 9728 with Wall 9728
Look for Wall 9814 or Locus 9814
Update 9814 with Locus 9814
Look for Wall 9728 or Locus 9728
Update 9728 with Wall 9728
Look for Wall 9814 or Locus 9814
Update 9814 with Locus 9814
Look for Wall 9728 or Locus 9728
Update 9728 with Wall 9728
Look for Wall 9728 or Locus 9728
Update 9728 with Wall 9728
Look for Wall 9824 or Locus 9824
Update 9824 with Locus 9824
Look for Wall 9300 or Locus 9300
Update 9300 with Locus 9300
Look for Wall 9704 or Locus 9704
Update 9704 with Wall 9704
Look for Wall 9703 or Locus 9703
Update 9703 with Locus 9703
Look for Wall 9728 or Locus 9728
Update 9728 with Wall 9728
Look for Wall 9813 or Locus 9813
Update 9813 with Locus 9813
Look for Wall 9887 or Locus 9887
Update 9887 with Wall 9887
Look for Wall 9888 or Locus 9888
Update 9888 with Wall 9888
Look for Wall 9879 or Locus 9879
Update 9879 with Locus 9879
Look for Wall 9262 or Locus 9262
Update 9262 with Wall 9262
Look for Wall 18048 or Locus 18048
Update 18048 with Wall 18048
Look for Wall 18035 or Locus 18035
Update 18035 with Locus 18035
Look for Wall 9140 or Locus 9140
Look for Wall 9953 or Locus 9953
Update 9953 with Locus 9953
Look for Wall 9728 or Locus 9728
Update 9728 with Wall 9728
Look for Wall 9626 or Locus 9626
Update 9626 with Wall 9626
Look for Wall 9704 or Locus 9704
Update 9704 with Wall 9704
Look for Wall 9834 or Locus 9834
Update 9834 with Locus 9834
Look for Wall 9728 or Locus 9728
Update 9728 with Wall 9728
Look for Wall 9824 or Locus 9824
Update 9824 with Locus 9824
Look for Wall 9727 or Locus 9727
Update 9727 with Locus 9727
Look for Wall 9736 or Locus 9736
Update 9736 with Locus 9736
Look for Wall 9204 or Locus 9204
Update 9204 with Locus 9204
Look for Wall 18042 or Locus 18042
Update 18042 with Locus 18042
Look for Wall 9727 or Locus 9727
Update 9727 with Locus 9727
Look for Wall 18042 or Locus 18042
Update 18042 with Locus 18042
Look for Wall 18037 or Locus 18037
Update 18037 with Locus 18037
Look for Wall 9727 or Locus 9727
Update 9727 with Locus 9727
Look for Wall 9626 or Locus 9626
Update 9626 with Wall 9626
Look for Wall 9628 or Locus 9628
Update 9628 with Locus 9628
Look for Wall 18042 or Locus 18042
Update 18042 with Locus 18042
Look for Wall 9728 or Locus 9728
Update 9728 with Wall 9728
Look for Wall 9832 or Locus 9832
Update 9832 with Locus 9832
Look for Wall 9762 or Locus 9762
Update 9762 with Locus 9762
Look for Wall 9704 or Locus 9704
Update 9704 with Wall 9704
Look for Wall 9779 or Locus 9779
Update 9779 with Locus 9779
Look for Wall 9766 or Locus 9766
Update 9766 with Locus 9766
Look for Wall 18061 or Locus 18061
Update 18061 with Locus 18061
Look for Wall 18048 or Locus 18048
Update 18048 with Wall 18048
Look for Wall 9140 or Locus 9140
Look for Wall 9795 or Locus 9795
Update 9795 with Locus 9795
Look for Wall 18058 or Locus 18058
Update 18058 with Locus 18058
Look for Wall 18048 or Locus 18048
Update 18048 with Wall 18048
Look for Wall 18068 or Locus 18068
Update 18068 with Locus 18068
Look for Wall 9262 or Locus 9262
Update 9262 with Wall 9262
Look for Wall 9804 or Locus 9804
Update 9804 with Locus 9804
Look for Wall 9812 or Locus 9812
Update 9812 with Locus 9812
Look for Wall 9981 or Locus 9981
Update 9981 with Locus 9981
Look for Wall 9728 or Locus 9728
Update 9728 with Wall 9728
Look for Wall 9626 or Locus 9626
Update 9626 with Wall 9626
Look for Wall 9795 or Locus 9795
Update 9795 with Locus 9795
Look for Wall 9800 or Locus 9800
Update 9800 with Wall 9800
Look for Wall 9804 or Locus 9804
Update 9804 with Locus 9804
Look for Wall 9795 or Locus 9795
Update 9795 with Locus 9795
Look for Wall 9982 or Locus 9982
Update 9982 with Locus 9982
Look for Wall 18229 or Locus 18229
Update 18229 with Wall 18229
Look for Wall 18308 or Locus 18308
Update 18308 with Locus 18308
Look for Wall 18289 or Locus 18289
Update 18289 with Locus 18289
Look for Wall 18048 or Locus 18048
Update 18048 with Wall 18048
Look for Wall 18048 or Locus 18048
Update 18048 with Wall 18048
Look for Wall 18355 or Locus 18355
Update 18355 with Locus 18355
Look for Wall 18029 or Locus 18029
Update 18029 with Locus 18029
Look for Wall 18358 or Locus 18358
Update 18358 with Locus 18358
Look for Wall 18229 or Locus 18229
Update 18229 with Wall 18229
Look for Wall 18350 or Locus 18350
Update 18350 with Locus 18350
Look for Wall 18048 or Locus 18048
Update 18048 with Wall 18048
Look for Wall 18229 or Locus 18229
Update 18229 with Wall 18229
Look for Wall 18286 or Locus 18286
Update 18286 with Locus 18286
Look for Wall 18048 or Locus 18048
Update 18048 with Wall 18048
Look for Wall 9909 or Locus 9909
Update 9909 with Wall 9909
Look for Wall 9915 or Locus 9915
Update 9915 with Wall 9915
Look for Wall 18228 or Locus 18228
Update 18228 with Locus 18228
Look for Wall 18048 or Locus 18048
Update 18048 with Wall 18048
Look for Wall 18221 or Locus 18221
Update 18221 with Locus 18221
Look for Wall 18229 or Locus 18229
Update 18229 with Wall 18229
Look for Wall 18216 or Locus 18216
Update 18216 with Locus 18216
Look for Wall 9262 or Locus 9262
Update 9262 with Wall 9262
Look for Wall 18243 or Locus 18243
Update 18243 with Locus 18243
Look for Wall 18229 or Locus 18229
Update 18229 with Wall 18229
Look for Wall 18244 or Locus 18244
Update 18244 with Locus 18244
Look for Wall 18297 or Locus 18297
Update 18297 with Locus 18297
Look for Wall 18298 or Locus 18298
Update 18298 with Locus 18298
Look for Wall 18275 or Locus 18275
Update 18275 with Locus 18275
Look for Wall 18229 or Locus 18229
Update 18229 with Wall 18229
Look for Wall 18308 or Locus 18308
Update 18308 with Locus 18308
Look for Wall 18251 or Locus 18251
Update 18251 with Locus 18251
Look for Wall 18229 or Locus 18229
Update 18229 with Wall 18229
Look for Wall 18374 or Locus 18374
Update 18374 with Locus 18374
Look for Wall 18380 or Locus 18380
Update 18380 with Locus 18380
Look for Wall 18308 or Locus 18308
Update 18308 with Locus 18308
Look for Wall 18323 or Locus 18323
Update 18323 with Locus 18323
Look for Wall 18229 or Locus 18229
Update 18229 with Wall 18229
Look for Wall 18312 or Locus 18312
Update 18312 with Locus 18312
Look for Wall 18336 or Locus 18336
Update 18336 with Locus 18336
Look for Wall 18333 or Locus 18333
Update 18333 with Locus 18333
Look for Wall 18308 or Locus 18308
Update 18308 with Locus 18308
Look for Wall 18322 or Locus 18322
Update 18322 with Locus 18322
Look for Wall 18324 or Locus 18324
Update 18324 with Locus 18324
Look for Wall 18296 or Locus 18296
Update 18296 with Wall 18296
Look for Wall 18327 or Locus 18327
Update 18327 with Locus 18327
Look for Wall 18297 or Locus 18297
Update 18297 with Locus 18297
Look for Wall 18298 or Locus 18298
Update 18298 with Locus 18298
Look for Wall 18275 or Locus 18275
Update 18275 with Locus 18275
Look for Wall 18320 or Locus 18320
Update 18320 with Locus 18320
Look for Wall 18341 or Locus 18341
Update 18341 with Locus 18341
Look for Wall 18229 or Locus 18229
Update 18229 with Wall 18229
Look for Wall 18349 or Locus 18349
Update 18349 with Wall 18349
Look for Wall 18313 or Locus 18313
Update 18313 with Locus 18313
Look for Wall 9262 or Locus 9262
Update 9262 with Wall 9262
Look for Wall 18347 or Locus 18347
Update 18347 with Locus 18347
Look for Wall 18356 or Locus 18356
Update 18356 with Locus 18356
Look for Wall 18337 or Locus 18337
Update 18337 with Locus 18337
Look for Wall 18286 or Locus 18286
Update 18286 with Locus 18286
Look for Wall 18229 or Locus 18229
Update 18229 with Wall 18229
Look for Wall 18500 or Locus 18500
Update 18500 with Locus 18500
Look for Wall 9179 or Locus 9179
Update 9179 with Locus 9179
Look for Wall 9180 or Locus 9180
Update 9180 with Wall 9180
Look for Wall 9212 or Locus 9212
Update 9212 with Wall 9212
Look for Wall 9211 or Locus 9211
Update 9211 with Wall 9211
Look for Wall 9408 or Locus 9408
Look for Wall 9211 or Locus 9211
Update 9211 with Wall 9211
Look for Wall 18010 or Locus 18010
Look for Wall 9408 or Locus 9408
Look for Wall 9216 or Locus 9216
Update 9216 with Wall 9216
Look for Wall 9548 or Locus 9548
Update 9548 with Locus 9548
Look for Wall 9408 or Locus 9408
Look for Wall 9211 or Locus 9211
Update 9211 with Wall 9211
Look for Wall 9408 or Locus 9408
Look for Wall 18004 or Locus 18004
Update 18004 with Locus 18004
Look for Wall 18010 or Locus 18010
Look for Wall 9904 or Locus 9904
Update 9904 with Wall 9904
Look for Wall 18065 or Locus 18065
Update 18065 with Locus 18065
Look for Wall 9211 or Locus 9211
Update 9211 with Wall 9211
Look for Wall 9211 or Locus 9211
Update 9211 with Wall 9211
Look for Wall 9904 or Locus 9904
Update 9904 with Wall 9904
Look for Wall 18089 or Locus 18089
Update 18089 with Locus 18089
Look for Wall 18054 or Locus 18054
Update 18054 with Locus 18054
Look for Wall 18010 or Locus 18010
Look for Wall 18088 or Locus 18088
Update 18088 with Locus 18088
Look for Wall 9408 or Locus 9408
Look for Wall 18034 or Locus 18034
Update 18034 with Locus 18034
Look for Wall 9408 or Locus 9408
Look for Wall 9974 or Locus 9974
Update 9974 with Locus 9974
Look for Wall 18059 or Locus 18059
Update 18059 with Locus 18059
Look for Wall 9971 or Locus 9971
Update 9971 with Locus 9971
Look for Wall 9262 or Locus 9262
Update 9262 with Wall 9262
Look for Wall 9961 or Locus 9961
Update 9961 with Wall 9961
Look for Wall 18481 or Locus 18481
Update 18481 with Wall 18481
Look for Wall 18516 or Locus 18516
Update 18516 with Wall 18516
Look for Wall 18481 or Locus 18481
Update 18481 with Wall 18481
Look for Wall 18503 or Locus 18503
Update 18503 with Wall 18503
Look for Wall 18515 or Locus 18515
Update 18515 with Wall 18515
Look for Wall 18514 or Locus 18514
Update 18514 with Wall 18514
Look for Wall 18509 or Locus 18509
Update 18509 with Locus 18509
Look for Wall 18511 or Locus 18511
Update 18511 with Locus 18511
Look for Wall 9140 or Locus 9140
Look for Wall 18481 or Locus 18481
Update 18481 with Wall 18481
Look for Wall 9211 or Locus 9211
Update 9211 with Wall 9211
Look for Wall 18516 or Locus 18516
Update 18516 with Wall 18516
Look for Wall 18515 or Locus 18515
Update 18515 with Wall 18515
Look for Wall 9823 or Locus 9823
Update 9823 with Locus 9823
Look for Wall 9262 or Locus 9262
Update 9262 with Wall 9262
Look for Wall 9275 or Locus 9275
Look for Wall 9262 or Locus 9262
Update 9262 with Wall 9262
Look for Wall 18021 or Locus 18021
Update 18021 with Locus 18021
Look for Wall 9275 or Locus 9275
Look for Wall 9140 or Locus 9140
Look for Wall 18067 or Locus 18067
Update 18067 with Locus 18067
Look for Wall 9275 or Locus 9275
Look for Wall 9262 or Locus 9262
Update 9262 with Wall 9262
Look for Wall 18252 or Locus 18252
Update 18252 with Locus 18252
Look for Wall 9262 or Locus 9262
Update 9262 with Wall 9262
Look for Wall 9140 or Locus 9140
Look for Wall 9140 or Locus 9140
Look for Wall 18263 or Locus 18263
Update 18263 with Locus 18263
Look for Wall 18271 or Locus 18271
Update 18271 with Locus 18271
Look for Wall 18511 or Locus 18511
Update 18511 with Locus 18511
Look for Wall 18511 or Locus 18511
Update 18511 with Locus 18511
Look for Wall 18510 or Locus 18510
Update 18510 with Locus 18510
Look for Wall 9412 or Locus 9412
Update 9412 with Wall 9412
Look for Wall 9413 or Locus 9413
Update 9413 with Wall 9413
Look for Wall 9493 or Locus 9493
Update 9493 with Locus 9493
Look for Wall 9400 or Locus 9400
Update 9400 with Wall 9400
Look for Wall 9065 or Locus 9065
Update 9065 with Wall 9065
Look for Wall 9140 or Locus 9140
Look for Wall 9434 or Locus 9434
Update 9434 with Locus 9434
Look for Wall 18048 or Locus 18048
Update 18048 with Wall 18048
Look for Wall 18250 or Locus 18250
Update 18250 with Wall 18250
Look for Wall 18242 or Locus 18242
Update 18242 with Locus 18242
Look for Wall 9909 or Locus 9909
Update 9909 with Wall 9909
Look for Wall 18241 or Locus 18241
Update 18241 with Locus 18241
Look for Wall 18250 or Locus 18250
Update 18250 with Wall 18250
Look for Wall 9065 or Locus 9065
Update 9065 with Wall 9065
Look for Wall 9998 or Locus 9998
Update 9998 with Wall 9998
Look for Wall 18371 or Locus 18371
Update 18371 with Locus 18371
Look for Wall 9909 or Locus 9909
Update 9909 with Wall 9909
Look for Wall 9679 or Locus 9679
Update 9679 with Locus 9679
Look for Wall 9066 or Locus 9066
Update 9066 with Wall 9066
Look for Wall 9140 or Locus 9140
Look for Wall 9211 or Locus 9211
Update 9211 with Wall 9211
Look for Wall 9211 or Locus 9211
Update 9211 with Wall 9211
Look for Wall 9903 or Locus 9903
Update 9903 with Locus 9903
Look for Wall 9408 or Locus 9408
Look for Wall 9436 or Locus 9436
Update 9436 with Locus 9436
Look for Wall 9066 or Locus 9066
Update 9066 with Wall 9066
Look for Wall 9140 or Locus 9140
Look for Wall 9436 or Locus 9436
Update 9436 with Locus 9436
Look for Wall 9066 or Locus 9066
Update 9066 with Wall 9066
Look for Wall 9211 or Locus 9211
Update 9211 with Wall 9211
Look for Wall 9861 or Locus 9861
Update 9861 with Locus 9861
Look for Wall 9066 or Locus 9066
Update 9066 with Wall 9066
Look for Wall 9140 or Locus 9140
Look for Wall 9902 or Locus 9902
Update 9902 with Locus 9902
Look for Wall 9914 or Locus 9914
Update 9914 with Wall 9914
Look for Wall 9211 or Locus 9211
Update 9211 with Wall 9211
Look for Wall 9140 or Locus 9140
Look for Wall 9859 or Locus 9859
Update 9859 with Locus 9859
Look for Wall 9066 or Locus 9066
Update 9066 with Wall 9066
Look for Wall 18015 or Locus 18015
Update 18015 with Locus 18015
Look for Wall 18019 or Locus 18019
Update 18019 with Locus 18019
Look for Wall 9065 or Locus 9065
Update 9065 with Wall 9065
Look for Wall 9066 or Locus 9066
Update 9066 with Wall 9066
Look for Wall 9400 or Locus 9400
Update 9400 with Wall 9400
Look for Wall 9400 or Locus 9400
Update 9400 with Wall 9400
Look for Wall 9140 or Locus 9140
Look for Wall 9915 or Locus 9915
Update 9915 with Wall 9915
Look for Wall 04G0-004 or Locus 04G0-004
Update 04G0-004 with Locus 04G0-004
Look for Wall 04G0-013 or Locus 04G0-013
Update 04G0-013 with Locus 04G0-013
Look for Wall 18384 or Locus 18384
Update 18384 with Locus 18384
Look for Wall 18396 or Locus 18396
Update 18396 with Locus 18396
Look for Wall 9903 or Locus 9903
Update 9903 with Locus 9903
Look for Wall 9795 or Locus 9795
Update 9795 with Locus 9795
Look for Wall 04G0-013 or Locus 04G0-013
Update 04G0-013 with Locus 04G0-013
Look for Wall 18265 or Locus 18265
Update 18265 with Locus 18265
Look for Wall 18242 or Locus 18242
Update 18242 with Locus 18242
Look for Wall 18076 or Locus 18076
Update 18076 with Locus 18076
Look for Wall 9795 or Locus 9795
Update 9795 with Locus 9795
Look for Wall 18058 or Locus 18058
Update 18058 with Locus 18058
Look for Wall 9795 or Locus 9795
Update 9795 with Locus 9795
Look for Wall 18033 or Locus 18033
Update 18033 with Locus 18033
Look for Wall 9816 or Locus 9816
Update 9816 with Locus 9816
Look for Wall 18242 or Locus 18242
Update 18242 with Locus 18242
Look for Wall 18265 or Locus 18265
Update 18265 with Locus 18265
Look for Wall 18033 or Locus 18033
Update 18033 with Locus 18033
Look for Wall 18273 or Locus 18273
Update 18273 with Locus 18273
Look for Wall 18265 or Locus 18265
Update 18265 with Locus 18265
Look for Wall 18033 or Locus 18033
Update 18033 with Locus 18033
Look for Wall 9902 or Locus 9902
Update 9902 with Locus 9902
Look for Wall 18216 or Locus 18216
Update 18216 with Locus 18216
Look for Wall 18570 or Locus 18570
Update 18570 with Locus 18570
Look for Wall 18058 or Locus 18058
Update 18058 with Locus 18058
Look for Wall 18242 or Locus 18242
Update 18242 with Locus 18242
Look for Wall 18395 or Locus 18395
Update 18395 with Locus 18395
Look for Wall 10D2-516 or Locus 10D2-516
Look for Wall 18323 or Locus 18323
Update 18323 with Locus 18323
Look for Wall 9657 or Locus 9657
Update 9657 with Locus 9657
Look for Wall 9814 or Locus 9814
Update 9814 with Locus 9814
Look for Wall 18286 or Locus 18286
Update 18286 with Locus 18286
Look for Wall 9903 or Locus 9903
Update 9903 with Locus 9903
Look for Wall 9903 or Locus 9903
Update 9903 with Locus 9903
Look for Wall 9899 or Locus 9899
Update 9899 with Locus 9899
Look for Wall 9328 or Locus 9328
Update 9328 with Locus 9328
Look for Wall 9752 or Locus 9752
Update 9752 with Locus 9752
Look for Wall 9305 or Locus 9305
Update 9305 with Locus 9305
Look for Wall 9305 or Locus 9305
Update 9305 with Locus 9305
Look for Wall 18293 or Locus 18293
Update 18293 with Locus 18293
Look for Wall 18286 or Locus 18286
Update 18286 with Locus 18286
Look for Wall 18229 or Locus 18229
Update 18229 with Wall 18229
Look for Wall 18313 or Locus 18313
Update 18313 with Locus 18313
Look for Wall 9297 or Locus 9297
Update 9297 with Locus 9297
Look for Wall 9179 or Locus 9179
Update 9179 with Locus 9179
Look for Wall 9759 or Locus 9759
Update 9759 with Locus 9759
Look for Wall 9860 or Locus 9860
Update 9860 with Locus 9860
Look for Wall 9300 or Locus 9300
Update 9300 with Locus 9300
Look for Wall 9537 or Locus 9537
Update 9537 with Locus 9537
Look for Wall 9259 or Locus 9259
Update 9259 with Locus 9259
Look for Wall 9259 or Locus 9259
Update 9259 with Locus 9259
Look for Wall 9678 or Locus 9678
Update 9678 with Locus 9678
Look for Wall 9878 or Locus 9878
Update 9878 with Locus 9878
Look for Wall 9730 or Locus 9730
Update 9730 with Locus 9730
Look for Wall 9814 or Locus 9814
Update 9814 with Locus 9814
Look for Wall 9875 or Locus 9875
Update 9875 with Locus 9875
Look for Wall 9251 or Locus 9251
Update 9251 with Locus 9251
Look for Wall 9249 or Locus 9249
Update 9249 with Locus 9249
Look for Wall 18313 or Locus 18313
Update 18313 with Locus 18313
Look for Wall 9657 or Locus 9657
Update 9657 with Locus 9657
Look for Wall 9605 or Locus 9605
Update 9605 with Locus 9605
Look for Wall 18494 or Locus 18494
Update 18494 with Locus 18494
Look for Wall 18561 or Locus 18561
Update 18561 with Locus 18561
Look for Wall 18392 or Locus 18392
Update 18392 with Locus 18392
Look for Wall 18483 or Locus 18483
Update 18483 with Locus 18483

In [2]:
f_df.head(3)


Out[2]:
File ID Illustration Chp Part Number Caption DB ID FileName FileType Error in print
0 Figure 1.1 Figure 1.0 1 Fig. 1.1. Map of Tel Dor showing Area G in rel... d09Z1-1001 d09Z1-1001.tif tif NaN
1 Figure 1.2 Figure 1.0 2 Fig. 1.2. A reconstruction of the Roman street... d09Z1-1002 d09Z1-1002.tif tif NaN
2 Figure 1.3 Figure 1.0 3 Fig. 1.3. Area G during the first season in 19... p08Z3-1365 p08Z3-1365.tif tif NaN

In [3]:
l_df.head(3)


Out[3]:
Region Site Area Note Locus ID Original Sort Order Locus/Wall Number Square Phase Contextual Integrity (I) Code Contextual Integrity (I) Phasing of Contents (PoC) Comments Context Chapter Status
0 Israel Tel Dor AI–AK:31–32 To facilitate navigation, Open Context editors... Locus 9000 1 Locus 9000 AI–AK/31–32 1a -- -- ⪰1 Topsoil on top of ashlar pavement of Phase 1a ... -- Dor IIIA: 5, 7, 8, 9, 11, 12, 13, 16 Not Final
1 Israel Tel Dor AJ:30–34 To facilitate navigation, Open Context editors... Locus 9001 2 Locus 9001 AJ/32 1a -- -- ⪰1 Topsoil down to fragment of F9000 -- Dor IIIA: 12 Not Final
2 Israel Tel Dor AJ:30–34 To facilitate navigation, Open Context editors... Locus 9002 3 Locus 9002 AJ/33 -- n non-stratified -- Topsoil -- Dor IIIA: 13 Not Final

In [4]:
# Find matching Loci (including Wall Loci) by matching their IDs
# with text in the file metadata 'Caption' column.
for locus_wall_id in l_df['Locus ID'].unique().tolist():
    l_w_id = locus_wall_id.replace('Locus ', 'L').replace('Wall ', 'W')
    
    # l_w_mum_id is for locus or wall IDs that are long unlikely to be
    # a false positive, and lack a "L" or "W" in the caption.
    l_w_num_id = l_w_id.replace('L', ' ').replace('W', ' ')
    if len(l_w_num_id) >= 6:
        # Catch cases where the Locus / Wall ID is long like 
        # '18347'.
        l_w_indx = (
            f_df['Caption'].str.contains(l_w_id)
            | f_df['Caption'].str.contains(l_w_num_id)
        )
    else:
        # The locus / wall id is too short to trust without a 
        # "L" or "W" prefix.
        l_w_indx = f_df['Caption'].str.contains(l_w_id)
    
    if f_df[l_w_indx].empty:
        # We didn't find a match, so continue.
        continue
    print('Found: {} for {} as {}'.format(
            len(f_df[l_w_indx]), 
            locus_wall_id,
            l_w_id,
        )
    )
    file_ids = f_df[l_w_indx]['File ID'].unique().tolist()
    file_locus_data = add_to_file_context_data(
        file_ids, 
        locus_wall_id, 
        file_locus_data
    )

# Now make a dataframe of the file - locus associations
file_locus_df = pd.DataFrame(data=file_locus_data)
print('File and Locus Associations (Found: {})'.format(
    len(file_locus_df.index))
)


Found: 3 for Wall 9003 as W9003
Found: 1 for Wall 9015 as W9015
Found: 1 for Wall 9019 as W9019
Found: 2 for Locus 9025 as L9025
Found: 1 for Wall 9041 as W9041
Found: 1 for Wall 9047 as W9047
Found: 2 for Locus 9048 as L9048
Found: 3 for Wall 9058 as W9058
Found: 5 for Wall 9065 as W9065
Found: 14 for Wall 9066 as W9066
Found: 3 for Wall 9096 as W9096
c:\python-3-7-4\lib\site-packages\pandas\core\strings.py:1843: UserWarning: This pattern has match groups. To actually get the groups, use str.extract.
  return func(self, *args, **kwargs)
Found: 4 for Wall 9147 as W9147
Found: 2 for Wall 9162 as W9162
Found: 3 for Locus 9168 as L9168
Found: 2 for Wall 9180 as W9180
Found: 1 for Locus 9185 as L9185
Found: 1 for Locus 9202 as L9202
Found: 2 for Locus 9204 as L9204
Found: 21 for Wall 9211 as W9211
Found: 2 for Wall 9212 as W9212
Found: 6 for Wall 9216 as W9216
Found: 2 for Wall 9217 as W9217
Found: 1 for Wall 9243 as W9243
Found: 2 for Locus 9251 as L9251
Found: 1 for Wall 9253 as W9253
Found: 30 for Wall 9262 as W9262
Found: 30 for Wall 9266 as W9266
Found: 1 for Wall 9274 as W9274
Found: 1 for Wall 9275a–b as W9275a–b
Found: 7 for Wall 9278 as W9278
Found: 1 for Wall 9279 as W9279
Found: 10 for Wall 9282 as W9282
Found: 1 for Wall 9290 as W9290
Found: 1 for Locus 9298 as L9298
Found: 1 for Wall 9301b as W9301b
Found: 2 for Locus 9326 as L9326
Found: 1 for Wall 9340 as W9340
Found: 1 for Locus 9346 as L9346
Found: 7 for Wall 9400 as W9400
Found: 4 for Wall 9408a as W9408a
Found: 7 for Wall 9408b as W9408b
Found: 1 for Wall 9412 as W9412
Found: 3 for Wall 9413 as W9413
Found: 1 for Locus 9434 as L9434
Found: 1 for Wall 9466 as W9466
Found: 1 for Wall 9490 as W9490
Found: 1 for Wall 9491 as W9491
Found: 4 for Wall 9510 as W9510
Found: 1 for Locus 9550 as L9550
Found: 1 for Locus 9558 as L9558
Found: 1 for Wall 9560 as W9560
Found: 14 for Wall 9626 as W9626
Found: 1 for Locus 9630 as L9630
Found: 6 for Wall 9636 as W9636
Found: 4 for Locus 9658 as L9658
Found: 1 for Locus 9660 as L9660
Found: 1 for Locus 9669 as L9669
Found: 3 for Wall 9675 as W9675
Found: 1 for Locus 9679 as L9679
Found: 1 for Locus 9680 as L9680
Found: 19 for Wall 9684 as W9684
Found: 1 for Locus 9698 as L9698
Found: 3 for Wall 9702 as W9702
Found: 13 for Wall 9704 as W9704
Found: 1 for Locus 9714 as L9714
Found: 6 for Wall 9715 as W9715
Found: 13 for Wall 9728 as W9728
Found: 15 for Wall 9729 as W9729
Found: 6 for Wall 9735 as W9735
Found: 1 for Locus 9736 as L9736
Found: 1 for Locus 9752 as L9752
Found: 1 for Locus 9762 as L9762
Found: 1 for Locus 9766 as L9766
Found: 1 for Locus 9779 as L9779
Found: 4 for Wall 9800 as W9800
Found: 2 for Locus 9805 as L9805
Found: 1 for Locus 9816 as L9816
Found: 4 for Wall 9825 as W9825
Found: 3 for Wall 9841 as W9841
Found: 1 for Wall 9845 as W9845
Found: 1 for Locus 9865 as L9865
Found: 2 for Locus 9871 as L9871
Found: 1 for Locus 9875 as L9875
Found: 7 for Locus 9880 as L9880
Found: 2 for Locus 9882 as L9882
Found: 2 for Wall 9885 as W9885
Found: 5 for Wall 9887 as W9887
Found: 2 for Locus 9890 as L9890
Found: 1 for Locus 9895 as L9895
Found: 1 for Locus 9903 as L9903
Found: 2 for Wall 9904 as W9904
Found: 9 for Wall 9909 as W9909
Found: 8 for Wall 9914 as W9914
Found: 14 for Wall 9915 as W9915
Found: 1 for Wall 9917 as W9917
Found: 5 for Wall 9936 as W9936
Found: 1 for Wall 9942 as W9942
Found: 1 for Wall 9943 as W9943
Found: 2 for Wall 9952 as W9952
Found: 11 for Wall 9957 as W9957
Found: 2 for Wall 9959 as W9959
Found: 3 for Wall 9961 as W9961
Found: 6 for Wall 9963 as W9963
Found: 5 for Wall 9964 as W9964
Found: 1 for Locus 9965 as L9965
Found: 1 for Wall 9970 as W9970
Found: 1 for Locus 9974 as L9974
Found: 2 for Wall 9975 as W9975
Found: 9 for Locus 9982 as L9982
Found: 1 for Locus 9985 as L9985
Found: 4 for Wall 9989 as W9989
Found: 8 for Wall 9990 as W9990
Found: 3 for Wall 9993 as W9993
Found: 1 for Wall 9998 as W9998
Found: 1 for Locus 18000 as L18000
Found: 3 for Wall 18010a as W18010a
Found: 1 for Locus 18030 as L18030
Found: 9 for Locus 18033 as L18033
Found: 1 for Locus 18034 as L18034
Found: 1 for Locus 18035 as L18035
Found: 1 for Locus 18036 as L18036
Found: 1 for Locus 18041 as L18041
Found: 12 for Wall 18045 as W18045
Found: 19 for Wall 18048 as W18048
Found: 1 for Locus 18054 as L18054
Found: 1 for Locus 18059 as L18059
Found: 1 for Locus 18065 as L18065
Found: 2 for Locus 18067 as L18067
Found: 1 for Locus 18075 as L18075
Found: 1 for Locus 18082 as L18082
Found: 1 for Locus 18088 as L18088
Found: 2 for Locus 18089 as L18089
Found: 1 for Locus 18107 as L18107
Found: 1 for Locus 18121 as L18121
Found: 1 for Locus 18137 as L18137
Found: 1 for Locus 18153 as L18153
Found: 1 for Locus 18154 as L18154
Found: 2 for Locus 18182 as L18182
Found: 1 for Locus 18199 as L18199
Found: 1 for Locus 18205 as L18205
Found: 1 for Wall 18206 as W18206
Found: 1 for Locus 18213 as L18213
Found: 1 for Locus 18216 as L18216
Found: 1 for Wall 18218 as W18218
Found: 1 for Locus 18225 as L18225
Found: 22 for Wall 18229 as W18229
Found: 1 for Locus 18239 as L18239
Found: 2 for Locus 18241 as L18241
Found: 1 for Locus 18242 as L18242
Found: 1 for Locus 18243 as L18243
Found: 1 for Wall 18248 as W18248
Found: 9 for Wall 18250 as W18250
Found: 1 for Locus 18255 as L18255
Found: 5 for Locus 18275 as L18275
Found: 3 for Locus 18286 as L18286
Found: 3 for Locus 18293 as L18293
Found: 6 for Wall 18296 as W18296
Found: 3 for Locus 18298 as L18298
Found: 1 for Wall 18300 as W18300
Found: 7 for Locus 18308 as L18308
Found: 1 for Locus 18312 as L18312
Found: 1 for Locus 18313 as L18313
Found: 7 for Wall 18315 as W18315
Found: 1 for Locus 18319 as L18319
Found: 1 for Locus 18324 as L18324
Found: 2 for Locus 18330 as L18330
Found: 9 for Locus 18333 as L18333
Found: 3 for Locus 18347 as L18347
Found: 8 for Wall 18349 as W18349
Found: 1 for Locus 18361 as L18361
Found: 2 for Locus 18363 as L18363
Found: 1 for Locus 18380 as L18380
Found: 2 for Locus 18399 as L18399
Found: 1 for Locus 18400 as L18400
Found: 2 for Wall 18463 as W18463
Found: 2 for Wall 18471 as W18471
Found: 9 for Wall 18481 as W18481
Found: 1 for Locus 18482 as L18482
Found: 2 for Wall 18503 as W18503
Found: 2 for Locus 18509 as L18509
Found: 2 for Wall 18514 as W18514
Found: 5 for Wall 18515 as W18515
Found: 2 for Wall 18516 as W18516
Found: 1 for Wall 18528 as W18528
Found: 1 for Locus 18529 as L18529
Found: 3 for Locus 18570 as L18570
Found: 2 for Wall 18575 as W18575
Found: 2 for Wall 18577 as W18577
Found: 1 for Locus 18584 as L18584
Found: 2 for Wall 18912 as W18912
Found: 1 for Locus 04G0-004 as L04G0-004
Found: 1 for Wall 04G0-008 as W04G0-008
File and Locus Associations (Found: 680)

In [5]:
# Find matching Loci (including Wall Loci) by matching their Squares
# with text in the file metadata 'Caption' column.
l_df_sq = l_df[~l_df['Square'].isnull()]
for square in l_df_sq['Square'].astype(str).unique().tolist():
    sq_indx = f_df['Caption'].str.contains(square)
    if len(square) < 3 or f_df[sq_indx].empty:
        # Not enough characters for secure match.
        continue
    # Get all file_ids that have his square in their captions
    file_ids = f_df[sq_indx]['File ID'].unique().tolist()
    # Get all the locus ids that are associated with this square
    area_ids = l_df[
        l_df['Square']==square
    ]['Area'].unique().tolist()
    print('Found: {} files with square {} and {} areas'.format(
            len(f_df[sq_indx]), 
            square,
            len(area_ids)
        )
    )
    # Now add to the file_locus_data.
    file_square_data = add_to_file_context_data(
        file_ids, 
        area_ids, 
        file_square_data,
        context_id_col='Area'
    )

# Now make a dataframe of the file - area associations
file_area_df = pd.DataFrame(data=file_square_data)
print('File and Area Associations (Found: {})'.format(
    len(file_area_df.index))
)


Found: 22 files with square AJ/32 and 1 areas
Found: 14 files with square AJ/33 and 1 areas
Found: 3 files with square AJ–AK/33 and 1 areas
Found: 22 files with square AI/32 and 1 areas
Found: 1 files with square AJ/31 and 1 areas
Found: 20 files with square AI/31 and 1 areas
Found: 1 files with square AI/31–32 and 1 areas
Found: 1 files with square AI/31–33 and 1 areas
Found: 4 files with square AJ/32–33 and 1 areas
Found: 12 files with square AK/32 and 1 areas
Found: 6 files with square AJ/34 and 1 areas
Found: 26 files with square AI/33 and 1 areas
Found: 9 files with square AJ–AK/32 and 1 areas
Found: 1 files with square AI/34 and 1 areas
Found: 6 files with square AH/33 and 1 areas
Found: 5 files with square AK/33 and 1 areas
Found: 2 files with square AK/34 and 1 areas
Found: 4 files with square AH/34 and 1 areas
Found: 3 files with square AI–AJ/32 and 1 areas
Found: 1 files with square AI–AJ/33 and 1 areas
Found: 8 files with square AH–AI/33 and 1 areas
Found: 1 files with square AI/33–34 and 1 areas
Found: 1 files with square AG/34 and 1 areas
Found: 11 files with square AG/33 and 1 areas
Found: 1 files with square AK/32–33 and 1 areas
Found: 2 files with square AG/33–34 and 1 areas
Found: 1 files with square AI–AJ/32–33 and 1 areas
File and Area Associations (Found: 188)

In [6]:
context_df = pd.merge(file_locus_df, file_area_df, on='File ID', how='outer')
context_linked_files = context_df['File ID'].unique().tolist()
print('Found File and Context Associations for {} unique files (total rows: {})'.format(
    len(context_linked_files),
    len(context_df.index))
)


# Get a list of files that do NOT have context associations
no_context_files = f_df[
    ~f_df['File ID'].isin(context_linked_files)
]['File ID'].unique().tolist()

file_site_data = {
    'File ID':[], 
    'Site Area': [],
}
file_site_data = add_to_file_context_data(
    no_context_files, 
    'Area G', 
    file_site_data,
    context_id_col='Site Area'
)
site_df = pd.DataFrame(data=file_site_data)
context_df = pd.concat([context_df, site_df], sort=False)

# Set the column order for nice aesthetics
context_df = context_df[['File ID', 'Site Area', 'Area', 'Locus ID']]
context_df.sort_values(by=['File ID', 'Locus ID', 'Area'], inplace=True)

context_df.to_csv(path_files_contexts, index=False)
context_df.head(3)


Found File and Context Associations for 426 unique files (total rows: 857)
Out[6]:
File ID Site Area Area Locus ID
0 Figure 1.1 Area G NaN NaN
532 Figure 1.10 NaN AJ:30–34 Wall 9914
733 Figure 1.11 NaN AI–AJ:32–34 NaN