In [1]:
import itertools
import os
import numpy as np
import pandas as pd
# Get the root_path for this jupyter notebook repo.
repo_path = os.path.dirname(os.path.abspath(os.getcwd()))
path_locus_cards = os.path.join(
repo_path, 'files', 'tell-dor', 'tell-dor-locus-cards-index.csv'
)
# Path to the Tell Dor locus metadata CSV
path_loci = os.path.join(
repo_path, 'files', 'tell-dor', 'tell-dor-loci.csv'
)
# Read the locus (and wall) CSV into dataframe l_df.
l_df = pd.read_csv(path_loci)
l_df['Number'] = l_df['Number'].astype(str)
# Get the directory of the locus cards
path_cards = os.path.join(
'C:\\', 'GitHub', 'open-context-py', 'static', 'exports', 'tell-dor-area-g', 'locus-cards'
)
files = [f for f in os.listdir(path_cards) if os.path.isfile(os.path.join(path_cards, f))]
print('Files found: {}'.format(len(files)))
In [2]:
cols = [
'Locus ID',
'Resource Type',
'file',
'full',
'preview',
'thumbs',
]
data = {col:[] for col in cols}
url_prefix = 'https://artiraq.org/static/opencontext/tel-dor/locus-cards/'
missing = []
for file in files:
# The file names correspond to locus / wall ids.
file_parts = file.split('.')
id_part = file_parts[0]
id_indx = (l_df['Number'] == id_part)
if len(file_parts) > 2:
print('Check for {} also'.format(file_parts[1]))
id_indx &= (l_df['Number'].str.contains(file_parts[1], case=False))
if l_df[id_indx].empty:
# Can't find a matching locus or wall
id_indx = (l_df['Number'].str.startswith(id_part))
if len(file_parts) > 2:
id_indx &= (l_df['Number'].str.contains(file_parts[1], case=False))
if l_df[id_indx].empty:
missing.append(file)
continue
locus_ids = l_df[id_indx]['Locus ID'].unique().tolist()
for locus_id in locus_ids:
data['Locus ID'].append(locus_id)
data['Resource Type'].append('Locus card')
data['file'].append(file)
data['full'].append((url_prefix + file))
data['preview'].append((url_prefix + file + '#preview'))
data['thumbs'].append('https://opencontext.org/static/oc/images/icons/pdf-noun-89522.png')
card_df = pd.DataFrame(data=data)
card_df = card_df[cols]
card_df.to_csv(path_locus_cards, index=False)
print('Still missing: {}'.format('\n'.join(missing)))