In [1]:
import itertools
import os
import numpy as np
import pandas as pd



# Get the root_path for this jupyter notebook repo.
repo_path = os.path.dirname(os.path.abspath(os.getcwd()))

path_locus_cards = os.path.join(
    repo_path, 'files', 'tell-dor', 'tell-dor-locus-cards-index.csv'
)
# Path to the Tell Dor locus metadata CSV 
path_loci = os.path.join(
    repo_path, 'files', 'tell-dor', 'tell-dor-loci.csv'
)
# Read the locus (and wall) CSV into dataframe l_df.
l_df = pd.read_csv(path_loci)
l_df['Number'] = l_df['Number'].astype(str)

# Get the directory of the locus cards
path_cards = os.path.join(
    'C:\\', 'GitHub', 'open-context-py', 'static', 'exports', 'tell-dor-area-g', 'locus-cards'
)
files = [f for f in os.listdir(path_cards) if os.path.isfile(os.path.join(path_cards, f))]
print('Files found: {}'.format(len(files)))


Files found: 1400

In [2]:
cols = [
    'Locus ID',
    'Resource Type',
    'file',
    'full',
    'preview',
    'thumbs',
]

data = {col:[] for col in cols}

url_prefix = 'https://artiraq.org/static/opencontext/tel-dor/locus-cards/'

missing = []
for file in files:
    # The file names correspond to locus / wall ids. 
    file_parts = file.split('.')  
    id_part = file_parts[0]
    id_indx = (l_df['Number'] == id_part)
    if len(file_parts) > 2:
        print('Check for {} also'.format(file_parts[1]))
        id_indx &= (l_df['Number'].str.contains(file_parts[1], case=False))
    if l_df[id_indx].empty:
        # Can't find a matching locus or wall
        id_indx = (l_df['Number'].str.startswith(id_part))
        if len(file_parts) > 2:
            id_indx &= (l_df['Number'].str.contains(file_parts[1], case=False))
    if l_df[id_indx].empty:
        missing.append(file)
        continue
    locus_ids = l_df[id_indx]['Locus ID'].unique().tolist()
    for locus_id in locus_ids:
        data['Locus ID'].append(locus_id)
        data['Resource Type'].append('Locus card')
        data['file'].append(file)
        data['full'].append((url_prefix + file))
        data['preview'].append((url_prefix + file + '#preview'))
        data['thumbs'].append('https://opencontext.org/static/oc/images/icons/pdf-noun-89522.png')
    
card_df = pd.DataFrame(data=data)
card_df = card_df[cols]
card_df.to_csv(path_locus_cards, index=False)

print('Still missing: {}'.format('\n'.join(missing)))


---------------------------------------------------------------------------
PermissionError                           Traceback (most recent call last)
<ipython-input-2-5347ce993ae3> in <module>
     39 card_df = pd.DataFrame(data=data)
     40 card_df = card_df[cols]
---> 41 card_df.to_csv(path_locus_cards, index=False)
     42 
     43 print('Still missing: {}'.format('\n'.join(missing)))

c:\python-3-7-4\lib\site-packages\pandas\core\generic.py in to_csv(self, path_or_buf, sep, na_rep, float_format, columns, header, index, index_label, mode, encoding, compression, quoting, quotechar, line_terminator, chunksize, date_format, doublequote, escapechar, decimal)
   3244             decimal=decimal,
   3245         )
-> 3246         formatter.save()
   3247 
   3248         if path_or_buf is None:

c:\python-3-7-4\lib\site-packages\pandas\io\formats\csvs.py in save(self)
    181                 self.mode,
    182                 encoding=self.encoding,
--> 183                 compression=self.compression,
    184             )
    185             close = True

c:\python-3-7-4\lib\site-packages\pandas\io\common.py in _get_handle(path_or_buf, mode, encoding, compression, memory_map, is_text)
    395         if encoding:
    396             # Encoding
--> 397             f = open(path_or_buf, mode, encoding=encoding, newline="")
    398         elif is_text:
    399             # No explicit encoding

PermissionError: [Errno 13] Permission denied: 'c:\\GitHub\\open-context-jupyter\\files\\tell-dor\\tell-dor-locus-cards-index.csv'