In [19]:
import os
import shutil
import sh
from pandas import DataFrame, Series
from itertools import islice

import yaml

REPOS_LIST = "/Users/raymondyee/C/src/gitenberg/Second-Folio/list_of_repos.txt"
GITENBERG_DIR = "/Users/raymondyee/C/src/gitenberg/"

repos=open(REPOS_LIST).read().strip().split("\n")

for (i,repo) in enumerate(islice(repos,None)):
    pg_id = repo.split("_")[-1]
    dest = os.path.join(GITENBERG_DIR, repo, "metadata.yaml")
    
    if os.path.exists(dest):
        with open(dest, "r+") as yfile: 
            y = yaml.load(yfile)
            covers = y.get('covers', [])
            #print (i, dest, len(covers))
            
            # check for existence of covers
            covers1 = []
            for cover in covers:
                cover_path = cover.get("image_path")
                if cover_path is not None:
                    if os.path.exists(os.path.join(GITENBERG_DIR, repo, cover_path)):
                        covers1.append(cover)
            
            if len(covers1) < len(covers):
                print (i, dest, len(covers), len(covers1))
                y['covers'] = covers1
                yfile.seek(0)
                yfile.write(yaml.safe_dump(y,default_flow_style=False,allow_unicode=True))
                yfile.truncate()

fixes:

(0, '/Users/raymondyee/C/src/gitenberg/Adventures-of-Huckleberry-Finn_76/metadata.yaml', 6, 1) (16, '/Users/raymondyee/C/src/gitenberg/Dracula_345/metadata.yaml', 2, 1) (18, '/Users/raymondyee/C/src/gitenberg/Household-Stories-by-the-Brothers-Grimm_19068/metadata.yaml', 2, 1) (38, '/Users/raymondyee/C/src/gitenberg/Les-Mis-rables_135/metadata.yaml', 2, 1) (40, '/Users/raymondyee/C/src/gitenberg/The-Count-of-Monte-Cristo_1184/metadata.yaml', 2, 1)


In [20]:
def git_cover_metadata(repo):
    metadata_path = os.path.join(GITENBERG_DIR, repo, "metadata.yaml")
    
    try:
        if os.path.exists(metadata_path):
            sh.cd(os.path.join(GITENBERG_DIR, repo))
            print ("add")
            sh.git("add", "metadata.yaml")
            print ("commit")
            try:
                sh.git("commit", "-m", "remove covers from metadata.yaml with bad paths")
            except:
                pass
            print ("push")
            sh.git.push()
        else:
            return None
    except Exception as e:
        return e

In [21]:
repos_fixed = ['Adventures-of-Huckleberry-Finn_76', 'Dracula_345/metadata.yaml',
              'Household-Stories-by-the-Brothers-Grimm_19068', 'Les-Mis-rables_135', 'The-Count-of-Monte-Cristo_1184']

In [23]:
for (i,repo) in enumerate(islice(repos_fixed,None)):
    print (i, repo)
    print (git_cover_metadata(repo))


(0, 'Adventures-of-Huckleberry-Finn_76')
add
commit
push
None
(1, 'Dracula_345/metadata.yaml')
None
(2, 'Household-Stories-by-the-Brothers-Grimm_19068')
add
commit
push
None
(3, 'Les-Mis-rables_135')
add
commit
push
None
(4, 'The-Count-of-Monte-Cristo_1184')
add
commit
push
None

In [ ]: