Imports and initialization


In [ ]:
from __future__ import print_function

from github_settings import (ry_username, ry_password,
                             username, password,
                             token, 
                             GITENBERG_GITHUB_TOKEN,
                             GITENBERG_TRAVIS_ACCESS_TOKEN,
                             RDHYEE_GITHUB_TOKEN,
                             RDHYEE_TRAVIS_ACCESS_TOKEN, 
                             RDHYEE_TRAVIS_PROFILE_TOKEN)


from itertools import (islice, izip)
import requests


from second_folio import (apply_to_repos, all_repos)

from gitenberg_utils import (GitenbergJob,
                             GitenbergTravisJob,
                             ForkBuildRepo,
                             BuildRepo,
                             BuildRepo2,
                             MetadataWrite,
                             RepoNameFixer,
                             repo_md)


import pandas as pd
from pandas import (DataFrame, Series)

In [ ]:
# loop through all the repos

repos = all_repos

status of builds


In [ ]:
# easier way to build travis_repo for given repo?

from travispy import TravisPy
travis = TravisPy(GITENBERG_TRAVIS_ACCESS_TOKEN)

travis_repo = travis.repo("GITenberg/United-States-Declaration-of-Independence_1")

In [ ]:
travis_repo.state

In [ ]:
def travis_last_build(travis, repo_owner, repo_name):
    travis_repo = travis.repo("{}/{}".format(repo_owner,repo_name))
    return (travis_repo.last_build, travis_repo.last_build.id, travis_repo.last_build.state)

Scaling up to build all the repos


In [ ]:
from rdhyee_utils import grouper

In [ ]:
import arrow

now = arrow.now()
now.isoformat()

In [ ]:
import io, csv
from itertools import islice

import arrow

def init_repos(conn, repo_owner='GITenberg', max_repos=None):

    REPO_LIST_PATH = "/Users/raymondyee/C/src/gitberg/build/lib/gitenberg/data/GITenberg_repo_list.tsv"
    PAGE_SIZE = 50
    PER_PAGE = 500

    c = conn.cursor()
    
    with io.open(REPO_LIST_PATH, mode='r', encoding='UTF-8') as f:
        s = f.read()

    repos = [row.split("\t") for row in s.split("\n") if len(row.split("\t")) == 2]
    
 
    for (i, page) in enumerate(grouper(islice(repos ,max_repos), PAGE_SIZE)):

        page_of_repos = []

        for repo in page:    
            values = [repo[0], arrow.now().isoformat(), repo[1], repo_owner] + (len(BASE_COLUMNS)-4) * [None]
            page_of_repos.append(values)

        try:    
            c.executemany(INSERT_SQL, page_of_repos)
        except Exception, e:
            print (e)
            c.rollback()
            break

    conn.commit()
    conn.close()

In [ ]:
# https://pymotw.com/2/sqlite3/
import sqlite3

import os
import sqlite3
import arrow

db_filename = 'gitenberg.db'

BASE_COLUMNS = [
 ('gutenberg_id', 'integer primary key'),
 ('updated', 'text'),
 ('repo_name', 'text'),
 ('repo_owner', 'text'),
 ('datebuilt', 'integer'),
 ('version', 'text'),
 ('ebooks_in_release_count', 'integer'),
 ('last_build_id', 'integer'),
 ('last_build_status', 'text'),
]

BASE_COLUMNS_FIELDS = [(k[0]) for k in BASE_COLUMNS]


CREATE_TABLE_SQL = "CREATE TABLE IF NOT EXISTS repos ({0})".format(

   ",\n".join(["%s %s" % (k[0], k[1]) for k in BASE_COLUMNS])
)

INSERT_SQL = "INSERT OR REPLACE INTO repos ({0}) VALUES ({1})".format( \
                    ", ".join(BASE_COLUMNS_FIELDS),
                    ", ".join(["?"] * len(BASE_COLUMNS_FIELDS))) 


conn = sqlite3.connect(db_filename)

c = conn.cursor()
c.execute(CREATE_TABLE_SQL)

print ("table created")

db_is_new = not os.path.exists(db_filename)


if db_is_new:
    print ('Need to create schema')
else:
    print ('Database exists, assume schema does, too.')
    
init_repos(conn)

conn.close()

list of repos?

https://github.com/gitenberg-dev/gitberg/blob/master/gitenberg/data/GITenberg_repo_list.tsv

-> local copy: /Users/raymondyee/C/src/gitberg/build/lib/gitenberg/data/GITenberg_repo_list.tsv


In [ ]:
repos = [row.split("\t")[-1] for row in s.split("\n") if len(row.split("\t")) == 2]

Build in Gitenberg

test


In [ ]:
# repo = "Anne-of-the-Island_51"
# repo = "Adventures-of-Huckleberry-Finn_76"
repo = "The-Lady-with-the-Dog-and-Other-Stories_13415"


bj = BuildRepo2(username=username, password=password, repo_name=repo,
              repo_owner='GITenberg', 
              update_travis_commit_msg='build using travis.build_epub',
              tag_commit_message='build using travis.build_epub',
              access_token=GITENBERG_TRAVIS_ACCESS_TOKEN)

In [ ]:
bj.status()

In [ ]:
bj.travis_repo.last_build.finished_at

In [ ]:
bj.gh.rate_limit()

In [ ]:
bj.run(fix_repo_name=True)

In [ ]:
bj.travis_repo.state, bj.status()

loop over rest


In [ ]:
from collections import OrderedDict
from itertools import islice

results = OrderedDict()

repos = all_repos
repos.reverse()

repos_iter = iter(repos[:])

In [ ]:
def build_repos(repos, n=None):
    for (i, repo) in enumerate(islice(repos, n)):
        try:
            bj = BuildRepo2(username=username, password=password, repo_name=repo,
              repo_owner='GITenberg', 
              update_travis_commit_msg='build using travis.build_epub',
              tag_commit_message='build using travis.build_epub',
              access_token=GITENBERG_TRAVIS_ACCESS_TOKEN)
            results[repo] = (bj, bj.run(fix_repo_name=True))
        except Exception, e:
            results[repo] = e
        
        print ("\r{}: {}".format(i, results[repo]), end="")

In [ ]:
build_repos(repos_iter, None)

In [ ]:
[result[0].status().get('ebooks_in_release_count') for result in results.values()[-5:] 
   if not isinstance(result,Exception)]

In [ ]:
len(results)

In [ ]:
# can I get travis status of pending build?
r = results.values()[-1][0]
r.repo_name, r.travis_repo.last_build.id, r.travis_repo.last_build.finished

In [ ]:
build_result = results.values()[0][0]
build_result.travis_repo.check_state()

Figure out the various tokens

There are at least three types of tokens in use in travis-ci (The Travis CI Blog: Token, Token, Token):

  • GitHub token
  • access token
  • Travis token

In [ ]:
# try using an access token

from travispy import TravisPy
#travis_client = TravisPy(RDHYEE_TRAVIS_ACCESS_TOKEN)
travis_client = TravisPy(GITENBERG_TRAVIS_ACCESS_TOKEN)

travis_repo = travis_client.repo("GITenberg/Adventures-of-Huckleberry-Finn_76")

In [ ]:
from collections import namedtuple
Point = namedtuple('Point', ['x', 'y'])

In [ ]:
Point(3,y=4).y

In [ ]:
(travis_repo.last_build_number, 
 travis_repo.last_build_id, 
 travis_repo.last_build.created,
 travis_repo.last_build_started_at, 
 travis_repo.last_build_finished_at,
 travis_repo.last_build_duration)

In [ ]:
travis_repo.enable()

In [ ]:
# using GitHub auth and read off the corresponding access token

from travispy import TravisPy
#t = TravisPy.github_auth(RDHYEE_GITHUB_TOKEN)
t = TravisPy.github_auth(GITENBERG_GITHUB_TOKEN)

In [ ]:
session = t._session
(session.uri,  
 session.headers.get('Authorization').split()[-1] == GITENBERG_TRAVIS_ACCESS_TOKEN)

everything else


In [ ]:
# repo = "Anne-of-the-Island_51"
# repo = "Adventures-of-Huckleberry-Finn_76"
repo = "Chaucer-s-Works-Volume-4--of-7----The-Canterbury-Tales_22120"

bj = BuildRepo(username=ry_username, password=ry_password, repo_name=repo,
              repo_owner='rdhyee', 
              update_travis_commit_msg='try again: encoding problem?',
              tag_commit_message='try again: encoding problem?',
              access_token=RDHYEE_TRAVIS_ACCESS_TOKEN)

In [ ]:
bj.run()

fork and build


In [ ]:
# fork and build repo

repo = repos[-1]

bj2 = ForkBuildRepo(username=ry_username, password=ry_password, repo_name=repo,
              repo_owner='rdhyee', 
              update_travis_commit_msg='update travis',
              tag_commit_message='update travis',
              access_token=RDHYEE_TRAVIS_ACCESS_TOKEN)

In [ ]:
bj2.fork_repo()

In [ ]:
from collections import OrderedDict
from itertools import islice

results = OrderedDict()

repos = all_repos
repos.reverse()

repos_iter = iter(repos)

In [ ]:
def build_repos(repos, n=None):
    for (i, repo) in enumerate(islice(repos, n)):
        try:
            bj = BuildRepo(username=ry_username, password=ry_password, repo_name=repo,
                  repo_owner='rdhyee', 
                  update_travis_commit_msg='build using gitenberg.travis',
                  tag_commit_message='build using gitenberg.travis',
                  access_token=RDHYEE_TRAVIS_ACCESS_TOKEN)
            results[repo] = (bj, bj.run())
        except Exception, e:
            results[repo] = e
        
        print ("\r{}: {}".format(i, results[repo]), end="")

In [ ]:
results.values()[-1][0].repo_name

In [ ]:
build_repos(repos_iter, 32)

In [ ]:
len(results)

In [ ]:
[result[0].status().get('ebooks_in_release_count') for result in results.values()[-5:]]

In [ ]:
results.values()[-1][0].status()

if I want to delete existing repo tokens...

can use

auths = gh.iter_authorizations()
[(auth.name, auth.created_at) for auth in auths]

try rebuilding errored builds


In [ ]:
from collections import OrderedDict
from itertools import islice

results = OrderedDict()

repos = all_repos
repos.reverse()

repos_iter = iter(repos)

def rebuild_repos(repos, n=None):
    for (i, repo) in enumerate(islice(repos, n)):
        try:
            bj = BuildRepo(username=ry_username, password=ry_password, repo_name=repo,
              repo_owner='rdhyee', 
              update_travis_commit_msg='build using gitenberg.travis (retry)',
              tag_commit_message='build using gitenberg.travis (retry)',
              access_token=RDHYEE_TRAVIS_ACCESS_TOKEN)
            # reset token
            results[repo] = (bj, (load_repo_token=False))
        except Exception, e:
            results[repo] = e
        
        print ("\r{}: {}".format(i, results[repo]), end="")

In [ ]:
rebuild_repos(repos_iter,None)

In [ ]:
len(results)

In [ ]:
repos_failed = [result[0].repo_name for result in results.values() if result[1] == 'failed']
repos_failed

fix repo names


In [ ]:
REPOS_FAILED = ['The-Pilgrim-s-Progress-from-this-world-to-that-which-is-to-come--13-Delivered-under-the-similit__131',
'The-Art-of-War_132',
'The-Thousand-and-One-Nights-Vol.-I.Commonly-Called-the-Arabian-Nights-Entertainments_34206',
'On-the-Origin-of-Species-By-Means-of-Natural-Selection--13-Or-the-Preservation-of-Favoured-Rac__1228',
'Daddy-Long-Legs_157',
'Heidi-Gift-Edition-_20781',
'The-Trial_7849',
'Chaucer-s-Works-Volume-4--of-7----The-Canterbury-Tales_22120',
'Persuasion_105',
'The-Works-of-Edgar-Allan-Poe-The-Raven-EditionTable-Of-Contents-And-Index-Of-The-Five-Volumes_25525']

In [ ]:
def fixname_repos(repos, n=None):
    for (i, repo) in enumerate(islice(repos, n)):
        try:
            bj = RepoNameFixer(username=ry_username, password=ry_password, repo_name=repo,
              repo_owner='rdhyee', 
              update_travis_commit_msg='build using gitenberg.travis (retry)',
              tag_commit_message='build using gitenberg.travis (retry)',
              access_token=RDHYEE_TRAVIS_ACCESS_TOKEN)
            results[repo] = (bj, bj.run())
        except Exception, e:
            results[repo] = e
        
        print ("\r{}: {}".format(i, results[repo]), end="")

status of the Second Folio forks (152 repos)?


In [ ]:
from second_folio import (all_repos)

all_repos[:5]

In [ ]:
def status_for_repo(repo_name):
    rs = GitenbergTravisJob(username=username, password=password, repo_name=repo_name,
              repo_owner='GITenberg', 
              update_travis_commit_msg='check status',
              tag_commit_message='check status',
              access_token=GITENBERG_TRAVIS_ACCESS_TOKEN)
    return rs.status()

results_iter = apply_to_repos(status_for_repo, repos=repos)

In [ ]:
status_for_repo('Adventures-of-Huckleberry-Finn_76')

In [ ]:
results = []

for (i,result) in enumerate(results_iter):
    
    results.append(result)
    if not isinstance(result, Exception):
        print ("\r{}: {}".format(i, result['repo_name']), end="")
    else:
        print ("\r{}: {}".format(i, str(result)), end="")

In [ ]:
[(i, result) for (i, result) in enumerate(results) if isinstance(result, Exception)]

In [ ]:
[result.get('repo_name') for result in results if result.get('ebooks_in_release_count') != 3]

In [ ]:
results[0]

In [ ]:
results[-1]

forks still require rebuilding -- including name change


In [ ]:
TO_REBUILD = ['Dubliners_2814',
 'Moby-Dick--Or-The-Whale_2701',
 'The-Brothers-Karamazov_28054',
 'Frankenstein_84',
 'The-Works-of-Edgar-Allan-Poe-The-Raven-EditionTable-Of-Contents-And-Index-Of-The-Five-Volumes_25525',
 'A-Little-Princess--13-Being-the-whole-story-of-Sara-Crewe-now-told-for-the-first-time_146',
 'The-Invisible-Man--A-Grotesque-Romance_5230',
 'Persuasion_105',
 'Tales-of-the-Jazz-Age_6695',
 'The-Last-of-the-Mohicans--A-Narrative-of-1757_27681',
 'Around-the-World-in-80-Days_103',
 'The-Trial_7849',
 'The-Posthumous-Papers-of-the-Pickwick-Club-v-1of-2_47534',
 'Cyrano-De-Bergerac_1254',
 'Daddy-Long-Legs_157',
 'Aesop-s-Fables--a-new-translation_11339',
 'The-Art-of-War_132']

In [ ]:
repo = "The-Posthumous-Papers-of-the-Pickwick-Club-v-1of-2_47534"
# repo = "Adventures-of-Huckleberry-Finn_76"
# repo = "Chaucer-s-Works-Volume-4--of-7----The-Canterbury-Tales_22120"


bj = BuildRepo(username=ry_username, password=ry_password, repo_name=repo,
              repo_owner='rdhyee', 
              update_travis_commit_msg='try again after fixing _version',
              tag_commit_message='try again after fixing _version',
              access_token=RDHYEE_TRAVIS_ACCESS_TOKEN)

In [ ]:
bj.fix_repo_name()

In [ ]:
bj.delete_repo_token()

In [ ]:
bj.run()

In [ ]:
bj.status()

In [ ]:
repos_iter = iter(TO_REBUILD[4:])

In [ ]:
from collections import OrderedDict
results = OrderedDict()

def build_repos(repos, n=None):
    for (i, repo) in enumerate(islice(repos, n)):
        try:
            bj = BuildRepo(username=ry_username, password=ry_password, repo_name=repo,
                  repo_owner='rdhyee', 
                  update_travis_commit_msg='build repo',
                  tag_commit_message='build repo',
                  access_token=RDHYEE_TRAVIS_ACCESS_TOKEN)
            token_delete_result = bj.delete_repo_token()
            results[repo] = (bj, bj.run(load_repo_token=False), token_delete_result)
        except Exception, e:
            results[repo] = e
        
        print ("\r{}: {}".format(i, results[repo]), end="")

In [ ]:
build_repos(repos_iter,None)

In [ ]:
[result[0].repo_name for result in results.values() if result[1] is not None]

In [ ]:
results

buildability


In [ ]:
# https://github.com/GITenberg/United-States-Declaration-of-Independence_1

#repo_name = "United-States-Declaration-of-Independence_1"
repo_name = "United-States-Bill-of-Rights_2"
repo_name = "On-Liberty_34901"

bj = BuildRepo2(username=username, password=password, repo_name=repo_name,
        repo_owner='GITenberg', 
        update_travis_commit_msg='build using travis.build_epub',
        tag_commit_message='build using travis.build_epub',
        access_token=GITENBERG_TRAVIS_ACCESS_TOKEN)

In [ ]:
bj.status()

In [ ]:
bj.buildable()

In [ ]:
bj.gh_repo

In [ ]:
import traceback
import sys
from pprint import pprint

try:
    bj.travis_repo.last_build
except KeyError as e:
    
    (exc_type, exc_value, exc_tb) = sys.exc_info()
    stack_trace = " ".join(traceback.format_exception(exc_type, exc_value, exc_tb))
    
    print (stack_trace)
    #print (" ".join(traceback.format_stack()))
    raise e

In [ ]:
# check for existence of metadata.yaml

bj.gh_repo.contents("metadata.yaml", ref="master") is not None

In [ ]:
source_book(bj)

writing metadata files

loop through all repo_name where has_metadata is null.