In [ ]:
from __future__ import print_function
from github_settings import (ry_username, ry_password,
username, password,
token,
GITENBERG_GITHUB_TOKEN,
GITENBERG_TRAVIS_ACCESS_TOKEN,
RDHYEE_GITHUB_TOKEN,
RDHYEE_TRAVIS_ACCESS_TOKEN,
RDHYEE_TRAVIS_PROFILE_TOKEN)
from itertools import (islice, izip)
import requests
from second_folio import (apply_to_repos, all_repos)
from gitenberg_utils import (GitenbergJob,
GitenbergTravisJob,
ForkBuildRepo,
BuildRepo,
BuildRepo2,
MetadataWrite,
RepoNameFixer,
repo_md)
import pandas as pd
from pandas import (DataFrame, Series)
In [ ]:
# loop through all the repos
repos = all_repos
In [ ]:
# easier way to build travis_repo for given repo?
from travispy import TravisPy
travis = TravisPy(GITENBERG_TRAVIS_ACCESS_TOKEN)
travis_repo = travis.repo("GITenberg/United-States-Declaration-of-Independence_1")
In [ ]:
travis_repo.state
In [ ]:
def travis_last_build(travis, repo_owner, repo_name):
travis_repo = travis.repo("{}/{}".format(repo_owner,repo_name))
return (travis_repo.last_build, travis_repo.last_build.id, travis_repo.last_build.state)
In [ ]:
from rdhyee_utils import grouper
In [ ]:
import arrow
now = arrow.now()
now.isoformat()
In [ ]:
import io, csv
from itertools import islice
import arrow
def init_repos(conn, repo_owner='GITenberg', max_repos=None):
REPO_LIST_PATH = "/Users/raymondyee/C/src/gitberg/build/lib/gitenberg/data/GITenberg_repo_list.tsv"
PAGE_SIZE = 50
PER_PAGE = 500
c = conn.cursor()
with io.open(REPO_LIST_PATH, mode='r', encoding='UTF-8') as f:
s = f.read()
repos = [row.split("\t") for row in s.split("\n") if len(row.split("\t")) == 2]
for (i, page) in enumerate(grouper(islice(repos ,max_repos), PAGE_SIZE)):
page_of_repos = []
for repo in page:
values = [repo[0], arrow.now().isoformat(), repo[1], repo_owner] + (len(BASE_COLUMNS)-4) * [None]
page_of_repos.append(values)
try:
c.executemany(INSERT_SQL, page_of_repos)
except Exception, e:
print (e)
c.rollback()
break
conn.commit()
conn.close()
In [ ]:
# https://pymotw.com/2/sqlite3/
import sqlite3
import os
import sqlite3
import arrow
db_filename = 'gitenberg.db'
BASE_COLUMNS = [
('gutenberg_id', 'integer primary key'),
('updated', 'text'),
('repo_name', 'text'),
('repo_owner', 'text'),
('datebuilt', 'integer'),
('version', 'text'),
('ebooks_in_release_count', 'integer'),
('last_build_id', 'integer'),
('last_build_status', 'text'),
]
BASE_COLUMNS_FIELDS = [(k[0]) for k in BASE_COLUMNS]
CREATE_TABLE_SQL = "CREATE TABLE IF NOT EXISTS repos ({0})".format(
",\n".join(["%s %s" % (k[0], k[1]) for k in BASE_COLUMNS])
)
INSERT_SQL = "INSERT OR REPLACE INTO repos ({0}) VALUES ({1})".format( \
", ".join(BASE_COLUMNS_FIELDS),
", ".join(["?"] * len(BASE_COLUMNS_FIELDS)))
conn = sqlite3.connect(db_filename)
c = conn.cursor()
c.execute(CREATE_TABLE_SQL)
print ("table created")
db_is_new = not os.path.exists(db_filename)
if db_is_new:
print ('Need to create schema')
else:
print ('Database exists, assume schema does, too.')
init_repos(conn)
conn.close()
list of repos?
https://github.com/gitenberg-dev/gitberg/blob/master/gitenberg/data/GITenberg_repo_list.tsv
-> local copy: /Users/raymondyee/C/src/gitberg/build/lib/gitenberg/data/GITenberg_repo_list.tsv
In [ ]:
repos = [row.split("\t")[-1] for row in s.split("\n") if len(row.split("\t")) == 2]
In [ ]:
# repo = "Anne-of-the-Island_51"
# repo = "Adventures-of-Huckleberry-Finn_76"
repo = "The-Lady-with-the-Dog-and-Other-Stories_13415"
bj = BuildRepo2(username=username, password=password, repo_name=repo,
repo_owner='GITenberg',
update_travis_commit_msg='build using travis.build_epub',
tag_commit_message='build using travis.build_epub',
access_token=GITENBERG_TRAVIS_ACCESS_TOKEN)
In [ ]:
bj.status()
In [ ]:
bj.travis_repo.last_build.finished_at
In [ ]:
bj.gh.rate_limit()
In [ ]:
bj.run(fix_repo_name=True)
In [ ]:
bj.travis_repo.state, bj.status()
In [ ]:
from collections import OrderedDict
from itertools import islice
results = OrderedDict()
repos = all_repos
repos.reverse()
repos_iter = iter(repos[:])
In [ ]:
def build_repos(repos, n=None):
for (i, repo) in enumerate(islice(repos, n)):
try:
bj = BuildRepo2(username=username, password=password, repo_name=repo,
repo_owner='GITenberg',
update_travis_commit_msg='build using travis.build_epub',
tag_commit_message='build using travis.build_epub',
access_token=GITENBERG_TRAVIS_ACCESS_TOKEN)
results[repo] = (bj, bj.run(fix_repo_name=True))
except Exception, e:
results[repo] = e
print ("\r{}: {}".format(i, results[repo]), end="")
In [ ]:
build_repos(repos_iter, None)
In [ ]:
[result[0].status().get('ebooks_in_release_count') for result in results.values()[-5:]
if not isinstance(result,Exception)]
In [ ]:
len(results)
In [ ]:
# can I get travis status of pending build?
r = results.values()[-1][0]
r.repo_name, r.travis_repo.last_build.id, r.travis_repo.last_build.finished
In [ ]:
build_result = results.values()[0][0]
build_result.travis_repo.check_state()
There are at least three types of tokens in use in travis-ci
(The Travis CI Blog: Token, Token, Token):
In [ ]:
# try using an access token
from travispy import TravisPy
#travis_client = TravisPy(RDHYEE_TRAVIS_ACCESS_TOKEN)
travis_client = TravisPy(GITENBERG_TRAVIS_ACCESS_TOKEN)
travis_repo = travis_client.repo("GITenberg/Adventures-of-Huckleberry-Finn_76")
In [ ]:
from collections import namedtuple
Point = namedtuple('Point', ['x', 'y'])
In [ ]:
Point(3,y=4).y
In [ ]:
(travis_repo.last_build_number,
travis_repo.last_build_id,
travis_repo.last_build.created,
travis_repo.last_build_started_at,
travis_repo.last_build_finished_at,
travis_repo.last_build_duration)
In [ ]:
travis_repo.enable()
In [ ]:
# using GitHub auth and read off the corresponding access token
from travispy import TravisPy
#t = TravisPy.github_auth(RDHYEE_GITHUB_TOKEN)
t = TravisPy.github_auth(GITENBERG_GITHUB_TOKEN)
In [ ]:
session = t._session
(session.uri,
session.headers.get('Authorization').split()[-1] == GITENBERG_TRAVIS_ACCESS_TOKEN)
In [ ]:
# repo = "Anne-of-the-Island_51"
# repo = "Adventures-of-Huckleberry-Finn_76"
repo = "Chaucer-s-Works-Volume-4--of-7----The-Canterbury-Tales_22120"
bj = BuildRepo(username=ry_username, password=ry_password, repo_name=repo,
repo_owner='rdhyee',
update_travis_commit_msg='try again: encoding problem?',
tag_commit_message='try again: encoding problem?',
access_token=RDHYEE_TRAVIS_ACCESS_TOKEN)
In [ ]:
bj.run()
In [ ]:
# fork and build repo
repo = repos[-1]
bj2 = ForkBuildRepo(username=ry_username, password=ry_password, repo_name=repo,
repo_owner='rdhyee',
update_travis_commit_msg='update travis',
tag_commit_message='update travis',
access_token=RDHYEE_TRAVIS_ACCESS_TOKEN)
In [ ]:
bj2.fork_repo()
In [ ]:
from collections import OrderedDict
from itertools import islice
results = OrderedDict()
repos = all_repos
repos.reverse()
repos_iter = iter(repos)
In [ ]:
def build_repos(repos, n=None):
for (i, repo) in enumerate(islice(repos, n)):
try:
bj = BuildRepo(username=ry_username, password=ry_password, repo_name=repo,
repo_owner='rdhyee',
update_travis_commit_msg='build using gitenberg.travis',
tag_commit_message='build using gitenberg.travis',
access_token=RDHYEE_TRAVIS_ACCESS_TOKEN)
results[repo] = (bj, bj.run())
except Exception, e:
results[repo] = e
print ("\r{}: {}".format(i, results[repo]), end="")
In [ ]:
results.values()[-1][0].repo_name
In [ ]:
build_repos(repos_iter, 32)
In [ ]:
len(results)
In [ ]:
[result[0].status().get('ebooks_in_release_count') for result in results.values()[-5:]]
In [ ]:
results.values()[-1][0].status()
In [ ]:
from collections import OrderedDict
from itertools import islice
results = OrderedDict()
repos = all_repos
repos.reverse()
repos_iter = iter(repos)
def rebuild_repos(repos, n=None):
for (i, repo) in enumerate(islice(repos, n)):
try:
bj = BuildRepo(username=ry_username, password=ry_password, repo_name=repo,
repo_owner='rdhyee',
update_travis_commit_msg='build using gitenberg.travis (retry)',
tag_commit_message='build using gitenberg.travis (retry)',
access_token=RDHYEE_TRAVIS_ACCESS_TOKEN)
# reset token
results[repo] = (bj, (load_repo_token=False))
except Exception, e:
results[repo] = e
print ("\r{}: {}".format(i, results[repo]), end="")
In [ ]:
rebuild_repos(repos_iter,None)
In [ ]:
len(results)
In [ ]:
repos_failed = [result[0].repo_name for result in results.values() if result[1] == 'failed']
repos_failed
In [ ]:
REPOS_FAILED = ['The-Pilgrim-s-Progress-from-this-world-to-that-which-is-to-come--13-Delivered-under-the-similit__131',
'The-Art-of-War_132',
'The-Thousand-and-One-Nights-Vol.-I.Commonly-Called-the-Arabian-Nights-Entertainments_34206',
'On-the-Origin-of-Species-By-Means-of-Natural-Selection--13-Or-the-Preservation-of-Favoured-Rac__1228',
'Daddy-Long-Legs_157',
'Heidi-Gift-Edition-_20781',
'The-Trial_7849',
'Chaucer-s-Works-Volume-4--of-7----The-Canterbury-Tales_22120',
'Persuasion_105',
'The-Works-of-Edgar-Allan-Poe-The-Raven-EditionTable-Of-Contents-And-Index-Of-The-Five-Volumes_25525']
In [ ]:
def fixname_repos(repos, n=None):
for (i, repo) in enumerate(islice(repos, n)):
try:
bj = RepoNameFixer(username=ry_username, password=ry_password, repo_name=repo,
repo_owner='rdhyee',
update_travis_commit_msg='build using gitenberg.travis (retry)',
tag_commit_message='build using gitenberg.travis (retry)',
access_token=RDHYEE_TRAVIS_ACCESS_TOKEN)
results[repo] = (bj, bj.run())
except Exception, e:
results[repo] = e
print ("\r{}: {}".format(i, results[repo]), end="")
In [ ]:
from second_folio import (all_repos)
all_repos[:5]
In [ ]:
def status_for_repo(repo_name):
rs = GitenbergTravisJob(username=username, password=password, repo_name=repo_name,
repo_owner='GITenberg',
update_travis_commit_msg='check status',
tag_commit_message='check status',
access_token=GITENBERG_TRAVIS_ACCESS_TOKEN)
return rs.status()
results_iter = apply_to_repos(status_for_repo, repos=repos)
In [ ]:
status_for_repo('Adventures-of-Huckleberry-Finn_76')
In [ ]:
results = []
for (i,result) in enumerate(results_iter):
results.append(result)
if not isinstance(result, Exception):
print ("\r{}: {}".format(i, result['repo_name']), end="")
else:
print ("\r{}: {}".format(i, str(result)), end="")
In [ ]:
[(i, result) for (i, result) in enumerate(results) if isinstance(result, Exception)]
In [ ]:
[result.get('repo_name') for result in results if result.get('ebooks_in_release_count') != 3]
In [ ]:
results[0]
In [ ]:
results[-1]
In [ ]:
TO_REBUILD = ['Dubliners_2814',
'Moby-Dick--Or-The-Whale_2701',
'The-Brothers-Karamazov_28054',
'Frankenstein_84',
'The-Works-of-Edgar-Allan-Poe-The-Raven-EditionTable-Of-Contents-And-Index-Of-The-Five-Volumes_25525',
'A-Little-Princess--13-Being-the-whole-story-of-Sara-Crewe-now-told-for-the-first-time_146',
'The-Invisible-Man--A-Grotesque-Romance_5230',
'Persuasion_105',
'Tales-of-the-Jazz-Age_6695',
'The-Last-of-the-Mohicans--A-Narrative-of-1757_27681',
'Around-the-World-in-80-Days_103',
'The-Trial_7849',
'The-Posthumous-Papers-of-the-Pickwick-Club-v-1of-2_47534',
'Cyrano-De-Bergerac_1254',
'Daddy-Long-Legs_157',
'Aesop-s-Fables--a-new-translation_11339',
'The-Art-of-War_132']
In [ ]:
repo = "The-Posthumous-Papers-of-the-Pickwick-Club-v-1of-2_47534"
# repo = "Adventures-of-Huckleberry-Finn_76"
# repo = "Chaucer-s-Works-Volume-4--of-7----The-Canterbury-Tales_22120"
bj = BuildRepo(username=ry_username, password=ry_password, repo_name=repo,
repo_owner='rdhyee',
update_travis_commit_msg='try again after fixing _version',
tag_commit_message='try again after fixing _version',
access_token=RDHYEE_TRAVIS_ACCESS_TOKEN)
In [ ]:
bj.fix_repo_name()
In [ ]:
bj.delete_repo_token()
In [ ]:
bj.run()
In [ ]:
bj.status()
In [ ]:
repos_iter = iter(TO_REBUILD[4:])
In [ ]:
from collections import OrderedDict
results = OrderedDict()
def build_repos(repos, n=None):
for (i, repo) in enumerate(islice(repos, n)):
try:
bj = BuildRepo(username=ry_username, password=ry_password, repo_name=repo,
repo_owner='rdhyee',
update_travis_commit_msg='build repo',
tag_commit_message='build repo',
access_token=RDHYEE_TRAVIS_ACCESS_TOKEN)
token_delete_result = bj.delete_repo_token()
results[repo] = (bj, bj.run(load_repo_token=False), token_delete_result)
except Exception, e:
results[repo] = e
print ("\r{}: {}".format(i, results[repo]), end="")
In [ ]:
build_repos(repos_iter,None)
In [ ]:
[result[0].repo_name for result in results.values() if result[1] is not None]
In [ ]:
results
In [ ]:
# https://github.com/GITenberg/United-States-Declaration-of-Independence_1
#repo_name = "United-States-Declaration-of-Independence_1"
repo_name = "United-States-Bill-of-Rights_2"
repo_name = "On-Liberty_34901"
bj = BuildRepo2(username=username, password=password, repo_name=repo_name,
repo_owner='GITenberg',
update_travis_commit_msg='build using travis.build_epub',
tag_commit_message='build using travis.build_epub',
access_token=GITENBERG_TRAVIS_ACCESS_TOKEN)
In [ ]:
bj.status()
In [ ]:
bj.buildable()
In [ ]:
bj.gh_repo
In [ ]:
import traceback
import sys
from pprint import pprint
try:
bj.travis_repo.last_build
except KeyError as e:
(exc_type, exc_value, exc_tb) = sys.exc_info()
stack_trace = " ".join(traceback.format_exception(exc_type, exc_value, exc_tb))
print (stack_trace)
#print (" ".join(traceback.format_stack()))
raise e
In [ ]:
# check for existence of metadata.yaml
bj.gh_repo.contents("metadata.yaml", ref="master") is not None
In [ ]:
source_book(bj)
loop through all repo_name where has_metadata is null.