In [ ]:
import requests
In [1]:
from __future__ import print_function
from itertools import (islice, izip)
import arrow
import github3
import requests
from sqlalchemy import or_
from github_settings import (ry_username, ry_password,
username, password,
# token,
GITENBERG_GITHUB_TOKEN,
GITENBERG_TRAVIS_ACCESS_TOKEN,
RDHYEE_GITHUB_TOKEN,
RDHYEE_TRAVIS_ACCESS_TOKEN,
RDHYEE_TRAVIS_PROFILE_TOKEN)
from second_folio import (apply_to_repos, all_repos)
from gitenberg_utils import (GitenbergJob,
GitenbergTravisJob,
ForkBuildRepo,
BuildRepo,
BuildRepo2,
MetadataWrite,
RepoNameFixer,
repo_md,
GitenbergJobRunner,
MetadataWriterRunner,
RepoJobRunner,
StatusUpdateRunner)
from gitenberg_db import Repo, create_session
In [ ]:
import logging
logging.getLogger().getEffectiveLevel()
In [ ]:
l = logging.getLogger()
l.setLevel(30)
print (logging.getLogger().getEffectiveLevel())
In [ ]:
# http://www.datacarpentry.org/python-ecology-lesson/08-working-with-sql
import sqlite3
from itertools import islice
# Create a SQL connection to our SQLite database
con = sqlite3.connect("gitenberg.db")
cur = con.cursor()
# the result of a "cursor.execute" can be iterated over by row
for row in islice(cur.execute('SELECT * FROM repos;'), 3):
print(row)
#Be sure to close the connection.
con.close()
In [ ]:
import pandas as pd
from pandas import DataFrame, Series
import sqlite3
con = sqlite3.connect("gitenberg.db")
df = pd.read_sql('SELECT * FROM repos;', con, parse_dates=('updated','metadata_written'))
df.head()
In [ ]:
df.dtypes
In [ ]:
# let's pull out a list of repos that have been built
In [ ]:
class MetadataWriterRunner2(MetadataWriterRunner):
def __init__(self, dbfname, gh_username, gh_password, access_token=None, max_exceptions=None,
repos_list=None):
super(MetadataWriterRunner2, self).__init__(dbfname, gh_username, gh_password,
access_token, max_exceptions)
self.repos_list = repos_list
def repos(self, n=None):
if self.repos_list is not None:
return islice(self.session().query(Repo).
filter(Repo.repo_name.in_(self.repos_list)),
n)
else:
return []
class RepoJobRunner2(RepoJobRunner):
def __init__(self, dbfname, gh_username, gh_password, access_token=None, max_exceptions=None,
repos_list=None):
super(RepoJobRunner2, self).__init__(dbfname, gh_username, gh_password,
access_token, max_exceptions)
self.repos_list = repos_list
def repos(self, n=None):
if self.repos_list is not None:
return islice(self.session().query(Repo).
filter(Repo.repo_name.in_(self.repos_list)),
n)
else:
return []
In [ ]:
mwr2 = MetadataWriterRunner2("gitenberg.db", username, password,
repos_list=('At-the-Sign-of-the-Eagle_6218',))
In [ ]:
mwr2.run(1)
In [ ]:
rjr2 = RepoJobRunner2("gitenberg.db", username, password, GITENBERG_TRAVIS_ACCESS_TOKEN, max_exceptions=20,
repos_list=('At-the-Sign-of-the-Eagle_6218',
))
In [ ]:
rjr2.run(None)
In [ ]:
mwr = MetadataWriterRunner("gitenberg.db", username, password)
In [ ]:
mwr.run(1)
In [ ]:
mwr.exceptions()
In [ ]:
job = BuildRepo2(username=username,
password=password,
repo_name='',
repo_owner='GITenberg',
update_travis_commit_msg='build using gitenberg.travis',
tag_commit_message='build using gitenberg.travis',
access_token=GITENBERG_TRAVIS_ACCESS_TOKEN)
In [ ]:
session = create_session("gitenberg.db")
In [ ]:
(session.query(Repo)
.filter(or_(Repo.buildable == None, Repo.buildable == True))
.filter(Repo.datebuilt == None)
.filter(Repo.metadata_written != None)
).count()
In [ ]:
rjr = RepoJobRunner("gitenberg.db", username, password, GITENBERG_TRAVIS_ACCESS_TOKEN, max_exceptions=20)
In [ ]:
rjr.run(50)
In [ ]:
list(rjr.repo_names(1))
In [ ]:
def delete_repo_token(repo_name):
gtj = GitenbergTravisJob(username, password, repo_name, 'GITenberg',
update_travis_commit_msg='build using gitenberg.travis',
tag_commit_message='build using gitenberg.travis',
access_token=GITENBERG_TRAVIS_ACCESS_TOKEN)
gtj.delete_repo_token()
In [ ]:
rjr.exceptions()
In [ ]:
rjr.gh.ratelimit_remaining
In [ ]:
dt = arrow.get(rjr.gh.rate_limit()['rate']['reset']) - arrow.now()
rjr.countdown(dt.seconds)
In [ ]:
class StatusUpdateRunner2(StatusUpdateRunner):
def __init__(self, dbfname, gh_username, gh_password, access_token=None, max_exceptions=None,
repos_list=None):
super(StatusUpdateRunner2, self).__init__(dbfname, gh_username, gh_password,
access_token, max_exceptions)
self.repos_list = repos_list
def repos(self, n=None):
if self.repos_list is not None:
return islice(self.session().query(Repo).
filter(Repo.repo_name.in_(self.repos_list)),
n)
else:
return []
In [ ]:
(session.query(Repo)
.filter(Repo.datebuilt != None)
.filter(Repo.last_build_id == None)
).count()
In [ ]:
sur = StatusUpdateRunner("gitenberg.db", username, password, GITENBERG_TRAVIS_ACCESS_TOKEN)
In [ ]:
sur.run(None)
In [ ]:
sur.gh.ratelimit_remaining
In [ ]:
dt = arrow.get(sur.gh.rate_limit()['rate']['reset']) - arrow.now()
sur.countdown(dt.seconds)
In [ ]:
sur.exceptions()
In [ ]:
(session.query(Repo)
.filter(Repo.ebooks_in_release_count == 3)
).count()
In [ ]:
session.query(Repo.ebooks_in_release_count).distinct().all()
SELECT ebooks_in_release_count, count (ebooks_in_release_count)
FROM Repos
GROUP BY ebooks_in_release_count
In [ ]:
# how many built
(session.query(Repo)
.filter(Repo.datebuilt != None).count())
In [ ]:
# how many for which we know lastbuilt status
(session.query(Repo)
.filter(Repo.last_build_state != None).count())
In [ ]:
# http://stackoverflow.com/a/4086229/7782
from sqlalchemy import func
(session.query(Repo.ebooks_in_release_count, func.count(Repo.ebooks_in_release_count))
.group_by(Repo.ebooks_in_release_count).all())
In [ ]:
from sqlalchemy import func
build_states = (session.query(Repo.last_build_state, func.count(Repo.last_build_state))
.group_by(Repo.last_build_state).all())
build_states
In [ ]:
__builtin__.sum([v for (k,v) in build_states])
In [ ]:
session.query(Repo).distinct(Repo.ebooks_in_release_count).count()
In [ ]:
sur.gh.ratelimit_remaining
In [ ]:
dt = arrow.get(sur.gh.rate_limit()['rate']['reset']) - arrow.now()
sur.countdown(dt.seconds)
In [ ]:
import json
import unicodecsv as csv
from StringIO import StringIO
# http://stackoverflow.com/a/11884806
def as_dict(repo):
return {c.name: getattr(repo, c.name) for c in repo.__table__.columns}
# return Repos that have a known build state
results = (session.query(Repo)
.filter(Repo.last_build_state != None))
# repos_file = StringIO()
with open("built_repos.tsv", "wb") as repos_file:
headers = [c.name for c in Repo.__table__.columns]
repo_csv = csv.DictWriter(repos_file, headers, encoding='utf-8', delimiter='\t')
repo_csv.writeheader()
for result in islice(results,None):
repo_csv.writerow(as_dict(result))
In [ ]:
!wc built_repos.tsv
In [ ]:
failed_builds = (session.query(Repo)
.filter(Repo.last_build_state == 'failed'))
failed_builds.count()
In [ ]:
for (i, repo) in enumerate(islice(failed_builds,None)):
url = url = "https://travis-ci.org/GITenberg/{repo_name}/builds/{last_build_id}".format(repo_name=repo.repo_name,
last_build_id=repo.last_build_id)
print (url)
let's look at https://travis-ci.org/GITenberg/American-Hand-Book-of-the-Daguerreotype_167/builds/150209405
cannot read from /home/travis/build/GITenberg/American-Hand-Book-of-the-Daguerreotype_167/book.epub
The case of the image file names don't match -- case sensitivity.
ebook-convert 371.txt book.epub --title "Literary Blunders: A Chapter in the "History of Human Error"" --authors "" ' returned non-zero exit status 1
A problem with how quotes are handled in invocation of ebook-convert
relationship among build, job, log?
https://travis-ci.org/GITenberg/American-Hand-Book-of-the-Daguerreotype_167/builds/150209405
In [ ]:
#
repo_name = "American-Hand-Book-of-the-Daguerreotype_167"
gtj = GitenbergTravisJob(username, password, repo_name, 'GITenberg',
update_travis_commit_msg='build using gitenberg.travis',
tag_commit_message='build using gitenberg.travis',
access_token=GITENBERG_TRAVIS_ACCESS_TOKEN)
gtj.travis_repo
How to read log files from travis? revisit menegazzo/travispy: Travis CI API for Python
In [ ]:
# How to read log files from travis
b = gtj.travis.build(gtj.travis_repo.last_build_id)
j = b.jobs[-1]
j.id
In [ ]:
j.log.body[:100]
In [ ]:
(session.query(Repo)
.filter(Repo.last_build_state == 'started')
).count()
In [ ]:
class StatusUpdateRunnerForStartedJobs(StatusUpdateRunner):
def repos(self, n):
return islice((self.session().query(Repo)
.filter(Repo.last_build_state == 'started')
),n)
In [ ]:
sur2 = StatusUpdateRunnerForStartedJobs("gitenberg.db", username, password, GITENBERG_TRAVIS_ACCESS_TOKEN)
In [ ]:
sur2.run(None)
In [ ]:
class ErroredRepoJobRunner(RepoJobRunner):
def repos(self, n):
return islice((self.session().query(Repo)
.filter(Repo.last_build_state == 'errored')
),n)
In [ ]:
erjr = ErroredRepoJobRunner("gitenberg.db", username, password, GITENBERG_TRAVIS_ACCESS_TOKEN, max_exceptions=20)
In [ ]:
erjr.run(10)
In [ ]:
erjr.gh.ratelimit_remaining
In [ ]:
dt = arrow.get(erjr.gh.rate_limit()['rate']['reset']) - arrow.now()
sur.countdown(dt.seconds)
In [ ]:
for repo in session.query(Repo).filter_by(ebooks_in_release_count = 3):
repo.has_metadata = True
repo.has_source = True
repo.buildable = True
repo.updated = arrow.now().isoformat()
session.commit()
In [ ]:
import gitenberg
b = gitenberg.Book(1)
b.parse_book_metadata()
b.meta.metadata
In [ ]:
import yaml
md = repo_md(1)
print (yaml.safe_dump(md,default_flow_style=False,
allow_unicode=True))
In [ ]:
1/0
In [ ]:
def status_for_repo(repo_name):
rs = GitenbergTravisJob(username=username, password=password, repo_name=repo_name,
repo_owner='GITenberg',
update_travis_commit_msg='check status',
tag_commit_message='check status',
access_token=GITENBERG_TRAVIS_ACCESS_TOKEN)
return rs.status()
results_iter = apply_to_repos(status_for_repo, repos=all_repos)
In [ ]:
results = []
for (i,result) in enumerate(results_iter):
results.append(result)
if not isinstance(result, Exception):
print ("\r{}: {}".format(i, result['repo_name']), end="")
else:
print ("\r{}: {}".format(i, str(result)), end="")
In [ ]:
[(i, result) for (i, result) in enumerate(results) if isinstance(result, Exception)]
In [ ]:
[result.get('repo_name') for result in results if result.get('ebooks_in_release_count') != 3]
In [ ]:
# update the database based on result
result = results[0]
result
In [ ]:
for result in results:
repo = session.query(Repo).filter_by(repo_name=result['repo_name']).first()
repo.updated = arrow.now().isoformat()
repo.datebuilt = result['last_build_started_at']
repo.version = result['version']
repo.ebooks_in_release_count = result['ebooks_in_release_count']
repo.last_build_id = result['last_build_id']
repo.last_build_state = result['last_build_state']
session.commit()
In [ ]:
# building the rest
session.query(Repo).filter(Repo.datebuilt != None).count()
In [ ]:
repo_names = [repo.repo_name for repo in
islice(session.query(Repo).filter(Repo.datebuilt == None).order_by(Repo.gutenberg_id.asc()),5)]
In [ ]:
from collections import OrderedDict
from itertools import islice
results = OrderedDict()
In [ ]:
repos_iter = iter(repo_names)
In [ ]:
def build_repos(repo_names, n=None):
for (i, repo_name) in enumerate(islice(repo_names, n)):
try:
bj = BuildRepo2(username=username, password=password, repo_name=repo_name,
repo_owner='GITenberg',
update_travis_commit_msg='build using gitenberg.travis',
tag_commit_message='build using gitenberg.travis',
access_token=GITENBERG_TRAVIS_ACCESS_TOKEN)
results[repo_name] = (bj, bj.run())
# just mark as started
repo = session.query(Repo).filter_by(repo_name=result['repo_name']).first()
repo.updated = arrow.now().isoformat()
repo.datebuilt = arrow.now().isoformat()
except Exception, e:
results[repo_name] = e
print ("\r{}: {}".format(i, results[repo_name]), end="")
In [ ]:
build_repos(repos_iter, 1)
wondering if not add_all -- any add or update function? python - SQLAlchemy insert or update example - Stack Overflow
In [ ]:
repo1.version = '0.0.5'
In [ ]:
session.dirty
In [ ]:
session.new
In [ ]:
our_repo = session.query(Repo).filter_by(repo_name='Repo1').first() # doctest:+NORMALIZE_WHITESPACE
our_repo
In [ ]:
gh = github3.login(ry_username, password=ry_password)
In [ ]:
from itertools import islice
auths = [{'name': auth.name, 'created_at':auth.created_at, 'updated_at':auth.updated_at}
for auth in islice(gh.iter_authorizations(),None)]
In [ ]:
sorted(auths, key=lambda r: r['created_at'])
In [ ]: