In [2]:
import pprint
from sqlalchemy import func, desc
from sqlalchemy_schema import *
from create_package_database import *
session, engine = session_setup()
In [3]:
## total fraction of conflicts
n_conf = session.query(Package_Function).\
filter(Package_Function.is_conflict == 1).count()
n_no_conf = session.query(Package_Function).\
filter(Package_Function.is_conflict == 0).count()
frac_conf = n_conf / (n_conf + n_no_conf)
print(n_conf)
print(n_no_conf)
print(frac_conf)
In [4]:
# package with most conflicts
top_pkg_conf = session.query(Package_Function,
func.count(Package_Function.is_conflict)).\
filter(Package_Function.is_conflict == 1).\
group_by(Package_Function.package_id).\
order_by(desc(func.count(Package_Function.is_conflict)))
pkg_conf_tuples = top_pkg_conf.all()
pkg_counts = [tup[1] for tup in pkg_conf_tuples]
pkg_ids = [tup[0].package_id for tup in pkg_conf_tuples]
pkg_names = [session.query(Package).\
filter(Package.package_id == tmp_pkg_id).\
first().package_name for tmp_pkg_id in pkg_ids]
pkg_tuples = list(zip(pkg_names, pkg_ids, pkg_counts))
pprint.pprint(pkg_tuples[0:20])
# pkg_id = top_pkg_conf.first()[0].package_id
# top_pkg_conf_name = session.query(Package).\
# filter(Package.package_id == pkg_id).\
# first().package_name
# print(top_pkg_conf_name)
In [5]:
# function with most conflicts
top_fn_conf = session.query(Package_Function,
func.count(Package_Function.is_conflict)).\
filter(Package_Function.is_conflict == 1).\
group_by(Package_Function.function_id).\
order_by(desc(func.count(Package_Function.is_conflict)))
# fn_id = top_pkg_conf.first()[0].function_id
# top_fn_conf_name = session.query(Function).\
# filter(Function.function_id == fn_id).\
# first().function_name
# print(top_fn_conf_name)
fn_conf_tuples = top_fn_conf.all()
fn_counts = [tup[1] for tup in fn_conf_tuples]
fn_ids = [tup[0].function_id for tup in fn_conf_tuples]
fn_names = [session.query(Function).\
filter(Function.function_id == tmp_fn_id).\
first().function_name for tmp_fn_id in fn_ids]
fn_tuples = list(zip(fn_names, fn_ids, fn_counts))
pprint.pprint(fn_tuples[0:20])
In [11]:
base_id = session.query(Package).filter(Package.package_name == 'base').first().package_id
base_conf = session.query(Package_Function).filter(Package_Function.package_id == base_id).filter(Package_Function.is_conflict == 1)
print(base_conf.count())
base_fns = [b.function_id for b in base_conf.all()]
base_fn_names = [session.query(Function).filter(Function.function_id == b).first().function_name for b in base_fns]
pprint.pprint(base_fn_names)
In [12]:
## I remember as.list being a conflict for one specific package. I want to look for this issue.
list_id = session.query(Function).filter(Function.function_name == "as.list").first().function_id
list_pkgs = session.query(Package_Function).filter(Package_Function.function_id == list_id).all()
list_pkg_ids = [l.package_id for l in list_pkgs]
list_pkg_names = [session.query(Package).filter(Package.package_id == l).first().package_name for l in list_pkg_ids]
pprint.pprint(list_pkg_names)