In [2]:
import pprint
from sqlalchemy import func, desc
from sqlalchemy_schema import *
from create_package_database import *
session, engine = session_setup()

In [3]:
## total fraction of conflicts
n_conf = session.query(Package_Function).\
    filter(Package_Function.is_conflict == 1).count()
n_no_conf = session.query(Package_Function).\
    filter(Package_Function.is_conflict == 0).count()
frac_conf = n_conf / (n_conf + n_no_conf)
print(n_conf)
print(n_no_conf)
print(frac_conf)


15897
146263
0.09803280710409472

In [4]:
# package with most conflicts
top_pkg_conf = session.query(Package_Function,
                             func.count(Package_Function.is_conflict)).\
    filter(Package_Function.is_conflict == 1).\
    group_by(Package_Function.package_id).\
    order_by(desc(func.count(Package_Function.is_conflict)))
pkg_conf_tuples = top_pkg_conf.all()
pkg_counts = [tup[1] for tup in pkg_conf_tuples]
pkg_ids = [tup[0].package_id for tup in pkg_conf_tuples]
pkg_names = [session.query(Package).\
    filter(Package.package_id == tmp_pkg_id).\
    first().package_name for tmp_pkg_id in pkg_ids]
pkg_tuples = list(zip(pkg_names, pkg_ids, pkg_counts))
pprint.pprint(pkg_tuples[0:20])
# pkg_id = top_pkg_conf.first()[0].package_id
# top_pkg_conf_name = session.query(Package).\
#     filter(Package.package_id == pkg_id).\
#     first().package_name
# print(top_pkg_conf_name)


[('adehabitat', 60, 135),
 ('pracma', 4447, 133),
 ('Sleuth2', 5740, 127),
 ('Sleuth3', 5741, 127),
 ('VGAM', 6589, 119),
 ('igraph', 2666, 74),
 ('QCAGUI', 4587, 72),
 ('GeneralizedHyperbolic', 2146, 71),
 ('HyperbolicDist', 2597, 71),
 ('spatstat', 5910, 71),
 ('Rcmdr', 4800, 68),
 ('mosaic', 3586, 67),
 ('base', 6782, 63),
 ('adehabitatLT', 63, 59),
 ('lava', 2969, 58),
 ('psych', 4545, 58),
 ('stats', 6805, 55),
 ('spTDyn', 5976, 53),
 ('raster', 4750, 52),
 ('ROptEstOld', 5221, 52)]

In [5]:
# function with most conflicts
top_fn_conf = session.query(Package_Function,
                             func.count(Package_Function.is_conflict)).\
    filter(Package_Function.is_conflict == 1).\
    group_by(Package_Function.function_id).\
    order_by(desc(func.count(Package_Function.is_conflict)))
# fn_id = top_pkg_conf.first()[0].function_id
# top_fn_conf_name = session.query(Function).\
#     filter(Function.function_id == fn_id).\
#     first().function_name
# print(top_fn_conf_name)
fn_conf_tuples = top_fn_conf.all()
fn_counts = [tup[1] for tup in fn_conf_tuples]
fn_ids = [tup[0].function_id for tup in fn_conf_tuples]
fn_names = [session.query(Function).\
    filter(Function.function_id == tmp_fn_id).\
    first().function_name for tmp_fn_id in fn_ids]
fn_tuples = list(zip(fn_names, fn_ids, fn_counts))
pprint.pprint(fn_tuples[0:20])


[('logit', 4371, 35),
 ('normalize', 3283, 34),
 ('residuals', 3391, 33),
 ('fitted', 3374, 31),
 ('compare', 3153, 29),
 ('as.matrix', 805, 26),
 ('distance', 3159, 25),
 ('kurtosis', 1895, 24),
 ('bootstrap', 3147, 24),
 ('size', 325, 23),
 ('skewness', 1919, 23),
 ('trim', 5567, 22),
 ('sim', 4385, 21),
 ('entropy', 99, 20),
 ('resample', 5554, 20),
 ('tr', 34757, 20),
 ('combine', 679, 19),
 ('AICc', 2594, 19),
 ('name', 714, 18),
 ('map', 2735, 18)]

In [11]:
base_id = session.query(Package).filter(Package.package_name == 'base').first().package_id
base_conf = session.query(Package_Function).filter(Package_Function.package_id == base_id).filter(Package_Function.is_conflict == 1)
print(base_conf.count())
base_fns = [b.function_id for b in base_conf.all()]
base_fn_names = [session.query(Function).filter(Function.function_id == b).first().function_name for b in base_fns]
pprint.pprint(base_fn_names)


'as.matrix',
 'labels',
 'beta',
 'ncol',
 'norm',
 'max',
 'min',
 'prod',
 'range',
 'sum',
 'scale.default',
 'table',
 'rbind',
 'F',
 'is.primitive',
 'print.default',
 'mode',
 'merge.data.frame',
 'message',
 'format.pval',
 'round.POSIXt',
 'trunc.POSIXt',
 'debug',
 'dir.exists',
 'as.difftime',
 'gregexpr',
 'trimws',
 'Recall',
 'single',
 'print.function',
 'rbind.data.frame',
 'atan2',
 'exp',
 'log',
 'sqrt',
 'isNamespaceLoaded',
 'acosh',
 'atan',
 'atanh',
 'cos',
 'cosh',
 'gamma',
 'log1p',
 'log2',
 'logb',
 'round',
 'sin',
 'sinh',
 'tan',
 'tanh',
 'trunc',
 'cat',
 'readline',
 'scan',
 'arrayInd',
 'I',
 'identity',
 'list.dirs',
 'anyDuplicated.matrix',
 'as.Date.numeric',
 'det',
 'duplicated.matrix',
 'unique.matrix']
63
[

In [12]:
## I remember as.list being a conflict for one specific package. I want to look for this issue.
list_id = session.query(Function).filter(Function.function_name == "as.list").first().function_id
list_pkgs = session.query(Package_Function).filter(Package_Function.function_id == list_id).all()
list_pkg_ids = [l.package_id for l in list_pkgs]
list_pkg_names = [session.query(Package).filter(Package.package_id == l).first().package_name for l in list_pkg_ids]
pprint.pprint(list_pkg_names)


['haplotypes',
 'multivator',
 'neuroim',
 'objectProperties',
 'penalized',
 'PivotalR',
 'RadOnc',
 'rbamtools',
 'simecol',
 'SOUP',
 'timeSeries',
 'ycinterextra',
 'base']