In [1]:
# This cell contains default parameters values for execution by `papermill`.
filename = '../sample_data/postgap.20180817.asthma.txt.gz'
In [2]:
# Parameters
filename = "./sample_data/postgap.20180817.asthma.txt.gz"
In [3]:
from reports import helpers
In [4]:
helpers.calc_run_str()
In [5]:
# pg = pd.read_csv(filename, sep='\t', na_values=['None'])
pg = helpers.load_file(filename)
Q: How many rows and columns?
In [6]:
print(pg.shape)
Q: How many unique target-disease associations?
In [7]:
helpers.calc_g2d_pair_counts(pg)
Q: What is the distribution of unique diseases per gene? And vice versa?
In [8]:
helpers.calc_pairwise_degree_dist(pg, 'gene_id', 'disease_efo_id', 'Gene', 'Disease')
Q: How many unique values appear for each identifier?
In [9]:
helpers.calc_id_field_counts(pg)
Q: What is the maximum number of rows for a given fixed identifier?
In [10]:
helpers.calc_id_field_max_rows(pg)
Q: How many unique identifier pairs appear?
In [11]:
helpers.calc_id_field_pair_counts(pg)
Q: What is the distribution of each association subscore (VEP
, GTEx
, etc.)?
In [12]:
helpers.calc_g2v_field_hists(pg)
Q: What is the distribution of unique LD SNPs per gene? And vice versa?
In [13]:
helpers.calc_pairwise_degree_dist(pg, 'gene_id', 'ld_snp_rsID', 'Gene', 'LD SNP')
Q: What is the overlap between presence of association subscores?
In [14]:
helpers.calc_g2v_field_overlap(pg)
Q: What is the joint distribution between association subscore pairs (ie. how correlated are they)?
In [15]:
helpers.calc_g2v_field_cross_dists(pg)
Q: What is the distribution of r2
?
In [16]:
helpers.calc_dist_r2(pg)
Q: What is the distribution of unique GWAS SNPs per LD SNP? And vice versa?
In [17]:
helpers.calc_pairwise_degree_dist(pg, 'ld_snp_rsID', 'gwas_snp', 'LD SNP', 'GWAS SNP')
Q: What are the distributions of (gwas_pvalue
, gwas_beta
, gwas_odds_ratio
)?
In [18]:
helpers.calc_v2d_field_hists(pg)
Q: What is the distribution of unique diseases per GWAS SNP? And vice versa?
In [19]:
helpers.calc_pairwise_degree_dist(pg, 'gwas_snp', 'disease_efo_id', 'GWAS SNP', 'Disease')