notebook.community

Edit and run



In [2]:

    
%run ~/relmapping/annot/notebooks/annot__init__.ipynb
annot_ = 'annot_ce10_mapq0'
def mp(fp, annot_=annot_): return os.path.join(annot_, 'metrics_atac', fp)









    



/mnt/home3/jj374/anaconda36/lib/python3.6/site-packages/statsmodels/compat/pandas.py:56: FutureWarning: The pandas.core.datetools module is deprecated and will be removed in a future version. Please use the pandas.tseries module instead.
  from pandas.core import datetools






    



os.getcwd(): /mnt/beegfs/scratch_copy/ahringer/jj374/lab/relmapping



In [4]:

    
# atac_wt_pe
step = 'tg_pe.bwa_pe.rm_unmapped_pe.rm_chrM.rm_blacklist.macs2_pe_lt200'
suffix = '_treat_pileup.bw'
prefix = 'atac814'

!scripts/yapc/yapc annot_ce10_mapq0/metrics_atac/atac_wt_pe \
    wt_emb {pf('atac814_wt_emb_rep1', step, suffix, prefix)} {pf('atac814_wt_emb_rep2', step, suffix, prefix)} \
    wt_l1 {pf('atac814_wt_l1_rep1', step, suffix, prefix)} {pf('atac814_wt_l1_rep2', step, suffix, prefix)} \
    wt_l2 {pf('atac814_wt_l2_rep1', step, suffix, prefix)} {pf('atac814_wt_l2_rep2', step, suffix, prefix)} \
    wt_l3 {pf('atac814_wt_l3_rep1', step, suffix, prefix)} {pf('atac814_wt_l3_rep2', step, suffix, prefix)} \
    wt_l4 {pf('atac814_wt_l4_rep1', step, suffix, prefix)} {pf('atac814_wt_l4_rep2', step, suffix, prefix)} \
    wt_ya {pf('atac814_wt_ya_rep1', step, suffix, prefix)} {pf('atac814_wt_ya_rep2', step, suffix, prefix)} \
    --smoothing-window-width 75 --fixed-peak-halfwidth 75









    



DEBUG:root:numpy version: 1.12.1
DEBUG:root:pandas version: 0.20.3
DEBUG:root:pyBigWig.numpy flag: 1
DEBUG:root:IDR version: 2.0.3
Input samples:
condition       sample                                                 fp  is_bw
  wt_emb  wt_emb_rep1  atac814/tg_pe.bwa_pe.rm_unmapped_pe.rm_chrM.rm...   True
  wt_emb  wt_emb_rep2  atac814/tg_pe.bwa_pe.rm_unmapped_pe.rm_chrM.rm...   True
   wt_l1   wt_l1_rep1  atac814/tg_pe.bwa_pe.rm_unmapped_pe.rm_chrM.rm...   True
   wt_l1   wt_l1_rep2  atac814/tg_pe.bwa_pe.rm_unmapped_pe.rm_chrM.rm...   True
   wt_l2   wt_l2_rep1  atac814/tg_pe.bwa_pe.rm_unmapped_pe.rm_chrM.rm...   True
   wt_l2   wt_l2_rep2  atac814/tg_pe.bwa_pe.rm_unmapped_pe.rm_chrM.rm...   True
   wt_l3   wt_l3_rep1  atac814/tg_pe.bwa_pe.rm_unmapped_pe.rm_chrM.rm...   True
   wt_l3   wt_l3_rep2  atac814/tg_pe.bwa_pe.rm_unmapped_pe.rm_chrM.rm...   True
   wt_l4   wt_l4_rep1  atac814/tg_pe.bwa_pe.rm_unmapped_pe.rm_chrM.rm...   True
   wt_l4   wt_l4_rep2  atac814/tg_pe.bwa_pe.rm_unmapped_pe.rm_chrM.rm...   True
   wt_ya   wt_ya_rep1  atac814/tg_pe.bwa_pe.rm_unmapped_pe.rm_chrM.rm...   True
   wt_ya   wt_ya_rep2  atac814/tg_pe.bwa_pe.rm_unmapped_pe.rm_chrM.rm...   True

Output prefix: annot_ce10_mapq0/metrics_atac/atac_wt_pe
calculating mean of all input signal:
chrI 15072423
chrII 15279345
chrIII 13783700
chrIV 17493793
chrV 20924149
chrX 17718866
Calculating d2smooth: annot_ce10_mapq0/metrics_atac/atac_wt_pe_d2smooth.bw
chrI 15072423
chrII 15279345
chrIII 13783700
chrIV 17493793
chrV 20924149
chrX 17718866
scripts/yapc/yapc:150: RuntimeWarning: invalid value encountered in less
  s = np.where(np.diff((d2y < tol).astype(int))==1)[0] + 1
scripts/yapc/yapc:151: RuntimeWarning: invalid value encountered in less
  e = np.where(np.diff((d2y < tol).astype(int))==-1)[0] + 1
62095 raw concave regions on chrom chrI
63702 raw concave regions on chrom chrII
56066 raw concave regions on chrom chrIII
72842 raw concave regions on chrom chrIV
88620 raw concave regions on chrom chrV
72160 raw concave regions on chrom chrX
415485 peaks total
scoring peaks for sample: wt_emb_rep1
scoring peaks for sample: wt_emb_rep2
scoring peaks for sample: wt_l1_rep1
scoring peaks for sample: wt_l1_rep2
scoring peaks for sample: wt_l2_rep1
scoring peaks for sample: wt_l2_rep2
scoring peaks for sample: wt_l3_rep1
scoring peaks for sample: wt_l3_rep2
scoring peaks for sample: wt_l4_rep1
scoring peaks for sample: wt_l4_rep2
scoring peaks for sample: wt_ya_rep1
scoring peaks for sample: wt_ya_rep2
415485 raw peaks
280137 peaks after discarding narrow concave regions (min_concave_region_width=75)
100000 peaks after filtering based on best rank across all conditions (truncate_idr_input=100000)

curvature index-scores:
wt_emb_rep1_score  wt_emb_rep2_score  wt_l1_rep1_score  wt_l1_rep2_score  wt_l2_rep1_score  wt_l2_rep2_score  wt_l3_rep1_score  wt_l3_rep2_score  wt_l4_rep1_score  wt_l4_rep2_score  wt_ya_rep1_score  wt_ya_rep2_score
        23.179125          47.881652         43.087507         60.848246         11.882826          4.454711         16.524516         71.184423         42.216108         36.297419         59.784829         71.042769
        95.185154         165.429316         85.231072         52.028599          4.997833        102.336662         44.093487        135.963702        103.391118        186.602926         53.414951         81.732920
         7.355130         -39.653997         69.855422        155.982102         72.615578         39.547007         78.179839        119.538686         94.510783        112.025355         76.779989         70.313357
       231.216097         339.964144        284.406610        251.095894        359.976839        322.940534        240.568899        231.038458        152.689580        322.523114        326.858808        147.486186
         6.754050          26.737773         52.057487        -22.894792         18.403817         22.249553         46.166572         22.418947         -1.545550         13.229067         33.621673         15.320175

Running idr on condition wt_emb:
idr --input-file-type bed --rank score --peak-merge-method max --plot --samples annot_ce10_mapq0/metrics_atac/atac_wt_pe_peaksidr_wt_emb_rep1.bed annot_ce10_mapq0/metrics_atac/atac_wt_pe_peaksidr_wt_emb_rep2.bed --output-file annot_ce10_mapq0/metrics_atac/atac_wt_pe_peaksidr_wt_emb.bed
Initial parameter values: [0.10 1.00 0.20 0.50]
Final parameter values: [2.10 1.94 0.98 0.35]
Number of reported peaks - 100000/100000 (100.0%)

Number of peaks passing IDR cutoff of 0.05 - 24279/100000 (24.3%)

Running idr on condition wt_l1:
idr --input-file-type bed --rank score --peak-merge-method max --plot --samples annot_ce10_mapq0/metrics_atac/atac_wt_pe_peaksidr_wt_l1_rep1.bed annot_ce10_mapq0/metrics_atac/atac_wt_pe_peaksidr_wt_l1_rep2.bed --output-file annot_ce10_mapq0/metrics_atac/atac_wt_pe_peaksidr_wt_l1.bed
Initial parameter values: [0.10 1.00 0.20 0.50]
Final parameter values: [2.33 1.91 0.97 0.24]
Number of reported peaks - 100000/100000 (100.0%)

Number of peaks passing IDR cutoff of 0.05 - 17399/100000 (17.4%)

Running idr on condition wt_l2:
idr --input-file-type bed --rank score --peak-merge-method max --plot --samples annot_ce10_mapq0/metrics_atac/atac_wt_pe_peaksidr_wt_l2_rep1.bed annot_ce10_mapq0/metrics_atac/atac_wt_pe_peaksidr_wt_l2_rep2.bed --output-file annot_ce10_mapq0/metrics_atac/atac_wt_pe_peaksidr_wt_l2.bed
Initial parameter values: [0.10 1.00 0.20 0.50]
Final parameter values: [1.89 1.71 0.97 0.37]
Number of reported peaks - 100000/100000 (100.0%)

Number of peaks passing IDR cutoff of 0.05 - 24762/100000 (24.8%)

Running idr on condition wt_l3:
idr --input-file-type bed --rank score --peak-merge-method max --plot --samples annot_ce10_mapq0/metrics_atac/atac_wt_pe_peaksidr_wt_l3_rep1.bed annot_ce10_mapq0/metrics_atac/atac_wt_pe_peaksidr_wt_l3_rep2.bed --output-file annot_ce10_mapq0/metrics_atac/atac_wt_pe_peaksidr_wt_l3.bed
Initial parameter values: [0.10 1.00 0.20 0.50]
Final parameter values: [1.94 1.62 0.96 0.39]
Number of reported peaks - 100000/100000 (100.0%)

Number of peaks passing IDR cutoff of 0.05 - 27282/100000 (27.3%)

Running idr on condition wt_l4:
idr --input-file-type bed --rank score --peak-merge-method max --plot --samples annot_ce10_mapq0/metrics_atac/atac_wt_pe_peaksidr_wt_l4_rep1.bed annot_ce10_mapq0/metrics_atac/atac_wt_pe_peaksidr_wt_l4_rep2.bed --output-file annot_ce10_mapq0/metrics_atac/atac_wt_pe_peaksidr_wt_l4.bed
Initial parameter values: [0.10 1.00 0.20 0.50]
Final parameter values: [2.09 1.67 0.96 0.43]
Number of reported peaks - 100000/100000 (100.0%)

Number of peaks passing IDR cutoff of 0.05 - 32432/100000 (32.4%)

Running idr on condition wt_ya:
idr --input-file-type bed --rank score --peak-merge-method max --plot --samples annot_ce10_mapq0/metrics_atac/atac_wt_pe_peaksidr_wt_ya_rep1.bed annot_ce10_mapq0/metrics_atac/atac_wt_pe_peaksidr_wt_ya_rep2.bed --output-file annot_ce10_mapq0/metrics_atac/atac_wt_pe_peaksidr_wt_ya.bed
Initial parameter values: [0.10 1.00 0.20 0.50]
Final parameter values: [2.08 1.75 0.97 0.38]
Number of reported peaks - 100000/100000 (100.0%)

Number of peaks passing IDR cutoff of 0.05 - 26937/100000 (26.9%)


globalIDR:
wt_emb_globalIDR  wt_l1_globalIDR  wt_l2_globalIDR  wt_l3_globalIDR  wt_l4_globalIDR  wt_ya_globalIDR
            0.72             0.34             0.31             0.39             1.95             0.74
            0.15             0.36             0.54             1.17             1.97             1.27
            4.25             3.73             4.43             3.42             2.76             2.02
            2.02             0.33             1.15             0.65             0.83             3.01
            0.70             0.45             0.77             0.87             0.61             1.48

Wrote all 100000 IDR-scored peaks to annot_ce10_mapq0/metrics_atac/atac_wt_pe.tsv
Wrote 28668 peaks at IDR=0.001 to annot_ce10_mapq0/metrics_atac/atac_wt_pe_0.001.bed
Wrote 34563 peaks at IDR=0.005 to annot_ce10_mapq0/metrics_atac/atac_wt_pe_0.005.bed
Wrote 37888 peaks at IDR=0.010 to annot_ce10_mapq0/metrics_atac/atac_wt_pe_0.01.bed
Wrote 49015 peaks at IDR=0.050 to annot_ce10_mapq0/metrics_atac/atac_wt_pe_0.05.bed



In [5]:

    
# atac_wt_se
step = 'tg_se.bwa_se.rm_unmapped.rm_chrM.rm_blacklist.macs2_se_extsize150_shiftm75_keepdup_all'
suffix = '_treat_pileup.bw'
prefix = 'atac814'

!scripts/yapc/yapc annot_ce10_mapq0/metrics_atac/atac_wt_se \
    wt_emb {pf('atac814_wt_emb_rep1', step, suffix, prefix)} {pf('atac814_wt_emb_rep2', step, suffix, prefix)} \
    wt_l1 {pf('atac814_wt_l1_rep1', step, suffix, prefix)} {pf('atac814_wt_l1_rep2', step, suffix, prefix)} \
    wt_l2 {pf('atac814_wt_l2_rep1', step, suffix, prefix)} {pf('atac814_wt_l2_rep2', step, suffix, prefix)} \
    wt_l3 {pf('atac814_wt_l3_rep1', step, suffix, prefix)} {pf('atac814_wt_l3_rep2', step, suffix, prefix)} \
    wt_l4 {pf('atac814_wt_l4_rep1', step, suffix, prefix)} {pf('atac814_wt_l4_rep2', step, suffix, prefix)} \
    wt_ya {pf('atac814_wt_ya_rep1', step, suffix, prefix)} {pf('atac814_wt_ya_rep2', step, suffix, prefix)} \
    --smoothing-window-width 150 --fixed-peak-halfwidth 75









    



DEBUG:root:numpy version: 1.12.1
DEBUG:root:pandas version: 0.20.3
DEBUG:root:pyBigWig.numpy flag: 1
DEBUG:root:IDR version: 2.0.3
Input samples:
condition       sample                                                 fp  is_bw
  wt_emb  wt_emb_rep1  atac814/tg_se.bwa_se.rm_unmapped.rm_chrM.rm_bl...   True
  wt_emb  wt_emb_rep2  atac814/tg_se.bwa_se.rm_unmapped.rm_chrM.rm_bl...   True
   wt_l1   wt_l1_rep1  atac814/tg_se.bwa_se.rm_unmapped.rm_chrM.rm_bl...   True
   wt_l1   wt_l1_rep2  atac814/tg_se.bwa_se.rm_unmapped.rm_chrM.rm_bl...   True
   wt_l2   wt_l2_rep1  atac814/tg_se.bwa_se.rm_unmapped.rm_chrM.rm_bl...   True
   wt_l2   wt_l2_rep2  atac814/tg_se.bwa_se.rm_unmapped.rm_chrM.rm_bl...   True
   wt_l3   wt_l3_rep1  atac814/tg_se.bwa_se.rm_unmapped.rm_chrM.rm_bl...   True
   wt_l3   wt_l3_rep2  atac814/tg_se.bwa_se.rm_unmapped.rm_chrM.rm_bl...   True
   wt_l4   wt_l4_rep1  atac814/tg_se.bwa_se.rm_unmapped.rm_chrM.rm_bl...   True
   wt_l4   wt_l4_rep2  atac814/tg_se.bwa_se.rm_unmapped.rm_chrM.rm_bl...   True
   wt_ya   wt_ya_rep1  atac814/tg_se.bwa_se.rm_unmapped.rm_chrM.rm_bl...   True
   wt_ya   wt_ya_rep2  atac814/tg_se.bwa_se.rm_unmapped.rm_chrM.rm_bl...   True

Output prefix: annot_ce10_mapq0/metrics_atac/atac_wt_se
calculating mean of all input signal:
chrI 15072423
chrII 15279345
chrIII 13783700
chrIV 17493793
chrV 20924149
chrX 17718866
Calculating d2smooth: annot_ce10_mapq0/metrics_atac/atac_wt_se_d2smooth.bw
chrI 15072423
chrII 15279345
chrIII 13783700
chrIV 17493793
chrV 20924149
chrX 17718866
scripts/yapc/yapc:150: RuntimeWarning: invalid value encountered in less
  s = np.where(np.diff((d2y < tol).astype(int))==1)[0] + 1
scripts/yapc/yapc:151: RuntimeWarning: invalid value encountered in less
  e = np.where(np.diff((d2y < tol).astype(int))==-1)[0] + 1
36868 raw concave regions on chrom chrI
37766 raw concave regions on chrom chrII
33914 raw concave regions on chrom chrIII
43222 raw concave regions on chrom chrIV
52315 raw concave regions on chrom chrV
42269 raw concave regions on chrom chrX
246354 peaks total
scoring peaks for sample: wt_emb_rep1
scoring peaks for sample: wt_emb_rep2
scoring peaks for sample: wt_l1_rep1
scoring peaks for sample: wt_l1_rep2
scoring peaks for sample: wt_l2_rep1
scoring peaks for sample: wt_l2_rep2
scoring peaks for sample: wt_l3_rep1
scoring peaks for sample: wt_l3_rep2
scoring peaks for sample: wt_l4_rep1
scoring peaks for sample: wt_l4_rep2
scoring peaks for sample: wt_ya_rep1
scoring peaks for sample: wt_ya_rep2
246354 raw peaks
219609 peaks after discarding narrow concave regions (min_concave_region_width=75)
100000 peaks after filtering based on best rank across all conditions (truncate_idr_input=100000)

curvature index-scores:
wt_emb_rep1_score  wt_emb_rep2_score  wt_l1_rep1_score  wt_l1_rep2_score  wt_l2_rep1_score  wt_l2_rep2_score  wt_l3_rep1_score  wt_l3_rep2_score  wt_l4_rep1_score  wt_l4_rep2_score  wt_ya_rep1_score  wt_ya_rep2_score
      5236.223944        5305.680957       3602.605307       2769.303884       2194.707429       2189.078924       7316.995205       9778.876606       5062.252001       3933.295974       5374.749328       7078.171909
       274.599539         265.154768        298.287029        194.782983        301.164105        323.029852        250.571822        213.723325        164.961504        259.862751        283.546289        128.242972
         6.922036           1.665854         17.177893          8.362803          4.167191         18.834424          4.136402         18.258443         -3.965916         10.453757         18.200792         -1.675975
        47.889051          21.050557         22.593690         22.257664         33.310063         22.623162         46.910557         13.292604         13.041123         19.187929         58.101267         71.141099
        -7.677432          -7.329703          8.426715        -13.388738          0.860489          3.232003          0.356052          0.784016         -8.258612          1.129879         35.181945         27.392671

Running idr on condition wt_emb:
idr --input-file-type bed --rank score --peak-merge-method max --plot --samples annot_ce10_mapq0/metrics_atac/atac_wt_se_peaksidr_wt_emb_rep1.bed annot_ce10_mapq0/metrics_atac/atac_wt_se_peaksidr_wt_emb_rep2.bed --output-file annot_ce10_mapq0/metrics_atac/atac_wt_se_peaksidr_wt_emb.bed
Initial parameter values: [0.10 1.00 0.20 0.50]
Final parameter values: [1.69 1.98 0.98 0.45]
Number of reported peaks - 100000/100000 (100.0%)

Number of peaks passing IDR cutoff of 0.05 - 28596/100000 (28.6%)

Running idr on condition wt_l1:
idr --input-file-type bed --rank score --peak-merge-method max --plot --samples annot_ce10_mapq0/metrics_atac/atac_wt_se_peaksidr_wt_l1_rep1.bed annot_ce10_mapq0/metrics_atac/atac_wt_se_peaksidr_wt_l1_rep2.bed --output-file annot_ce10_mapq0/metrics_atac/atac_wt_se_peaksidr_wt_l1.bed
Initial parameter values: [0.10 1.00 0.20 0.50]
Final parameter values: [1.70 1.83 0.97 0.40]
Number of reported peaks - 100000/100000 (100.0%)

Number of peaks passing IDR cutoff of 0.05 - 24757/100000 (24.8%)

Running idr on condition wt_l2:
idr --input-file-type bed --rank score --peak-merge-method max --plot --samples annot_ce10_mapq0/metrics_atac/atac_wt_se_peaksidr_wt_l2_rep1.bed annot_ce10_mapq0/metrics_atac/atac_wt_se_peaksidr_wt_l2_rep2.bed --output-file annot_ce10_mapq0/metrics_atac/atac_wt_se_peaksidr_wt_l2.bed
Initial parameter values: [0.10 1.00 0.20 0.50]
Final parameter values: [1.65 1.81 0.97 0.44]
Number of reported peaks - 100000/100000 (100.0%)

Number of peaks passing IDR cutoff of 0.05 - 27892/100000 (27.9%)

Running idr on condition wt_l3:
idr --input-file-type bed --rank score --peak-merge-method max --plot --samples annot_ce10_mapq0/metrics_atac/atac_wt_se_peaksidr_wt_l3_rep1.bed annot_ce10_mapq0/metrics_atac/atac_wt_se_peaksidr_wt_l3_rep2.bed --output-file annot_ce10_mapq0/metrics_atac/atac_wt_se_peaksidr_wt_l3.bed
Initial parameter values: [0.10 1.00 0.20 0.50]
Final parameter values: [1.77 1.68 0.97 0.44]
Number of reported peaks - 100000/100000 (100.0%)

Number of peaks passing IDR cutoff of 0.05 - 30236/100000 (30.2%)

Running idr on condition wt_l4:
idr --input-file-type bed --rank score --peak-merge-method max --plot --samples annot_ce10_mapq0/metrics_atac/atac_wt_se_peaksidr_wt_l4_rep1.bed annot_ce10_mapq0/metrics_atac/atac_wt_se_peaksidr_wt_l4_rep2.bed --output-file annot_ce10_mapq0/metrics_atac/atac_wt_se_peaksidr_wt_l4.bed
Initial parameter values: [0.10 1.00 0.20 0.50]
Final parameter values: [1.83 1.72 0.97 0.51]
Number of reported peaks - 100000/100000 (100.0%)

Number of peaks passing IDR cutoff of 0.05 - 35638/100000 (35.6%)

Running idr on condition wt_ya:
idr --input-file-type bed --rank score --peak-merge-method max --plot --samples annot_ce10_mapq0/metrics_atac/atac_wt_se_peaksidr_wt_ya_rep1.bed annot_ce10_mapq0/metrics_atac/atac_wt_se_peaksidr_wt_ya_rep2.bed --output-file annot_ce10_mapq0/metrics_atac/atac_wt_se_peaksidr_wt_ya.bed
Initial parameter values: [0.10 1.00 0.20 0.50]
Final parameter values: [1.74 1.75 0.97 0.45]
Number of reported peaks - 100000/100000 (100.0%)

Number of peaks passing IDR cutoff of 0.05 - 29580/100000 (29.6%)


globalIDR:
wt_emb_globalIDR  wt_l1_globalIDR  wt_l2_globalIDR  wt_l3_globalIDR  wt_l4_globalIDR  wt_ya_globalIDR
            5.00             5.00             5.00             5.00             5.00             5.00
            5.00             5.00             5.00             5.00             5.00             4.50
            0.43             0.67             0.87             0.40             0.69             2.86
            0.19             0.17             0.19             0.21             0.25             1.28
            0.88             0.54             0.19             0.65             0.64             1.60

Wrote all 100000 IDR-scored peaks to annot_ce10_mapq0/metrics_atac/atac_wt_se.tsv
Wrote 30340 peaks at IDR=0.001 to annot_ce10_mapq0/metrics_atac/atac_wt_se_0.001.bed
Wrote 37309 peaks at IDR=0.005 to annot_ce10_mapq0/metrics_atac/atac_wt_se_0.005.bed
Wrote 41363 peaks at IDR=0.010 to annot_ce10_mapq0/metrics_atac/atac_wt_se_0.01.bed
Wrote 54846 peaks at IDR=0.050 to annot_ce10_mapq0/metrics_atac/atac_wt_se_0.05.bed



In [6]:

    
# atac_glp1_se
step = 'tg_se.bwa_se.rm_unmapped.rm_chrM.rm_blacklist.macs2_se_extsize150_shiftm75_keepdup_all'
suffix = '_treat_pileup.bw'
prefix = 'atac814'

!scripts/yapc/yapc annot_ce10_mapq0/metrics_atac/atac_glp1_se \
    glp1_d1 {pf('atac814_glp1_d1_rep1', step, suffix, prefix)} {pf('atac814_glp1_d1_rep2', step, suffix, prefix)} \
    glp1_d2 {pf('atac814_glp1_d2_rep1', step, suffix, prefix)} {pf('atac814_glp1_d2_rep2', step, suffix, prefix)} \
    glp1_d6 {pf('atac814_glp1_d6_rep1', step, suffix, prefix)} {pf('atac814_glp1_d6_rep2', step, suffix, prefix)} \
    glp1_d9 {pf('atac814_glp1_d9_rep1', step, suffix, prefix)} {pf('atac814_glp1_d9_rep2', step, suffix, prefix)} \
    glp1_d13 {pf('atac814_glp1_d13_rep1', step, suffix, prefix)} {pf('atac814_glp1_d13_rep2', step, suffix, prefix)} \
    --smoothing-window-width 150 --fixed-peak-halfwidth 75









    



DEBUG:root:numpy version: 1.12.1
DEBUG:root:pandas version: 0.20.3
DEBUG:root:pyBigWig.numpy flag: 1
DEBUG:root:IDR version: 2.0.3
Input samples:
condition         sample                                                 fp  is_bw
 glp1_d1   glp1_d1_rep1  atac814/tg_se.bwa_se.rm_unmapped.rm_chrM.rm_bl...   True
 glp1_d1   glp1_d1_rep2  atac814/tg_se.bwa_se.rm_unmapped.rm_chrM.rm_bl...   True
 glp1_d2   glp1_d2_rep1  atac814/tg_se.bwa_se.rm_unmapped.rm_chrM.rm_bl...   True
 glp1_d2   glp1_d2_rep2  atac814/tg_se.bwa_se.rm_unmapped.rm_chrM.rm_bl...   True
 glp1_d6   glp1_d6_rep1  atac814/tg_se.bwa_se.rm_unmapped.rm_chrM.rm_bl...   True
 glp1_d6   glp1_d6_rep2  atac814/tg_se.bwa_se.rm_unmapped.rm_chrM.rm_bl...   True
 glp1_d9   glp1_d9_rep1  atac814/tg_se.bwa_se.rm_unmapped.rm_chrM.rm_bl...   True
 glp1_d9   glp1_d9_rep2  atac814/tg_se.bwa_se.rm_unmapped.rm_chrM.rm_bl...   True
glp1_d13  glp1_d13_rep1  atac814/tg_se.bwa_se.rm_unmapped.rm_chrM.rm_bl...   True
glp1_d13  glp1_d13_rep2  atac814/tg_se.bwa_se.rm_unmapped.rm_chrM.rm_bl...   True

Output prefix: annot_ce10_mapq0/metrics_atac/atac_glp1_se
calculating mean of all input signal:
chrI 15072423
chrII 15279345
chrIII 13783700
chrIV 17493793
chrV 20924149
chrX 17718866
Calculating d2smooth: annot_ce10_mapq0/metrics_atac/atac_glp1_se_d2smooth.bw
chrI 15072423
chrII 15279345
chrIII 13783700
chrIV 17493793
chrV 20924149
chrX 17718866
scripts/yapc/yapc:150: RuntimeWarning: invalid value encountered in less
  s = np.where(np.diff((d2y < tol).astype(int))==1)[0] + 1
scripts/yapc/yapc:151: RuntimeWarning: invalid value encountered in less
  e = np.where(np.diff((d2y < tol).astype(int))==-1)[0] + 1
35853 raw concave regions on chrom chrI
36595 raw concave regions on chrom chrII
32796 raw concave regions on chrom chrIII
41989 raw concave regions on chrom chrIV
50603 raw concave regions on chrom chrV
41046 raw concave regions on chrom chrX
238882 peaks total
scoring peaks for sample: glp1_d1_rep1
scoring peaks for sample: glp1_d1_rep2
scoring peaks for sample: glp1_d2_rep1
scoring peaks for sample: glp1_d2_rep2
scoring peaks for sample: glp1_d6_rep1
scoring peaks for sample: glp1_d6_rep2
scoring peaks for sample: glp1_d9_rep1
scoring peaks for sample: glp1_d9_rep2
scoring peaks for sample: glp1_d13_rep1
scoring peaks for sample: glp1_d13_rep2
238882 raw peaks
211804 peaks after discarding narrow concave regions (min_concave_region_width=75)
100000 peaks after filtering based on best rank across all conditions (truncate_idr_input=100000)

curvature index-scores:
glp1_d1_rep1_score  glp1_d1_rep2_score  glp1_d2_rep1_score  glp1_d2_rep2_score  glp1_d6_rep1_score  glp1_d6_rep2_score  glp1_d9_rep1_score  glp1_d9_rep2_score  glp1_d13_rep1_score  glp1_d13_rep2_score
       2882.992027         5035.424085         1536.987245         2452.700552         1384.016028          993.030874         1148.636040          918.418053           787.214204           947.444414
        117.800550           67.484779           92.142471          153.455198          115.478974           94.701837          100.362512          117.407411           138.300533           132.287563
          3.845345            6.238199            5.307431            7.236023            9.836610           -9.366718            5.631988            1.189719             2.902702             0.978943
         18.625506           24.496058           22.919817           10.059070           19.375796           22.022161           27.995000           17.831321            14.567176            41.598045
          7.405162          -10.268487          -13.055448          -11.888747           22.988129           -2.307184            2.916955           25.238356            19.513247            18.982721

Running idr on condition glp1_d1:
idr --input-file-type bed --rank score --peak-merge-method max --plot --samples annot_ce10_mapq0/metrics_atac/atac_glp1_se_peaksidr_glp1_d1_rep1.bed annot_ce10_mapq0/metrics_atac/atac_glp1_se_peaksidr_glp1_d1_rep2.bed --output-file annot_ce10_mapq0/metrics_atac/atac_glp1_se_peaksidr_glp1_d1.bed
Initial parameter values: [0.10 1.00 0.20 0.50]
Final parameter values: [2.42 1.88 0.98 0.47]
Number of reported peaks - 100000/100000 (100.0%)

Number of peaks passing IDR cutoff of 0.05 - 37774/100000 (37.8%)

Running idr on condition glp1_d2:
idr --input-file-type bed --rank score --peak-merge-method max --plot --samples annot_ce10_mapq0/metrics_atac/atac_glp1_se_peaksidr_glp1_d2_rep1.bed annot_ce10_mapq0/metrics_atac/atac_glp1_se_peaksidr_glp1_d2_rep2.bed --output-file annot_ce10_mapq0/metrics_atac/atac_glp1_se_peaksidr_glp1_d2.bed
Initial parameter values: [0.10 1.00 0.20 0.50]
Final parameter values: [2.52 1.86 0.98 0.41]
Number of reported peaks - 100000/100000 (100.0%)

Number of peaks passing IDR cutoff of 0.05 - 33489/100000 (33.5%)

Running idr on condition glp1_d6:
idr --input-file-type bed --rank score --peak-merge-method max --plot --samples annot_ce10_mapq0/metrics_atac/atac_glp1_se_peaksidr_glp1_d6_rep1.bed annot_ce10_mapq0/metrics_atac/atac_glp1_se_peaksidr_glp1_d6_rep2.bed --output-file annot_ce10_mapq0/metrics_atac/atac_glp1_se_peaksidr_glp1_d6.bed
Initial parameter values: [0.10 1.00 0.20 0.50]
Final parameter values: [2.63 1.78 0.97 0.38]
Number of reported peaks - 100000/100000 (100.0%)

Number of peaks passing IDR cutoff of 0.05 - 31965/100000 (32.0%)

Running idr on condition glp1_d9:
idr --input-file-type bed --rank score --peak-merge-method max --plot --samples annot_ce10_mapq0/metrics_atac/atac_glp1_se_peaksidr_glp1_d9_rep1.bed annot_ce10_mapq0/metrics_atac/atac_glp1_se_peaksidr_glp1_d9_rep2.bed --output-file annot_ce10_mapq0/metrics_atac/atac_glp1_se_peaksidr_glp1_d9.bed
Initial parameter values: [0.10 1.00 0.20 0.50]
Final parameter values: [2.48 1.73 0.97 0.42]
Number of reported peaks - 100000/100000 (100.0%)

Number of peaks passing IDR cutoff of 0.05 - 34678/100000 (34.7%)

Running idr on condition glp1_d13:
idr --input-file-type bed --rank score --peak-merge-method max --plot --samples annot_ce10_mapq0/metrics_atac/atac_glp1_se_peaksidr_glp1_d13_rep1.bed annot_ce10_mapq0/metrics_atac/atac_glp1_se_peaksidr_glp1_d13_rep2.bed --output-file annot_ce10_mapq0/metrics_atac/atac_glp1_se_peaksidr_glp1_d13.bed
Initial parameter values: [0.10 1.00 0.20 0.50]
Final parameter values: [2.54 1.89 0.97 0.41]
Number of reported peaks - 100000/100000 (100.0%)

Number of peaks passing IDR cutoff of 0.05 - 33580/100000 (33.6%)


globalIDR:
glp1_d1_globalIDR  glp1_d2_globalIDR  glp1_d6_globalIDR  glp1_d9_globalIDR  glp1_d13_globalIDR
             5.00               5.00               5.00               5.00                5.00
             3.97               4.67               4.41               4.65                5.00
             1.08               0.44               0.73               1.16                0.43
             0.24               0.20               0.19               0.38                0.95
             0.24               0.20               0.19               0.21                0.20

Wrote all 100000 IDR-scored peaks to annot_ce10_mapq0/metrics_atac/atac_glp1_se.tsv
Wrote 30650 peaks at IDR=0.001 to annot_ce10_mapq0/metrics_atac/atac_glp1_se_0.001.bed
Wrote 35927 peaks at IDR=0.005 to annot_ce10_mapq0/metrics_atac/atac_glp1_se_0.005.bed
Wrote 38894 peaks at IDR=0.010 to annot_ce10_mapq0/metrics_atac/atac_glp1_se_0.01.bed
Wrote 49206 peaks at IDR=0.050 to annot_ce10_mapq0/metrics_atac/atac_glp1_se_0.05.bed



In [21]:

    
df_wt_either.columns









    Out[21]:





Index(['chrom', 'start', 'end', 'score', 'strand', 'thickStart', 'thickEnd',
       'itemRgb', 'Name', 'wt_emb_globalIDR', 'wt_l1_globalIDR',
       'wt_l2_globalIDR', 'wt_l3_globalIDR', 'wt_l4_globalIDR',
       'wt_ya_globalIDR', 'atac_source'],
      dtype='object')



In [25]:

    
# atac wt_pe, wt_se, glp1_se: merge across tissues
df_wt_pe = read_gffbed('annot_ce10_mapq0/metrics_atac/atac_wt_pe_0.001.bed')
df_wt_se = read_gffbed('annot_ce10_mapq0/metrics_atac/atac_wt_se_0.001.bed')
df_glp1_se = read_gffbed('annot_ce10_mapq0/metrics_atac/atac_glp1_se_0.001.bed')

df_wt_pe['atac_source'] = 'atac_wt_pe_mapq0'
df_wt_se['atac_source'] = 'atac_wt_se_mapq0'
df_glp1_se['atac_source'] = 'atac_glp1_se_mapq0'

print('%d peaks in wt_pe' % (len(df_wt_pe),))
print('%d peaks in wt_se' % (len(df_wt_se),))
print('%d peaks in glp1_se' % (len(df_glp1_se),))

cols_ = yp.NAMES_BED3 + ['atac_source']
print('merging mapq0 tracks:')
df_wt_either, df_wt_se_only = add_unique(df_wt_pe, df_wt_se)
df_wt_glp1_mapq0, df_glp1_se_only = add_unique(df_wt_either[cols_], df_glp1_se[cols_])

print('adding map10 to q10:')
df_wt_glp1_q10 = pd.read_csv('annot_ce10_eLife_full/accessible_sites.tsv', sep='\t')
df_wt_glp1_all, df_wt_glp1_mapq0_only = add_unique(df_wt_glp1_q10[cols_], df_wt_glp1_mapq0[cols_])









    



28668 peaks in wt_pe
30340 peaks in wt_se
30650 peaks in glp1_se
merging mapq0 tracks:
add_unique: 33765 peaks in merged set -- 28668 peaks from A plus 5097 of 30340 non-overlapping peaks from B
add_unique: 41992 peaks in merged set -- 33765 peaks from A plus 8227 of 30650 non-overlapping peaks from B
adding map10 to q10:
add_unique: 45381 peaks in merged set -- 42245 peaks from A plus 3136 of 41992 non-overlapping peaks from B



In [26]:

    
df_wt_glp1_mapq0[cols_].to_csv('annot_ce10_mapq0/atac_wt_glp1_mapq0.bed', sep='\t', header=False, index=False)
df_wt_glp1_mapq0_only[cols_].to_csv('annot_ce10_mapq0/atac_wt_glp1_mapq0_only.bed', sep='\t', header=False, index=False)
df_wt_glp1_all[cols_].to_csv('annot_ce10_mapq0/atac_wt_glp1_all.bed', sep='\t', header=False, index=False)



In [ ]: