BSMAP was run on 8 samples (on different machines)


In [4]:
ls analyses/2016-10-11


2016-10-11/

In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [42]:
for i in ("M2","M3"):
    !python {bsmaploc}methratio.py \
-d ../data/Crassostrea_gigas.GCAz_000297895.1.dna_sm.toplevel.fa \
-u -z -g \
-o methratio_out_{i}.txt \
-s {bsmaploc}samtools \
bsmap_out_{i}.sam \


@ Tue Oct 11 08:10:48 2016: reading reference ../data/Crassostrea_gigas.GCAz_000297895.1.dna_sm.toplevel.fa ...
@ Tue Oct 11 08:11:11 2016: reading bsmap_out_M2.sam ...
[samopen] SAM header is present: 7658 sequences.
@ Tue Oct 11 08:13:23 2016: combining CpG methylation from both strands ...
@ Tue Oct 11 08:13:44 2016: writing methratio_out_M2.txt ...
@ Tue Oct 11 08:16:58 2016: done.
total 5453853 valid mappings, 21735799 covered cytosines, average coverage: 1.86 fold.
@ Tue Oct 11 08:16:59 2016: reading reference ../data/Crassostrea_gigas.GCAz_000297895.1.dna_sm.toplevel.fa ...
@ Tue Oct 11 08:17:21 2016: reading bsmap_out_M3.sam ...
[samopen] SAM header is present: 7658 sequences.
@ Tue Oct 11 08:19:41 2016: combining CpG methylation from both strands ...
@ Tue Oct 11 08:20:00 2016: writing methratio_out_M3.txt ...
@ Tue Oct 11 08:23:17 2016: done.
total 5974521 valid mappings, 23104419 covered cytosines, average coverage: 1.89 fold.

In [43]:
!curl https://raw.githubusercontent.com/che625/olson-ms-nb/master/scripts/mr3x.awk \
> /Users/sr320/git-repos/sr320.github.io/jupyter/scripts/mr3x.awk

!curl https://raw.githubusercontent.com/che625/olson-ms-nb/master/scripts/mr_gg.awk.sh \
> /Users/sr320/git-repos/sr320.github.io/jupyter/scripts/mr_gg.awk.sh


  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100    83  100    83    0     0     88      0 --:--:-- --:--:-- --:--:--    88
  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100   286  100   286    0     0   1665      0 --:--:-- --:--:-- --:--:--  1672

In [45]:
#first methratio files are converted to filter for CG context, 3x coverage (mr3x.awk), and reformatting (mr_gg.awk.sh).
#due to issue passing variable to awk, simple scripts were used (included in repository)
for i in ("M2","M3"):
    !echo {i}
    !grep "[A-Z][A-Z]CG[A-Z]" <methratio_out_{i}.txt> methratio_out_{i}CG.txt
    !awk -f /Users/sr320/git-repos/sr320.github.io/jupyter/scripts/mr3x.awk methratio_out_{i}CG.txt \
    > mr3x.{i}.txt
    !awk -f /Users/sr320/git-repos/sr320.github.io/jupyter/scripts/mr_gg.awk.sh \
    mr3x.{i}.txt > mkfmt_{i}.txt


M2
M3

In [47]:
#first methratio files are converted to filter for CG context, 3x coverage (mr3x.awk), and reformatting (mr_gg.awk.sh).
#due to issue passing variable to awk, simple scripts were used (included in repository)
for i in ("M2","M3"):
    !echo {i}
    !grep -i "[A-Z][A-Z]CG[A-Z]" <methratio_out_{i}.txt> methratio_out_{i}CGi.txt
    !awk -f /Users/sr320/git-repos/sr320.github.io/jupyter/scripts/mr3x.awk methratio_out_{i}CGi.txt \
    > mr3xi.{i}.txt
    !awk -f /Users/sr320/git-repos/sr320.github.io/jupyter/scripts/mr_gg.awk.sh \
    mr3xi.{i}.txt > mkfmti_{i}.txt


M2
M3

In [46]:
#maybe we need to ignore case

In [51]:
!md5 mkfmt_M2.txt mkfmti_M2.txt | head


MD5 (mkfmt_M2.txt) = df67fde9e87ec165618d384374074057
MD5 (mkfmti_M2.txt) = df67fde9e87ec165618d384374074057

In [52]:
#nope

In [54]:
!head -100 mkfmt_M2.txt


chr.Base	chr	base	strand	coverage	freqC	freqT
scaffold1.14274	scaffold1	14274	F	4	0.00	100.00
scaffold1.14305	scaffold1	14305	F	4	0.00	100.00
scaffold1.15309	scaffold1	15309	F	4	0.00	100.00
scaffold1.15315	scaffold1	15315	F	4	0.00	100.00
scaffold1.15336	scaffold1	15336	F	5	0.00	100.00
scaffold1.16016	scaffold1	16016	F	3	0.00	100.00
scaffold1.17209	scaffold1	17209	F	3	0.00	100.00
scaffold1.19038	scaffold1	19038	F	4	0.00	100.00
scaffold1.19069	scaffold1	19069	F	4	0.00	100.00
scaffold1.19077	scaffold1	19077	F	3	0.00	100.00
scaffold1.19147	scaffold1	19147	F	7	0.00	100.00
scaffold1.19211	scaffold1	19211	F	3	0.00	100.00
scaffold1.19222	scaffold1	19222	F	3	0.00	100.00
scaffold1.19427	scaffold1	19427	F	5	0.00	100.00
scaffold1.19454	scaffold1	19454	F	3	0.00	100.00
scaffold1.19456	scaffold1	19456	F	3	0.00	100.00
scaffold1.19864	scaffold1	19864	F	3	0.00	100.00
scaffold1.19935	scaffold1	19935	F	5	0.00	100.00
scaffold1.19963	scaffold1	19963	F	4	0.00	100.00
scaffold1.20056	scaffold1	20056	F	7	0.00	100.00
scaffold1.20067	scaffold1	20067	F	7	0.00	100.00
scaffold1.20087	scaffold1	20087	F	5	0.00	100.00
scaffold1.20116	scaffold1	20116	F	3	0.00	100.00
scaffold1.20119	scaffold1	20119	F	3	0.00	100.00
scaffold1.20180	scaffold1	20180	F	4	0.00	100.00
scaffold1.20215	scaffold1	20215	F	5	0.00	100.00
scaffold1.20253	scaffold1	20253	F	6	0.00	100.00
scaffold1.20275	scaffold1	20275	F	5	0.00	100.00
scaffold1.20278	scaffold1	20278	F	5	0.00	100.00
scaffold1.20560	scaffold1	20560	F	3	0.00	100.00
scaffold1.20564	scaffold1	20564	F	3	0.00	100.00
scaffold1.20756	scaffold1	20756	F	4	25.00	75.00
scaffold1.21325	scaffold1	21325	F	3	33.33	66.67
scaffold1.21633	scaffold1	21633	F	4	25.00	75.00
scaffold1.22985	scaffold1	22985	F	3	100.00	0.00
scaffold1.23091	scaffold1	23091	F	3	33.33	66.67
scaffold1.23095	scaffold1	23095	F	5	40.00	60.00
scaffold1.23099	scaffold1	23099	F	3	66.67	33.33
scaffold1.23469	scaffold1	23469	F	5	0.00	100.00
scaffold1.23979	scaffold1	23979	F	5	80.00	20.00
scaffold1.24040	scaffold1	24040	F	3	66.67	33.33
scaffold1.24627	scaffold1	24627	F	3	0.00	100.00
scaffold1.25289	scaffold1	25289	F	12	83.33	16.67
scaffold1.25324	scaffold1	25324	F	8	50.00	50.00
scaffold1.25489	scaffold1	25489	F	6	16.67	83.33
scaffold1.25503	scaffold1	25503	F	3	100.00	0.00
scaffold1.25517	scaffold1	25517	F	3	0.00	100.00
scaffold1.30312	scaffold1	30312	F	3	0.00	100.00
scaffold1.30364	scaffold1	30364	F	5	0.00	100.00
scaffold1.34376	scaffold1	34376	F	3	0.00	100.00
scaffold1.34387	scaffold1	34387	F	8	12.50	87.50
scaffold1.34409	scaffold1	34409	F	7	0.00	100.00
scaffold1.37888	scaffold1	37888	F	3	0.00	100.00
scaffold1.38052	scaffold1	38052	F	3	0.00	100.00
scaffold1.38200	scaffold1	38200	F	3	0.00	100.00
scaffold1.38307	scaffold1	38307	F	3	0.00	100.00
scaffold1.38396	scaffold1	38396	F	3	0.00	100.00
scaffold1.38420	scaffold1	38420	F	8	0.00	100.00
scaffold1.38451	scaffold1	38451	F	6	0.00	100.00
scaffold1.38454	scaffold1	38454	F	6	0.00	100.00
scaffold1.38457	scaffold1	38457	F	7	0.00	100.00
scaffold1.38473	scaffold1	38473	F	7	0.00	100.00
scaffold1.38477	scaffold1	38477	F	7	14.29	85.71
scaffold1.38502	scaffold1	38502	F	8	0.00	100.00
scaffold1.38528	scaffold1	38528	F	4	0.00	100.00
scaffold1.41897	scaffold1	41897	F	3	0.00	100.00
scaffold1.42178	scaffold1	42178	F	6	0.00	100.00
scaffold1.42189	scaffold1	42189	F	4	0.00	100.00
scaffold1.42210	scaffold1	42210	F	6	0.00	100.00
scaffold1.43637	scaffold1	43637	F	4	0.00	100.00
scaffold1.43648	scaffold1	43648	F	4	0.00	100.00
scaffold1.43788	scaffold1	43788	F	3	0.00	100.00
scaffold1.43818	scaffold1	43818	F	3	0.00	100.00
scaffold1.44516	scaffold1	44516	F	3	0.00	100.00
scaffold1.44713	scaffold1	44713	F	5	0.00	100.00
scaffold1.44719	scaffold1	44719	F	6	0.00	100.00
scaffold1.44860	scaffold1	44860	F	3	0.00	100.00
scaffold1.44862	scaffold1	44862	F	3	0.00	100.00
scaffold1.45677	scaffold1	45677	F	3	0.00	100.00
scaffold1.45683	scaffold1	45683	F	3	0.00	100.00
scaffold1.45736	scaffold1	45736	F	5	0.00	100.00
scaffold1.47244	scaffold1	47244	F	8	0.00	100.00
scaffold1.47265	scaffold1	47265	F	5	20.00	80.00
scaffold1.49801	scaffold1	49801	F	4	0.00	100.00
scaffold1.50021	scaffold1	50021	F	5	0.00	100.00
scaffold1.52362	scaffold1	52362	F	5	0.00	100.00
scaffold1.52371	scaffold1	52371	F	5	0.00	100.00
scaffold1.52400	scaffold1	52400	F	3	0.00	100.00
scaffold1.52406	scaffold1	52406	F	3	0.00	100.00
scaffold1.52408	scaffold1	52408	F	4	0.00	100.00
scaffold1.52418	scaffold1	52418	F	5	0.00	100.00
scaffold1.52422	scaffold1	52422	F	4	0.00	100.00
scaffold1.52454	scaffold1	52454	F	4	0.00	100.00
scaffold1.52467	scaffold1	52467	F	3	0.00	100.00
scaffold1.52472	scaffold1	52472	F	3	0.00	100.00
scaffold1.52512	scaffold1	52512	F	4	0.00	100.00
scaffold1.52528	scaffold1	52528	F	4	0.00	100.00
scaffold1.53044	scaffold1	53044	F	5	0.00	100.00
scaffold1.53050	scaffold1	53050	F	5	0.00	100.00

Products


In [56]:
cd /Users/sr320/git-repos/sr320.github.io/jupyter


/Users/sr320/git-repos/sr320.github.io/jupyter

In [58]:
mkdir analyses

In [59]:
mkdir analyses/$(date +%F)

In [61]:
for i in ("M2","M3"):
    !cp /Volumes/caviar/wd/2016-10-11/mkfmt_{i}.txt analyses/$(date +%F)/mkfmt_{i}.txt

In [63]:
!head analyses/$(date +%F)/*


==> analyses/2016-10-11/mkfmt_M2.txt <==
chr.Base	chr	base	strand	coverage	freqC	freqT
scaffold1.14274	scaffold1	14274	F	4	0.00	100.00
scaffold1.14305	scaffold1	14305	F	4	0.00	100.00
scaffold1.15309	scaffold1	15309	F	4	0.00	100.00
scaffold1.15315	scaffold1	15315	F	4	0.00	100.00
scaffold1.15336	scaffold1	15336	F	5	0.00	100.00
scaffold1.16016	scaffold1	16016	F	3	0.00	100.00
scaffold1.17209	scaffold1	17209	F	3	0.00	100.00
scaffold1.19038	scaffold1	19038	F	4	0.00	100.00
scaffold1.19069	scaffold1	19069	F	4	0.00	100.00

==> analyses/2016-10-11/mkfmt_M3.txt <==
chr.Base	chr	base	strand	coverage	freqC	freqT
scaffold1.259	scaffold1	259	F	4	100.00	0.00
scaffold1.263	scaffold1	263	F	4	100.00	0.00
scaffold1.267	scaffold1	267	F	4	100.00	0.00
scaffold1.271	scaffold1	271	F	4	100.00	0.00
scaffold1.299	scaffold1	299	F	4	50.00	50.00
scaffold1.10033	scaffold1	10033	F	3	0.00	100.00
scaffold1.10343	scaffold1	10343	F	4	0.00	100.00
scaffold1.10530	scaffold1	10530	F	4	0.00	100.00
scaffold1.10569	scaffold1	10569	F	5	0.00	100.00

urls

https://raw.githubusercontent.com/sr320/sr320.github.io/master/jupyter/analyses/2016-10-11/mkfmt_M2.txt

https://raw.githubusercontent.com/sr320/sr320.github.io/master/jupyter/analyses/2016-10-11/mkfmt_M3.txt

In [ ]: