In [167]:
# Jupyter specific imports
from IPython.core.display import display, HTML, Markdown
# Import of required packages
from os import remove
# import all the functions from pyCL
from pycltools.pycltools import *
In [168]:
help(jhelp)
In [169]:
jhelp(jhelp, full=True)
In [170]:
jhelp(jprint, full=True)
In [45]:
txt="Lorem ipsum condimentum elementum sapien nam eleifend quisque sapien curae"
jprint(txt,font="sans", color="purple", size=200, bold=True)
In [46]:
txt="Lorem ipsum\n\tcondimentum elementum\n\t\tsapien nam eleifend quisque\n\t\t\tsapien curae"
jprint(txt,font="sans", color="powderblue", size=200, bold=True, line_height=50)
In [47]:
jprint("Lorem","ipsum","condimentum","elementum", 1, True, bold=False, italic=False, highlight=False, underlined=True, striked=False, subscripted=False, superscripted=False, font="calibri", color="grey", size=250, align="center")
In [171]:
jhelp(toogle_code, full=True)
In [49]:
#toogle_code()
In [172]:
jhelp(larger_display, full=True)
In [173]:
larger_display(100)
In [174]:
jhelp(is_readable_file, full=True)
In [53]:
try:
is_readable_file("./data/KJHYTGYUJ")
print ("OK")
except OSError as E:
print(E)
In [54]:
try:
is_readable_file("./data/RADAR_Secondary.txt")
print ("OK")
except OSError as E:
print(E)
In [175]:
jhelp(is_gziped, full=True)
In [56]:
is_gziped("./data/RADAR_Secondary.txt")
Out[56]:
In [57]:
is_gziped("./data/RADAR_Secondary.txt.gz")
Out[57]:
In [176]:
jhelp(has_extension, full=True)
In [59]:
has_extension("./data/test/RADAR_Secondary.txt.gz", "gz")
Out[59]:
In [60]:
has_extension("./data/test/RADAR_Secondary.txt.gz", "fa")
Out[60]:
In [61]:
has_extension("./data/test/RADAR_Secondary.txt.gz", "txt", -2)
Out[61]:
In [177]:
jhelp(file_basename, full=True)
In [63]:
file_basename("./data/RADAR_Secondary.txt.gz")
Out[63]:
In [178]:
jhelp(extensions, full=True)
In [65]:
print(extensions("./data/RADAR_Secondary.txt.gz"))
print(extensions("./data/RADAR_Secondary.txt"))
print(extensions("./data/RADAR_Secondary"))
In [66]:
jhelp(extensions_list, full=True)
In [67]:
print(extensions_list("./data/RADAR_Secondary.txt.gz"))
print(extensions_list("./data/RADAR_Secondary.txt"))
print(extensions_list("./data/RADAR_Secondary"))
In [179]:
jhelp(file_name, full=True)
In [69]:
file_name("./data/test/RADAR_Secondary.txt.gz")
Out[69]:
In [180]:
jhelp(dir_name, full=True)
In [71]:
print(dir_name("./data/test/RADAR_Secondary.txt.gz"))
print(dir_name("./__init__.py"))
print(dir_name("/bin/bash"))
In [72]:
jhelp(dir_path, full=True)
In [73]:
print(dir_path("./data/test/RADAR_Secondary.txt.gz"))
print(dir_path("./__init__.py"))
print(dir_path("/bin/bash"))
In [181]:
jhelp(supersplit, full=True)
In [75]:
a = "chr7\t74138\t774138\tA>I|LOC100129917|LUNG:LYMPHOBLASTOID_CELL_LINE|15342557:15258596:22327324\t0"
print(supersplit(a, ["\t","|"]))
print(supersplit(a))
print(supersplit(a, "|"))
In [182]:
jhelp(rm_blank, full=True)
In [77]:
a = "chr\t\t17|LU NG:LYMPHOBLAST OID_CELL_LINE|15342557:152585 96:22327324\t0"
print(rm_blank(a))
print(rm_blank(a, replace="*"))
In [183]:
jhelp(copyFile, full=True)
In [79]:
copyFile(src="./data/RADAR_Secondary.txt", dest="./data/")
In [80]:
copyFile(src="./data/RADAR_Secondary.txt", dest="./data/RADAR_Secondary_copy.txt")
In [184]:
jhelp(gzip_file, full=True)
In [82]:
gzip_file("./data/RADAR_Secondary.txt")
Out[82]:
In [185]:
jhelp(gunzip_file, full=True)
In [84]:
gunzip_file("./data/RADAR_Secondary.txt.gz")
Out[84]:
In [186]:
jhelp(linerange, full=True)
In [86]:
file = "./data/RADAR_Secondary.txt"
linerange (file)
In [87]:
file = "./data/gencode_sample.gff3"
linerange (file, [[2,5],[10,12],[98,100]], max_char_line=100)
In [88]:
file = "./data/RADAR_Secondary.txt.gz"
linerange (file, line_numbering=False)
In [187]:
jhelp(cat, full=True)
In [90]:
file = "./data/RADAR_Secondary.txt.gz"
cat (file, max_lines=10)
In [91]:
file="./data/gencode_sample.gff3"
cat (file, max_lines=20, line_numbering=True, max_char_line=100)
In [188]:
jhelp(tail, full=True)
In [93]:
file = "./data/RADAR_clean.txt"
tail (file, n = 4)
In [94]:
file = "./data/RADAR_Secondary.txt.gz"
tail (file, n = 4, line_numbering=True)
In [95]:
file="./data/gencode_sample.gff3"
tail (file, n = 5, max_char_line=100)
In [189]:
jhelp(head, full=True)
In [97]:
head("./data/RADAR_Main.txt", n= 3)
In [98]:
head("./data/RADAR_Main.txt", ignore_comment_line=True,n= 3)
In [99]:
head("./data/RADAR_Main.txt", n=5, max_char_line=110)
In [100]:
head("./data/RADAR_Secondary.txt.gz", n=6, ignore_comment_line=True)
In [101]:
head("./data/sample.sam", n=6, ignore_comment_line=True)
In [102]:
head ("./data/sample_100.bam", n=6)
In [103]:
jhelp(linesample, full=True)
In [104]:
linesample("./data/RADAR_clean.txt", n_lines=10, line_numbering=True)
In [105]:
linesample("./data/RADAR_Secondary.txt.gz", n_lines=10, line_numbering=True)
In [106]:
jhelp(count_uniq, full=True)
In [107]:
count_uniq("./data/Small_editing_Peng_hg38.bed", colnum=17, sep=['\t',"|"])
Out[107]:
In [108]:
count_uniq("./data/gencode_sample.gff3", colnum=17, sep=["\t","=", ";"], select_values={2:["transcript", "exon"], 6:"+"})
Out[108]:
In [190]:
jhelp(colsum, full=True)
In [110]:
display(Markdown(colsum("./data/RADAR_Main.txt", header=True, colrange=[0,2,6], max_items=15)))
In [111]:
colsum("./data/RADAR_Main.txt", header=True, ret_type="dict", colrange=[0,3])
Out[111]:
In [112]:
print(colsum(
"./data/RADAR_clean.txt",
header=True,
ignore_hashtag_line=True,
ret_type="report",
separator=["\t","|"],
max_items=5))
In [191]:
jhelp(fastcount, full=True)
In [114]:
fastcount("./data/RADAR_Secondary.txt")
Out[114]:
In [115]:
fastcount("./data/RADAR_Secondary.txt.gz")
Out[115]:
In [192]:
jhelp(simplecount, full=True)
In [117]:
simplecount("./data/Small_m5C_Squires_hg38.bed", ignore_hashtag_line=True)
Out[117]:
In [118]:
simplecount("./data/RADAR_Secondary.txt.gz")
Out[118]:
In [193]:
jhelp(mkdir, full=True)
In [120]:
mkdir("./data/test_dir")
In [121]:
mkdir ("./test/test/test")
!rm -rf ./test
In [194]:
jhelp(make_cmd_str, full=True)
In [123]:
make_cmd_str("bwa", {"-b":None, "-t":6, "-i":"../idx/seq.fa"}, ["../read1", "../read2"])
Out[123]:
In [195]:
jhelp(bash_basic, full=True)
In [125]:
print(bash_basic("ls -l"))
print(bash_basic("echo TTTT"))
print(bash_basic("grep ./data/RADAR_Secondary.txt"))
In [196]:
jhelp(bash, full=True)
In [127]:
bash("ls", print_stdout=True, ret_stdout=True,)
Out[127]:
In [128]:
bash("for i in 1 2 3 4; do echo $i && sleep 1 && ls error ;done", live="stderr", print_stdout=True, ret_stdout=True, print_stderr=True)
In [129]:
bash("ls", print_stdout=False, ret_stdout=False, log_stdout="./data/stdout.txt")
head("./data/stdout.txt")
In [197]:
jhelp(bash_update, full=True)
In [131]:
#bash_update("htop")
In [198]:
jhelp(dict_to_md, full=True)
In [133]:
d = {"a":12,"b":14,"c":8,"d":56,"e":76}
display(Markdown(dict_to_md(d, "Letter", "Number", sort_by_val=True)))
display(Markdown(dict_to_md(d, "Letter", "Number", transpose=True, max_items=3)))
In [199]:
jhelp(dict_to_report, full=True)
In [135]:
d = {"a":12,"b":14,"c":{"c1":12,"c2":{"c2.1":33221,"c2.2":765},"c3":32,"c4":443},"d":56,"e":76}
print(dict_to_report(d, tab=" | "))
d = {"a":12,"b":14,"c":{"c1":12,"c2":{"c2.1":33221,"c2.2":765, "c2.3":7533,"c2.4":76433,"c2.5":876543,"c2.6":89765,"c2.7":8654},"c3":32,"c4":443},"d":56,"e":76}
print(dict_to_report(d, tab="--", max_items=4, sort_dict=True))
In [200]:
jhelp(reformat_table, full = True)
In [137]:
# With numeric index
reformat_table(
input_file="./data/Small_m5C_Squires_hg38.bed",
output_file="./data/Small_m5C_Squires_hg38_reformat.bed",
init_template=[0,"\t",1,"\t",2,"\t",3,"|",4,"\t",5,"\t",6],
final_template=[0,"\t",1,"\t",2,"\tm5C|*|HeLa|22344696\t-\t",6],
replace_internal_space='_',
replace_null_val="*",
keep_original_header=False,
header="# New header\n"
)
linerange ("./data/Small_m5C_Squires_hg38.bed")
linerange ("./data/Small_m5C_Squires_hg38_reformat.bed")
In [138]:
# With str index
reformat_table(
input_file="./data/Small_m5C_Squires_hg38.bed",
output_file="./data/Small_m5C_Squires_hg38_reformat.bed",
init_template=["{chrom}","\t","{start}","\t","{end}","|","{name}","\t","{score}","\t","{strand}"],
final_template=["{start}","\t","{end}","\tadditional_informations\t","{name}"],
replace_internal_space='_',
replace_null_val="*",
keep_original_header=False,
header="# New header\n",
verbose=True
)
linerange ("./data/Small_m5C_Squires_hg38.bed")
linerange ("./data/Small_m5C_Squires_hg38_reformat.bed")
In [139]:
subst_dict = {0:{"chr1":"1", "chr2":"2"}, 3:{"Peng":"22344696"}}
filter_dict = {18:["intron"]}
input_file="./data/Small_editing_Peng_hg38.bed"
output_file="./data/Small_editing_Peng_hg38_reformat.bed"
reformat_table(
input_file, output_file,
init_template=[0,"\t",1,"\t",2,"\t",3,"|",4,"|",5,"|",6,"|",7,"|",8,"|",9,"->",10,"|",11,"%|",12,"|",13,"|",14,"|",15,"|",16,"|",17,"|",18,"|",19,"\t",20,"\t",21],
final_template=[0,"\t",1,"\t",2,"\t",9,">",10,"|",3,"|HeLa|",19,"\t",11,"\t",21],
replace_internal_space='_',
replace_null_val="*",
subst_dict = subst_dict,
filter_dict = filter_dict,
verbose=True
)
linerange (input_file)
linerange (output_file)
In [140]:
input_file="./data/Small_editing_Peng_hg38.bed"
df = reformat_table(
input_file,
return_df=True,
init_template=[0,"\t",1,"\t",2,"\t",3,"|",4,"|",5,"|",6,"|",7,"|",8,"|",9,"->",10,"|",11,"%|",12,"|",13,"|",14,"|",15,"|",16,"|",17,"|",18,"|",19,"\t",20,"\t",21],
replace_internal_space='_',
replace_null_val="*",
verbose=True)
print(head(input_file, 11))
df.head()
Out[140]:
In [141]:
input_file = "./data/gencode_sample.gff3"
df = reformat_table(
input_file,
return_df=True,
standard_template="gff3_ens_transcript",
keep_original_header=False,
header_from_final_template= True,
verbose=True
)
print(head(input_file, 11))
df.head()
Out[141]:
In [201]:
jhelp(url_exist, full=True)
In [143]:
url_exist("http://www.google.com") # When this one will be False it will probably be the end of the world
Out[143]:
In [144]:
url_exist("http://www.JUYGKUYHGJHFJ.com")
Out[144]:
In [202]:
jhelp(wget, full=True)
In [146]:
outfile = wget("")
if outfile:
print(outfile)
remove(outfile)
In [147]:
outfile = wget("https://www.encodeproject.org/files/ENCFF000HJC/@@download/ENCFF000HJC.bigWig", "test.bigWig", 50000000)
if outfile:
print(outfile)
remove(outfile)
In [203]:
jhelp(print_arg, full=True)
In [149]:
def test (A,B,C=7,*args, **kwarg):
print_arg()
test(1,2,3,5, z=65, x=100)
In [204]:
jhelp(scp, full=True)
In [151]:
#scp(hostname="ebi-cli-001.ebi.ac.uk", local_file="../README.md", remote_dir="~/test", username="aleg", rsa_private_key="/home/aleg/.ssh/ebi_rsa")
In [152]:
#scp(hostname="ebi", local_file="../README.md", remote_dir="~/test")
In [153]:
jhelp(get_package_file, full=True)
In [154]:
get_package_file("pyCL", "pyCL/")
In [155]:
jhelp(bam_sample, full=True)
In [156]:
bam_sample("./data/sample.sam", fp_out="./data/sample_100.sam", n_reads=100, verbose=True)
linesample("./data/sample_100.sam", n_lines=10, max_char_line=100)
In [157]:
bam_sample("./data/sample.sam", fp_out="./data/sample_100.bam", n_reads=100, verbose=True)
!samtools view "./data/sample_100.bam" | head
In [158]:
bam_sample("./data/sample.txt", fp_out="./data/sample_100.bam", n_reads=100, verbose=True)
In [159]:
bam_sample("./data/sample.sam", fp_out="./data/sample_100.txt", n_reads=100, verbose=True)
In [205]:
jhelp(base_generator, full = True)
In [161]:
bg = base_generator()
for i in range(10):
print (next(bg))
In [162]:
bg = base_generator(bases=['A', 'T', 'C', 'G', 'N'], weights=[0.8, 0.8, 0.2, 0.2, 0.1])
for i in range(10):
print (next(bg))
In [206]:
jhelp(make_sequence, full=True)
In [164]:
make_sequence()
Out[164]:
In [165]:
make_sequence(bases=['A', 'T', 'C', 'G', 'N'], weights=[], length=100)
Out[165]:
In [166]:
make_sequence(bases=['A', 'T', 'C', 'G', 'N'], weights=[0.8, 0.8, 0.2, 0.2, 0.1], length=100)
Out[166]: