notebook.community

Edit and run



In [ ]:

    
from environment import *

with open("../project.json") as io:

    PROJECT_JSON = json.load(io)

PATH = make_path_dict(PROJECT_JSON)



In [ ]:

    
gene_x_sample = pd.read_csv(PATH["gene_x_sample.processed.tsv"], sep="\t", index_col=0)

target_x_sample = pd.read_csv(PATH["target_x_sample.tsv"], sep="\t", index_col=0)

gene_sets = pd.concat(
    kraft.read_gmt(gmt_file_path)
    for gmt_file_path in PROJECT_JSON["gene_set_file_paths"]
)

gene_set_x_information = pd.read_csv(
    PATH["gene_set_x_information.tsv"], sep="\t", index_col=0
)



In [ ]:

    
for target_name in target_x_sample.index:

    output_directory_path = os.path.join(PATH["gsea/"], target_name)

    kraft.establish_path(output_directory_path, "directory")

    score_moe_p_value_fdr = pd.read_csv(
        os.path.join(
            PATH["find_differentially_expressed_gene/"],
            target_name,
            f"all.{PROJECT_JSON['gsea_match_function_name']}.tsv",
        ),
        sep="\t",
        index_col=0,
    )

    gene_score = score_moe_p_value_fdr["Score"]

    gene_score.name = "Gene Score"

    gene_set_score = (
        kraft.run_single_sample_gseas(
            gene_score.to_frame(),
            gene_sets.loc[
                gene_set_x_information.index[
                    gene_set_x_information["Good"].astype(bool)
                ]
            ],
            statistic=PROJECT_JSON["gsea_statistic"],
            n_job=PROJECT_JSON["n_job"],
        )
        .squeeze()
        .sort_values()
    )

    gene_set_score.name = f"GSEA Score ({PROJECT_JSON['gsea_statistic']})"

    gene_set_score.to_csv(
        os.path.join(
            output_directory_path, f"{PROJECT_JSON['gsea_match_function_name']}.tsv"
        ),
        sep="\t",
    )

    kraft.plot_scatter_and_annotate(
        None,
        gene_set_score,
        "y",
        annotation=(("Peek", PROJECT_JSON["gene_sets_to_peek"], 8, "#20d9ba"),),
        title_text=target_name,
        html_file_path=os.path.join(
            output_directory_path,
            f"{PROJECT_JSON['gsea_match_function_name']}.plot_point_and_annotate.html",
        ),
    )

    output_directory_path = os.path.join(
        output_directory_path, f"{PROJECT_JSON['gsea_match_function_name']}.mountain"
    )

    kraft.establish_path(output_directory_path, "directory")

    for gene_set_name in PROJECT_JSON["gene_sets_to_peek"]:

        kraft.run_single_sample_gsea(
            gene_score,
            gene_sets.loc[gene_set_name],
            statistic=PROJECT_JSON["gsea_statistic"],
            title_text=f"{target_name}<br>{gene_set_name}",
            html_file_path=os.path.join(output_directory_path, f"{gene_set_name}.html"),
        )



In [ ]:

    
import julia
from julia.api import Julia

Julia(compiled_modules=False)


julia.install()

%load_ext julia.magic



In [ ]:

    
%%julia

push!(
    LOAD_PATH,
    "/home/kwat/github/Kraft.jl/src",
)



In [ ]:

    
julia.Main.PROJECT_JSON = PROJECT_JSON



In [ ]:

    
%%julia

using Kraft: read_gmt

gene_set_name_genes = read_gmt(PROJECT_JSON["gene_set_file_paths"])



In [ ]:

    
julia.Main.gene_values = gene_score.values

julia.Main.genes = gene_score.index.tolist()



In [ ]:

    
%%julia

using Kraft: compute_gene_set_enrichment

gene_set_name_enrichment = compute_gene_set_enrichment(
    gene_values,
    genes,
    gene_set_name_genes,
)



In [ ]:

    
gene_set_name_enrichment