In [ ]:
from environment import *
with open("../project.json") as io:
PROJECT_JSON = json.load(io)
PATH = make_path_dict(PROJECT_JSON)
In [ ]:
gene_x_sample = pd.read_csv(PATH["gene_x_sample.processed.tsv"], sep="\t", index_col=0)
target_x_sample = pd.read_csv(PATH["target_x_sample.tsv"], sep="\t", index_col=0)
gene_sets = pd.concat(
kraft.read_gmt(gmt_file_path)
for gmt_file_path in PROJECT_JSON["gene_set_file_paths"]
)
gene_set_x_information = pd.read_csv(
PATH["gene_set_x_information.tsv"], sep="\t", index_col=0
)
In [ ]:
for target_name in target_x_sample.index:
output_directory_path = os.path.join(PATH["gsea/"], target_name)
kraft.establish_path(output_directory_path, "directory")
score_moe_p_value_fdr = pd.read_csv(
os.path.join(
PATH["find_differentially_expressed_gene/"],
target_name,
f"all.{PROJECT_JSON['gsea_match_function_name']}.tsv",
),
sep="\t",
index_col=0,
)
gene_score = score_moe_p_value_fdr["Score"]
gene_score.name = "Gene Score"
gene_set_score = (
kraft.run_single_sample_gseas(
gene_score.to_frame(),
gene_sets.loc[
gene_set_x_information.index[
gene_set_x_information["Good"].astype(bool)
]
],
statistic=PROJECT_JSON["gsea_statistic"],
n_job=PROJECT_JSON["n_job"],
)
.squeeze()
.sort_values()
)
gene_set_score.name = f"GSEA Score ({PROJECT_JSON['gsea_statistic']})"
gene_set_score.to_csv(
os.path.join(
output_directory_path, f"{PROJECT_JSON['gsea_match_function_name']}.tsv"
),
sep="\t",
)
kraft.plot_scatter_and_annotate(
None,
gene_set_score,
"y",
annotation=(("Peek", PROJECT_JSON["gene_sets_to_peek"], 8, "#20d9ba"),),
title_text=target_name,
html_file_path=os.path.join(
output_directory_path,
f"{PROJECT_JSON['gsea_match_function_name']}.plot_point_and_annotate.html",
),
)
output_directory_path = os.path.join(
output_directory_path, f"{PROJECT_JSON['gsea_match_function_name']}.mountain"
)
kraft.establish_path(output_directory_path, "directory")
for gene_set_name in PROJECT_JSON["gene_sets_to_peek"]:
kraft.run_single_sample_gsea(
gene_score,
gene_sets.loc[gene_set_name],
statistic=PROJECT_JSON["gsea_statistic"],
title_text=f"{target_name}<br>{gene_set_name}",
html_file_path=os.path.join(output_directory_path, f"{gene_set_name}.html"),
)
In [ ]:
import julia
from julia.api import Julia
Julia(compiled_modules=False)
julia.install()
%load_ext julia.magic
In [ ]:
%%julia
push!(
LOAD_PATH,
"/home/kwat/github/Kraft.jl/src",
)
In [ ]:
julia.Main.PROJECT_JSON = PROJECT_JSON
In [ ]:
%%julia
using Kraft: read_gmt
gene_set_name_genes = read_gmt(PROJECT_JSON["gene_set_file_paths"])
In [ ]:
julia.Main.gene_values = gene_score.values
julia.Main.genes = gene_score.index.tolist()
In [ ]:
%%julia
using Kraft: compute_gene_set_enrichment
gene_set_name_enrichment = compute_gene_set_enrichment(
gene_values,
genes,
gene_set_name_genes,
)
In [ ]:
gene_set_name_enrichment