In [ ]:
from environment import *
with open("../project.json") as io:
PROJECT_JSON = json.load(io)
PATH = make_path_dict(PROJECT_JSON)
In [ ]:
target_x_sample = pd.read_csv(PATH["target_x_sample.tsv"], sep="\t", index_col=0)
In [ ]:
annotation = (("Peek", PROJECT_JSON["gene_sets_to_peek"], 12, "#20d9ba"),)
In [ ]:
for target_name, target_values in target_x_sample.iterrows():
target_values = target_values[target_values != -1]
if target_values.value_counts().min() < 2:
continue
score_moe_p_value_fdr = pd.read_csv(
os.path.join(
PATH["find_differentially_expressed_gene_set/"], target_name, "all.tsv"
),
sep="\t",
index_col=0,
)
if score_moe_p_value_fdr["P-Value"].isna().all():
continue
output_directory_path = os.path.join(
PATH["compare_differentially_expressed_gene_set/"], target_name
)
kraft.establish_path(output_directory_path, "directory")
x = score_moe_p_value_fdr["Score"]
if x.isna().all():
continue
y_name = "Significance"
kraft.plot_scatter_and_annotate(
x,
pd.Series(1 - score_moe_p_value_fdr["P-Value"], name=y_name),
"x",
annotation=annotation,
title_text="All",
html_file_path=os.path.join(output_directory_path, "all.html"),
)
for gene_set_file_path in PROJECT_JSON["gene_set_file_paths"]:
gene_sets_name = os.path.split(gene_set_file_path)[1]
gene_sets = (
score_moe_p_value_fdr.index & kraft.read_gmt(gene_set_file_path).index
)
x = x[gene_sets]
if x.isna().all():
continue
kraft.plot_scatter_and_annotate(
x,
pd.Series(1 - score_moe_p_value_fdr["P-Value"], name=y_name)[gene_sets],
"x",
annotation=annotation,
title_text=gene_sets_name,
html_file_path=os.path.join(
output_directory_path, f"{gene_sets_name}.html"
),
)