In [ ]:
from environment import *

with open("../project.json") as io:

    PROJECT_JSON = json.load(io)

PATH = make_path_dict(PROJECT_JSON)

In [ ]:
target_x_sample = pd.read_csv(PATH["target_x_sample.tsv"], sep="\t", index_col=0)

In [ ]:
annotation = (("Peek", PROJECT_JSON["gene_sets_to_peek"], 12, "#20d9ba"),)

In [ ]:
for target_name, target_values in target_x_sample.iterrows():

    target_values = target_values[target_values != -1]

    if target_values.value_counts().min() < 2:

        continue

    score_moe_p_value_fdr = pd.read_csv(
        os.path.join(
            PATH["find_differentially_expressed_gene_set/"], target_name, "all.tsv"
        ),
        sep="\t",
        index_col=0,
    )

    if score_moe_p_value_fdr["P-Value"].isna().all():

        continue

    output_directory_path = os.path.join(
        PATH["compare_differentially_expressed_gene_set/"], target_name
    )

    kraft.establish_path(output_directory_path, "directory")

    x = score_moe_p_value_fdr["Score"]

    if x.isna().all():

        continue

    y_name = "Significance"

    kraft.plot_scatter_and_annotate(
        x,
        pd.Series(1 - score_moe_p_value_fdr["P-Value"], name=y_name),
        "x",
        annotation=annotation,
        title_text="All",
        html_file_path=os.path.join(output_directory_path, "all.html"),
    )

    for gene_set_file_path in PROJECT_JSON["gene_set_file_paths"]:

        gene_sets_name = os.path.split(gene_set_file_path)[1]

        gene_sets = (
            score_moe_p_value_fdr.index & kraft.read_gmt(gene_set_file_path).index
        )

        x = x[gene_sets]

        if x.isna().all():

            continue

        kraft.plot_scatter_and_annotate(
            x,
            pd.Series(1 - score_moe_p_value_fdr["P-Value"], name=y_name)[gene_sets],
            "x",
            annotation=annotation,
            title_text=gene_sets_name,
            html_file_path=os.path.join(
                output_directory_path, f"{gene_sets_name}.html"
            ),
        )