In [ ]:
from environment import *

with open("../project.json") as io:

    PROJECT_JSON = json.load(io)

PATH = make_path_dict(PROJECT_JSON)

In [ ]:
target_x_sample = pd.read_csv(PATH["target_x_sample.tsv"], sep="\t", index_col=0)

gene_set_x_sample = pd.read_csv(PATH["gene_set_x_sample.tsv"], sep="\t", index_col=0)

In [ ]:
make_match_panel_keyword_arguments = {
    "n_job": PROJECT_JSON["n_job"],
    "n_sampling": PROJECT_JSON["n_sampling"],
    "n_permutation": PROJECT_JSON["n_permutation"],
    "target_data_type": "binary",
    "plot_std": PROJECT_JSON["plot_std"],
}

for target_name, target_values in target_x_sample.iterrows():

    target_values = target_values[target_values != -1]

    match_function = kraft.compute_information_correlation_between_2_vectors

    if target_values.value_counts().min() < 2:

        continue

    output_directory_path = os.path.join(
        PATH["find_differentially_expressed_gene_set/"], target_name
    )

    kraft.establish_path(output_directory_path, "directory")

    file_path_prefix = os.path.join(output_directory_path, "all")

    # score_moe_p_value_fdr = pd.read_csv(
    #     f"{file_path_prefix}.tsv", sep="\t", index_col=0
    # )

    kraft.make_match_panel(
        target_values,
        gene_set_x_sample,
        # score_moe_p_value_fdr=score_moe_p_value_fdr,
        match_function=match_function,
        n_extreme=PROJECT_JSON["n_extreme"],
        title_text="All",
        file_path_prefix=file_path_prefix,
        **make_match_panel_keyword_arguments,
    )

    score_moe_p_value_fdr = pd.read_csv(
        f"{file_path_prefix}.tsv", sep="\t", index_col=0
    )

    for gene_set_file_path in PROJECT_JSON["gene_set_file_paths"]:

        gene_set_name = os.path.split(gene_set_file_path)[1]

        kraft.make_match_panel(
            target_values,
            gene_set_x_sample.reindex(index=kraft.read_gmt(gene_set_file_path).index),
            score_moe_p_value_fdr=score_moe_p_value_fdr,
            n_extreme=PROJECT_JSON["n_extreme"],
            title_text=gene_set_name,
            file_path_prefix=os.path.join(output_directory_path, gene_set_name),
            **make_match_panel_keyword_arguments,
        )

    kraft.make_match_panel(
        target_values,
        gene_set_x_sample.reindex(index=PROJECT_JSON["gene_sets_to_peek"]),
        score_moe_p_value_fdr=score_moe_p_value_fdr,
        n_extreme=None,
        title_text="Peek",
        **make_match_panel_keyword_arguments,
    )

    kraft.plot_scatter_and_annotate(
        None,
        score_moe_p_value_fdr["Score"].sort_values(),
        "y",
        annotation=(("Peek", PROJECT_JSON["gene_sets_to_peek"], 8, "#20d9ba"),),
        title_text=target_name,
        html_file_path=os.path.join(
            output_directory_path,
            f"{match_function.__name__}.plot_point_and_annotate.html",
        ),
    )