In [ ]:
from environment import *

with open("../project.json") as io:

    PROJECT_JSON = json.load(io)

PATH = make_path_dict(PROJECT_JSON)

In [ ]:
gene_x_sample = pd.read_csv(PATH["gene_x_sample.processed.tsv"], sep="\t", index_col=0)

target_x_sample = pd.read_csv(PATH["target_x_sample.tsv"], sep="\t", index_col=0)

In [ ]:
def compute_log_ratio(logged_values, target):

    return logged_values[target == 1].mean() - logged_values[target == 0].mean()

In [ ]:
make_match_panel_keyword_arguments = {
    "n_job": PROJECT_JSON["n_job"],
    "n_sampling": PROJECT_JSON["n_sampling"],
    "n_permutation": PROJECT_JSON["n_permutation"],
    "target_data_type": "binary",
    "plot_std": PROJECT_JSON["plot_std"],
}

for target_name, target_values in target_x_sample.iterrows():

    target_values = target_values[target_values != -1]

    output_directory_path = os.path.join(
        PATH["find_differentially_expressed_gene/"], target_name
    )

    kraft.establish_path(output_directory_path, "directory")

    target_value_min_n = target_values.value_counts().min()

    for match_function, min_n in (
        (kraft.compute_information_correlation_between_2_vectors, 2),
        (compute_log_ratio, 1),
    ):

        if target_value_min_n < min_n:

            continue

        file_path_prefix = os.path.join(
            output_directory_path, f"all.{match_function.__name__}"
        )

        # score_moe_p_value_fdr = pd.read_csv(
        #    f"{file_path_prefix}.tsv", sep="\t", index_col=0
        # )

        kraft.make_match_panel(
            target_values,
            gene_x_sample,
            # score_moe_p_value_fdr=score_moe_p_value_fdr,
            match_function=match_function,
            n_extreme=PROJECT_JSON["n_extreme"],
            title_text=f"All ({match_function.__name__})",
            file_path_prefix=file_path_prefix,
            **make_match_panel_keyword_arguments,
        )

        score_moe_p_value_fdr = pd.read_csv(
            f"{file_path_prefix}.tsv", sep="\t", index_col=0
        )

        kraft.make_match_panel(
            target_values,
            gene_x_sample.reindex(PROJECT_JSON["genes_to_peek"]),
            score_moe_p_value_fdr=score_moe_p_value_fdr,
            n_extreme=None,
            title_text=f"Peek ({match_function.__name__})",
            **make_match_panel_keyword_arguments,
        )

        y = score_moe_p_value_fdr["Score"].sort_values()

        y.name = f"Score ({match_function.__name__})"

        kraft.plot_scatter_and_annotate(
            None,
            y,
            "y",
            annotation=(("Peek", PROJECT_JSON["genes_to_peek"], 8, "#20d9ba"),),
            title_text=target_name,
            html_file_path=os.path.join(
                output_directory_path,
                f"{match_function.__name__}.plot_point_and_annotate.html",
            ),
        )