In [ ]:
from environment import *
with open("../project.json") as io:
PROJECT_JSON = json.load(io)
PATH = make_path_dict(PROJECT_JSON)
In [ ]:
gene_x_sample = pd.read_csv(PATH["gene_x_sample.processed.tsv"], sep="\t", index_col=0)
target_x_sample = pd.read_csv(PATH["target_x_sample.tsv"], sep="\t", index_col=0)
In [ ]:
def compute_log_ratio(logged_values, target):
return logged_values[target == 1].mean() - logged_values[target == 0].mean()
In [ ]:
make_match_panel_keyword_arguments = {
"n_job": PROJECT_JSON["n_job"],
"n_sampling": PROJECT_JSON["n_sampling"],
"n_permutation": PROJECT_JSON["n_permutation"],
"target_data_type": "binary",
"plot_std": PROJECT_JSON["plot_std"],
}
for target_name, target_values in target_x_sample.iterrows():
target_values = target_values[target_values != -1]
output_directory_path = os.path.join(
PATH["find_differentially_expressed_gene/"], target_name
)
kraft.establish_path(output_directory_path, "directory")
target_value_min_n = target_values.value_counts().min()
for match_function, min_n in (
(kraft.compute_information_correlation_between_2_vectors, 2),
(compute_log_ratio, 1),
):
if target_value_min_n < min_n:
continue
file_path_prefix = os.path.join(
output_directory_path, f"all.{match_function.__name__}"
)
# score_moe_p_value_fdr = pd.read_csv(
# f"{file_path_prefix}.tsv", sep="\t", index_col=0
# )
kraft.make_match_panel(
target_values,
gene_x_sample,
# score_moe_p_value_fdr=score_moe_p_value_fdr,
match_function=match_function,
n_extreme=PROJECT_JSON["n_extreme"],
title_text=f"All ({match_function.__name__})",
file_path_prefix=file_path_prefix,
**make_match_panel_keyword_arguments,
)
score_moe_p_value_fdr = pd.read_csv(
f"{file_path_prefix}.tsv", sep="\t", index_col=0
)
kraft.make_match_panel(
target_values,
gene_x_sample.reindex(PROJECT_JSON["genes_to_peek"]),
score_moe_p_value_fdr=score_moe_p_value_fdr,
n_extreme=None,
title_text=f"Peek ({match_function.__name__})",
**make_match_panel_keyword_arguments,
)
y = score_moe_p_value_fdr["Score"].sort_values()
y.name = f"Score ({match_function.__name__})"
kraft.plot_scatter_and_annotate(
None,
y,
"y",
annotation=(("Peek", PROJECT_JSON["genes_to_peek"], 8, "#20d9ba"),),
title_text=target_name,
html_file_path=os.path.join(
output_directory_path,
f"{match_function.__name__}.plot_point_and_annotate.html",
),
)