In [ ]:
from environment import *
with open("../project.json") as io:
PROJECT_JSON = json.load(io)
PATH = make_path_dict(PROJECT_JSON)
In [ ]:
gene_x_sample = pd.read_csv(PATH["gene_x_sample.tsv"], sep="\t", index_col=0)
gene_x_sample.columns.name = "Sample"
In [ ]:
gene_x_sample__processed = kraft.process_feature_x_sample(
gene_x_sample,
features_to_drop=PROJECT_JSON["features_to_drop"],
samples_to_drop=PROJECT_JSON["samples_to_drop"],
nanize=PROJECT_JSON["nanize"],
drop_axis=PROJECT_JSON["drop_axis"],
max_na=PROJECT_JSON["max_na"],
min_n_not_na_value=PROJECT_JSON["min_n_not_na_value"],
min_n_not_na_unique_value=PROJECT_JSON["min_n_not_na_unique_value"],
shift_as_necessary_to_achieve_min_before_logging=PROJECT_JSON[
"shift_as_necessary_to_achieve_min_before_logging"
],
log_base=PROJECT_JSON["log_base"],
normalization_axis=PROJECT_JSON["normalization_axis"],
normalization_method=PROJECT_JSON["normalization_method"],
clip_min=PROJECT_JSON["clip_min"],
clip_max=PROJECT_JSON["clip_max"],
plot_heat_map_max_size=PROJECT_JSON["plot_heat_map_max_size"],
plot_histogram_max_size=PROJECT_JSON["plot_histogram_max_size"],
plot_rug_max_size=PROJECT_JSON["plot_rug_max_size"],
)
gene_x_sample__processed.to_csv(PATH["gene_x_sample.processed.tsv"], sep="\t")
gene_x_sample__processed
In [ ]:
kraft.plot_plotly_figure(
{
"data": [
{"type": "histogram", "name": column_name, "x": column_values}
for column_name, column_values in gene_x_sample__processed.items()
],
"layout": {
"title": {"text": "Distribution of Sample Values"},
"xaxis": {"title": "Value"},
},
}
)