In [ ]:
from environment import *

with open("../project.json") as io:

    PROJECT_JSON = json.load(io)

PATH = make_path_dict(PROJECT_JSON)

In [ ]:
gene_x_sample = pd.read_csv(PATH["gene_x_sample.tsv"], sep="\t", index_col=0)

gene_x_sample.columns.name = "Sample"

In [ ]:
gene_x_sample__processed = kraft.process_feature_x_sample(
    gene_x_sample,
    features_to_drop=PROJECT_JSON["features_to_drop"],
    samples_to_drop=PROJECT_JSON["samples_to_drop"],
    nanize=PROJECT_JSON["nanize"],
    drop_axis=PROJECT_JSON["drop_axis"],
    max_na=PROJECT_JSON["max_na"],
    min_n_not_na_value=PROJECT_JSON["min_n_not_na_value"],
    min_n_not_na_unique_value=PROJECT_JSON["min_n_not_na_unique_value"],
    shift_as_necessary_to_achieve_min_before_logging=PROJECT_JSON[
        "shift_as_necessary_to_achieve_min_before_logging"
    ],
    log_base=PROJECT_JSON["log_base"],
    normalization_axis=PROJECT_JSON["normalization_axis"],
    normalization_method=PROJECT_JSON["normalization_method"],
    clip_min=PROJECT_JSON["clip_min"],
    clip_max=PROJECT_JSON["clip_max"],
    plot_heat_map_max_size=PROJECT_JSON["plot_heat_map_max_size"],
    plot_histogram_max_size=PROJECT_JSON["plot_histogram_max_size"],
    plot_rug_max_size=PROJECT_JSON["plot_rug_max_size"],
)

gene_x_sample__processed.to_csv(PATH["gene_x_sample.processed.tsv"], sep="\t")

gene_x_sample__processed

In [ ]:
kraft.plot_plotly_figure(
    {
        "data": [
            {"type": "histogram", "name": column_name, "x": column_values}
            for column_name, column_values in gene_x_sample__processed.items()
        ],
        "layout": {
            "title": {"text": "Distribution of Sample Values"},
            "xaxis": {"title": "Value"},
        },
    }
)