In [ ]:
from environment import *

with open("../project.json") as io:

    PROJECT_JSON = json.load(io)

PATH = make_path_dict(PROJECT_JSON)

In [ ]:
enst_tpms = []

for sample_name in PROJECT_JSON["sample_names"]:

    enst_tpm = pd.read_csv(
        os.path.join(PATH["count_transcript/"], sample_name, "abundance.tsv"),
        sep="\t",
        index_col=0,
    )["tpm"]

    enst_tpm.name = sample_name

    enst_tpms.append(enst_tpm)

enst_x_sample = pd.concat(enst_tpms, axis=1)

enst_x_sample.index.name = "ENST"

enst_x_sample.to_csv(PATH["enst_x_sample.tsv"], sep="\t")

print((enst_x_sample != 0).sum())

enst_x_sample

In [ ]:
enst_gene_name = pd.read_csv(PROJECT_JSON["enst_gene_name_file_path"], sep="\t")

gene_x_sample = enst_x_sample.copy()

gene_x_sample.index = gene_x_sample.index.map(
    dict(
        zip(
            enst_gene_name["Transcript stable ID version"],
            enst_gene_name["Gene name"].str.upper(),
        )
    )
)

gene_x_sample = (
    gene_x_sample.loc[~gene_x_sample.index.isna()]
    .replace(0, np.nan)
    .groupby(level=0)
    .median()
)

gene_x_sample.index.name = "Gene"

gene_x_sample.to_csv(PATH["gene_x_sample.tsv"], sep="\t")

gene_x_sample