In [ ]:
from environment import *
with open("../project.json") as io:
PROJECT_JSON = json.load(io)
PATH = make_path_dict(PROJECT_JSON)
In [ ]:
enst_tpms = []
for sample_name in PROJECT_JSON["sample_names"]:
enst_tpm = pd.read_csv(
os.path.join(PATH["count_transcript/"], sample_name, "abundance.tsv"),
sep="\t",
index_col=0,
)["tpm"]
enst_tpm.name = sample_name
enst_tpms.append(enst_tpm)
enst_x_sample = pd.concat(enst_tpms, axis=1)
enst_x_sample.index.name = "ENST"
enst_x_sample.to_csv(PATH["enst_x_sample.tsv"], sep="\t")
print((enst_x_sample != 0).sum())
enst_x_sample
In [ ]:
enst_gene_name = pd.read_csv(PROJECT_JSON["enst_gene_name_file_path"], sep="\t")
gene_x_sample = enst_x_sample.copy()
gene_x_sample.index = gene_x_sample.index.map(
dict(
zip(
enst_gene_name["Transcript stable ID version"],
enst_gene_name["Gene name"].str.upper(),
)
)
)
gene_x_sample = (
gene_x_sample.loc[~gene_x_sample.index.isna()]
.replace(0, np.nan)
.groupby(level=0)
.median()
)
gene_x_sample.index.name = "Gene"
gene_x_sample.to_csv(PATH["gene_x_sample.tsv"], sep="\t")
gene_x_sample