In [ ]:
import pandas as pd
from os import path
In [ ]:
sra = pd.read_table("SraRunTable.txt")
# Fix sample names to use '_', not '.'
sra.Sample_Name_s = sra.Sample_Name_s.str.replace('.', '_')
In [ ]:
# Create mapping of sample names to SRRs
name2run = {}
for run in sra.itertuples():
name2run[run.Sample_Name_s] = run.Run_s
In [ ]:
def make_srr_list(name_set):
outfile = path.splitext(name_set)[0] + '.srr.txt'
with open(name_set) as infp, open(outfile, 'w') as outfp:
for name in (line.rstrip() for line in infp):
if name:
print(name2run[name], file=outfp)
In [ ]:
make_srr_list("field-expt.txt")
make_srr_list("greenhouse.txt")
make_srr_list("timecourse_ghdavis.txt")
In [ ]:
with open("sra2name.tab", 'w') as fh:
print("runid", "name", sep='\t', file=fh)
for name, run in name2run.items():
print(run, name, sep='\t', file=fh)