In [2]:
import pandas as pd
import os
import re
In [3]:
# Where is the shortstack directory
os.chdir("/Users/mgalland/SURFdrive/trichome_team/06.results_from_xp/sRNA-Seq/20170117_srnadesc/shortstack/")
In [4]:
# Get all ShortStack directories (sample list)
pattern = re.compile("[A-Z+][0-9]+")
# Store them in a list
files = [os.getcwd() + "/" + f + "/Results.txt" for f in os.listdir("./") if pattern.search(f)]
# print first elements
files[0:2]
Out[4]:
In [5]:
# Store all ShortStack results dataframes in a list
shortstack_res = [pd.read_table(f) for f in files]
# This is how it looks
shortstack_res[0].head()
Out[5]:
In [6]:
# Select only the MajorRNA and merge
shortstack_res = [f[["MajorRNA"]] for f in shortstack_res]
shortstack_res[0].head()
Out[6]:
In [11]:
# initialize the first dataframe and merge
df = pd.merge(shortstack_res[0],shortstack_res[1],on="MajorRNA",how="outer")
print("\n")
print("#############################")
print("First merge")
print(df.head())
rows, columns = df.shape
print("It contains {0} number of lines".format(str(rows)))
print("##############################")
In [21]:
l = list(set(df["MajorRNA"].tolist()))
l = l[1:10]
In [20]:
In [ ]: