In [12]:
import shutil
import re
import pandas
In [2]:
def pushToSynapse(syn, value, parentId, fileName = None):
"""
given string push the same to synaspe and store under the given synapseId
"""
temp_file = tempfile.NamedTemporaryFile(mode='w+t',prefix='command_line_used_', suffix='.txt', delete=False)
temp_file.write(value)
temp_file.close()
if fileName is None:
fileName = os.path.basename(temp_file.name)
syn_temp_file = syn.store(synapseclient.File(temp_file.name, parentId=parentId, name=fileName))
return syn_temp_file.id
In [5]:
def create_dir(directory):
if not os.path.exists(directory):
os.makedirs(directory)
return directory
In [6]:
def moveFile(orig_file, new_fileName, moveDir=None):
"""
given an absolute path file name, move it to a new_fileName
"""
if moveDir is None:
basedir = os.path.dirname(orig_file)
abs_new_fileName = basedir + '/' + new_fileName
shutil.move(orig_file, abs_new_fileName)
return abs_new_fileName
In [8]:
import random, string
def randomword(length):
return ''.join(random.choice(string.lowercase) for i in range(length))
In [ ]:
def pushToSynapse(value, parentId, fileName = None):
"""
given string, push it to synaspe under the given synapseId and store it as a file
returns the synapse entity id
"""
temp_file = tempfile.NamedTemporaryFile(mode='w+t',prefix='command_line_used_', suffix='.txt', delete=False)
temp_file.write(value)
temp_file.close()
if fileName is None:
fileName = os.path.basename(temp_file.name)
syn_temp_file = syn.store(synapseclient.File(temp_file.name, parentId=parentId, name=fileName))
return syn_temp_file.id
In [9]:
def merge_htseq_counts(counts_files):
dfs = []
for count_file in counts_files:
df = pandas.read_csv(count_file,sep='\t',header=None, skip_footer=5, names=('gene',count_file), index_col=['gene'])
dfs.append(df)
final_counts = pandas.concat(dfs, axis=1)
return final_counts
In [10]:
def get_FilesList(path,pattern=None):
"""
traverse a dir recursively and find files with full paths
that match the given pattern
"""
SUB='get_FilesList'
if pattern is None:
sys.stderr.write('[%s]: No pattern supplied..will return all the files' % (SUB))
SUB='get_FilesList'
found_files = []
for root,dirs,files in os.walk(path):
for basename in files:
if pattern is not None:
if re.search(pattern,basename):
filename = str(os.path.join(root,basename))
found_files.append(filename.strip())
else:
filename = str(os.path.join(root,basename))
found_files.append(filename.strip())
print '[%s]: Found %d files at %s' % ( SUB,len(found_files),path)
return found_files
In [1]:
def get_markdown_for_pandas_DF(df, wikiEntity=None, subPageId=None, syn=None, prefixText=None, suffixText=None):
df = df.reset_index()
if prefixText:
wikiText = "%s\n\n" % prefixText
else:
wikiText = ''
ncols = df.shape[1]
nrows = df.shape[0]
mod_colnames = map(lambda x: x.replace('_', '-'), df.columns.values)
wikiText += "|%s|\n" % ('|'.join(mod_colnames))
wikiText += "|%s|\n" % ( '|'.join(['--'] * ncols))
for row in df.iterrows():
values = row[1].values
wikiText += "|%s|\n" % ('|'.join(map(str,values)))
if suffixText:
wikiText += "%s\n" % suffixText
#just return the text
if wikiEntity is None and syn is None:
return wikiText
else:
wiki = syn.getWiki(wikiEntity, subpageId=subPageId)
wiki['markdown'] = wikiText
syn.store(wiki)
return wikiText
In [9]:
def merge_htseq_counts(counts_files):
counts_files = [ x.path for x in counts_files]
dfs = []
for count_file in counts_files:
df = pandas.read_csv(count_file,sep='\t',header=None, skip_footer=5, names=('gene',count_file), index_col=['gene'])
dfs.append(df)
counts = pandas.concat(dfs, axis=1)
final_counts = counts[~(counts == 0).all(axis=1)]
final_counts.columns = ['adrenal', 'brain']
return final_counts
In [4]:
def temp_create_wiki(syn, counts, plot, rna_seq_proj, counts_syn ):
#lets upload the plot with provenance
plot = synapseclient.File(plot,parentId=rna_seq_proj.id)
plot = syn.store(plot, used=counts_syn.id)
#get the wiki
wiki = synapseclient.Wiki(title='My first RNA-Seq project', owner = rna_seq_proj)
#lets build a markdown for the wiki
counts_markdown = get_markdown_for_pandas_DF(counts)
markdown = """
###Raw read counts:
{counts}
###Genes with difference in counts:
{plot}
###Complete Analysis Provenance
{provenance}
"""
plot_wiki = '${image?synapseId=%s&align=None&scale=100}' % plot.id
provenance_wiki = '${provenance?entityList=%s&depth=4&showExpand=false&displayHeightPx=1000}' % counts_syn.id
markdown = markdown.format(counts =counts_markdown,
plot = plot_wiki,
provenance = provenance_wiki
)
wiki['markdown'] = markdown
wiki = syn.store(wiki)
return wiki
In [ ]: