Utility functions mainly used for Synapse demo


In [12]:
import shutil
import re
import pandas

In [2]:
def pushToSynapse(syn, value, parentId, fileName = None):
    """
    given string push the same to synaspe and store under the given synapseId

    """
    temp_file = tempfile.NamedTemporaryFile(mode='w+t',prefix='command_line_used_', suffix='.txt', delete=False)
    temp_file.write(value)
    temp_file.close()

    if fileName is None:
        fileName = os.path.basename(temp_file.name)
    
    syn_temp_file = syn.store(synapseclient.File(temp_file.name, parentId=parentId, name=fileName))
    return syn_temp_file.id

In [5]:
def create_dir(directory):
    if not os.path.exists(directory):
        os.makedirs(directory)
    return directory

In [6]:
def moveFile(orig_file, new_fileName, moveDir=None):
    """
    given an absolute path file name, move it to a new_fileName
    """
    if moveDir is None:
        basedir =  os.path.dirname(orig_file)

    abs_new_fileName = basedir + '/' + new_fileName
    shutil.move(orig_file, abs_new_fileName)
    return abs_new_fileName

In [8]:
import random, string

def randomword(length):
   return ''.join(random.choice(string.lowercase) for i in range(length))

In [ ]:
def pushToSynapse(value, parentId, fileName = None):
    """
    given string, push it to synaspe under the given synapseId and store it as a file
    returns the synapse entity id
    """
    temp_file = tempfile.NamedTemporaryFile(mode='w+t',prefix='command_line_used_', suffix='.txt', delete=False)
    temp_file.write(value)
    temp_file.close()

    if fileName is None:
        fileName = os.path.basename(temp_file.name)
    
    syn_temp_file = syn.store(synapseclient.File(temp_file.name, parentId=parentId, name=fileName))
    return syn_temp_file.id

In [9]:
def merge_htseq_counts(counts_files):
    dfs = []
    for count_file in counts_files:
        df = pandas.read_csv(count_file,sep='\t',header=None, skip_footer=5, names=('gene',count_file), index_col=['gene'])
        dfs.append(df)
    final_counts = pandas.concat(dfs, axis=1)
    return final_counts

In [10]:
def get_FilesList(path,pattern=None):
    """
    traverse a dir recursively and find files with full paths
    that match the given pattern
    """
    SUB='get_FilesList'
    if pattern is None:
        sys.stderr.write('[%s]: No pattern supplied..will return all the files' % (SUB))

    SUB='get_FilesList'
    found_files = []
    for root,dirs,files in os.walk(path):
        for basename in files:
            if pattern is not None:
                if re.search(pattern,basename):
                    filename = str(os.path.join(root,basename))
                    found_files.append(filename.strip())
            else:
                filename = str(os.path.join(root,basename))
                found_files.append(filename.strip())

    print '[%s]: Found %d files at %s' % ( SUB,len(found_files),path)
    return found_files

In [1]:
def get_markdown_for_pandas_DF(df, wikiEntity=None, subPageId=None, syn=None, prefixText=None, suffixText=None):
    df = df.reset_index()
    if prefixText:
        wikiText = "%s\n\n" % prefixText
    else:
        wikiText = ''
    ncols = df.shape[1]
    nrows = df.shape[0]
    mod_colnames = map(lambda x: x.replace('_', '-'), df.columns.values)
    wikiText += "|%s|\n" %  ('|'.join(mod_colnames))
    wikiText += "|%s|\n" %  ( '|'.join(['--'] * ncols))

    for row in df.iterrows():
        values = row[1].values
        wikiText += "|%s|\n" % ('|'.join(map(str,values)))
    if suffixText:
        wikiText += "%s\n" % suffixText

    #just return the text
    if wikiEntity is None and syn is None:
        return wikiText
    else:
        wiki = syn.getWiki(wikiEntity, subpageId=subPageId)
        wiki['markdown'] = wikiText
        syn.store(wiki)
        return wikiText

In [9]:
def merge_htseq_counts(counts_files):
    counts_files = [ x.path for x in counts_files]
    dfs = []
    for count_file in counts_files:
        df = pandas.read_csv(count_file,sep='\t',header=None, skip_footer=5, names=('gene',count_file), index_col=['gene'])
        dfs.append(df)
    counts = pandas.concat(dfs, axis=1)
    final_counts = counts[~(counts == 0).all(axis=1)]
    final_counts.columns = ['adrenal', 'brain']
    return final_counts

In [4]:
def temp_create_wiki(syn, counts, plot, rna_seq_proj, counts_syn ):
    
    #lets upload the plot with provenance
    plot = synapseclient.File(plot,parentId=rna_seq_proj.id)
    plot = syn.store(plot, used=counts_syn.id)
    
    #get the wiki
    wiki = synapseclient.Wiki(title='My first RNA-Seq project', owner = rna_seq_proj)
    
    #lets build a markdown for the wiki
    counts_markdown = get_markdown_for_pandas_DF(counts)
    
    markdown = """
    ###Raw read counts:
    {counts}
                
    ###Genes with difference in counts:
    {plot}

    ###Complete Analysis Provenance
    {provenance}
    """
    
    plot_wiki = '${image?synapseId=%s&align=None&scale=100}' % plot.id
    provenance_wiki = '${provenance?entityList=%s&depth=4&showExpand=false&displayHeightPx=1000}' % counts_syn.id
    markdown = markdown.format(counts =counts_markdown,
                               plot = plot_wiki,
                               provenance = provenance_wiki
                               )
        
    wiki['markdown'] = markdown
    wiki = syn.store(wiki)
    return wiki

In [ ]: