Note: The goal of this demo to show how to keep track of a typical steps in a RNA-Seq NGS counting workflow and not necessarily a recommended RNA-Seq or differential expression analysis method
- If you would like to see the full RNA-Seq analysis tracking in synapse please refer to _2.RNA-Seq_completeworkflow notebook
For detailed help on Synapse please refer to Synapse Getting Started Guide
In [1]:
import os
import sys
import synapseclient
from synapseclient import File, Project, Wiki, Folder
import tempfile
import numpy as np
%matplotlib inline
import matplotlib.pyplot as plt
%run 'synapse_demo_utils_functions.ipynb'
In [ ]:
syn = synapseclient.login()
We are going to start from pre-mapped subsampled RNA-Seq dataset for adrenal and brain tissue generated by the Illumina Body Map project
The dataset is stored in a public synapse project. The following are the unique synapse id's for the mapped files which would be the starting point for our demo analysis
In [3]:
adrenal_mapped_file = 'syn2468600'
brain_mapped_file = 'syn2468604'
adrenal_mapped_file = syn.get(adrenal_mapped_file)
brain_mapped_file = syn.get(brain_mapped_file)
Key things needed
In [4]:
#temp local analysis dir
local_analysis_dir = tempfile.mkdtemp(prefix='temp_Demo_RNA-Seq_workflow')
#download the pre-prepared small annotation
annotation_file = 'syn2468559' #pre-stored in synaspe
annotation_file = syn.get(annotation_file)
In [5]:
adrenal_counts_file = adrenal_mapped_file.path + '.htseq.counts.txt'
adrenal_reads_count_command = "htseq-count -f sam -s no -q %s %s > %s" % (adrenal_mapped_file.path,
annotation_file.path, adrenal_counts_file)
! $adrenal_reads_count_command
###################
###################
brain_counts_file = brain_mapped_file.path + '.htseq.counts.txt'
brain_reads_count_command = "htseq-count -f sam -s no -q %s %s > %s" % (brain_mapped_file.path,
annotation_file.path, brain_counts_file)
! $brain_reads_count_command
In [6]:
#create a project
project_name = 'RNA_Seq_Project-%s' % randomword(5) ##mainly to create a random unique project name
myProj = Project(project_name)
myProj = syn.store(myProj)
#lets create a folder to keep the counts
counts_folder = syn.store(Folder('counts', parentId=myProj.id))
In [ ]:
adrenal_counts_file = File(adrenal_counts_file, parentId=counts_folder.id)
#additional two variables for creating provenance
used = [adrenal_mapped_file.id]
executed = [pushToSynapse(adrenal_reads_count_command, counts_folder.id)]
adrenal_counts_file = syn.store(adrenal_counts_file, used = used, executed = executed)
In [ ]:
brain_counts_file = File(brain_counts_file, parentId=counts_folder.id)
#additional two variables for creating provenance
used = [brain_mapped_file]
executed = [pushToSynapse(brain_reads_count_command, counts_folder.id)]
brain_counts_file = syn.store(brain_counts_file, used = used, executed = executed)
In [9]:
cleaned_counts = merge_htseq_counts([adrenal_counts_file, brain_counts_file ])
cleaned_counts
Out[9]:
In [10]:
%matplotlib inline
cleaned_counts.apply(lambda x: np.log2(x), axis=1).plot(rot=90, title="raw counts(log)", kind="bar")
plt.savefig('raw_read_counts.png')
In [ ]:
outFile = local_analysis_dir + '/merged_counts.tsv'
cleaned_counts.to_csv(outFile, sep="\t", header=True, index=True )
cleaned_counts_syn = syn.store(File(outFile,parentId = counts_folder.id), used=[brain_counts_file.id,
adrenal_counts_file.id])
In [ ]:
wiki = temp_create_wiki(syn, cleaned_counts, 'raw_read_counts.png', myProj, cleaned_counts_syn)
In [13]:
syn.onweb(myProj)
In [14]:
syn.delete(myProj)
shutil.rmtree(local_analysis_dir)
In [ ]: