In [1]:
import os
HOME = os.path.expanduser('~')
DATA_FOLDER = os.path.abspath(
os.path.join(HOME, 'projects', 'cshl-singlecell-2017', 'data'))
FIGURE_FOLDER = os.path.abspath(
os.path.join(HOME, 'projects', 'cshl-singlecell-2017', 'figures'))
notebook_name = '50_Example_workflow_reanalyzing_macosko2015'
data_folder = os.path.join(DATA_FOLDER, notebook_name)
figure_folder = os.path.join(FIGURE_FOLDER, notebook_name)
input_folder = os.path.join(DATA_FOLDER, '91_filter_genes')
! mkdir -p $data_folder
! mkdir -p $figure_folder
In [3]:
from sklearn.decomposition import PCA, FastICA, NMF
from sklearn.manifold import TSNE, MDS
In [4]:
import pandas as pd
import matplotlib.pyplot as plt
import phenograph
import macosko2015
%matplotlib inline
In [5]:
counts, cells, genes = macosko2015.load_differential_clusters()
print('counts.shape', counts.shape)
print('cells.shape', cells.shape)
print('genes.shape', genes.shape)
In [6]:
counts.head()
Out[6]:
In [8]:
genes.head()
Out[8]:
In [9]:
cells.head()
Out[9]:
In [10]:
pcaer = PCA(n_components=15)
# pcad = pcaer.fit_tra
In [11]:
pcad = pcaer.fit_transform(counts)
pcad
Out[11]:
In [12]:
pcad.shape
Out[12]:
In [13]:
pcad_df = pd.DataFrame(pcad, index=counts.index)
print(pcad_df.shape)
pcad_df.head()
Out[13]:
In [16]:
%%time
smusher = TSNE()
tsned = smusher.fit_transform(pcad_df)
print(tsned.shape)
tsned
In [ ]: