author: lukethompson@gmail.com, nbokulich@gmail.com
date: 28 Feb 2017
language: Python 3.5
conda enviroment: emp-py3
license: unlicensed

sourcetracker_mapping_file_and_execution.ipynb


In [1]:
import pandas as pd

In [2]:
path_map = '~/emp/metadata-refine'
path_out = '~/emp/analyses-sourcetracker'

In [3]:
df = pd.read_csv('%s/emp_qiime_mapping_qc_filtered.tsv' % path_map, sep='\t', index_col=0, dtype='object')

In [6]:
df['env'] = df['empo_3']
df['SourceSink'] = ['source' if x == 'True' else 'sink' for x in df['subset_2k']]

In [9]:
df['SourceSink'].value_counts()


Out[9]:
sink      22910
source     2000
Name: SourceSink, dtype: int64

In [10]:
df.to_csv('%s/ST2_emp_qiime_mapping_qc_filtered.tsv' % path_out, sep='\t')

Run SourceTracker2 on the cluster

source activate qiime2
project_dir=$HOME/emp/
maps=$project_dir/maps/
otus=$project_dir/otus/
srun sourcetracker2 gibbs -i $otus/emp_deblur_100bp.qc_filtered.rare_5000.biom -m $maps/emp_qiime_mapping_qc_filtered_sourcetracker.tsv -o $project_dir/sourcetracker_rar100/ --source_category_column env --source_rarefaction_depth 100 --sink_rarefaction_depth 100 --jobs 8

In [ ]: