(you do not need to do this if you have done it in the Interfacing_R notebook)
In [1]:
!rm sequence.index 2>/dev/null
!wget -nd ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/historical_data/former_toplevel/sequence.index -O sequence.index
In [1]:
import rpy2.robjects as robjects
import rpy2.robjects.lib.ggplot2 as ggplot2
%load_ext rpy2.ipython
In [17]:
seq_data = %R read.delim('sequence.index', header=TRUE, stringsAsFactors=FALSE)
In [18]:
#This code was changed against the book as rpy2 changed
as_integer = robjects.r('as.integer')
match = robjects.r.match
seq_data['READ_COUNT'] = as_integer(seq_data['READ_COUNT'])
seq_data['BASE_COUNT'] = as_integer(seq_data['BASE_COUNT'])
%R -i seq_data
%R print(colnames(seq_data))
%R seq_data$CENTER_NAME <- toupper(seq_data$CENTER_NAME)
Out[18]:
In [19]:
%%R
seq_data <- seq_data[seq_data$WITHDRAWN==0, ]
seq_data <- seq_data[, c('STUDY_ID', 'STUDY_NAME', 'CENTER_NAME', 'SAMPLE_ID', 'SAMPLE_NAME', 'POPULATION', 'INSTRUMENT_PLATFORM', 'LIBRARY_LAYOUT', 'PAIRED_FASTQ', 'READ_COUNT', 'BASE_COUNT', 'ANALYSIS_GROUP')]
In [20]:
%%R
bar <- ggplot(seq_data) + aes(factor(CENTER_NAME)) + geom_bar() + theme(axis.text.x = element_text(angle = 90, hjust = 1))
print(bar)
In [21]:
%%R
seq_data$POPULATION <- as.factor(seq_data$POPULATION)
yri_ceu <- seq_data[seq_data$POPULATION %in% c("YRI", "CEU") & seq_data$BASE_COUNT < 2E9 & seq_data$READ_COUNT < 3E7, ]
In [22]:
%%R
scatter <- ggplot(yri_ceu, aes(x=BASE_COUNT, y=READ_COUNT, col=factor(ANALYSIS_GROUP), shape=POPULATION)) + geom_point()
print(scatter)
In [28]:
%%R
library(gridExtra)
library(grid)
g <- grid.arrange(bar, scatter, ncol=1)
g
In [30]:
%%R
png('fig.png')
g
dev.off()
In [ ]: