In [1]:
source("loadData.R")
# load and clean the data
raw <- loadData()
"Raw data:"
format(head(raw,3), digits=3)
clean <- cleanData(raw)
"Cleaned data:"
format(head(clean,3), digits=3)
dataPrime <- data.frame()
wells <- unique(clean$Well.Name)
for (well_i in wells) {
data_i <- clean[clean$Well.Name == well_i,]
data_i$GR <- (data_i$GR - mean(data_i$GR, na.rm=T)) / sd(data_i$GR, na.rm=T)
data_i$ILD_log10 <- (data_i$ILD_log10 - mean(data_i$ILD_log10, na.rm=T)) / sd(data_i$ILD_log10, na.rm=T)
data_i$DeltaPHI <- (data_i$DeltaPHI - mean(data_i$DeltaPHI, na.rm=T)) / sd(data_i$DeltaPHI, na.rm=T)
data_i$PHIND <- (data_i$PHIND - mean(data_i$PHIND, na.rm=T)) / sd(data_i$PHIND, na.rm=T)
data_i$PE <- (data_i$PE - mean(data_i$PE, na.rm=T)) / sd(data_i$PE, na.rm=T)
dataPrime <- rbind(dataPrime, data_i)
}
cs <- dataPrime
rm(dataPrime)
"Centered and scaled data:"
format(head(cs,3), digits=3)
In [2]:
df <- cs
df$FmTemp <- as.factor(apply(df, MARGIN = 1, FUN = function(x) {strsplit(as.character(x["Formation"]), split=" ")[[1]][1]}))
df$FmType <- as.factor(apply(df, MARGIN = 1, FUN = function(x) {strsplit(as.character(x["Formation"]), split=" ")[[1]][2]}))
format(head(cs, 3), digits=3)
format(head(df, 3), digits=3)
df$Sequence <- NA
df$FmName <- NA
for (i in 1:nrow(df)) {
if (nchar(as.character(df$FmTemp[i])) > 1) {
temp <- df$FmTemp[i]
df$Sequence[i] <- as.character(substr(temp, 2, 2))
df$FmName[i] <- as.character(substr(temp, 1, 1))
} else {
temp <- df$FmTemp[i]
df$Sequence[i] <- "1"
df$FmName[i] <- as.character(temp)
}
}
df$Sequence <- as.factor(df$Sequence)
df$FmName <- as.factor(df$FmName)
df <- subset(df, select=-c(FmTemp))
format(head(df, 3), digits=3)
In [3]:
library(ggplot2)
library(ggthemes)
options(repr.plot.width=10, repr.plot.height=3, warn=-1)
"By formation:"
table(df$Formation, df$Facies)
"By formation name:"
table(df$FmName, df$Facies)
"By formation sequence:"
table(df$Sequence, df$Facies)
"By formation type:"
table(df$FmType, df$Facies)
"By isMarine:"
table(df$isMarine, df$Facies)
facies_colors <- c('#F4D03F', '#F5B041', '#DC7633', '#6E2C00', '#1B4F72', '#2E86C1', '#AED6F1', '#A569BD', '#196F3D')
facies_labels <- c('SS', 'CSiS', 'FSiS', 'SiSh', 'MS', 'WS', 'D', 'PS', 'BS')
ggplot(df, aes(GR, fill=Facies)) + geom_density(alpha=.5) + theme_economist_white(gray_bg=T) +
scale_fill_manual(values=facies_colors, drop=F, labels=facies_labels) +
labs(x="GR", y="Density", title="Gamma Ray PDF by Facies") +
xlim(-2,2) +
theme(panel.grid.major.x = element_line(colour="gray", size=0.25), legend.position="right",
axis.text=element_text(size=6), axis.title=element_text(size=8,face="bold"), plot.title=element_text(size=8),
legend.title=element_text(size=8), legend.text=element_text(size=8))
ggplot(df, aes(ILD_log10, fill=Facies)) + geom_density(alpha=.5) + theme_economist_white(gray_bg=T) +
scale_fill_manual(values=facies_colors, drop=F, labels=facies_labels) +
labs(x="ILD_log10", y="Density", title="Deep Resistivity PDF by Facies") +
theme(panel.grid.major.x = element_line(colour="gray", size=0.25), legend.position="right",
axis.text=element_text(size=6), axis.title=element_text(size=8,face="bold"), plot.title=element_text(size=8),
legend.title=element_text(size=8), legend.text=element_text(size=8))
ggplot(df, aes(DeltaPHI, fill=Facies)) + geom_density(alpha=.5) + theme_economist_white(gray_bg=T) +
scale_fill_manual(values=facies_colors, drop=F, labels=facies_labels) +
labs(x="DeltaPHI", y="Density", title="Delta PHI PDF by Facies") +
xlim(-2.5,2.5) +
theme(panel.grid.major.x = element_line(colour="gray", size=0.25), legend.position="right",
axis.text=element_text(size=6), axis.title=element_text(size=8,face="bold"), plot.title=element_text(size=8),
legend.title=element_text(size=8), legend.text=element_text(size=8))
ggplot(df, aes(PHIND, fill=Facies)) + geom_density(alpha=.5) + theme_economist_white(gray_bg=T) +
scale_fill_manual(values=facies_colors, drop=F, labels=facies_labels) +
labs(x="PHIND", y="Density", title="Average Porosity PDF by Facies") +
xlim(-2.5,2.5) +
theme(panel.grid.major.x = element_line(colour="gray", size=0.25), legend.position="right",
axis.text=element_text(size=6), axis.title=element_text(size=8,face="bold"), plot.title=element_text(size=8),
legend.title=element_text(size=8), legend.text=element_text(size=8))
ggplot(df, aes(PE, fill=Facies)) + geom_density(alpha=.5) + theme_economist_white(gray_bg=T) +
scale_fill_manual(values=facies_colors, drop=F, labels=facies_labels) +
labs(x="PE", y="Density", title="Photoelectric Effect PDF by Facies") +
xlim(-2.5,2.5) +
theme(panel.grid.major.x = element_line(colour="gray", size=0.25), legend.position="right",
axis.text=element_text(size=6), axis.title=element_text(size=8,face="bold"), plot.title=element_text(size=8),
legend.title=element_text(size=8), legend.text=element_text(size=8))
In [4]:
# formation depth
wells <- unique(df$Well.Name)
df$FmThickness <- NA
df$FmRelDepth <- NA
df$isMarineThickness <- 0
for (i in wells) {
df_i <- df[df$Well.Name == i,]
formations <- unique(df_i$Formation)
for (j in formations) {
df_ij <- df_i[df_i$Formation == j,]
# formation thickness & formation depth
maxdepth <- max(df_ij$Depth)
mindepth <- min(df_ij$Depth)
df[df$Well.Name == i & df$Formation == j, "FmThickness"] <- maxdepth - mindepth
df[df$Well.Name == i & df$Formation == j, "FmRelDepth"] <- (df_ij$Depth - mindepth) / (maxdepth - mindepth)
# basic statistics on features
#features <- c("Depth", "GR", "ILD_log10", "DeltaPHI", "PHIND", "PE", "RELPOS")
#for (f in features) {
# df[df$Well.Name == i & df$Formation == j, paste(f, "_mean")] <- mean(df_ij[,f], na.rm=T)
# df[df$Well.Name == i & df$Formation == j, paste(f, "_sd")] <- sd(df_ij[,f], na.rm=T)
# df[df$Well.Name == i & df$Formation == j, paste(f, "_min")] <- min(df_ij[,f], na.rm=T)
# df[df$Well.Name == i & df$Formation == j, paste(f, "_max")] <- max(df_ij[,f], na.rm=T)
#}
}
# thickness and relative depth for isMarine unit
isMarine_prev <- df_i$isMarine[1]
isMarine_cur <- NULL
iStart <- NULL
for (z in 1:nrow(df_i)) {
isMarine_cur <- df_i$isMarine[z]
if (z == 1) {
iStart <- df_i$Depth[z]
isMarine_cur <- df_i$isMarine[z]
} else if (isMarine_cur != isMarine_prev | z == nrow(df_i)) {
df[df$Well.Name == i & df$Depth >= iStart &
df$Depth < df$Depth[z], "isMarineThickness"] <- df_i$Depth[z] - iStart
df[df$Well.Name == i & df$Depth >= iStart &
df$Depth < df$Depth[z], "isMarineRelDepth"] <- (df[df$Well.Name == i &
df$Depth >= iStart &
df$Depth < df$Depth[z],]$Depth -
iStart) /
(df_i$Depth[z] - iStart)
iStart <- df_i$Depth[z]
}
isMarine_prev <- isMarine_cur
}
}
format(head(df, 5), digits=3)
In [5]:
options(repr.plot.width=30, repr.plot.height=24)
library(rpart)
library(rattle)
library(rpart.plot)
library(RColorBrewer)
source("accuracyMetrics.R")
wells <- unique(df$Well.Name)
wells <- wells[!wells %in% c("Recruit F9")]
results <- data.frame()
for (i in 1:(length(wells)-1)) {
for (j in (i+1):length(wells)) {
train <- df[df$Well.Name != wells[i] & df$Well.Name != wells[j],]
test <- df[df$Well.Name == wells[i] | df$Well.Name == wells[j],]
fit <- rpart(Facies ~ ., data=subset(train, select=-c(Well.Name, Depth)), method="class")
my_predictions <- predict(fit, newdata=test, type="class")
if(wells[i] == "SHRIMPLIN" & wells[j] == "CHURCHMAN BIBLE") {
fancyRpartPlot(fit)
}
f1 <- myF1Metric(my_predictions, test$Facies)
results <- rbind(results, data.frame(testWell1=wells[i], testWell2=wells[j], f1=f1))
}
}
format(head(results,10), digits=3)
"We can expect an F1-score on average to be:"
print(mean(results$f1))