In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
In [2]:
filenames = [
"7_Well239.xlsx","8_Well425.xlsx","9_Well445.xlsx","10_Well451.xlsx","11_Well496.xlsx",
"12_Well510.xlsx"
];
train_files = filenames[:5]
In [3]:
COLUMNS = ["DEPTH","RHOB","NPHI","GR","DT"]
FEATURES = ["RHOB","NPHI","GR"]
In [4]:
df = pd.DataFrame()
In [5]:
for item in train_files:
cur_df = pd.read_excel('MTP/'+item, 'Sheet1', index_col=None)
df = df.append(cur_df[COLUMNS],ignore_index=True)
In [6]:
print df.shape
df = df[(df.DT!=-999.25) & (df.RHOB!=-999.25) & (df.NPHI!=-999.25) & (df.GR!=-999.25)]
print df.shape
In [7]:
df.head(5)
Out[7]:
In [8]:
df.head(5)
Out[8]:
In [9]:
df = df.sort_values(by="DEPTH")
In [10]:
df.head(10)
Out[10]:
In [11]:
base = df.iloc[0,0]
end = float(df.tail(1)["DEPTH"])
In [12]:
depth_coeff = []
In [13]:
from sklearn import linear_model
start = base
last_end = 0.0
while start < end:
cur_df = df[(df.DEPTH<=(start+10)) & (df.DEPTH>(last_end))]
x_train = cur_df[FEATURES]
y_train = cur_df["DT"]
clf = linear_model.LinearRegression(normalize=True)
clf.fit(x_train,y_train)
depth_coeff.append(clf)
last_end = start
start = start+10
In [14]:
def get_index(depth):
index=-1
start=base
while depth>start:
index = index+1
start += 10
return index
In [15]:
test_file = filenames[5]
In [16]:
test_file
Out[16]:
In [17]:
test_df = pd.read_excel('MTP/'+test_file, 'Sheet1', index_col=None)
test_df = test_df[COLUMNS]
test_df = test_df[(test_df.DT!=-999.25) & (test_df.RHOB!=-999.25) & (test_df.NPHI!=-999.25) & (test_df.GR!=-999.25)]
In [18]:
y_true = []
y_pred = []
for index,row in test_df.iterrows():
cur_depth = row["DEPTH"]
x_test = row[FEATURES]
y_actual = row["DT"]
y_true.append(y_actual)
y_pred.append(clf.predict(x_test)[0])
index = get_index(cur_depth)
clf = depth_coeff[index]
# print clf.predict(x_test),y_actual
In [19]:
from sklearn.metrics import r2_score
r2_score(y_true, y_pred)
Out[19]:
In [20]:
rows = []
for clf in depth_coeff:
row = []
for item in clf.coef_:
row.append(item)
row.append(clf.intercept_)
rows.append(row)
In [21]:
df = pd.DataFrame()
df = df.append(rows)
df.to_csv('coefficients.csv')
In [ ]: