In [1]:
import pandas as pd
import numpy as np
from sklearn import svm
from sklearn.metrics import accuracy_score
In [2]:
csvNames = ['Well1B3mths.csv','Well1C3mths.csv','Well1D3mths.csv']
"""
csvNames = ['Well1B3mths.csv','Well1C3mths.csv','Well1D3mths.csv','Well1E3mths.csv',
'Well1F3mths.csv','Well1G3mths.csv','Well1H3mths.csv','Well1I3mths.csv',
'Well1J3mths.csv','Well2A3mths.csv','Well2B3mths.csv','Well2C3mths.csv',
'Well2D3mths.csv','Well2E3mths.csv','Well3A3mths.csv','Well3B3mths.csv',
'Well3C3mths.csv','Well3D3mths.csv','Well3E3mths.csv','Well3F3mths.csv',
'Well3G3mths.csv','Well3H3mths.csv','Well3I3mths.csv','Well4A3mths.csv',
'Well4B3mths.csv']
"""
Out[2]:
In [3]:
featuresColNames = ['Casing Pressure',
'Gas Flow (Volume)',
'Motor Speed',
'Motor Torque',
'Pump Speed Actual',
'Tubing Flow Meter',
'Tubing Pressure',
'Water Flow Mag from Separator']
targetsName = ['Downhole Gauge Pressure']
allFeatures = []
allTargets = []
for well in csvNames:
df = pd.DataFrame.from_csv(well)
features = df[featuresColNames]
target = df[targetsName]
allFeatures.append(features)
allTargets.append(target)
allFeatures = pd.concat(allFeatures)
allTargets = pd.concat(allTargets)
In [6]:
trainingX = features[:100]
trainingY = target[:100]
testX = features[200:300]
testY = target[200:300]
In [26]:
clf = svm.SVR()
clf.fit(trainingX, trainingY)
predictions = clf.predict(testX)
In [25]:
def calculateError(prediction, actual):
return (predicted-actual)/actual
percentageErrors = []
for i, prediction in enumerate(predictions):
actual = testY[i][0]
percentageError = (prediction-actual)/actual
percentageErrors.append(percentageError)
overallError = np.sum(percentageErrors) / len(percentageErrors)
print "Overall error was: " + str(overallError*100) + "%"