In [1]:
import pandas as pd
import numpy as np
from sklearn import svm
from sklearn.metrics import accuracy_score

In [2]:
csvNames = ['Well1B3mths.csv','Well1C3mths.csv','Well1D3mths.csv']
"""
csvNames = ['Well1B3mths.csv','Well1C3mths.csv','Well1D3mths.csv','Well1E3mths.csv',
           'Well1F3mths.csv','Well1G3mths.csv','Well1H3mths.csv','Well1I3mths.csv',
           'Well1J3mths.csv','Well2A3mths.csv','Well2B3mths.csv','Well2C3mths.csv',
            'Well2D3mths.csv','Well2E3mths.csv','Well3A3mths.csv','Well3B3mths.csv',
            'Well3C3mths.csv','Well3D3mths.csv','Well3E3mths.csv','Well3F3mths.csv',
            'Well3G3mths.csv','Well3H3mths.csv','Well3I3mths.csv','Well4A3mths.csv',
            'Well4B3mths.csv']
"""


Out[2]:
"\ncsvNames = ['Well1B3mths.csv','Well1C3mths.csv','Well1D3mths.csv','Well1E3mths.csv',\n           'Well1F3mths.csv','Well1G3mths.csv','Well1H3mths.csv','Well1I3mths.csv',\n           'Well1J3mths.csv','Well2A3mths.csv','Well2B3mths.csv','Well2C3mths.csv',\n            'Well2D3mths.csv','Well2E3mths.csv','Well3A3mths.csv','Well3B3mths.csv',\n            'Well3C3mths.csv','Well3D3mths.csv','Well3E3mths.csv','Well3F3mths.csv',\n            'Well3G3mths.csv','Well3H3mths.csv','Well3I3mths.csv','Well4A3mths.csv',\n            'Well4B3mths.csv']\n"

In [3]:
featuresColNames = ['Casing Pressure',
                    'Gas Flow (Volume)',
                    'Motor Speed',
                    'Motor Torque',
                    'Pump Speed Actual',
                    'Tubing Flow Meter',
                    'Tubing Pressure',
                    'Water Flow Mag from Separator']
targetsName = ['Downhole Gauge Pressure']

allFeatures = []
allTargets = []

for well in csvNames:
    df = pd.DataFrame.from_csv(well)
    
    features = df[featuresColNames]
    target = df[targetsName]
    
    allFeatures.append(features)
    allTargets.append(target)

allFeatures = pd.concat(allFeatures)
allTargets = pd.concat(allTargets)

Split the Features and Targets into Training and Test Sets


In [6]:
trainingX = features[:100]
trainingY = target[:100]
testX = features[200:300]
testY = target[200:300]

🤖 Machine Learning (with sklearn)


In [26]:
clf = svm.SVR()
clf.fit(trainingX, trainingY) 
predictions = clf.predict(testX)

Calculate the Percentage of Error

Go through each example and calculate the percentage of error. Add this percentage of error to an array. Then use this array to calculate the overall percentage of error within the predicted test set.


In [25]:
def calculateError(prediction, actual):
    return (predicted-actual)/actual

percentageErrors = []
for i, prediction in enumerate(predictions):
    actual = testY[i][0]
    percentageError = (prediction-actual)/actual
    percentageErrors.append(percentageError)

overallError = np.sum(percentageErrors) / len(percentageErrors)

print "Overall error was: " + str(overallError*100) + "%"


Overall error was: 0.0201162442605%