In [12]:
import os
import pdb
import logging
import json
from sklearn.externals import joblib
from pipeline import Pipeline
import logging
import json
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import ast
# Scikit
from sklearn.ensemble import ExtraTreesRegressor, IsolationForest
from sklearn.model_selection import train_test_split, KFold
from sklearn.externals import joblib
from sklearn.neighbors import KNeighborsRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import AdaBoostRegressor
from a_detection import AnomalyDetection
from helper import generate_matrix, ape, mape, mdape, gen_subplots, plot, train_statistics
RNG = np.random.RandomState(42)
from pipeline import Pipeline
from train_pipeline import TrainPipeline
logging.basicConfig(level=logging.INFO, format='%(asctime)s: %(levelname)s - %(message)s', filename='analysis.log')
In [2]:
class MyPipeline(Pipeline):
def __init__(self):
DIRECTORY = os.path.dirname(os.path.abspath("./scikit/"))
settings = json.load(open('{}/settings.json'.format(DIRECTORY)))
super().__init__("price_brutto", settings, DIRECTORY)
p = MyPipeline()
In [3]:
model = joblib.load('{}/extraTree.pkl'.format(p.model_folder))
ads = p.load_df("ads_transformed.pkl")(None)
ads['price_brutto'] = np.log(ads['price_brutto'])
In [4]:
X, y = generate_matrix(ads, 'price_brutto')
#y = np.exp(y)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=RNG)
In [9]:
y_pred = model.predict(X_test)
In [14]:
train_statistics(np.exp(y_test), np.exp(y_pred), title="ExtraTree_train_100")
train_statistics(y_test, y_pred, title="ExtraTree_train_100")
In [ ]:
In [ ]: