In [12]:
import os
import pdb
import logging
import json
from sklearn.externals import joblib


from pipeline import Pipeline
import logging
import json
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import ast
# Scikit
from sklearn.ensemble import ExtraTreesRegressor, IsolationForest
from sklearn.model_selection import train_test_split, KFold
from sklearn.externals import joblib
from sklearn.neighbors import KNeighborsRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import AdaBoostRegressor

from a_detection import AnomalyDetection
from helper import generate_matrix, ape, mape, mdape, gen_subplots, plot, train_statistics

RNG = np.random.RandomState(42)
from pipeline import Pipeline
from train_pipeline import TrainPipeline

logging.basicConfig(level=logging.INFO, format='%(asctime)s: %(levelname)s - %(message)s', filename='analysis.log')

In [2]:
class MyPipeline(Pipeline):
    def __init__(self):
        DIRECTORY = os.path.dirname(os.path.abspath("./scikit/"))
        settings = json.load(open('{}/settings.json'.format(DIRECTORY)))
        super().__init__("price_brutto", settings, DIRECTORY)

p = MyPipeline()

In [3]:
model = joblib.load('{}/extraTree.pkl'.format(p.model_folder))
ads = p.load_df("ads_transformed.pkl")(None)
ads['price_brutto'] = np.log(ads['price_brutto'])

In [4]:
X, y = generate_matrix(ads, 'price_brutto')
#y = np.exp(y)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=RNG)

In [9]:
y_pred = model.predict(X_test)

In [14]:
train_statistics(np.exp(y_test), np.exp(y_pred), title="ExtraTree_train_100")
train_statistics(y_test, y_pred, title="ExtraTree_train_100")

In [ ]:


In [ ]: