In [3]:
import pandas as pd
import pdb
from sklearn.model_selection import train_test_split
import numpy as np
import matplotlib.pyplot as plt
from sklearn.externals import joblib
import seaborn as sns
from sklearn.feature_extraction import DictVectorizer
from sklearn.feature_selection import chi2, f_regression
from sklearn.linear_model import LassoLarsCV, Ridge, RidgeCV, LassoCV, Lasso, LinearRegression, LogisticRegression
from sklearn.ensemble import RandomForestRegressor, ExtraTreesRegressor
from sklearn.naive_bayes import GaussianNB
from sklearn.model_selection import cross_val_score
from sklearn.metrics import mean_squared_error, make_scorer
from sklearn import metrics
from sklearn.neighbors import KNeighborsRegressor
from sklearn.tree import export_graphviz
import logging
import scipy
import gc
from multiprocessing import Pool
from collections import defaultdict
import os
from scikit.helper import generate_matrix, ape, mape, mdape, gen_subplots, plot, train_statistics
import json
from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker, load_only, Load
from models import Advertisement, Municipality, ObjectType
from scikit.combined_ensemble import CombinedEnsemble
from scikit import combined_ensemble as combined_ensemble
import sys
sys.modules["combined_ensemble"] = sys.modules["scikit.combined_ensemble"]

logging.basicConfig(level=logging.INFO, 
                    format='%(asctime)s: %(levelname)s - %(message)s', 
                    filename='jupyter.log')

from scikit.train_pipeline import TrainPipeline

def dpathes_to_hash(dpathes):
    return [hash(frozenset(row.nonzero()[1])) for row in dpathes]

In [4]:
model = joblib.load('scikit/models/extraTree.pkl')
ads_transformed = joblib.load('scikit/ads_transformed.pkl')

ads = pd.read_csv('scikit/advertisements.csv', index_col=0, engine='c')


---------------------------------------------------------------------------
ModuleNotFoundError                       Traceback (most recent call last)
<ipython-input-4-64bdafa0451c> in <module>()
      1 model = joblib.load('scikit/models/extraTree.pkl')
----> 2 ads_transformed = joblib.load('scikit/ads_transformed.pkl')
      3 
      4 ads = pd.read_csv('scikit/advertisements.csv', index_col=0, engine='c')

/home/piero/projects/Immo/immo/crawler/venv/lib/python3.6/site-packages/sklearn/externals/joblib/numpy_pickle.py in load(filename, mmap_mode)
    573                     return load_compatibility(fobj)
    574 
--> 575                 obj = _unpickle(fobj, filename, mmap_mode)
    576 
    577     return obj

/home/piero/projects/Immo/immo/crawler/venv/lib/python3.6/site-packages/sklearn/externals/joblib/numpy_pickle.py in _unpickle(fobj, filename, mmap_mode)
    505     obj = None
    506     try:
--> 507         obj = unpickler.load()
    508         if unpickler.compat_mode:
    509             warnings.warn("The file '%s' has been generated with a "

/usr/lib64/python3.6/pickle.py in load(self)
   1048                     raise EOFError
   1049                 assert isinstance(key, bytes_types)
-> 1050                 dispatch[key[0]](self)
   1051         except _Stop as stopinst:
   1052             return stopinst.value

/usr/lib64/python3.6/pickle.py in load_global(self)
   1336         module = self.readline()[:-1].decode("utf-8")
   1337         name = self.readline()[:-1].decode("utf-8")
-> 1338         klass = self.find_class(module, name)
   1339         self.append(klass)
   1340     dispatch[GLOBAL[0]] = load_global

/usr/lib64/python3.6/pickle.py in find_class(self, module, name)
   1386             elif module in _compat_pickle.IMPORT_MAPPING:
   1387                 module = _compat_pickle.IMPORT_MAPPING[module]
-> 1388         __import__(module, level=0)
   1389         if self.proto >= 4:
   1390             return _getattribute(sys.modules[module], name)[0]

ModuleNotFoundError: No module named 'pandas.indexes'

In [ ]:
DIRECTORY = os.path.dirname(os.path.abspath("./scikit/main.py"))
settings = json.load(open('{}/settings.json'.format(DIRECTORY)))

p = TrainPipeline("price", settings, DIRECTORY)

In [4]:
ads = ads[:1000]
for f in p.preparation_pipeline:
    logging.info("Apply transformation: {}".format(f.__name__))
    ads = f(ads)

if len(ads) == 0:
    print("Error: Input did not meet our standards.")

ads = p.outlier_detection(ads)
if len(ads) == 0:
    print("Error: Input data is an outlier!")

In [5]:
col_new = set(list(ads))
col_exist = set(list(ads_transformed))

if len(col_new - col_exist) > 0:
    print("Error: there are input columns which are not in the trained model: {}".format(col_new - col_exist))

In [6]:
col_plain = set(list(ads))

In [7]:
filled_ads = ads.join(pd.DataFrame(columns=list(col_exist - col_new)))

filled_ads[list(col_exist - col_new)] = 0

In [44]:



---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-44-441f7785e1d5> in <module>()
----> 1 TrainPipeline.__package__

AttributeError: type object 'TrainPipeline' has no attribute '__package__'

In [8]:
def load_additional(municipality_id, otype_id):
    engine = create_engine(os.environ.get('DATABASE_URL', None))
    Session = sessionmaker(bind=engine)
    session = Session()

    m_stmt = session.query(Municipality).filter_by(id=municipality_id).options(
        Load(Municipality).load_only(
            "name",
            "canton_id",
            "district_id",
            "mountain_region_id",
            "language_region_id",
            "job_market_region_id",
            "agglomeration_id",
            "metropole_region_id",
            "tourism_region_id",
            "is_town",
            "noise_level",
            "urban_character_id",
            "steuerfuss_gde",
            "steuerfuss_kanton",
            "degurba_id",
            "planning_region_id",
            "ase",
            "greater_region_id",
            "ms_region_id",
            "municipal_size_class_id",
            "agglomeration_size_class_id",
            "municipal_type22_id",
            "municipal_type9_id")
    ).with_labels().statement

    o_stmt = session.query(ObjectType).filter_by(id=otype_id).options(
        Load(ObjectType).load_only("name", "grouping")
    ).with_labels().statement

    return pd.read_sql_query(m_stmt, session.bind) \
        .join(pd.read_sql_query(o_stmt, session.bind)) \
        .drop(['municipalities_id', 'object_types_id'], axis=1) \
        .rename(columns={'municipalities_name': 'municipality',
                        'municipalities_canton_id': 'canton_id',
                        'municipalities_district_id': 'district_id',
                        'municipalities_planning_region_id': 'planning_region_id',
                        'municipalities_mountain_region_id': 'mountain_region_id',
                        'municipalities_ase': 'ase',
                        'municipalities_greater_region_id': 'greater_region_id',
                        'municipalities_language_region_id': 'language_region_id',
                        'municipalities_ms_region_id': 'ms_region_id',
                        'municipalities_job_market_region_id': 'job_market_region_id',
                        'municipalities_agglomeration_id': 'agglomeration_id',
                        'municipalities_metropole_region_id': 'metropole_region_id',
                        'municipalities_tourism_region_id': 'tourism_region_id',
                        'municipalities_municipal_size_class_id': 'municipal_size_class_id',
                        'municipalities_urban_character_id': 'urban_character_id',
                        'municipalities_agglomeration_size_class_id': 'agglomeration_size_class_id',
                        'municipalities_is_town': 'is_town',
                        'municipalities_degurba_id': 'degurba_id',
                        'municipalities_municipal_type22_id': 'municipal_type22_id',
                        'municipalities_municipal_type9_id': 'municipal_type9_id',
                        'municipalities_noise_level': 'm_noise_level',
                        'municipalities_steuerfuss_gde': 'steuerfuss_gde',
                        'municipalities_steuerfuss_kanton': 'steuerfuss_kanton',
                        'object_types_name': 'otype',
                        'object_types_grouping': 'ogroup'})

row = load_additional(1, 30)


unwanted_cols = ['cubature', 'room_height', 'effective_area',
                  'plot_area', 'longitude', 'latitude',
                  'floor', 'num_floors', 'crawler']

col_plain - set(list(row)) - set(unwanted_cols)


Out[8]:
{'agglomeration_id_0',
 'agglomeration_id_1061',
 'agglomeration_id_121',
 'agglomeration_id_1344',
 'agglomeration_id_1711',
 'agglomeration_id_2125',
 'agglomeration_id_2196',
 'agglomeration_id_230',
 'agglomeration_id_2581',
 'agglomeration_id_2601',
 'agglomeration_id_261',
 'agglomeration_id_2701',
 'agglomeration_id_2939',
 'agglomeration_id_3203',
 'agglomeration_id_3231',
 'agglomeration_id_3271',
 'agglomeration_id_3336',
 'agglomeration_id_3425',
 'agglomeration_id_351',
 'agglomeration_id_371',
 'agglomeration_id_3787',
 'agglomeration_id_3901',
 'agglomeration_id_4001',
 'agglomeration_id_4021',
 'agglomeration_id_4082',
 'agglomeration_id_4201',
 'agglomeration_id_4401',
 'agglomeration_id_4436',
 'agglomeration_id_4566',
 'agglomeration_id_4671',
 'agglomeration_id_5002',
 'agglomeration_id_5113',
 'agglomeration_id_5192',
 'agglomeration_id_5250',
 'agglomeration_id_5586',
 'agglomeration_id_581',
 'agglomeration_id_5890',
 'agglomeration_id_6002',
 'agglomeration_id_6248',
 'agglomeration_id_6266',
 'agglomeration_id_6421',
 'agglomeration_id_6458',
 'agglomeration_id_6621',
 'agglomeration_id_6711',
 'agglomeration_id_9001',
 'agglomeration_id_9002',
 'agglomeration_id_9003',
 'agglomeration_id_9004',
 'agglomeration_id_9005',
 'agglomeration_id_942',
 'agglomeration_size_class_id_0',
 'agglomeration_size_class_id_1',
 'agglomeration_size_class_id_2',
 'agglomeration_size_class_id_3',
 'agglomeration_size_class_id_4',
 'agglomeration_size_class_id_5',
 'ase_0',
 'ase_1',
 'avg_room_area',
 'bath',
 'build_year',
 'canton_id_1',
 'canton_id_10',
 'canton_id_11',
 'canton_id_12',
 'canton_id_13',
 'canton_id_14',
 'canton_id_15',
 'canton_id_16',
 'canton_id_17',
 'canton_id_18',
 'canton_id_19',
 'canton_id_2',
 'canton_id_20',
 'canton_id_21',
 'canton_id_22',
 'canton_id_23',
 'canton_id_24',
 'canton_id_25',
 'canton_id_26',
 'canton_id_3',
 'canton_id_4',
 'canton_id_5',
 'canton_id_6',
 'canton_id_7',
 'canton_id_8',
 'canton_id_9',
 'degurba_id_1',
 'degurba_id_2',
 'degurba_id_3',
 'district_id_1001',
 'district_id_1002',
 'district_id_1003',
 'district_id_1004',
 'district_id_1006',
 'district_id_1007',
 'district_id_101',
 'district_id_102',
 'district_id_103',
 'district_id_104',
 'district_id_105',
 'district_id_106',
 'district_id_107',
 'district_id_108',
 'district_id_109',
 'district_id_110',
 'district_id_1101',
 'district_id_1102',
 'district_id_1103',
 'district_id_1105',
 'district_id_1106',
 'district_id_1107',
 'district_id_1108',
 'district_id_111',
 'district_id_1110',
 'district_id_112',
 'district_id_1200',
 'district_id_1301',
 'district_id_1303',
 'district_id_1304',
 'district_id_1305',
 'district_id_1402',
 'district_id_1403',
 'district_id_1404',
 'district_id_1405',
 'district_id_1501',
 'district_id_1600',
 'district_id_1721',
 'district_id_1722',
 'district_id_1723',
 'district_id_1724',
 'district_id_1725',
 'district_id_1726',
 'district_id_1727',
 'district_id_1728',
 'district_id_1821',
 'district_id_1824',
 'district_id_1825',
 'district_id_1826',
 'district_id_1827',
 'district_id_1829',
 'district_id_1830',
 'district_id_1831',
 'district_id_1901',
 'district_id_1902',
 'district_id_1903',
 'district_id_1904',
 'district_id_1905',
 'district_id_1906',
 'district_id_1907',
 'district_id_1908',
 'district_id_1909',
 'district_id_1910',
 'district_id_1911',
 'district_id_2011',
 'district_id_2012',
 'district_id_2013',
 'district_id_2014',
 'district_id_2015',
 'district_id_2101',
 'district_id_2103',
 'district_id_2104',
 'district_id_2105',
 'district_id_2106',
 'district_id_2221',
 'district_id_2222',
 'district_id_2224',
 'district_id_2225',
 'district_id_2226',
 'district_id_2227',
 'district_id_2228',
 'district_id_2229',
 'district_id_2230',
 'district_id_2301',
 'district_id_2302',
 'district_id_2303',
 'district_id_2304',
 'district_id_2305',
 'district_id_2306',
 'district_id_2307',
 'district_id_2309',
 'district_id_2311',
 'district_id_2312',
 'district_id_2401',
 'district_id_2403',
 'district_id_2404',
 'district_id_2405',
 'district_id_241',
 'district_id_242',
 'district_id_243',
 'district_id_244',
 'district_id_245',
 'district_id_246',
 'district_id_247',
 'district_id_249',
 'district_id_250',
 'district_id_2500',
 'district_id_2601',
 'district_id_311',
 'district_id_312',
 'district_id_313',
 'district_id_314',
 'district_id_315',
 'district_id_316',
 'district_id_400',
 'district_id_501',
 'district_id_502',
 'district_id_503',
 'district_id_504',
 'district_id_505',
 'district_id_600',
 'district_id_700',
 'district_id_800',
 'district_id_900',
 'exterior',
 'greater_region_id_1',
 'greater_region_id_2',
 'greater_region_id_3',
 'greater_region_id_4',
 'greater_region_id_5',
 'greater_region_id_6',
 'greater_region_id_7',
 'interior',
 'is_town_0',
 'is_town_1',
 'job_market_region_id_1',
 'job_market_region_id_10',
 'job_market_region_id_11',
 'job_market_region_id_12',
 'job_market_region_id_13',
 'job_market_region_id_14',
 'job_market_region_id_15',
 'job_market_region_id_16',
 'job_market_region_id_2',
 'job_market_region_id_3',
 'job_market_region_id_4',
 'job_market_region_id_5',
 'job_market_region_id_6',
 'job_market_region_id_7',
 'job_market_region_id_8',
 'job_market_region_id_9',
 'language_region_id_1',
 'language_region_id_2',
 'language_region_id_3',
 'language_region_id_4',
 'last_construction',
 'living_area',
 'metropole_region_id_0',
 'metropole_region_id_1',
 'metropole_region_id_2',
 'metropole_region_id_3',
 'metropole_region_id_4',
 'metropole_region_id_5',
 'metropole_region_id_9',
 'mountain_region_id_0',
 'mountain_region_id_1',
 'mountain_region_id_2',
 'mountain_region_id_3',
 'mountain_region_id_4',
 'ms_region_id_1',
 'ms_region_id_10',
 'ms_region_id_100',
 'ms_region_id_102',
 'ms_region_id_103',
 'ms_region_id_105',
 'ms_region_id_106',
 'ms_region_id_11',
 'ms_region_id_12',
 'ms_region_id_13',
 'ms_region_id_14',
 'ms_region_id_15',
 'ms_region_id_16',
 'ms_region_id_18',
 'ms_region_id_19',
 'ms_region_id_2',
 'ms_region_id_20',
 'ms_region_id_22',
 'ms_region_id_23',
 'ms_region_id_25',
 'ms_region_id_26',
 'ms_region_id_27',
 'ms_region_id_28',
 'ms_region_id_29',
 'ms_region_id_3',
 'ms_region_id_30',
 'ms_region_id_31',
 'ms_region_id_32',
 'ms_region_id_33',
 'ms_region_id_34',
 'ms_region_id_35',
 'ms_region_id_36',
 'ms_region_id_37',
 'ms_region_id_38',
 'ms_region_id_39',
 'ms_region_id_4',
 'ms_region_id_40',
 'ms_region_id_41',
 'ms_region_id_42',
 'ms_region_id_43',
 'ms_region_id_44',
 'ms_region_id_45',
 'ms_region_id_46',
 'ms_region_id_47',
 'ms_region_id_48',
 'ms_region_id_49',
 'ms_region_id_5',
 'ms_region_id_50',
 'ms_region_id_51',
 'ms_region_id_52',
 'ms_region_id_53',
 'ms_region_id_54',
 'ms_region_id_55',
 'ms_region_id_56',
 'ms_region_id_57',
 'ms_region_id_58',
 'ms_region_id_59',
 'ms_region_id_6',
 'ms_region_id_60',
 'ms_region_id_61',
 'ms_region_id_62',
 'ms_region_id_64',
 'ms_region_id_66',
 'ms_region_id_67',
 'ms_region_id_68',
 'ms_region_id_7',
 'ms_region_id_70',
 'ms_region_id_71',
 'ms_region_id_72',
 'ms_region_id_73',
 'ms_region_id_74',
 'ms_region_id_75',
 'ms_region_id_76',
 'ms_region_id_77',
 'ms_region_id_78',
 'ms_region_id_79',
 'ms_region_id_8',
 'ms_region_id_80',
 'ms_region_id_81',
 'ms_region_id_82',
 'ms_region_id_83',
 'ms_region_id_84',
 'ms_region_id_85',
 'ms_region_id_86',
 'ms_region_id_87',
 'ms_region_id_88',
 'ms_region_id_9',
 'ms_region_id_91',
 'ms_region_id_93',
 'ms_region_id_94',
 'ms_region_id_95',
 'ms_region_id_97',
 'ms_region_id_98',
 'ms_region_id_99',
 'municipal_type22_id_1',
 'municipal_type22_id_10',
 'municipal_type22_id_11',
 'municipal_type22_id_12',
 'municipal_type22_id_13',
 'municipal_type22_id_14',
 'municipal_type22_id_15',
 'municipal_type22_id_16',
 'municipal_type22_id_17',
 'municipal_type22_id_18',
 'municipal_type22_id_19',
 'municipal_type22_id_2',
 'municipal_type22_id_20',
 'municipal_type22_id_21',
 'municipal_type22_id_3',
 'municipal_type22_id_4',
 'municipal_type22_id_5',
 'municipal_type22_id_6',
 'municipal_type22_id_7',
 'municipal_type22_id_8',
 'municipal_type22_id_9',
 'municipal_type9_id_1',
 'municipal_type9_id_2',
 'municipal_type9_id_3',
 'municipal_type9_id_4',
 'municipal_type9_id_5',
 'municipal_type9_id_6',
 'municipal_type9_id_7',
 'municipal_type9_id_8',
 'municipal_type9_id_9',
 'municipality_Aarau',
 'municipality_Aarburg',
 'municipality_Aarwangen',
 'municipality_Abtwil',
 'municipality_Adelboden',
 'municipality_Adligenswil',
 'municipality_Adliswil',
 'municipality_Affoltern am Albis',
 'municipality_Agno',
 'municipality_Allschwil',
 'municipality_Altdorf (UR)',
 'municipality_Altendorf',
 'municipality_Altstätten',
 'municipality_Andelfingen',
 'municipality_Anières',
 'municipality_Anniviers',
 'municipality_Appenzell',
 'municipality_Arbaz',
 'municipality_Arbon',
 'municipality_Arconciel',
 'municipality_Arlesheim',
 'municipality_Ascona',
 'municipality_Attiswil',
 'municipality_Auboranges',
 'municipality_Ayent',
 'municipality_Baar',
 'municipality_Bad Ragaz',
 'municipality_Bad Zurzach',
 'municipality_Baden',
 'municipality_Bagnes',
 'municipality_Balgach',
 'municipality_Basel',
 'municipality_Bassersdorf',
 'municipality_Beinwil am See',
 'municipality_Bellinzona',
 'municipality_Bellmund',
 'municipality_Bellwald',
 'municipality_Belmont-Broye',
 'municipality_Belmont-sur-Lausanne',
 'municipality_Belp',
 'municipality_Bergdietikon',
 'municipality_Bergün/Bravuogn',
 'municipality_Berikon',
 'municipality_Beringen',
 'municipality_Bern',
 'municipality_Bernex',
 'municipality_Beromünster',
 'municipality_Bettwil',
 'municipality_Bex',
 'municipality_Biberstein',
 'municipality_Biel/Bienne',
 'municipality_Binningen',
 'municipality_Birmensdorf (ZH)',
 'municipality_Birmenstorf (AG)',
 'municipality_Birr',
 'municipality_Birrwil',
 'municipality_Bleienbach',
 'municipality_Blonay',
 'municipality_Blumenstein',
 'municipality_Bodio',
 'municipality_Bolligen',
 'municipality_Boniswil',
 'municipality_Bottighofen',
 'municipality_Bottmingen',
 'municipality_Breil/Brigels',
 'municipality_Breitenbach',
 'municipality_Bremgarten (AG)',
 'municipality_Brissago',
 'municipality_Buchs (AG)',
 'municipality_Buchs (SG)',
 'municipality_Bulle',
 'municipality_Bätterkinden',
 'municipality_Bösingen',
 'municipality_Böttstein',
 'municipality_Bözberg',
 'municipality_Bülach',
 'municipality_Bürglen (UR)',
 'municipality_Büron',
 'municipality_Caslano',
 'municipality_Chalais',
 'municipality_Charrat',
 'municipality_Chermignon',
 'municipality_Chiasso',
 'municipality_Chippis',
 'municipality_Choulex',
 'municipality_Chur',
 'municipality_Châtel-Saint-Denis',
 'municipality_Chéserex',
 'municipality_Chêne-Bougeries',
 'municipality_Coldrerio',
 "municipality_Collina d'Oro",
 'municipality_Conthey',
 'municipality_Corsier (GE)',
 'municipality_Crissier',
 'municipality_Cudrefin',
 'municipality_Cugy (FR)',
 'municipality_Cureglia',
 'municipality_Dachsen',
 'municipality_Dallenwil',
 'municipality_Davos',
 'municipality_Degersheim',
 'municipality_Delémont',
 'municipality_Derendingen',
 'municipality_Dietikon',
 'municipality_Dintikon',
 'municipality_Disentis/Mustér',
 'municipality_Domat/Ems',
 'municipality_Dulliken',
 'municipality_Dällikon',
 'municipality_Döttingen',
 'municipality_Ebikon',
 'municipality_Egerkingen',
 'municipality_Eggenwil',
 'municipality_Egliswil',
 'municipality_Eiken',
 'municipality_Einsiedeln',
 'municipality_Embrach',
 'municipality_Engelberg',
 'municipality_Erlinsbach (AG)',
 'municipality_Ermatingen',
 'municipality_Eschenz',
 'municipality_Fahrwangen',
 'municipality_Felben-Wellhausen',
 'municipality_Fieschertal',
 'municipality_Flims',
 'municipality_Flums',
 'municipality_Flüelen',
 'municipality_Flühli',
 'municipality_Forel (Lavaux)',
 'municipality_Frauenkappelen',
 'municipality_Freienbach',
 'municipality_Freienwil',
 'municipality_Frick',
 'municipality_Frutigen',
 'municipality_Fully',
 'municipality_Fällanden',
 'municipality_Füllinsdorf',
 'municipality_Gambarogno',
 'municipality_Genève',
 'municipality_Gersau',
 'municipality_Gibloux',
 'municipality_Giebenach',
 'municipality_Gipf-Oberfrick',
 'municipality_Gland',
 'municipality_Glarus',
 'municipality_Glarus Nord',
 'municipality_Glarus Süd',
 'municipality_Gletterens',
 'municipality_Golaten',
 'municipality_Goldach',
 'municipality_Gossau (SG)',
 'municipality_Gossau (ZH)',
 'municipality_Greppen',
 'municipality_Grindelwald',
 'municipality_Grossaffoltern',
 'municipality_Grosshöchstetten',
 'municipality_Gryon',
 'municipality_Gränichen',
 'municipality_Grüningen',
 'municipality_Hallwil',
 'municipality_Hasle bei Burgdorf',
 'municipality_Hausen (AG)',
 'municipality_Haute-Sorne',
 'municipality_Heimberg',
 'municipality_Hendschiken',
 'municipality_Herdern',
 'municipality_Hergiswil bei Willisau',
 'municipality_Herisau',
 'municipality_Hermance',
 'municipality_Hettlingen',
 'municipality_Hittnau',
 'municipality_Hochdorf',
 'municipality_Hombrechtikon',
 'municipality_Horgen',
 'municipality_Hornussen',
 'municipality_Horw',
 'municipality_Huttwil',
 'municipality_Hägendorf',
 'municipality_Hölstein',
 'municipality_Icogne',
 'municipality_Ilanz/Glion',
 'municipality_Interlaken',
 'municipality_Jegenstorf',
 'municipality_Jongny',
 'municipality_Kemmental',
 'municipality_Kerns',
 'municipality_Kilchberg (ZH)',
 'municipality_Klingnau',
 'municipality_Klosters-Serneus',
 'municipality_Kloten',
 'municipality_Koppigen',
 'municipality_Kradolf-Schönenberg',
 'municipality_Krattigen',
 'municipality_Kriens',
 'municipality_Küssnacht (SZ)',
 'municipality_Küttigen',
 'municipality_La Tour-de-Peilz',
 'municipality_Laax',
 'municipality_Lachen',
 'municipality_Landquart',
 'municipality_Langendorf',
 'municipality_Langenthal',
 'municipality_Lantsch/Lenz',
 'municipality_Lausanne',
 'municipality_Lax',
 'municipality_Le Locle',
 'municipality_Le Mouret',
 'municipality_Leibstadt',
 'municipality_Leissigen',
 'municipality_Lenzburg',
 'municipality_Les Brenets',
 'municipality_Leukerbad',
 'municipality_Lommis',
 'municipality_Losone',
 'municipality_Lugano',
 'municipality_Luzern',
 'municipality_Lyss',
 'municipality_Magden',
 'municipality_Malans',
 'municipality_Maracon',
 'municipality_Martigny',
 'municipality_Matzingen',
 'municipality_Maur',
 'municipality_Meggen',
 'municipality_Meisterschwanden',
 'municipality_Melchnau',
 'municipality_Mellingen',
 'municipality_Menziken',
 'municipality_Menzingen',
 'municipality_Menznau',
 'municipality_Messen',
 'municipality_Minusio',
 'municipality_Montagny (FR)',
 'municipality_Monteceneri',
 'municipality_Montreux',
 'municipality_Muhen',
 'municipality_Muralto',
 'municipality_Muri (AG)',
 'municipality_Märstetten',
 'municipality_Möhlin',
 'municipality_Mörel-Filet',
 'municipality_Möriken-Wildegg',
 'municipality_Münchenstein',
 'municipality_Münchwilen (AG)',
 'municipality_Münsingen',
 'municipality_Naters',
 'municipality_Nendaz',
 'municipality_Neuchâtel',
 'municipality_Neuendorf',
 'municipality_Niederbipp',
 'municipality_Niederglatt',
 'municipality_Niedergösgen',
 'municipality_Niederhünigen',
 'municipality_Nürensdorf',
 'municipality_Oberbuchsiten',
 'municipality_Oberbüren',
 'municipality_Oberdorf (SO)',
 'municipality_Oberengstringen',
 'municipality_Oberentfelden',
 'municipality_Obergoms',
 'municipality_Oberhofen am Thunersee',
 'municipality_Oberlunkhofen',
 'municipality_Oberrohrdorf',
 'municipality_Oberrüti',
 'municipality_Oberwil (BL)',
 'municipality_Oberwil-Lieli',
 'municipality_Oftringen',
 'municipality_Ollon',
 'municipality_Olsberg',
 'municipality_Olten',
 'municipality_Opfikon',
 'municipality_Paradiso',
 'municipality_Payerne',
 'municipality_Pfaffnau',
 'municipality_Pfäffikon',
 'municipality_Pieterlen',
 'municipality_Plateau de Diesse',
 'municipality_Pontresina',
 'municipality_Pratteln',
 'municipality_Prilly',
 'municipality_Puidoux',
 'municipality_Péry-La Heutte',
 'municipality_Quarten',
 'municipality_Rafz',
 'municipality_Rapperswil-Jona',
 'municipality_Rebstein',
 'municipality_Regensdorf',
 'municipality_Reigoldswil',
 'municipality_Reinach (AG)',
 'municipality_Rheineck',
 'municipality_Rheinfelden',
 'municipality_Riaz',
 'municipality_Rickenbach (SO)',
 'municipality_Rickenbach (ZH)',
 'municipality_Riddes',
 'municipality_Riehen',
 'municipality_Riggisberg',
 'municipality_Rohrbachgraben',
 'municipality_Romanshorn',
 'municipality_Rorschacherberg',
 'municipality_Rovio',
 'municipality_Rubigen',
 'municipality_Rupperswil',
 'municipality_Rüte',
 'municipality_Rüthi (SG)',
 'municipality_Safenwil',
 'municipality_Saillon',
 'municipality_Saint-Aubin-Sauges',
 'municipality_Saint-Prex',
 'municipality_Savièse',
 'municipality_Schaffhausen',
 'municipality_Schattdorf',
 'municipality_Schleitheim',
 'municipality_Schwarzenberg',
 'municipality_Schöfflisdorf',
 'municipality_Schönenbuch',
 'municipality_Schüpfen',
 'municipality_Scuol',
 'municipality_Seelisberg',
 'municipality_Selzach',
 'municipality_Sennwald',
 'municipality_Seon',
 'municipality_Sevelen',
 'municipality_Sion',
 'municipality_Sorens',
 'municipality_St. Antoni',
 'municipality_St. Gallen',
 'municipality_St. Margrethen',
 'municipality_St. Moritz',
 'municipality_Staffelbach',
 'municipality_Starrkirch-Wil',
 'municipality_Stein am Rhein',
 'municipality_Stettlen',
 'municipality_Strengelbach',
 'municipality_Stäfa',
 'municipality_Suhr',
 'municipality_Sumiswald',
 'municipality_Surpierre',
 'municipality_Sursee',
 'municipality_Tannay',
 'municipality_Tegerfelden',
 'municipality_Terre di Pedemonte',
 'municipality_Thalwil',
 'municipality_Thayngen',
 'municipality_Thierachern',
 'municipality_Thun',
 'municipality_Toffen',
 'municipality_Troinex',
 'municipality_Turgi',
 'municipality_Tägerwilen',
 'municipality_Unterkulm',
 'municipality_Unterseen',
 'municipality_Urtenen-Schönbühl',
 'municipality_Utzenstorf',
 'municipality_Uzwil',
 'municipality_Val-de-Ruz',
 'municipality_Vandoeuvres',
 'municipality_Vaz/Obervaz',
 'municipality_Vechigen',
 'municipality_Veyrier',
 'municipality_Villars-sur-Glâne',
 'municipality_Volketswil',
 'municipality_Vuisternens-devant-Romont',
 'municipality_Vétroz',
 'municipality_Wagenhausen',
 'municipality_Walchwil',
 'municipality_Walkringen',
 'municipality_Wallisellen',
 'municipality_Walperswil',
 'municipality_Waltenschwil',
 'municipality_Wangen bei Olten',
 'municipality_Wangen-Brüttisellen',
 'municipality_Wattwil',
 'municipality_Weggis',
 'municipality_Weiach',
 'municipality_Weisslingen',
 'municipality_Welschenrohr',
 'municipality_Wettingen',
 'municipality_Wettswil am Albis',
 'municipality_Widnau',
 'municipality_Wiedlisbach',
 'municipality_Wil (SG)',
 'municipality_Wil (ZH)',
 'municipality_Wildhaus-Alt St. Johann',
 'municipality_Windisch',
 'municipality_Winkel',
 'municipality_Winterthur',
 'municipality_Wohlen (AG)',
 'municipality_Wollerau',
 'municipality_Würenlingen',
 'municipality_Würenlos',
 'municipality_Yvonand',
 'municipality_Zuchwil',
 'municipality_Zufikon',
 'municipality_Zug',
 'municipality_Zunzgen',
 'municipality_Zuzgen',
 'municipality_Zürich',
 'neighbourhood',
 'noise_level',
 'num_rooms',
 'ogroup_haus',
 'ogroup_wohnung',
 'otype_attikawohnung',
 'otype_bauernhaus',
 'otype_chalet',
 'otype_dachwohnung',
 'otype_doppel',
 'otype_doppeleinfamilienhaus',
 'otype_duplex',
 'otype_einfamilienhaus',
 'otype_etagenwohnung',
 'otype_loft',
 'otype_maisonette',
 'otype_maisonette / duplex',
 'otype_mansarde',
 'otype_moebliertes',
 'otype_möblierte wohnung',
 'otype_neubauwohnung',
 'otype_reihen',
 'otype_reihenfamilienhaus',
 'otype_reihenhaus',
 'otype_reihenmittehaus',
 'otype_terrassenhaus',
 'otype_terrassenwohnung',
 'otype_villa',
 'otype_villa / schloss',
 'otype_wohnung',
 'planning_region_id_1001',
 'planning_region_id_1002',
 'planning_region_id_1003',
 'planning_region_id_1005',
 'planning_region_id_1006',
 'planning_region_id_101',
 'planning_region_id_102',
 'planning_region_id_103',
 'planning_region_id_104',
 'planning_region_id_105',
 'planning_region_id_106',
 'planning_region_id_107',
 'planning_region_id_108',
 'planning_region_id_109',
 'planning_region_id_110',
 'planning_region_id_1101',
 'planning_region_id_1102',
 'planning_region_id_1103',
 'planning_region_id_1106',
 'planning_region_id_111',
 'planning_region_id_1201',
 'planning_region_id_1301',
 'planning_region_id_1401',
 'planning_region_id_1601',
 'planning_region_id_1706',
 'planning_region_id_1707',
 'planning_region_id_1708',
 'planning_region_id_1709',
 'planning_region_id_1710',
 'planning_region_id_1711',
 'planning_region_id_1801',
 'planning_region_id_1802',
 'planning_region_id_1803',
 'planning_region_id_1805',
 'planning_region_id_1808',
 'planning_region_id_1810',
 'planning_region_id_1811',
 'planning_region_id_1901',
 'planning_region_id_1903',
 'planning_region_id_1904',
 'planning_region_id_1905',
 'planning_region_id_1906',
 'planning_region_id_1907',
 'planning_region_id_1908',
 'planning_region_id_1910',
 'planning_region_id_1911',
 'planning_region_id_1912',
 'planning_region_id_1915',
 'planning_region_id_1916',
 'planning_region_id_1917',
 'planning_region_id_2001',
 'planning_region_id_2002',
 'planning_region_id_2003',
 'planning_region_id_2004',
 'planning_region_id_2006',
 'planning_region_id_205',
 'planning_region_id_206',
 'planning_region_id_207',
 'planning_region_id_2101',
 'planning_region_id_2102',
 'planning_region_id_2103',
 'planning_region_id_2104',
 'planning_region_id_2105',
 'planning_region_id_213',
 'planning_region_id_215',
 'planning_region_id_216',
 'planning_region_id_217',
 'planning_region_id_219',
 'planning_region_id_2201',
 'planning_region_id_2202',
 'planning_region_id_2204',
 'planning_region_id_2206',
 'planning_region_id_2207',
 'planning_region_id_2208',
 'planning_region_id_2212',
 'planning_region_id_2216',
 'planning_region_id_2217',
 'planning_region_id_2219',
 'planning_region_id_2301',
 'planning_region_id_2302',
 'planning_region_id_2304',
 'planning_region_id_2305',
 'planning_region_id_2306',
 'planning_region_id_2307',
 'planning_region_id_2401',
 'planning_region_id_2402',
 'planning_region_id_2403',
 'planning_region_id_2501',
 'planning_region_id_2601',
 'planning_region_id_301',
 'planning_region_id_302',
 'planning_region_id_303',
 'planning_region_id_305',
 'planning_region_id_306',
 'planning_region_id_401',
 'planning_region_id_501',
 'planning_region_id_502',
 'planning_region_id_503',
 'planning_region_id_601',
 'planning_region_id_701',
 'planning_region_id_803',
 'planning_region_id_901',
 'price',
 'tags_erdgeschoss',
 'tags_haus',
 'tags_keller',
 'tags_modern',
 'tags_neubau',
 'tags_raum',
 'tags_villa',
 'tags_wohnung',
 'tourism_region_id_1',
 'tourism_region_id_10',
 'tourism_region_id_11',
 'tourism_region_id_12',
 'tourism_region_id_13',
 'tourism_region_id_14',
 'tourism_region_id_2',
 'tourism_region_id_3',
 'tourism_region_id_4',
 'tourism_region_id_5',
 'tourism_region_id_6',
 'tourism_region_id_7',
 'tourism_region_id_8',
 'tourism_region_id_9',
 'urban_character_id_0',
 'urban_character_id_1',
 'urban_character_id_2',
 'urban_character_id_3',
 'urban_character_id_4',
 'urban_character_id_5',
 'urban_character_id_6',
 'was_renovated'}

In [9]:
"""
user Input:

  living_area
  num_rooms
  build_year
  last_renovation_year

  otype_id

  street 
  municipality_id

longitude,latitude, --> suchen mit adresse
noise_level, ---> suchen

tags, ergänzen/Formular anbieten???
"""

in_params = {
    'living_area': 120, 
    'num_rooms': 4, 
    'otype_id': 30, 
    'street': "Paradeplatz", 
    'municipality_id': 1, 
    'build_year': 1900, 
    'last_renovation_year': 1990, 
    'tags': []
}

def get_noise_level(long, lat):
    return None

def get_long_lat(street, municipality_id):
    return (None, None)

def user_input_to_df(parameters):
    # search long lat
    long, lat = get_long_lat(parameters.street, parameters.municipality_id)
    # serch noise_level
    noise_level = get_noise_level(long, lat)
    
    df = pd.DataFrame([{
        'living_area': parameters.living_area, 
        'num_rooms': parameters.num_rooms,  
        'build_year': parameters.build_year, 
        'last_renovation_year': parameters.last_renovation_year, 
        'tags': parameters.tags,
        'noise_level': noise_level,
    }]).join(load_additional(parameters.municipality_id, parameters.otype_id))
    
    if noise_level == None:
        df['noise_level'] = df['m_noise_level']
        
    return df

In [ ]: