In [21]:
import numpy as np
from sklearn.preprocessing import normalize
from sklearn.preprocessing import StandardScaler
import pandas as pd
import matplotlib.pyplot as plt
import scipy.stats as stats
%matplotlib inline
from sklearn import datasets, linear_model
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.model_selection import train_test_split
import statsmodels.formula.api as smf
from statsmodels.compat import lzip
from sklearn import feature_selection
import statsmodels.regression.linear_model as sm
from statsmodels.stats.outliers_influence import variance_inflation_factor
from statsmodels.stats.diagnostic import het_breuschpagan
Os dados para este trabalho foram coletados de um servidor web que hospeda um site. As observações são as médias das variáveis por minuto:
In [11]:
df = pd.read_csv('servidor.csv')
df.head()
df.info()
df.describe()
Out[11]:
In [14]:
results = smf.ols('Duracao_media_ms ~ Perc_medio_CPU + Load_avg_minute + Requests_média', data=df).fit()
results.summary()
Out[14]:
In [18]:
X = df.drop('Duracao_media_ms',axis=1)
Xe = sm.add_constant(X,prepend=True)
vif = [variance_inflation_factor(Xe.values, i) for i in range(Xe.shape[1])]
vif_s =pd.Series(vif, index =Xe.columns)
print(vif_s)
In [29]:
name = ['Lagrange multiplier statistic', 'p-value',
'f-value', 'f p-value']
testeH = het_breuschpagan(results.resid, results.model.exog)
lzip(name, testeH)
Out[29]:
In [ ]: