Regresión


In [22]:
import numpy as np #libreria de datos numericos

import matplotlib.pyplot as plt

import seaborn as sns
sns.set()

%matplotlib inline

import pandas as pd

In [36]:
a = 34
B = 100

x = np.random.normal(3,4,100)
y = a + B*x + np.random.normal(0,90,len(x)) # realizar correlaciones (error, varianza, len(x))


DF = pd.DataFrame({'x':x,'y':y})

DF.head()


Out[36]:
x y
0 4.201274 523.079677
1 8.136716 732.464634
2 1.693414 370.752457
3 5.073713 630.349290
4 5.725034 666.353722

In [37]:
#plt.scatter(x, y)

sns.lmplot('x', 'y', DF)


Out[37]:
<seaborn.axisgrid.FacetGrid at 0x11c3c2150>

In [ ]:
Correlacion entre dos variables: y = c + Bx + e 

Tarea:
    Buscar datasets 
    
    
    
    statmodels # https://github.com/statsmodels/statsmodels A good repository with examples!
    Funciones
        .ols (ordinary least squares) # http://statsmodels.sourceforge.net/0.6.0/examples/notebooks/generated/ols.html
        .summary #http://statsmodels.sourceforge.net/0.6.0/generated/statsmodels.iolib.summary.Summary.html

In [4]: