In [1]:
    
import sys
    
In [2]:
    
print("Following are your python version details:\n%s" % sys.version)
    
    
In [3]:
    
%matplotlib inline
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
    
In [4]:
    
sns.set_context("poster")
sns.set_style("ticks")
    
In [5]:
    
print "Numpy version: ", np.__version__
print "Pandas version: ", pd.__version__
print "Matplotlib version: ", plt.matplotlib.__version__
print "Seaborn version: ", sns.__version__
    
    
In [6]:
    
x = np.arange(-10,10,0.14)
y = x**2
print "x.shape: ", x.shape
print "y.shape: ", y.shape
    
    
More details at: http://matplotlib.org/users/pyplot_tutorial.html
In [7]:
    
plt.plot(x,y, marker="o", color="r", label="demo")
plt.xlabel("X axis")
plt.ylabel("Y axis")
plt.title("Demo plot")
plt.legend()
    
    Out[7]:
    
More details at: http://pandas.pydata.org/pandas-docs/stable/tutorials.html
In [8]:
    
df = pd.DataFrame()
df["X"] = x
df["Y"] = y
df["G"] = np.random.randint(1,10,size=x.shape)
df["E"] = np.random.randint(1,5,size=x.shape)
df.shape
    
    Out[8]:
In [9]:
    
df.head()
    
    Out[9]:
In [10]:
    
df.describe()
    
    Out[10]:
In [11]:
    
df.G = df.G.astype("category")
df.E = df.E.astype("category")
    
More details at: https://stanford.edu/~mwaskom/software/seaborn/index.html
In [12]:
    
sns.barplot(x="G", y="Y", data=df, estimator=np.mean, color="dodgerblue")
    
    Out[12]:
    
In [13]:
    
g = sns.jointplot("X", "Y", data=df, kind="reg",
                  color="r", size=7)
    
    
In [14]:
    
sns.pairplot(df, hue="E")
    
    Out[14]:
    
In [15]:
    
# Initialize a grid of plots with an Axes for each walk
grid = sns.FacetGrid(df, col="G", hue="E", col_wrap=4, size=3, legend_out=True)
# Draw a horizontal line to show the starting point
grid.map(plt.axhline, y=30, ls=":", c=".5")
# Draw a line plot to show the trajectory of each random walk
t = grid.map(plt.plot, "X", "Y", marker="o", ms=4).add_legend(title="E values")
#grid.fig.tight_layout(w_pad=1)
    
    
More details at: http://scikit-learn.org/stable/index.html
In [16]:
    
from sklearn.linear_model import LinearRegression, LogisticRegression
from sklearn.metrics import classification_report
    
In [17]:
    
X = df[["X"]].copy()
y = df["Y"].copy()
print "X.shape: ", X.shape
print "Y.shape: ", y.shape
    
    
In [18]:
    
model_linear = LinearRegression()
model_linear.fit(X, y)
    
    Out[18]:
In [19]:
    
y_pred = model_linear.predict(X)
print "Y_pred.shape: ", y_pred.shape
    
    
In [20]:
    
X["X^2"] = X["X"]**2
    
In [21]:
    
X.columns
    
    Out[21]:
In [22]:
    
model_sqr = LinearRegression()
model_sqr.fit(X, y)
y_pred_sqr = model_sqr.predict(X)
print "Y_pred_sqr.shape: ", y_pred_sqr.shape
    
    
In [23]:
    
plt.scatter(X["X"], y, marker="o", label="data", alpha=0.5, s=30)
plt.plot(X["X"], y_pred, linestyle="--", linewidth=1.5, color="k", label="fit [linear]")
plt.plot(X["X"], y_pred_sqr, linestyle="--", linewidth=1.5, color="r", label="fit [square]")
plt.xlabel("X")
plt.ylabel("Y")
plt.legend()
    
    Out[23]:
    
In [24]:
    
model_linear.coef_
    
    Out[24]:
In [25]:
    
model_sqr.coef_
    
    Out[25]:
More details at: http://statsmodels.sourceforge.net/
In [26]:
    
import statsmodels.api as sm
    
In [27]:
    
model = sm.OLS(y, X)
res = model.fit()
res.summary2()
    
    Out[27]:
In [28]:
    
model = sm.OLS.from_formula("Y ~ X + I(X**2)", data=df)
res = model.fit()
res.summary2()
    
    Out[28]:
In [29]:
    
X = df[["X", "Y"]]
y = df["E"]
    
In [30]:
    
model = LogisticRegression(multi_class="multinomial", solver="lbfgs")
model.fit(X, y)
y_pred = model.predict(X)
print classification_report(y, y_pred)
    
    
    
In [31]:
    
y_pred_p = model.predict_proba(X)
    
In [32]:
    
y_pred_p[:10]
    
    Out[32]:
In [33]:
    
model = sm.MNLogit.from_formula("E ~ Y + X", data=df)
res = model.fit()
#res.summary2()
    
    
    
In [34]:
    
res.summary()
    
    Out[34]:
In [ ]: