notebook.community

Edit and run



In [1]:

    
import pandas as pd
import statsmodels.api as sm
import numpy as np
from scipy.stats import logistic
import math
%pylab inline









    



Populating the interactive namespace from numpy and matplotlib






    



WARNING: pylab import has clobbered these variables: ['logistic']
`%matplotlib` prevents importing * from pylab and numpy



In [2]:

    
dob = pd.read_csv('snapshot_data/2014-09-17/property_indexes/dob-index.csv', index_col=0)



In [3]:

    
dob.fillna(value=0, inplace=True)



In [4]:

    
dob['total'] = dob.sum(axis=1)



In [23]:

    
dob.ix[1990:]['total']









    Out[23]:





1990    12987
1991    10692
1992     9168
1993     7059
1994     5114
1995     3019
1996     1674
1997      836
1998      471
1999      307
2000      199
2001      122
2002      100
2003       71
2004       57
2005       35
2006       28
2007       24
2008       13
2009       17
2010       23
2011       19
2012       20
2013       32
2014        7
2411        1
2426        2
Name: total, dtype: float64



In [5]:

    
dob['ratio'] = (dob['total'] - dob['male']) / dob['total']



In [6]:

    
dob['year'] = dob.index
dob['shift-year'] = dob['year'] - 1800



In [7]:

    
dob.ix[1800:1980]['ratio'].plot(kind='line')









    Out[7]:





<matplotlib.axes._subplots.AxesSubplot at 0x7f09a5ef5d10>



In [8]:

    
logit = sm.Logit(dob.ix[1800:1990]['ratio'], dob.ix[1800:1990]['shift-year'])



In [9]:

    
result = logit.fit()









    



Optimization terminated successfully.
         Current function value: 0.344498
         Iterations 5



In [10]:

    
result.summary()









    Out[10]:





Logit Regression Results

  Dep. Variable:        ratio         No. Observations:       191 


  Model:                Logit         Df Residuals:           190 


  Method:                MLE          Df Model:                 0 


  Date:           Thu, 08 Jan 2015    Pseudo R-squ.:       -0.6871


  Time:               15:17:15        Log-Likelihood:      -65.799


  converged:            True          LL-Null:             -39.002


                                    LLR p-value:          1.000 




                coef      std err       z       P>|z|  [95.0% Conf. Int.] 


  shift-year     -0.0146      0.002     -7.109   0.000     -0.019    -0.011



In [11]:

    
result.params[0]









    Out[11]:





-0.014622907346901763



In [12]:

    
result.model









    Out[12]:





<statsmodels.discrete.discrete_model.Logit at 0x7f09d06269d0>



In [13]:

    
def sigmoid(x):
    b0 = 1
    b1 = -result.params[0]
    exponent = (b0 + ((x)*b1))
    return  1 / (1 + math.exp(-1 * exponent) )

def invsigmoid(x):
    return 1 / sigmoid(x)



In [14]:

    
dob['logistic'] = dob['shift-year'].apply(sigmoid)



In [15]:

    
dob.ix[1800:1990][['logistic','ratio']].plot()









    Out[15]:





<matplotlib.axes._subplots.AxesSubplot at 0x7f09a5ec05d0>



In [16]:

    
5*math.e**2









    Out[16]:





36.94528049465325



In [18]:

    
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
import numpy as np
import sympy as sym

"""
create a function to fit with your data. a, b, c and d are the coefficients
that curve_fit will calculate for you. 
In this part you need to guess and/or use mathematical knowledge to find
a function that resembles your data
"""
def mypoly(x, a, b, c, d):
    return a*x**3 + b*x**2 +c*x + d

def myexp(x, a, b,c, d):
    return (a**((b*x)+c)) +d

def mypow(x, a,b,c):
    return  ((x)**(b)) +c


"""
make the curve_fit
"""
for func in [mypoly, myexp, mypow]:
    x = list(dob.ix[1800:1980]['ratio'].index)
    y = list(dob.ix[1800:1980]['ratio'])

    
    popt, pcov = curve_fit(func, x, y, maxfev=1000000)
    print 'pcov', pcov

    """
    Plot your data
    """
    plt.plot(x, y, 'ro',label="Original Data")

    """
    brutal force to avoid errors
    """    
    x = [float(xn) for xn in x] #every element (xn) in x becomes a float
    y = [float(yn) for yn in y] #every element (yn) in y becomes a float
    x = np.array(x) #transform your data in a numpy array, 
    y = np.array(y) #so the curve_fit can work



    """
    The result is:
    popt[0] = a , popt[1] = b, popt[2] = c and popt[2] = d of the function,
    so f(x) = popt[0]*x**3 + popt[1]*x**2 + popt[2]*x + popt[3].
    """
    print "a = %s , b = %s, c = %s, d = %s" % (popt[0], popt[1], popt[2], popt[3] if len(popt)==4 else None)

    """
    Use sympy to generate the latex sintax of the function
    """
    xs = sym.Symbol('\lambda')    
    tex = sym.latex(func(xs,*popt)).replace('$', '')
    plt.title(r'$f(\lambda)= %s$' %(tex),fontsize=16)

    """
    Print the coefficients and plot the funcion.
    """

    plt.plot(x, func(x, *popt), label="Fitted Curve") #same as line above \/
    #plt.plot(x, popt[0]*x**3 + popt[1]*x**2 + popt[2]*x + popt[3], label="Fitted Curve") 

    plt.legend(loc='upper left')
    plt.show()









    



pcov [[  2.02482052e-17  -1.14807353e-13   2.16886454e-10  -1.36513136e-07]
 [ -1.14807353e-13   6.51000487e-10  -1.22990766e-06   7.74181850e-04]
 [  2.16886454e-10  -1.22990766e-06   2.32376497e-03  -1.46282103e+00]
 [ -1.36513136e-07   7.74181850e-04  -1.46282103e+00   9.20913276e+02]]
a = 4.87416698802e-08 , b = -0.000269974433142, c = 0.498921947871, d = -307.558030013






    












    



pcov [[  3.01147547e+04  -2.59627896e+06   5.42091393e+09  -1.76199201e-01]
 [ -2.59627896e+06   2.23832619e+08  -4.67352462e+11   1.51905520e+01]
 [  5.42091393e+09  -4.67352462e+11   9.75810965e+14  -3.17171987e+04]
 [ -1.76199183e-01   1.51905505e+01  -3.17171953e+04   7.07313684e-06]]
a = 0.987088150409 , b = -1.10594731976, c = 2309.16849805, d = 0.0383932763027






    












    



pcov [[ inf  inf  inf]
 [ inf  inf  inf]
 [ inf  inf  inf]]
a = 1.0 , b = 0.260677743372, c = -7.03174236265, d = None



In [ ]:

    
myexp_f(a = 0.987088150409 , b = -1.10594731976, c = 2309.16849805, d = 0.0383932763027)(2034)



In [20]:

    
def myexp_f( a, b,c, d):
    return lambda x: (a**((b*x)+c)) +d

def mypoly_f(a,b,c,d):
    return lambda x: a*x**3 + b*x**2 +c*x + d


myexp_at_zero = lambda x: abs(myexp_f(a = 0.987088150409 , b = -1.10594731976, c = 2309.16849805, d = 0.0383932763027)(x) - 0.5)

mypoly_at_zero = lambda x: abs(mypoly_f(a = 4.87416698802e-08 , b = -0.000269974433142, c = 0.498921947871, d = -307.558030013)(x) - 0.5)

from scipy.optimize import minimize

print minimize(myexp_at_zero, (2100))
print minimize(mypoly_at_zero,(2100))









    



   status: 2
  success: False
     njev: 44
     nfev: 144
 hess_inv: array([[ 643.99976828]])
      fun: 3.212685673048554e-11
        x: array([ 2034.17023164])
  message: 'Desired error not necessarily achieved due to precision loss.'
      jac: array([ 0.00232247])
   status: 2
  success: False
     njev: 49
     nfev: 159
 hess_inv: array([[ 16654.7000672]])
      fun: 7.298694981727749e-11
        x: array([ 2037.01251129])
  message: 'Desired error not necessarily achieved due to precision loss.'
      jac: array([-0.00400543])



In [ ]:



In [ ]:

Dep. Variable:	ratio	No. Observations:	191
Model:	Logit	Df Residuals:	190
Method:	MLE	Df Model:	0
Date:	Thu, 08 Jan 2015	Pseudo R-squ.:	-0.6871
Time:	15:17:15	Log-Likelihood:	-65.799
converged:	True	LL-Null:	-39.002
		LLR p-value:	1.000

	coef	std err	z	P>\|z\|	[95.0% Conf. Int.]
shift-year	-0.0146	0.002	-7.109	0.000	-0.019 -0.011