In [1]:

import pandas as pd
import statsmodels.api as sm
import numpy as np
from scipy.stats import logistic
import math
%pylab inline




Populating the interactive namespace from numpy and matplotlib

WARNING: pylab import has clobbered these variables: ['logistic']
%matplotlib prevents importing * from pylab and numpy




In [2]:




In [3]:

dob.fillna(value=0, inplace=True)




In [4]:

dob['total'] = dob.sum(axis=1)




In [23]:

dob.ix[1990:]['total']




Out[23]:

1990    12987
1991    10692
1992     9168
1993     7059
1994     5114
1995     3019
1996     1674
1997      836
1998      471
1999      307
2000      199
2001      122
2002      100
2003       71
2004       57
2005       35
2006       28
2007       24
2008       13
2009       17
2010       23
2011       19
2012       20
2013       32
2014        7
2411        1
2426        2
Name: total, dtype: float64




In [5]:

dob['ratio'] = (dob['total'] - dob['male']) / dob['total']




In [6]:

dob['year'] = dob.index
dob['shift-year'] = dob['year'] - 1800




In [7]:

dob.ix[1800:1980]['ratio'].plot(kind='line')




Out[7]:

<matplotlib.axes._subplots.AxesSubplot at 0x7f09a5ef5d10>




In [8]:

logit = sm.Logit(dob.ix[1800:1990]['ratio'], dob.ix[1800:1990]['shift-year'])




In [9]:

result = logit.fit()




Optimization terminated successfully.
Current function value: 0.344498
Iterations 5




In [10]:

result.summary()




Out[10]:

Logit Regression Results

Dep. Variable:       ratio        No. Observations:       191

Model:               Logit        Df Residuals:           190

Method:               MLE         Df Model:                 0

Date:          Thu, 08 Jan 2015   Pseudo R-squ.:       -0.6871

Time:              15:17:15       Log-Likelihood:      -65.799

converged:           True         LL-Null:             -39.002

LLR p-value:          1.000

coef     std err      z      P>|z| [95.0% Conf. Int.]

shift-year    -0.0146     0.002    -7.109  0.000    -0.019    -0.011




In [11]:

result.params[0]




Out[11]:

-0.014622907346901763




In [12]:

result.model




Out[12]:

<statsmodels.discrete.discrete_model.Logit at 0x7f09d06269d0>




In [13]:

def sigmoid(x):
b0 = 1
b1 = -result.params[0]
exponent = (b0 + ((x)*b1))
return  1 / (1 + math.exp(-1 * exponent) )

def invsigmoid(x):
return 1 / sigmoid(x)




In [14]:

dob['logistic'] = dob['shift-year'].apply(sigmoid)




In [15]:

dob.ix[1800:1990][['logistic','ratio']].plot()




Out[15]:

<matplotlib.axes._subplots.AxesSubplot at 0x7f09a5ec05d0>




In [16]:

5*math.e**2




Out[16]:

36.94528049465325




In [18]:

import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
import numpy as np
import sympy as sym

"""
create a function to fit with your data. a, b, c and d are the coefficients
that curve_fit will calculate for you.
In this part you need to guess and/or use mathematical knowledge to find
a function that resembles your data
"""
def mypoly(x, a, b, c, d):
return a*x**3 + b*x**2 +c*x + d

def myexp(x, a, b,c, d):
return (a**((b*x)+c)) +d

def mypow(x, a,b,c):
return  ((x)**(b)) +c

"""
make the curve_fit
"""
for func in [mypoly, myexp, mypow]:
x = list(dob.ix[1800:1980]['ratio'].index)
y = list(dob.ix[1800:1980]['ratio'])

popt, pcov = curve_fit(func, x, y, maxfev=1000000)
print 'pcov', pcov

"""
"""
plt.plot(x, y, 'ro',label="Original Data")

"""
brutal force to avoid errors
"""
x = [float(xn) for xn in x] #every element (xn) in x becomes a float
y = [float(yn) for yn in y] #every element (yn) in y becomes a float
x = np.array(x) #transform your data in a numpy array,
y = np.array(y) #so the curve_fit can work

"""
The result is:
popt[0] = a , popt[1] = b, popt[2] = c and popt[2] = d of the function,
so f(x) = popt[0]*x**3 + popt[1]*x**2 + popt[2]*x + popt[3].
"""
print "a = %s , b = %s, c = %s, d = %s" % (popt[0], popt[1], popt[2], popt[3] if len(popt)==4 else None)

"""
Use sympy to generate the latex sintax of the function
"""
xs = sym.Symbol('\lambda')
tex = sym.latex(func(xs,*popt)).replace('$', '') plt.title(r'$f(\lambda)= %s\$' %(tex),fontsize=16)

"""
Print the coefficients and plot the funcion.
"""

plt.plot(x, func(x, *popt), label="Fitted Curve") #same as line above \/
#plt.plot(x, popt[0]*x**3 + popt[1]*x**2 + popt[2]*x + popt[3], label="Fitted Curve")

plt.legend(loc='upper left')
plt.show()




pcov [[  2.02482052e-17  -1.14807353e-13   2.16886454e-10  -1.36513136e-07]
[ -1.14807353e-13   6.51000487e-10  -1.22990766e-06   7.74181850e-04]
[  2.16886454e-10  -1.22990766e-06   2.32376497e-03  -1.46282103e+00]
[ -1.36513136e-07   7.74181850e-04  -1.46282103e+00   9.20913276e+02]]
a = 4.87416698802e-08 , b = -0.000269974433142, c = 0.498921947871, d = -307.558030013

pcov [[  3.01147547e+04  -2.59627896e+06   5.42091393e+09  -1.76199201e-01]
[ -2.59627896e+06   2.23832619e+08  -4.67352462e+11   1.51905520e+01]
[  5.42091393e+09  -4.67352462e+11   9.75810965e+14  -3.17171987e+04]
[ -1.76199183e-01   1.51905505e+01  -3.17171953e+04   7.07313684e-06]]
a = 0.987088150409 , b = -1.10594731976, c = 2309.16849805, d = 0.0383932763027

pcov [[ inf  inf  inf]
[ inf  inf  inf]
[ inf  inf  inf]]
a = 1.0 , b = 0.260677743372, c = -7.03174236265, d = None




In [ ]:

myexp_f(a = 0.987088150409 , b = -1.10594731976, c = 2309.16849805, d = 0.0383932763027)(2034)




In [20]:

def myexp_f( a, b,c, d):
return lambda x: (a**((b*x)+c)) +d

def mypoly_f(a,b,c,d):
return lambda x: a*x**3 + b*x**2 +c*x + d

myexp_at_zero = lambda x: abs(myexp_f(a = 0.987088150409 , b = -1.10594731976, c = 2309.16849805, d = 0.0383932763027)(x) - 0.5)

mypoly_at_zero = lambda x: abs(mypoly_f(a = 4.87416698802e-08 , b = -0.000269974433142, c = 0.498921947871, d = -307.558030013)(x) - 0.5)

from scipy.optimize import minimize

print minimize(myexp_at_zero, (2100))
print minimize(mypoly_at_zero,(2100))




status: 2
success: False
njev: 44
nfev: 144
hess_inv: array([[ 643.99976828]])
fun: 3.212685673048554e-11
x: array([ 2034.17023164])
message: 'Desired error not necessarily achieved due to precision loss.'
jac: array([ 0.00232247])
status: 2
success: False
njev: 49
nfev: 159
hess_inv: array([[ 16654.7000672]])
fun: 7.298694981727749e-11
x: array([ 2037.01251129])
message: 'Desired error not necessarily achieved due to precision loss.'
jac: array([-0.00400543])




In [ ]:




In [ ]: