In [1]:
import pandas as pd
import statsmodels.api as sm
import numpy as np
from scipy.stats import logistic
import math
%pylab inline
In [2]:
dob = pd.read_csv('snapshot_data/2014-09-17/property_indexes/dob-index.csv', index_col=0)
In [3]:
dob.fillna(value=0, inplace=True)
In [4]:
dob['total'] = dob.sum(axis=1)
In [23]:
dob.ix[1990:]['total']
Out[23]:
In [5]:
dob['ratio'] = (dob['total'] - dob['male']) / dob['total']
In [6]:
dob['year'] = dob.index
dob['shift-year'] = dob['year'] - 1800
In [7]:
dob.ix[1800:1980]['ratio'].plot(kind='line')
Out[7]:
In [8]:
logit = sm.Logit(dob.ix[1800:1990]['ratio'], dob.ix[1800:1990]['shift-year'])
In [9]:
result = logit.fit()
In [10]:
result.summary()
Out[10]:
In [11]:
result.params[0]
Out[11]:
In [12]:
result.model
Out[12]:
In [13]:
def sigmoid(x):
b0 = 1
b1 = -result.params[0]
exponent = (b0 + ((x)*b1))
return 1 / (1 + math.exp(-1 * exponent) )
def invsigmoid(x):
return 1 / sigmoid(x)
In [14]:
dob['logistic'] = dob['shift-year'].apply(sigmoid)
In [15]:
dob.ix[1800:1990][['logistic','ratio']].plot()
Out[15]:
In [16]:
5*math.e**2
Out[16]:
In [18]:
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
import numpy as np
import sympy as sym
"""
create a function to fit with your data. a, b, c and d are the coefficients
that curve_fit will calculate for you.
In this part you need to guess and/or use mathematical knowledge to find
a function that resembles your data
"""
def mypoly(x, a, b, c, d):
return a*x**3 + b*x**2 +c*x + d
def myexp(x, a, b,c, d):
return (a**((b*x)+c)) +d
def mypow(x, a,b,c):
return ((x)**(b)) +c
"""
make the curve_fit
"""
for func in [mypoly, myexp, mypow]:
x = list(dob.ix[1800:1980]['ratio'].index)
y = list(dob.ix[1800:1980]['ratio'])
popt, pcov = curve_fit(func, x, y, maxfev=1000000)
print 'pcov', pcov
"""
Plot your data
"""
plt.plot(x, y, 'ro',label="Original Data")
"""
brutal force to avoid errors
"""
x = [float(xn) for xn in x] #every element (xn) in x becomes a float
y = [float(yn) for yn in y] #every element (yn) in y becomes a float
x = np.array(x) #transform your data in a numpy array,
y = np.array(y) #so the curve_fit can work
"""
The result is:
popt[0] = a , popt[1] = b, popt[2] = c and popt[2] = d of the function,
so f(x) = popt[0]*x**3 + popt[1]*x**2 + popt[2]*x + popt[3].
"""
print "a = %s , b = %s, c = %s, d = %s" % (popt[0], popt[1], popt[2], popt[3] if len(popt)==4 else None)
"""
Use sympy to generate the latex sintax of the function
"""
xs = sym.Symbol('\lambda')
tex = sym.latex(func(xs,*popt)).replace('$', '')
plt.title(r'$f(\lambda)= %s$' %(tex),fontsize=16)
"""
Print the coefficients and plot the funcion.
"""
plt.plot(x, func(x, *popt), label="Fitted Curve") #same as line above \/
#plt.plot(x, popt[0]*x**3 + popt[1]*x**2 + popt[2]*x + popt[3], label="Fitted Curve")
plt.legend(loc='upper left')
plt.show()
In [ ]:
myexp_f(a = 0.987088150409 , b = -1.10594731976, c = 2309.16849805, d = 0.0383932763027)(2034)
In [20]:
def myexp_f( a, b,c, d):
return lambda x: (a**((b*x)+c)) +d
def mypoly_f(a,b,c,d):
return lambda x: a*x**3 + b*x**2 +c*x + d
myexp_at_zero = lambda x: abs(myexp_f(a = 0.987088150409 , b = -1.10594731976, c = 2309.16849805, d = 0.0383932763027)(x) - 0.5)
mypoly_at_zero = lambda x: abs(mypoly_f(a = 4.87416698802e-08 , b = -0.000269974433142, c = 0.498921947871, d = -307.558030013)(x) - 0.5)
from scipy.optimize import minimize
print minimize(myexp_at_zero, (2100))
print minimize(mypoly_at_zero,(2100))
In [ ]:
In [ ]: