how_to_fit_data


如何使用Python擬合資料?

參考資料

準備工作:範例所需檔案下載及說明

  • O-C_ephemeris.txt (已在files4examples資料夾中無須下載):

    檔案說明(待補)

  • pulse-profile.txt (已在files4examples資料夾中無須下載):

    檔案說明(待補)

  • QSOnumber.txt (已在files4examples資料夾中無須下載):

    檔案說明(待補)

  • 其他範例檔案(待補)

範例1:以NumPy的polyfit 進行斜直線fitting


In [1]:
from astropy.io import ascii
radio_infrared_NLSy1 = ascii.read('../files4examples/Radio_Infrared_NLSy1.txt',names=['radio luminosity','infrared luminosity'])
radio_infrared_SG = ascii.read('../files4examples/Radio_Infrared_spiral.txt',names=['radio luminosity','infrared luminosity'])
radio_infrared_RG = ascii.read('../files4examples/Radio_Infrared_radio.txt',names=['radio luminosity','infrared luminosity'])

L_radio_NLSy1=radio_infrared_NLSy1['radio luminosity']
L_infrared_NLSy1=radio_infrared_NLSy1['infrared luminosity']

L_radio_SG=radio_infrared_SG['radio luminosity']
L_infrared_SG=radio_infrared_SG['infrared luminosity']

L_radio_RG=radio_infrared_RG['radio luminosity']
L_infrared_RG=radio_infrared_RG['infrared luminosity']

In [2]:
import numpy as np
x1=np.log10(L_radio_NLSy1)
x2=np.log10(L_radio_SG)
x3=np.log10(L_radio_RG)

y1=np.log10(L_infrared_NLSy1)
y2=np.log10(L_infrared_SG)
y3=np.log10(L_infrared_RG)

p_NLSy1 = np.poly1d(np.polyfit(x1,y1,1))
p_SG = np.poly1d(np.polyfit(x2,y2,1))
p_RG = np.poly1d(np.polyfit(x3,y3,1))
  • polyfit為NumPy中以多項式曲線來擬合資料的函式,回傳值為多項式的係數。 poly1d為NumPy中用來產生多項式物件的類別。

In [3]:
%matplotlib notebook
import matplotlib.pyplot as plt

fig = plt.figure()
ax = fig.add_subplot(111,aspect='equal')
plt.xlim(32,40)
plt.ylim(20,28)


x = np.linspace(32,40,100)
plt.scatter(x1,y1,facecolor='none',edgecolor='r',marker='D')
plt.plot(x,p_NLSy1(x),'r-')

plt.scatter(x2,y2,color='k',marker='+',s=80)
plt.plot(x,p_SG(x),'k-')

plt.scatter(x3,y3,facecolor='none',edgecolor='k',marker='^',s=80)
plt.plot(x,p_RG(x),'k--')


plt.xlabel(r'$\nu$ L$_\nu$ [22$\mu$m] log(W)')
plt.ylabel(r'$L_\nu$ [1.4GHz] log(W/Hz)')
plt.legend(('NLSy1','SG','RG'),loc='upper left')
plt.show()



In [4]:
print(np.polyfit(x1,y1,1))


[  1.20566047 -21.85036595]

In [5]:
print(np.polyfit(x2,y2,1))


[  1.0522583  -15.94596635]

In [6]:
print(np.polyfit(x3,y3,1))


[ 0.64201694  2.25599313]

範例2:以NumPy的linalg.lstsq 進行斜直線fitting


In [7]:
from astropy.io import ascii
radio_infrared_NLSy1 = ascii.read('../files4examples/Radio_Infrared_NLSy1.txt',names=['radio luminosity','infrared luminosity'])
radio_infrared_SG = ascii.read('../files4examples/Radio_Infrared_spiral.txt',names=['radio luminosity','infrared luminosity'])
radio_infrared_RG = ascii.read('../files4examples/Radio_Infrared_radio.txt',names=['radio luminosity','infrared luminosity'])

L_radio_NLSy1=radio_infrared_NLSy1['radio luminosity']
L_infrared_NLSy1=radio_infrared_NLSy1['infrared luminosity']

L_radio_SG=radio_infrared_SG['radio luminosity']
L_infrared_SG=radio_infrared_SG['infrared luminosity']

L_radio_RG=radio_infrared_RG['radio luminosity']
L_infrared_RG=radio_infrared_RG['infrared luminosity']

In [8]:
import numpy as np
x1=np.log10(L_radio_NLSy1)
x2=np.log10(L_radio_SG)
x3=np.log10(L_radio_RG)

y1=np.log10(L_infrared_NLSy1)
y2=np.log10(L_infrared_SG)
y3=np.log10(L_infrared_RG)


x = np.linspace(32,40,100)

In [9]:
A1 = np.vstack([x1, np.ones(len(x1))]).T
m1,c1 = np.linalg.lstsq(A1, y1)[0]

A2 = np.vstack([x2, np.ones(len(x2))]).T
m2,c2 = np.linalg.lstsq(A2, y2)[0]

A3 = np.vstack([x3, np.ones(len(x3))]).T
m3,c3 = np.linalg.lstsq(A3, y3)[0]

# Example
# y = Ap, where A = [[x 1]] and p = [[m], [c]]
# x = np.array([0, 1, 2, 3])
# A = np.vstack([x, np.ones(len(x))]).T
# A =
# array([[ 0.,  1.],
#        [ 1.,  1.],
#        [ 2.,  1.],
#        [ 3.,  1.]])

# len() : 求出矩陣中元素個數
# ones() : 令矩陣中所有元素的值等於 1
# vstack().T : 將兩個矩陣進行堆疊並轉置

In [10]:
%matplotlib notebook
import matplotlib.pyplot as plt

fig = plt.figure()
ax = fig.add_subplot(111,aspect='equal')
plt.xlim(32,40)
plt.ylim(20,28)


plt.scatter(x1, y1,facecolor='none',edgecolor='r',marker='D')
plt.plot(x, m1*x + c1, 'r',label='NLSy1')

plt.scatter(x2, y2,color='k',marker='+',s=80)
plt.plot(x, m2*x + c2, 'k', label='SG')

plt.scatter(x3, y3,facecolor='none',edgecolor='k',marker='^',s=80)
plt.plot(x, m3*x + c3, 'k--', label='RG')


plt.xlabel(r'$\nu$ $L_\nu$ [22$\mu$m] log(W)')
plt.ylabel(r'$L_\nu$ [1.4GHz] log(W/Hz)')
plt.legend(('NLSy1','SG','RG'),loc='upper left')
plt.show()



In [11]:
print(m1, c1)
print(m2, c2)
print(m3, c3)


1.20566047134 -21.8503659548
1.05225830172 -15.9459663514
0.64201694396 2.25599313427

範例3:以SciPy的linregress 進行斜直線fitting


In [12]:
from astropy.io import ascii
radio_infrared_NLSy1 = ascii.read('../files4examples/Radio_Infrared_NLSy1.txt',names=['radio luminosity','infrared luminosity'])
radio_infrared_SG = ascii.read('../files4examples/Radio_Infrared_spiral.txt',names=['radio luminosity','infrared luminosity'])
radio_infrared_RG = ascii.read('../files4examples/Radio_Infrared_radio.txt',names=['radio luminosity','infrared luminosity'])

L_radio_NLSy1=radio_infrared_NLSy1['radio luminosity']
L_infrared_NLSy1=radio_infrared_NLSy1['infrared luminosity']

L_radio_SG=radio_infrared_SG['radio luminosity']
L_infrared_SG=radio_infrared_SG['infrared luminosity']

L_radio_RG=radio_infrared_RG['radio luminosity']
L_infrared_RG=radio_infrared_RG['infrared luminosity']

In [13]:
import numpy as np
x1=np.log10(L_radio_NLSy1)
x2=np.log10(L_radio_SG)
x3=np.log10(L_radio_RG)

y1=np.log10(L_infrared_NLSy1)
y2=np.log10(L_infrared_SG)
y3=np.log10(L_infrared_RG)


from scipy import stats

slope_1, intercept_1, r_value_1, p_value_1, std_err_1 = stats.linregress(x1,y1)
slope_2, intercept_2, r_value_2, p_value_2, std_err_2 = stats.linregress(x2,y2)
slope_3, intercept_3, r_value_3, p_value_3, std_err_3 = stats.linregress(x3,y3)

In [14]:
%matplotlib notebook
import matplotlib.pyplot as plt

fig = plt.figure()
ax = fig.add_subplot(111,aspect='equal')
plt.xlim(32,40)
plt.ylim(20,28)


x = np.linspace(32,40,100)
plt.scatter(x1,y1,facecolor='none',edgecolor='r',marker='D')
plt.plot(x,slope_1*x+intercept_1,'r-')

plt.scatter(x2,y2,color='k',marker='+',s=80)
plt.plot(x,slope_2*x+intercept_2,'k-')

plt.scatter(x3,y3,facecolor='none',edgecolor='k',marker='^',s=80)
plt.plot(x,slope_3*x+intercept_3,'k--')


plt.xlabel(r'$\nu$ $L_\nu$ [22$\mu$m] log(W)')
plt.ylabel(r'$L_\nu$ [1.4GHz] log(W/Hz)')
plt.legend(('NLSy1','SG','RG'),loc='upper left')
plt.show()



In [15]:
print(slope_1, intercept_1, r_value_1, p_value_1, std_err_1)


1.20566047134 -21.8503659548 0.730529367991 5.89058883604e-25 0.0952507781472

In [16]:
print(slope_2, intercept_2, r_value_2, p_value_2, std_err_2)


1.05225830172 -15.9459663514 0.848667416498 1.01703572996e-15 0.091832278694

In [17]:
print(slope_3, intercept_3, r_value_3, p_value_3, std_err_3)


0.64201694396 2.25599313427 0.808741592761 3.11730887252e-11 0.0720461198017

範例4:以NumPy的polyfit多項式曲線擬合O-C ephemeris


In [18]:
from astropy.io import ascii
oc_ephemeris = ascii.read('../files4examples/O-C_ephemeris.txt', names=['cycles', 'delay', 'error'])
cycles = oc_ephemeris['cycles']
delay = oc_ephemeris['delay']
error = oc_ephemeris['error']
  • polyfit為NumPy中以多項式曲線來擬合資料的函式,回傳值為多項式的係數。 poly1d為NumPy中用來產生多項式物件的類別。

In [19]:
import numpy as np
p2 = np.poly1d(np.polyfit(cycles, delay, 2))
p3 = np.poly1d(np.polyfit(cycles, delay, 3))
x = np.linspace(-10000, 50000, 100)

In [20]:
%matplotlib notebook
import matplotlib.pyplot as plt
from matplotlib.ticker import ScalarFormatter
fig = plt.figure()
ax = fig.add_subplot(111)
plt.errorbar(cycles, delay, yerr=error, fmt='sk')
plt.hold(True)
plt.plot(x, p2(x), 'r-')
plt.plot(x, p3(x), '--')
plt.hold(False)
plt.ylim(-0.005, 0.03)
plt.legend(('Second', 'Third', 'Data'))
plt.xlabel('N (cycles)')
plt.ylabel('Delay (days)')
ax.xaxis.set_major_formatter(ScalarFormatter(useMathText=True))
ax.ticklabel_format(axis='x', style='sci', scilimits=(0,0))
plt.show()


範例5:以AstroML的LinearRegression來擬合pulse-profile

  • AstroML 是一個專門用於天文資料的統計、探勘與機器學習的Python套件,雖然Anaconda沒有預先安裝此套件,不過可透過pip指令安裝:
    pip install astroML
    pip install astroML_addons
    

In [21]:
from astropy.io import ascii
pulse_profile = ascii.read('../files4examples/pulse-profile.txt', names=['phase', 'rate', 'error'])
phase = pulse_profile['phase']
rate =pulse_profile['rate']
error = pulse_profile['error']

In [22]:
import numpy as np
from astroML.linear_model import LinearRegression

x = np.array([np.sin(2 * np.pi * phase), np.cos(2 * np.pi * phase), 
              np.sin(4 * np.pi * phase), np.cos(4 * np.pi * phase)]).T

model = LinearRegression()
model.fit(x, rate, error)
coef = model.coef_
y_pred = model.predict(x)
print(coef)


[ 88.89855452  -0.76346906   2.11349999   0.70749647   0.63919169]

In [23]:
%matplotlib notebook
import matplotlib.pyplot as plt
phase2 =  np.append(phase, phase + 1)
rate2 = np.append(rate, rate)
error2 = np.append(error, error)
y_pred2 = np.append(y_pred, y_pred)

plt.figure()
plt.errorbar(phase2, rate2, yerr=error2,  fmt="*k", label='Data')
plt.hold(True)
plt.plot(phase2, y_pred2, 'r-', label='Model')
plt.hold(False)
plt.xlabel('Phase')
plt.ylabel('Conuts/s')
plt.legend()
plt.show()


範例6:以SciPy的curve_fit來擬合pulse-profile


In [24]:
from astropy.io import ascii
pulse_profile = ascii.read('../files4examples/pulse-profile.txt', names=['phase', 'rate', 'error'])
phase = pulse_profile['phase']
rate =pulse_profile['rate']
error = pulse_profile['error']

In [25]:
from scipy.optimize import curve_fit
import numpy as np

# 定義模型
def model(x, a0, a1, a2, a3, a4):
    return ( a0 + a1 * np.sin(2 * np.pi * x) +  a2 * np.cos(2 * np.pi * x) +
            a3 * np.sin(4 * np.pi * x) + a4 * np.cos(4 * np.pi * x) )

# 曲線擬合
popt, pcov = curve_fit(model, phase, rate, sigma=error)
perr = np.sqrt(np.diag(pcov))
print(popt)
print(pcov)
print("a0 =", popt[0], "+/-", perr[0])
print("a1 =", popt[1], "+/-", perr[1])
print("a2 =", popt[2], "+/-", perr[2])
print("a3 =", popt[3], "+/-", perr[3])
print("a4 =", popt[4], "+/-", perr[4])


[ 88.89855452  -0.76346889   2.11350003   0.70749648   0.63919159]
[[ 0.04283942 -0.00036791  0.00101848  0.00034094  0.00030802]
 [-0.00036791  0.08536753  0.00034425  0.00092262  0.00060634]
 [ 0.00101848  0.00034425  0.08598668 -0.00013985  0.00110847]
 [ 0.00034094  0.00092262 -0.00013985  0.0858235  -0.00021854]
 [ 0.00030802  0.00060634  0.00110847 -0.00021854  0.08550514]]
a0 = 88.8985545178 +/- 0.206976851602
a1 = -0.763468891908 +/- 0.292177216423
a2 = 2.11350003189 +/- 0.293234852344
a3 = 0.707496479272 +/- 0.292956479157
a4 = 0.639191589345 +/- 0.292412616802

In [26]:
%matplotlib notebook
import matplotlib.pyplot as plt
phase2 =  np.append(phase, phase + 1)
rate2 = np.append(rate, rate)
error2 = np.append(error, error)
plt.figure()
plt.errorbar(phase2, rate2, yerr=error2, fmt="*k", label='Data')
plt.hold(True)
plt.plot(phase2, model(phase2, popt[0], popt[1], popt[2], popt[3], popt[4]), 'r-', label='Model')
plt.hold(False)
plt.xlabel('Phase')
plt.ylabel('Conuts/s')
plt.legend()
plt.show()


範例7:以astropy.modeling中的一維高斯曲線擬合QSO數量分佈


In [27]:
from astropy.io import ascii
data = ascii.read('../files4examples/QSOnumber.txt')
x = data['x']
y = data['y']

In [28]:
from astropy.modeling import models as mo, fitting as fit
import numpy as np
model_init = mo.Gaussian1D(amplitude=4220, mean=-0.25, stddev=0.1)
fitter = fit.LevMarLSQFitter()
fit_res = fitter(model_init, x, y)
print(fit_res.amplitude)
print(fit_res.mean)
print(fit_res.stddev)


Parameter('amplitude', value=4078.8970040617974)
Parameter('mean', value=-0.17176406050119747)
Parameter('stddev', value=0.045674628679433164)

In [29]:
%matplotlib notebook
import matplotlib.pyplot as plt
fig = plt.figure()
ax = fig.add_subplot(111)
plt.bar(x, y, width=x[1]-x[0], align='center', edgecolor='black', fill=False)
plt.hold(True)
plt.plot(x, fit_res(x), 'r')
plt.hold(False)
plt.xlim(-0.6, 0.6)
plt.ylim(0, 5000)
plt.xlabel('log($f_{\lambda_{3100A}}$ / $f_{\lambda_{2200A}}$)', fontsize=20)
plt.ylabel('Numbers of QSO', fontsize=20)
ax.tick_params(axis='y', labelsize=20)
plt.xticks(np.arange(-0.4,0.5,0.2), fontsize=20)
plt.show()


範例8:以astropy.modeling中多個sin的疊加來擬合pulse-profile


In [30]:
from astropy.io import ascii
pulse_profile = ascii.read('../files4examples/pulse-profile.txt', names=['phase', 'rate', 'error'])
phase = pulse_profile['phase']
rate =pulse_profile['rate']
error = pulse_profile['error']

In [31]:
from astropy.modeling import models as mo, fitting as fit
import numpy as np

# 不給初始值, 使用預設初始值
mo_init = (mo.Const1D() + 
           mo.Sine1D(frequency=1, fixed={'frequency':True, 'phase':True}) + 
           mo.Sine1D(frequency=1, phase=0.25, fixed={'frequency':True, 'phase':True}) + 
           mo.Sine1D(frequency=2, fixed={'frequency':True, 'phase':True}) +
           mo.Sine1D(frequency=2, phase=0.25, fixed={'frequency':True, 'phase':True}))
# 給初始值
# mo_init = (mo.Const1D(amplitude=np.mean(rate)) + 
#            mo.Sine1D(amplitude=-0.7, frequency=1, fixed={'frequency':True, 'phase':True}) + 
#            mo.Sine1D(amplitude=2, frequency=1, phase=0.25, fixed={'frequency':True, 'phase':True}) + 
#            mo.Sine1D(amplitude=0.7, frequency=2, fixed={'frequency':True, 'phase':True}) +
#            mo.Sine1D(amplitude=0.6, frequency=2, phase=0.25, fixed={'frequency':True, 'phase':True}))

fitter = fit.LevMarLSQFitter()
fit_res = fitter(mo_init, phase, rate)
a0 = fit_res.amplitude_0.value
a1 = fit_res.amplitude_1.value
a2 = fit_res.amplitude_2.value
a3 = fit_res.amplitude_3.value
a4 = fit_res.amplitude_4.value
print(a0, a1, a2, a3, a4)


88.9115655625 -0.760649544662 2.11718251275 0.717100412576 0.639133194806

In [32]:
%matplotlib notebook
import matplotlib.pyplot as plt
phase2 =  np.append(phase, phase + 1)
rate2 = np.append(rate, rate)
error2 = np.append(error, error)
plt.figure()
plt.errorbar(phase2, rate2, yerr=error2, fmt='*k')
plt.hold(True)
plt.plot(phase2, fit_res(phase2), 'r')
plt.hold(False)
plt.legend(('Model', 'Data'))
plt.xlabel('Phase')
plt.ylabel('Conuts/s')
plt.show()


範例9:以astropy.modeling中的二維高斯曲線擬合 (待博識補充)


In [33]:
# Gaussian fit 的 function
def get_gaussfit(bin_img):
    '''
    Propose: to fit an 2D image with an 2D gaussian function
             initial guess of parameters: 
                  x_mean & y_mean at the center of the image
                  x_width & y_width = 2 pixels
                  amplitude = max(image)
             best parameters were optimized by least-square method
    Parameter
    ----------------------
    bin_img: an 2D image
    
    ======================
    Output
    ----------------------
    object of fitting.LevMarLSQFitter()
    
    '''
    x_size=bin_img.shape[0]
    y_size=bin_img.shape[1]
    p_init = models.Gaussian2D(amplitude=np.max(bin_img),x_mean=0.5*x_size,y_mean=0.5*y_size,x_stddev=2,y_stddev=2)
    y, x = np.mgrid[:bin_img.shape[1], :bin_img.shape[0]]
    fit_p = fitting.LevMarLSQFitter()
    p = fit_p(p_init, x,y,bin_img)
    return p;

import numpy as np
import astropy.units as units
import matplotlib.pyplot as plt
from astropy.table import Table
from astroquery.vizier import Vizier
from astropy.coordinates import SkyCoord
from astropy.io import ascii
from astropy.modeling import models, fitting

#catalog_list = Vizier.find_catalogs('PPMXL')

#取消回傳的 row <= 50的限制
Vizier.ROW_LIMIT = -1

#建立格點
x=36 #全天
y=17 #全天
grid_x=np.arange(x)*10
grid_y=np.arange(y)*10-80

# 定義輸出的表格
result_table = Table(names=('RA', 'DE', 'mRA_offset','mDE_offset'), dtype=('f4', 'f4', 'f4', 'f4'))
result_table['RA'].unit=units.deg
result_table['DE'].unit=units.deg
result_table['mRA_offset'].unit=units.mas/units.year
result_table['mDE_offset'].unit=units.mas/units.year

#對每個格點在 Vizier上 query PPMXL, 搜尋半徑 10 arcminutes 
for x in grid_x:
    for y in grid_y:
        result = Vizier.query_region(SkyCoord(ra=x*units.deg, dec=y*units.deg,frame="icrs"), radius=10.0*units.arcmin, catalog=('I/317'))
        #name=str(x)+'_'+str(y)+'.csv'
        #ascii.write(result[0], name, format='csv')
        #plt.plot(result[0]['pmRA'],result[0]['pmDE'],'r.')
        bin_img, yedges, xedges = np.histogram2d(result[0]['pmRA'], result[0]['pmDE'], (20,20),range=[[-100,100],[-100,100]])
        extent = [xedges[0], xedges[-1], yedges[0], yedges[-1]]
        #plt.imshow(bin_img, extent=extent, interpolation='nearest', cmap='gist_yarg', origin='lower')
        fit_results=get_gaussfit(bin_img)
        ra_off=(fit_results.x_mean-(bin_img.shape[0]*0.5))*10
        de_off=(fit_results.y_mean-(bin_img.shape[1]*0.5))*10
        #theta=1./np.cos(y)*
        result_table.add_row([x,y,ra_off,de_off])


---------------------------------------------------------------------------
ImportError                               Traceback (most recent call last)
<ipython-input-33-aa5e01ca0323> in <module>()
     30 import matplotlib.pyplot as plt
     31 from astropy.table import Table
---> 32 from astroquery.vizier import Vizier
     33 from astropy.coordinates import SkyCoord
     34 from astropy.io import ascii

ImportError: No module named 'astroquery'

In [ ]:
result_table

In [ ]:
%matplotlib inline
plt.quiver(result_table['RA'],result_table['DE'],result_table['mRA_offset'],result_table['mDE_offset'])