In [1]:
%matplotlib inline

import numpy as np
import pandas as pd
import matplotlib.pylab as plt

df_arac = pd.read_csv(u'../data/arac.csv',sep=';')

In [48]:
df_arac


Out[48]:
Year All Car Minibus Bus SmallTruck Truck Motorcycle SpecialVehicles Machinery Tractor
0 1966 231977 91469 10913 12041 31462 47931 32099 3610 2452 NaN
1 1967 284194 112367 16008 13332 39927 56889 39647 3641 2383 NaN
2 1968 318768 125375 18967 13948 43441 62616 47062 4033 3326 NaN
3 1969 354398 137345 20540 15529 48655 69478 52959 4568 5324 NaN
4 1970 369808 137771 20916 15980 52152 70730 60994 5070 6195 NaN
5 1971 403880 153676 22380 17140 57011 73433 68417 5349 6474 NaN
6 1972 460087 187272 25559 18504 62796 78920 74402 5747 6887 NaN
7 1973 543318 240360 30055 20011 71043 86780 80860 6420 7789 NaN
8 1974 647947 313160 34122 21404 81025 95309 86028 7338 9561 NaN
9 1975 785920 403546 40623 23763 98579 108381 91421 8450 11157 NaN
10 1976 920141 488894 46066 25388 116861 122176 96984 9224 14548 NaN
11 1977 1042239 560424 51999 27096 134213 138093 102127 10137 18150 NaN
12 1978 1142561 624438 56836 28559 144695 146551 109890 10698 20894 NaN
13 1979 1566405 688687 61596 30634 155278 157095 120378 11291 22875 318571
14 1980 1696681 742252 64707 32783 165821 164893 137931 11777 24090 352427
15 1981 1802742 776432 66514 33839 172269 172372 160557 12459 26246 382054
16 1982 1901926 811465 69598 35432 178762 180772 182795 13386 30160 399556
17 1983 2041244 856350 73585 38478 186427 190277 217327 14705 33532 430563
18 1984 2215174 919577 80697 43638 198106 197721 256338 16312 39445 463340
19 1985 2391357 983444 87951 47119 212505 205496 289052 17639 45561 502590
20 1986 2641353 1087234 97917 50798 224755 217111 327326 19448 50819 565945
21 1987 2887287 1193021 106314 53554 233480 225872 369894 21236 55129 628787
22 1988 3140265 1310257 112885 56172 240718 234166 420889 23301 58300 683577
23 1989 3388259 1434830 118026 58859 248567 241392 472853 25060 60191 728481
24 1990 3750678 1649879 125399 63700 263407 257353 531941 26519 63024 769456
25 1991 4101975 1864344 133632 68973 280891 273409 590488 28606 66981 794651
26 1992 4584717 2181388 145312 75592 308180 287160 655347 31158 72000 828580
27 1993 5250622 2619852 159900 84254 354290 305511 743320 33703 79233 870559
28 1994 5606712 2861640 166424 87545 374473 313771 788786 35495 83072 895506
29 1995 5922859 3058511 173051 90197 397743 321421 819922 37272 87214 937528
30 1996 6305707 3274156 182694 94978 442788 333269 854150 40212 95318 988142
31 1997 6863462 3570105 197057 101896 529838 353586 905121 45327 107151 1053381
32 1998 7371541 3838288 211495 108361 626004 371163 940935 49925 117913 1107457
33 1999 7758511 4072326 221683 112186 692935 378967 975746 52105 120937 1131626
34 2000 8320449 4422180 235885 118454 794459 394283 1011284 55677 129157 1159070
35 2001 8521956 4534803 239381 119306 833175 396493 1031221 57490 131019 1179068
36 2002 8655170 4600140 241700 120097 875381 399025 1046907 58790 133003 1180127
37 2003 8903843 4700343 245394 123500 973457 405034 1073415 60511 137933 1184256
38 2004 10236357 5400440 318954 152712 1259867 647420 1218677 28004 NaN 1210283
39 2005 11145826 5772745 338539 163390 1475057 676929 1441066 30333 NaN 1247767
40 2006 12227393 6140992 357523 175949 1695624 709535 1822831 34260 NaN 1290679
41 2007 13022945 6472156 372601 189128 1890459 729202 2003492 38573 NaN 1327334
42 2008 13765395 6796629 383548 199934 2066007 744217 2181383 35100 NaN 1358577
43 2009 14316700 7093964 384053 201033 2204951 727302 2303261 34104 NaN 1368032
44 2010 15095603 7544871 386973 208510 2399038 726359 2389488 35492 NaN 1404872
45 2011 16089528 8113111 389435 219906 2611104 728458 2527190 34116 NaN 1466208
46 2012 17033413 8648875 396119 235949 2794606 751650 2657722 33071 NaN 1515421
47 2013 17939447 9283923 421848 219885 2933050 755950 2722826 36148 NaN 1565817
48 2014 18828721 9857915 427264 211200 3062479 773728 2828466 40731 NaN 1626938
49 2015 19882069 10509258 446822 216566 3235304 802615 2938821 45138 NaN 1687545

In [59]:
BaseYear = 1966
x = np.matrix(df_arac.Year[0:-1:5]).T-BaseYear
#y = np.matrix(df_arac.Car[0:]).T/1000000.0
y = np.matrix(df_arac.Minibus[0:-1:5]).T/1000000.0

plt.plot(x+BaseYear, y, 'o-')
plt.xlabel('Year')
plt.ylabel('Number of Cars (Millions)')

plt.show()



In [13]:
df_arac.Year[0:]


Out[13]:
0     1966
1     1967
2     1968
3     1969
4     1970
5     1971
6     1972
7     1973
8     1974
9     1975
10    1976
11    1977
12    1978
13    1979
14    1980
15    1981
16    1982
17    1983
18    1984
19    1985
20    1986
21    1987
22    1988
23    1989
24    1990
25    1991
26    1992
27    1993
28    1994
29    1995
30    1996
31    1997
32    1998
33    1999
34    2000
35    2001
36    2002
37    2003
38    2004
39    2005
40    2006
41    2007
42    2008
43    2009
44    2010
45    2011
46    2012
47    2013
48    2014
49    2015
Name: Year, dtype: int64

In [62]:
# Setup the vandermonde matrix
N = len(x)
degree = 6
#A = np.hstack((np.power(x,0), np.power(x,1), np.power(x,2)))
A = np.hstack((np.power(x,i) for i in range(degree+1)))

# Solve the least squares problem
w_ls,E,rank,sigma = np.linalg.lstsq(A, y)

#w_ls = (A.T*A).I*A.T*y

plt.plot(x+BaseYear, y, 'o-')
plt.xlabel('Years')
plt.ylabel('Number of Cars')


# Prediction
#TargetYear = np.matrix([2017, 2018, 2019])
TargetYear = np.arange(1960,2021,1/12.0)
x_test = np.matrix(TargetYear - BaseYear).T
A2 = np.hstack((np.power(x_test,i) for i in range(degree+1)))

f = A2*w_ls

plt.plot(x_test+BaseYear, f, 'r-')




plt.show()



In [29]:
np.vstack((np.hstack([1,1]), np.hstack([3,2])))


s = [(i,i*i,i*i*i) for i in range(3)]
s


Out[29]:
[(0, 0, 0), (1, 1, 1), (2, 4, 8)]

In [43]:
A = np.hstack((np.power(x[0:4],i) for i in range(degree+1)))

A2 = np.hstack((np.power(x_test,i) for i in range(degree+1)))

A2*w_ls


Out[43]:
matrix([[ 13.17080038]])