In [1]:
# Importing the libraries
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

In [2]:
# Importing the dataset
train = pd.read_csv('train.csv')
test=pd.read_csv('test.csv')
X = train.iloc[:, :-1].values
y = train.iloc[:, -1].values

#obtain all the numeric features in both datasets
#here numric features is also a dataframe
numeric_features_train=train.select_dtypes(include=[np.number])
numeric_features_train.describe()

numeric_features_test=test.select_dtypes(include=[np.number])
numeric_features_test.describe()

#obtain categorical variables in both datasets
#Use one -hot encoding to encode these variables to numerical values
categoricals_train = train.select_dtypes(exclude=[np.number])
categoricals_train.describe()

categoricals_test = test.select_dtypes(exclude=[np.number])
categoricals_test.describe()



#obtain null values for each column in both datasets
nulls_train = pd.DataFrame(train.isnull().sum().sort_values(ascending=False)[:])
nulls_train.columns = ['Null Count']
nulls_train.index.name = 'Feature'
nulls_train[:16]

nulls_test = pd.DataFrame(test.isnull().sum().sort_values(ascending=False)[:])
nulls_test.columns = ['Null Count']
nulls_test.index.name = 'Feature'
nulls_test[:16]


      
    

#Delete the columns haivng most null values in Train as well as in test dataset
for elem in nulls_train.index[:16]:
    train=train.drop(elem,1)
    test=test.drop(elem,1)






#examine the correlation between attributes in train dataset
corr=numeric_features_train.corr()
#corr['SalePrice'].sort_values(ascending='False').index


#Delete all the columns which has the very less correlation with Target Variable
#Removing attributes which have correlation coeff b/w -0.2 to 0.4
del_corr=[]
for elem in corr['SalePrice'].sort_values(ascending=False).index:
    val=corr['SalePrice'][elem]
    if(val<0.400000 and val>(-0.20000)):
        del_corr.append(elem)
        
       
#check if label are present in dataset or not
for label in del_corr:
    if(label in train.columns):
       train=train.drop(label,axis=1)
       test=test.drop(label,axis=1)

    

        
categoricals_train = train.select_dtypes(exclude=[np.number])
categoricals_train.describe()

categoricals_test = test.select_dtypes(exclude=[np.number])
categoricals_test.describe()
        
 
#Remove the categorcial attributes which have categories<=6
#this is beacauese the wont affect much the dependent variable  

    
for column in categoricals_train.columns:
    if(len(train[column].unique())<=6):
        train=train.drop(column,axis=1)
        test=test.drop(column,axis=1)
        

          
  
#UP till here 
#Removed Null variable
#Removed less correlated variables
#Removed some categorical variables       
                   
                    
 


#Split Categorical variables into Dummy Variables with Corresponding Values as  0 or 1 
#Depending on whether that variable need to present for that particular record
l=[0,1,2,3,7,8,9,16,20] #index of categorical variables
from sklearn.preprocessing import LabelEncoder
labelencoder_train= LabelEncoder()
labelencoder_test= LabelEncoder()
for i in l:
    train.iloc[:,i]=labelencoder_train.fit_transform(train.iloc[:,i].factorize()[0])
    test.iloc[:,i]=labelencoder_test.fit_transform(test.iloc[:,i].factorize()[0])

#Encode the dataset to get dummy categories
#train=pd.get_dummies(train,columns=['Neighborhood','Condition1','Condition2','HouseStyle','RoofMatl','Exterior1st','Exterior2nd','FullBath','TotRmsAbvGrd','Functional','Fireplaces','GarageCars','SaleType']) 
#test=pd.get_dummies(test,columns=['Neighborhood','Condition1','Condition2','HouseStyle','RoofMatl','Exterior1st','Exterior2nd','FullBath','TotRmsAbvGrd','Functional','Fireplaces','GarageCars','SaleType'])    

train.isnull().any()

train['MasVnrArea']=train['MasVnrArea'].factorize()[0]
train.isnull().any()
test.isnull().any()
test['MasVnrArea']=test['MasVnrArea'].factorize()[0]
test['TotalBsmtSF']=test['TotalBsmtSF'].factorize()[0]
test['GarageCars']=test['GarageCars'].factorize()[0]
test['GarageArea']=test['GarageArea'].factorize()[0]
test.isnull().any()


Out[2]:
Neighborhood    False
Condition1      False
Condition2      False
HouseStyle      False
OverallQual     False
YearBuilt       False
YearRemodAdd    False
RoofMatl        False
Exterior1st     False
Exterior2nd     False
MasVnrArea      False
TotalBsmtSF     False
1stFlrSF        False
GrLivArea       False
FullBath        False
TotRmsAbvGrd    False
Functional      False
Fireplaces      False
GarageCars      False
GarageArea      False
SaleType        False
dtype: bool

In [3]:
from sklearn.model_selection import cross_val_predict
from sklearn import linear_model
from sklearn.linear_model import LinearRegression

lr = linear_model.LinearRegression()
lr.fit(train.iloc[:,:-1], train.iloc[:,-1])  
final_prediction=lr.predict(test)
#cross_val_predict(lr, )
#x =[[[1,3,5], [2, 8, 9]],[[7,5,9], [12, 8, 9]]]
#print(x[0:1,-1])

submission=open('/Users/abhinisinha/Documents/Kaggle_Compititions/House_Pricing/LinearRegressionSubmission.csv','w')
submission.write('Id'+','+'SalePrice'+'\n')
num = 0
for i in np.arange(1459):
    num = num + 1
    print(str(i+1461)+','+str(final_prediction[i])+'\n')
    submission.write(str(i+1461)+','+str(final_prediction[i])+'\n')
    #submission.write(str(i+1461)+','+str(final_prediction[i])+'\n')

print(num)


1461,70236.5285862

1462,112500.464271

1463,137968.045223

1464,154657.00476

1465,166382.000125

1466,152160.165076

1467,130166.405109

1468,145710.464777

1469,167288.259165

1470,62663.2711279

1471,167069.396535

1472,90105.3489961

1473,76855.978803

1474,134152.58786

1475,108712.013408

1476,272771.202088

1477,204798.094836

1478,234226.415445

1479,228586.826473

1480,311948.553124

1481,244423.108013

1482,190219.047866

1483,142301.812029

1484,147617.287337

1485,161665.682884

1486,181803.103023

1487,252489.27695

1488,212391.169249

1489,157460.568586

1490,160593.797517

1491,166098.922269

1492,63490.8592444

1493,158596.778809

1494,241651.006862

1495,233255.119274

1496,178944.188758

1497,167919.029073

1498,144635.032565

1499,140511.421246

1500,124656.797726

1501,137480.579308

1502,132546.796204

1503,213318.45481

1504,204108.359147

1505,178588.572743

1506,152326.336392

1507,200805.319291

1508,155708.635952

1509,136286.033194

1510,102479.061894

1511,105910.246178

1512,117655.83052

1513,139740.221702

1514,162706.807287

1515,190786.051166

1516,118728.07738

1517,127253.677783

1518,119799.517396

1519,172243.078165

1520,84060.6011928

1521,87966.657265

1522,158296.943533

1523,68978.1799041

1524,77652.7217062

1525,71709.2826708

1526,73605.6307824

1527,66585.625978

1528,106626.962919

1529,90312.5752013

1530,135117.20563

1531,98855.845783

1532,83598.6110411

1533,103325.393436

1534,107855.875332

1535,120987.016465

1536,69634.005393

1537,23134.4978357

1538,147964.630313

1539,186430.987558

1540,140780.682161

1541,112364.995508

1542,107764.387223

1543,178125.654861

1544,31420.6241877

1545,82097.9351035

1546,117187.498653

1547,103485.888863

1548,100171.24816

1549,76168.4785282

1550,98261.5469171

1551,87287.2148801

1552,105341.924222

1553,144176.671475

1554,103633.291043

1555,134623.982727

1556,51614.9752877

1557,115266.278785

1558,70654.6537796

1559,113983.04345

1560,97158.9477578

1561,127766.575863

1562,84439.3688804

1563,69971.7417017

1564,136055.649479

1565,124099.320594

1566,187252.932681

1567,39042.4040723

1568,169710.499724

1569,76199.8813119

1570,114841.646991

1571,97959.5722956

1572,98321.4482642

1573,206616.418192

1574,99655.8214546

1575,181616.60685

1576,187503.075498

1577,155011.999856

1578,110633.937837

1579,98533.2414223

1580,176134.057429

1581,94120.0284554

1582,79757.1161583

1583,237527.080043

1584,198666.539747

1585,112675.804574

1586,23367.4895392

1587,76946.0920489

1588,92282.9750808

1589,58889.7506707

1590,115534.061109

1591,78154.2781471

1592,92295.5526137

1593,92201.6784786

1594,148433.166603

1595,130105.960148

1596,189706.607535

1597,142909.493255

1598,165609.149088

1599,164220.459583

1600,169171.04118

1601,-8418.24602357

1602,88278.1216859

1603,32859.8761671

1604,200056.116475

1605,215715.104499

1606,131750.286707

1607,172003.451173

1608,178576.571647

1609,157670.845575

1610,117518.110086

1611,114244.028842

1612,130092.793239

1613,131975.447928

1614,88599.9680589

1615,82593.2521117

1616,82593.2521117

1617,60887.0398052

1618,86301.419412

1619,85229.2522689

1620,155399.695563

1621,106413.82457

1622,93123.4926812

1623,220734.444477

1624,165790.978859

1625,84014.6964032

1626,143661.197306

1627,156952.663976

1628,238962.394227

1629,174673.827221

1630,271050.735661

1631,188254.438849

1632,202261.837656

1633,160530.429685

1634,159611.380881

1635,148580.075343

1636,138155.083501

1637,145095.437515

1638,178750.248775

1639,161833.978951

1640,229638.98718

1641,168737.447652

1642,192391.56524

1643,199741.281852

1644,205340.379443

1645,146224.986081

1646,120492.19996

1647,143954.853772

1648,106651.545354

1649,88201.8291805

1650,63416.8810025

1651,75528.5593158

1652,89702.1393206

1653,89508.8873947

1654,141099.961925

1655,138018.914125

1656,135303.184295

1657,142250.558361

1658,135878.482513

1659,111541.057195

1660,118951.874673

1661,309036.368015

1662,290876.447918

1663,264011.068066

1664,305266.893464

1665,245319.648013

1666,238691.767162

1667,261797.450027

1668,256772.173533

1669,231320.388742

1670,249201.779358

1671,201209.213068

1672,289941.882321

1673,239985.006092

1674,218126.230009

1675,156223.39111

1676,152919.238016

1677,174684.741106

1678,312044.38449

1679,256052.772053

1680,250854.383224

1681,219218.969746

1682,223386.718644

1683,174203.213797

1684,157364.680221

1685,156444.553809

1686,166780.107913

1687,163644.050038

1688,181671.612515

1689,180146.694275

1690,168875.27288

1691,161811.348067

1692,224324.141782

1693,137913.419801

1694,155825.427237

1695,162395.903555

1696,217186.1692

1697,164658.077592

1698,276709.017907

1699,251793.062784

1700,200674.176405

1701,227738.463075

1702,213867.478105

1703,220478.697159

1704,225133.814815

1705,196862.045979

1706,278394.159232

1707,176106.770691

1708,170049.564361

1709,232711.584092

1710,200542.665289

1711,216234.020238

1712,228449.6549

1713,205873.825533

1714,183002.63589

1715,171316.869341

1716,158668.857208

1717,155540.893827

1718,154379.840067

1719,177486.332215

1720,194648.486637

1721,137521.173949

1722,117775.586795

1723,96122.4053347

1724,185080.316254

1725,201600.61287

1726,145414.76106

1727,142614.722765

1728,142134.050467

1729,126474.930462

1730,178992.839556

1731,85076.2919671

1732,78674.7394193

1733,66607.8369076

1734,82972.0090918

1735,69962.4994205

1736,76700.2593362

1737,215633.373992

1738,184584.492426

1739,251719.35008

1740,187794.892663

1741,155678.096839

1742,136569.31322

1743,154009.178962

1744,207440.873488

1745,185402.379683

1746,234695.087916

1747,200632.275967

1748,198076.503446

1749,119151.619187

1750,114664.402013

1751,223809.362796

1752,70360.107416

1753,129453.137876

1754,191385.665843

1755,129249.256831

1756,71884.6577056

1757,66014.7470414

1758,109315.52927

1759,125998.55106

1760,131406.794494

1761,131131.562467

1762,149222.898413

1763,151537.82792

1764,72731.5998227

1765,156511.658593

1766,141768.793901

1767,175726.941305

1768,86519.8330701

1769,158963.501466

1770,103038.585787

1771,99231.0083856

1772,105666.4485

1773,95674.5132417

1774,137941.152768

1775,100367.119721

1776,89438.8892412

1777,59036.169751

1778,97540.6719666

1779,91864.6460452

1780,162272.095344

1781,90030.5936071

1782,27444.7725435

1783,102197.758155

1784,58977.3235552

1785,69642.2872959

1786,124016.64897

1787,144751.97266

1788,6531.15734388

1789,59108.5429973

1790,18246.7780605

1791,188906.128723

1792,126159.59936

1793,108833.660575

1794,116917.708435

1795,100633.346378

1796,111155.976807

1797,98930.8156242

1798,88633.3972125

1799,57679.1337644

1800,84073.2285641

1801,77442.7823879

1802,129135.657

1803,146361.108726

1804,92165.8904952

1805,111424.914358

1806,119885.888467

1807,128172.281865

1808,101357.774677

1809,116414.829968

1810,111720.317849

1811,91238.1068312

1812,103470.015168

1813,88746.6426088

1814,51978.9434753

1815,-12839.1154243

1816,52494.4413158

1817,72260.2080867

1818,121190.582267

1819,116437.539781

1820,63753.0338419

1821,115834.828507

1822,136506.929975

1823,33940.5690235

1824,124575.521531

1825,101615.591268

1826,65966.9431873

1827,77754.9707593

1828,101839.625871

1829,113709.137833

1830,121400.514487

1831,157002.862464

1832,112000.601971

1833,125420.748779

1834,115958.692028

1835,172520.378914

1836,120490.980792

1837,57125.4951138

1838,90763.1426047

1839,53969.8263613

1840,118204.691511

1841,89906.9916041

1842,64394.9899184

1843,98954.8458892

1844,122126.937513

1845,122672.456058

1846,121896.822528

1847,154219.640749

1848,11975.0667567

1849,73301.7765949

1850,83142.8972033

1851,127806.266489

1852,92016.2201308

1853,88044.7561108

1854,147606.466879

1855,142095.67761

1856,191276.083956

1857,130787.756954

1858,156985.908297

1859,123655.043136

1860,141689.26358

1861,122204.343082

1862,261221.416519

1863,263520.962401

1864,263520.962401

1865,252140.369359

1866,245649.171753

1867,207986.65106

1868,236316.477469

1869,174776.062717

1870,185917.999668

1871,210428.898372

1872,148381.287956

1873,179954.99029

1874,95057.7012358

1875,166882.940852

1876,162767.618593

1877,170507.174821

1878,179984.349038

1879,83805.9625381

1880,92847.4667332

1881,191395.46636

1882,227044.055136

1883,170505.608815

1884,184541.181624

1885,204543.06262

1886,219457.208181

1887,171952.75646

1888,219386.549041

1889,142417.605133

1890,66897.502721

1891,130733.882135

1892,47504.9946194

1893,89663.0903018

1894,138815.593804

1895,114361.82166

1896,100638.323265

1897,95591.1830014

1898,104543.334972

1899,124489.759031

1900,126094.682707

1901,109990.673777

1902,99265.164143

1903,201640.160149

1904,98814.3906122

1905,148289.764501

1906,87887.2266406

1907,166380.409671

1908,121704.419023

1909,135706.158845

1910,131750.828526

1911,171831.010329

1912,237964.578188

1913,133283.039849

1914,14984.5252571

1915,214405.599009

1916,-13663.2770931

1917,226091.107336

1918,111422.229783

1919,153212.114212

1920,141570.867335

1921,263334.901377

1922,231347.955993

1923,193259.933516

1924,163643.582427

1925,192852.969743

1926,259041.139369

1927,78996.0304015

1928,130522.569188

1929,68985.4385447

1930,113538.383105

1931,113180.314445

1932,111733.562362

1933,131955.759422

1934,154434.450308

1935,147136.223388

1936,149006.223144

1937,166316.548262

1938,134465.270786

1939,208179.260629

1940,166382.000125

1941,165558.858317

1942,144753.10961

1943,188964.484898

1944,248975.130533

1945,255707.692379

1946,155830.309967

1947,216797.847178

1948,137114.252244

1949,184746.206722

1950,144288.409299

1951,219637.06935

1952,184837.811598

1953,130475.48569

1954,185483.611226

1955,97637.3785182

1956,249914.961534

1957,148451.408282

1958,246709.121368

1959,129257.928511

1960,61064.4795265

1961,81671.1677961

1962,88094.4023475

1963,89081.1213638

1964,86723.895439

1965,129598.513015

1966,132686.230278

1967,216093.275355

1968,280669.147603

1969,264877.254196

1970,298479.336329

1971,332310.084172

1972,274210.175077

1973,235754.727885

1974,245923.20743

1975,329654.142934

1976,219668.297266

1977,271355.499327

1978,276295.904104

1979,264500.515087

1980,177771.213068

1981,258217.608594

1982,197727.752282

1983,197889.682625

1984,180485.458306

1985,191886.290651

1986,201555.218008

1987,181657.073046

1988,152075.925173

1989,180394.499473

1990,195748.067327

1991,177792.353329

1992,195738.342769

1993,155244.035281

1994,204121.317039

1995,176260.869176

1996,227459.668636

1997,264984.843113

1998,245309.571752

1999,216236.513648

2000,250165.847401

2001,202711.31028

2002,187315.663003

2003,226557.797097

2004,224816.364057

2005,205014.708574

2006,190623.473137

2007,222482.87673

2008,160387.994038

2009,153875.715214

2010,167359.468807

2011,136106.05274

2012,159057.5798

2013,140761.537939

2014,156050.43918

2015,163198.858465

2016,154043.003561

2017,162833.21609

2018,77755.2633437

2019,92374.0643159

2020,60540.3537365

2021,74468.9686518

2022,152500.52683

2023,132117.57088

2024,221237.269815

2025,287424.01291

2026,161268.926853

2027,123115.285818

2028,157076.120275

2029,140191.390181

2030,225383.214623

2031,215180.588634

2032,220066.94627

2033,221604.327694

2034,156653.773896

2035,190915.326218

2036,179731.81188

2037,185667.833174

2038,281458.988387

2039,189739.899108

2040,284124.343779

2041,203951.568835

2042,192701.261021

2043,133127.079138

2044,171207.653935

2045,176432.711204

2046,116966.177252

2047,112211.249534

2048,98179.6249474

2049,129696.581788

2050,163470.41019

2051,81133.7930606

2052,101839.598416

2053,102500.651212

2054,17875.0234171

2055,117501.153728

2056,120795.997154

2057,71327.5994725

2058,175232.316546

2059,111848.968718

2060,115301.04301

2061,148655.768397

2062,90797.5334716

2063,62657.5615067

2064,94890.1015437

2065,82211.5498333

2066,173608.804519

2067,112804.896035

2068,137047.550306

2069,43356.0385477

2070,68548.1639287

2071,47506.0134361

2072,138920.54362

2073,113004.917078

2074,162044.651463

2075,119710.94363

2076,81770.4613418

2077,99208.2743318

2078,82372.2024676

2079,98577.6521185

2080,88903.7909725

2081,106914.352756

2082,141799.195748

2083,117001.094443

2084,68314.8868975

2085,84879.706333

2086,126508.625537

2087,102104.830086

2088,94842.6228503

2089,51869.7195272

2090,112575.17774

2091,104621.396138

2092,81982.6588092

2093,105829.494685

2094,156417.625712

2095,143680.167253

2096,58143.8076391

2097,96945.5809655

2098,126671.667383

2099,-15807.8378921

2100,78742.6280057

2101,110479.293592

2102,119302.570625

2103,67408.3626608

2104,145975.754495

2105,124095.526968

2106,5586.96581481

2107,211036.185046

2108,76026.5343547

2109,82615.3284268

2110,94821.4318249

2111,98569.7210109

2112,109206.419355

2113,96704.0117885

2114,100765.705421

2115,144768.368041

2116,105533.624805

2117,121514.637663

2118,107536.974195

2119,89507.1952816

2120,103514.986986

2121,54197.2177714

2122,49301.5316225

2123,70191.4502745

2124,166613.606305

2125,111840.086658

2126,144526.675444

2127,143139.542973

2128,112630.646984

2129,68334.6511203

2130,112338.621899

2131,144792.940763

2132,87581.4388005

2133,102543.335926

2134,96205.499414

2135,69393.238612

2136,62955.3450001

2137,94623.0835624

2138,93863.935403

2139,123931.959066

2140,99838.3121858

2141,125599.12521

2142,92536.1287238

2143,108038.115896

2144,81731.3683171

2145,92403.4553606

2146,109167.820697

2147,125141.375175

2148,98348.1607845

2149,94339.5572342

2150,184871.284016

2151,117477.974423

2152,161915.670923

2153,126789.869652

2154,94502.9595885

2155,118299.090515

2156,229293.805534

2157,193736.937278

2158,211155.483485

2159,205839.990692

2160,168335.431119

2161,220484.981952

2162,296783.429822

2163,264081.149074

2164,212850.196775

2165,155896.937856

2166,103532.852137

2167,176939.556299

2168,152815.616841

2169,167925.397731

2170,178161.78796

2171,101990.053257

2172,110515.441402

2173,121521.008598

2174,215216.07872

2175,251486.969697

2176,235583.96218

2177,220134.797868

2178,207947.040181

2179,111588.017255

2180,186146.861225

2181,169307.16613

2182,193956.676031

2183,171476.296908

2184,90413.5341712

2185,92902.335239

2186,131913.199065

2187,134723.526959

2188,145783.156756

2189,288893.173998

2190,77719.3404917

2191,81411.352342

2192,67047.8208764

2193,105407.916198

2194,99416.6968612

2195,53847.0664492

2196,66686.7264949

2197,86920.5044123

2198,134550.13213

2199,146267.49132

2200,111989.431636

2201,111082.932176

2202,163143.393023

2203,135402.892324

2204,136429.41897

2205,93964.6408557

2206,112828.156948

2207,194679.147322

2208,200166.036867

2209,156140.092343

2210,121172.177481

2211,93731.9839796

2212,99950.9871317

2213,90810.1562109

2214,106312.839141

2215,80253.5970196

2216,129354.624474

2217,-7047.00811042

2218,65896.3904777

2219,64680.268733

2220,30546.1170533

2221,215481.327325

2222,188204.987363

2223,212921.315432

2224,201623.225138

2225,126127.819068

2226,139477.166803

2227,179901.367715

2228,205227.573662

2229,211695.025874

2230,124429.749625

2231,185887.186206

2232,163584.658854

2233,141210.896717

2234,198056.854688

2235,173136.514812

2236,215443.189464

2237,246801.542929

2238,179306.916977

2239,107653.216076

2240,133053.677784

2241,90013.2976746

2242,99025.8234289

2243,116415.452957

2244,73366.9992752

2245,73461.6305703

2246,101212.296804

2247,124693.75296

2248,71717.0825165

2249,89663.3116485

2250,86703.032347

2251,77365.0072845

2252,151500.718651

2253,142382.677211

2254,167074.78525

2255,176284.339881

2256,158813.803614

2257,194952.11881

2258,141248.028043

2259,165303.942341

2260,127655.010053

2261,167606.371552

2262,191619.9748

2263,261024.883381

2264,304947.166643

2265,128632.970099

2266,239558.367853

2267,266847.101249

2268,271431.038381

2269,135842.771289

2270,165172.793426

2271,190526.759104

2272,161429.511508

2273,126476.557788

2274,151971.230881

2275,132727.230236

2276,161348.820616

2277,169254.87081

2278,127277.899021

2279,93600.106422

2280,63537.8974056

2281,156469.331768

2282,149893.405901

2283,93367.3150426

2284,108809.073235

2285,129653.339886

2286,82660.8583574

2287,254785.440359

2288,234139.86373

2289,265917.537088

2290,307588.74537

2291,260858.151914

2292,301715.440243

2293,316676.882746

2294,303429.913575

2295,312675.287945

2296,249229.220188

2297,256833.268269

2298,258371.669531

2299,295492.183247

2300,259614.270431

2301,231219.142725

2302,196076.169476

2303,218465.074761

2304,229501.611053

2305,173241.013074

2306,170133.400852

2307,168135.362664

2308,229497.622833

2309,229481.323898

2310,193162.799803

2311,179359.062555

2312,131771.964228

2313,144372.017971

2314,152962.188419

2315,155256.676836

2316,181327.577048

2317,158971.620807

2318,146708.263131

2319,162772.06621

2320,173082.345519

2321,227120.950924

2322,185021.226781

2323,146210.012556

2324,161697.898541

2325,193743.191743

2326,162260.926986

2327,193574.602499

2328,207782.390408

2329,177698.950078

2330,168103.17266

2331,282400.345289

2332,315842.24097

2333,271253.490032

2334,235369.663756

2335,221361.321489

2336,264241.49305

2337,184901.849965

2338,223369.945364

2339,205495.105747

2340,291371.07721

2341,185067.649713

2342,196994.990433

2343,193055.153516

2344,203354.382209

2345,225408.393006

2346,192059.532173

2347,180346.777532

2348,210287.300181

2349,136885.656998

2350,227994.237002

2351,225547.916415

2352,227859.32767

2353,214512.862588

2354,159130.185665

2355,169548.946744

2356,100052.463723

2357,186352.771253

2358,156405.146932

2359,141151.377632

2360,77319.6837822

2361,91396.0155281

2362,222711.057621

2363,86557.5772998

2364,127694.915756

2365,201800.142403

2366,169454.304594

2367,210851.370816

2368,191695.694912

2369,204655.730407

2370,145388.901141

2371,140715.082362

2372,170931.60762

2373,239864.04202

2374,252726.981984

2375,208913.051848

2376,235235.201579

2377,270163.443769

2378,120663.971546

2379,176448.148258

2380,112035.719419

2381,131485.371017

2382,177219.771464

2383,181912.306589

2384,200194.98297

2385,121986.360455

2386,107293.646389

2387,101670.895875

2388,90186.1993364

2389,88765.7011413

2390,118705.402323

2391,101401.366548

2392,70189.6663853

2393,159341.429162

2394,106822.168331

2395,181539.888823

2396,140448.853061

2397,178669.209703

2398,96999.4887533

2399,44023.5291093

2400,39491.9357525

2401,61850.9412012

2402,87244.8419145

2403,144954.887225

2404,115581.019784

2405,125828.998148

2406,101958.202772

2407,72691.1204573

2408,100856.168672

2409,102866.687549

2410,195552.503804

2411,77740.0830312

2412,130134.622996

2413,89824.7570774

2414,104033.717567

2415,122080.243029

2416,101301.200145

2417,101096.606812

2418,95057.9651264

2419,91741.4712167

2420,97254.2405539

2421,100441.454643

2422,65547.5719543

2423,99297.3331008

2424,146384.5853

2425,208100.013425

2426,63323.2005704

2427,126804.93562

2428,183306.946676

2429,73064.8587367

2430,94446.7978392

2431,96083.9662753

2432,99065.9244148

2433,117318.964419

2434,105422.219716

2435,138667.354121

2436,72774.503325

2437,69232.496812

2438,86572.456622

2439,77567.8912947

2440,106839.038813

2441,77963.144286

2442,91169.4219832

2443,107993.968896

2444,99235.7613763

2445,58461.1886074

2446,90342.7274499

2447,180761.172324

2448,130187.943528

2449,80118.0094969

2450,129148.619282

2451,113609.859774

2452,195032.157367

2453,57664.7124205

2454,76988.9536038

2455,71137.060718

2456,52986.1401171

2457,105665.248697

2458,111622.620179

2459,61664.708002

2460,120491.514813

2461,82789.2709346

2462,119062.282239

2463,84046.1694546

2464,165928.637328

2465,109035.951755

2466,88434.0034393

2467,160688.824427

2468,64983.1709044

2469,78263.4755538

2470,173256.663178

2471,201296.475142

2472,160221.138573

2473,79920.4368238

2474,62346.0548363

2475,219914.090399

2476,88009.3648128

2477,81047.7671021

2478,120704.019889

2479,77915.1880902

2480,101659.302765

2481,77684.3607967

2482,92130.0269868

2483,69325.8339705

2484,88634.4872855

2485,76968.5696568

2486,130425.63141

2487,176958.668354

2488,160174.983364

2489,156733.881387

2490,128802.309749

2491,57630.7562633

2492,165702.944031

2493,128242.398838

2494,114827.11634

2495,61408.0980442

2496,211683.45935

2497,124775.362316

2498,63947.2169176

2499,57266.9426868

2500,91148.0288408

2501,99977.5761299

2502,116655.717117

2503,56673.9651921

2504,163002.550559

2505,220343.372591

2506,231408.871108

2507,259806.839826

2508,231408.871108

2509,195636.201025

2510,214287.578649

2511,166869.703211

2512,188070.959601

2513,196304.320475

2514,209367.681039

2515,113666.829501

2516,144176.88315

2517,99176.7879466

2518,117671.880586

2519,194186.597182

2520,191557.200972

2521,173332.250917

2522,198708.727745

2523,80936.075183

2524,107518.661662

2525,99007.9428694

2526,103095.846014

2527,76550.9761871

2528,88291.2902841

2529,87959.5178736

2530,89489.7035708

2531,199501.449533

2532,203940.239231

2533,177937.980992

2534,203597.249398

2535,257608.096627

2536,211283.761847

2537,190803.833637

2538,167651.160317

2539,156479.548314

2540,163888.140372

2541,166619.541864

2542,126906.089506

2543,83942.9002199

2544,83191.9785527

2545,97110.3378171

2546,122306.795876

2547,121171.554593

2548,158455.509116

2549,131828.16794

2550,509259.563223

2551,129467.388578

2552,87838.568443

2553,76597.3945465

2554,107595.198698

2555,93900.3980597

2556,56336.5672042

2557,50094.0744524

2558,165758.56981

2559,93659.433401

2560,119678.164231

2561,132897.608013

2562,128294.442478

2563,132683.094653

2564,163293.330702

2565,90314.4413148

2566,105057.935489

2567,81190.2963127

2568,174303.065468

2569,177324.37952

2570,122029.409332

2571,128228.620292

2572,169042.973142

2573,182994.26914

2574,282673.79917

2575,98082.6782243

2576,116919.242671

2577,113089.124461

2578,48299.8721344

2579,-30566.8488047

2580,146565.769506

2581,80334.5123104

2582,93836.9145929

2583,218283.366542

2584,145850.378995

2585,154287.475643

2586,186482.765209

2587,178694.64844

2588,123131.948217

2589,120773.594834

2590,192074.411949

2591,198000.175744

2592,194683.706713

2593,215076.628383

2594,149639.707853

2595,187265.821665

2596,239822.959893

2597,173155.353135

2598,218175.820743

2599,244232.057889

2600,124868.963912

2601,124742.296308

2602,60863.8988686

2603,54098.9543894

2604,79620.6736794

2605,64774.2544455

2606,104821.57401

2607,134742.197793

2608,158210.370047

2609,127435.053015

2610,92456.1986155

2611,109728.539739

2612,121329.025054

2613,71719.5575829

2614,77710.5081307

2615,123899.446101

2616,114409.774052

2617,171154.529783

2618,145743.190517

2619,191955.698611

2620,160483.240349

2621,174793.340642

2622,181294.495544

2623,234354.777973

2624,249037.987003

2625,271954.038607

2626,152885.253575

2627,161161.039378

2628,299446.278883

2629,342282.03899

2630,281113.236838

2631,323140.616066

2632,282767.243397

2633,239502.481555

2634,286786.858223

2635,132883.246664

2636,161173.844866

2637,155915.985852

2638,208031.204744

2639,176821.323114

2640,123374.090418

2641,64654.0634023

2642,167530.484396

2643,100488.982145

2644,103219.610936

2645,99942.5476239

2646,87178.5307712

2647,95093.7409804

2648,128524.306049

2649,130396.767731

2650,125764.747325

2651,140096.583363

2652,303091.637775

2653,224960.770293

2654,212660.631041

2655,267116.456563

2656,269728.198279

2657,263401.788083

2658,243483.082391

2659,255218.755317

2660,260606.8622

2661,265313.854314

2662,258465.502009

2663,228910.125623

2664,234017.47321

2665,257616.401182

2666,243528.29735

2667,143127.864861

2668,149060.797828

2669,164390.101491

2670,249312.009258

2671,162858.073122

2672,177948.420103

2673,196618.300684

2674,189975.398696

2675,161423.121908

2676,176943.55133

2677,179409.83932

2678,232583.581154

2679,231318.29516

2680,245726.404828

2681,304471.334686

2682,268312.966674

2683,335764.025634

2684,262839.900111

2685,282247.38969

2686,219381.466936

2687,227898.798027

2688,204191.357245

2689,197900.311195

2690,308853.177744

2691,158291.632184

2692,162131.967477

2693,178261.350327

2694,148940.293609

2695,184713.171199

2696,177677.666085

2697,169133.459441

2698,149770.953899

2699,151457.914409

2700,137191.623397

2701,122144.189679

2702,63969.9037515

2703,124470.875754

2704,110084.169346

2705,84800.3350125

2706,59907.7573343

2707,70229.3839459

2708,80687.2490903

2709,97461.622567

2710,87339.2929822

2711,249141.556985

2712,307068.866463

2713,162308.749942

2714,140627.373221

2715,137360.604158

2716,132546.796204

2717,181000.855303

2718,206598.885728

2719,86452.2144252

2720,135710.254149

2721,105628.464496

2722,132427.879612

2723,137806.214103

2724,84798.6033138

2725,93470.5243781

2726,99162.1498585

2727,146664.520098

2728,143451.791764

2729,113908.494101

2730,95699.1787158

2731,81721.2421115

2732,103470.697674

2733,135308.089024

2734,121408.793729

2735,96465.574882

2736,127027.82255

2737,75835.5939644

2738,131703.293471

2739,147562.865312

2740,100548.624139

2741,132662.024051

2742,149876.36276

2743,109000.337958

2744,131839.582654

2745,120495.237876

2746,89337.2939608

2747,152049.428034

2748,105697.37779

2749,94392.4302515

2750,90879.833008

2751,88562.0730663

2752,223627.135775

2753,122192.421394

2754,170159.234508

2755,114707.610207

2756,44590.8153075

2757,11892.2145406

2758,36045.9932544

2759,115517.125912

2760,112203.797697

2761,125324.355723

2762,126276.98782

2763,170376.72779

2764,141331.196395

2765,239176.684295

2766,79579.082739

2767,46640.5554475

2768,145503.715268

2769,95743.6947358

2770,120028.237832

2771,62342.4523262

2772,105476.98361

2773,138156.599244

2774,102678.40961

2775,111282.938661

2776,117425.480267

2777,123233.283422

2778,111877.034511

2779,95738.5286873

2780,84229.7640273

2781,71500.9096319

2782,42616.0650587

2783,39117.4821297

2784,77316.7285918

2785,91532.7971243

2786,4609.40334278

2787,110878.156475

2788,25454.001501

2789,188376.749737

2790,86535.8229544

2791,79275.2701445

2792,53897.7529468

2793,147380.647996

2794,52910.7066095

2795,88804.6448604

2796,58290.4573156

2797,160512.167768

2798,79255.1231113

2799,93620.4694354

2800,82296.3573899

2801,94063.6777622

2802,111373.217968

2803,167849.426861

2804,123746.786968

2805,77389.7535555

2806,73775.955375

2807,140290.16406

2808,102314.304171

2809,93146.7991314

2810,94290.2891532

2811,132650.649971

2812,123065.303809

2813,162713.7952

2814,166822.052152

2815,57067.3072796

2816,201282.362314

2817,117219.417622

2818,73045.092205

2819,158799.772324

2820,102016.860249

2821,83160.2249628

2822,168904.387878

2823,262487.281018

2824,118252.044765

2825,145196.866045

2826,89175.5388972

2827,128470.595373

2828,204904.458538

2829,170300.635857

2830,217281.558672

2831,150982.944176

2832,191963.184564

2833,216848.865514

2834,188320.101928

2835,188384.253096

2836,166779.068532

2837,147606.36447

2838,118282.46942

2839,151807.575018

2840,172786.448297

2841,182356.679001

2842,193762.89563

2843,131510.670079

2844,138915.079112

2845,73703.2624465

2846,179308.43868

2847,160093.233592

2848,205710.276442

2849,190744.667521

2850,246651.889601

2851,209375.047163

2852,208724.097274

2853,192655.26007

2854,112675.804574

2855,165835.559318

2856,188490.807592

2857,168383.452831

2858,164591.212153

2859,68469.8761953

2860,120096.345248

2861,97940.0913189

2862,189197.154687

2863,137427.013242

2864,224449.83956

2865,133043.409097

2866,163082.142234

2867,49195.0033815

2868,85279.9344999

2869,82803.2591995

2870,111166.109484

2871,96793.7947191

2872,-1910.73750752

2873,72873.1223968

2874,87753.0780511

2875,98191.9500782

2876,161884.195255

2877,114154.643771

2878,148034.982146

2879,107420.105855

2880,87390.6542729

2881,125855.37437

2882,116295.638136

2883,144123.558596

2884,196359.085947

2885,154114.070919

2886,166372.503282

2887,69369.663822

2888,99443.7526191

2889,46491.685477

2890,40430.2367961

2891,121380.071371

2892,23547.2256899

2893,101856.078568

2894,46407.1142932

2895,227797.027792

2896,206954.754122

2897,160279.017766

2898,152593.703322

2899,193682.384369

2900,121771.596945

2901,167242.064679

2902,152636.463014

2903,256282.406113

2904,257873.81683

2905,9785.48447225

2906,182175.424084

2907,113497.640006

2908,80021.6582659

2909,142692.3604

2910,61267.0096621

2911,60863.8988686

2912,119492.91358

2913,59736.4433693

2914,82593.2521117

2915,82593.2521117

2916,59768.6590267

2917,116338.505342

2918,118081.051934

2919,210396.093126

1459

In [64]:
import statsmodels.api as sm
import matplotlib.pyplot as plot

#independent variables
ind_data = train.values[:,:-1]
dep_data = train.values[:,-1]
print(train.iloc[:,-1].shape)
print(train.iloc[:,:-1].shape)
#gamma_model = sm.GLM(dep_data, ind_data, family=sm.families.Gamma())
#gamma_model = sm.GLM(dep_data, ind_data, family=sm.families.Gamma(sm.families.links.log))
gamma_model = sm.GLM(dep_data, ind_data, family=sm.families.Gaussian(sm.families.links.log))
gamma_results = gamma_model.fit()
print(gamma_results.summary())
#plot(gamma_results.mu)
#print(test.values)

submission=open('/Users/abhinisinha/Documents/Kaggle_Compititions/House_Pricing/GeneralLinearRegressionSubmission.csv','w')
submission.write('Id'+','+'SalePrice'+'\n')

final_prediction=gamma_results.predict(test.values)
print(final_prediction.shape)
num = 0
#for prediction in np.nditer(final_prediction):
for prediction in final_prediction:
#     try:
    submission.write(str(num+1461)+','+str(prediction)+'\n')
    #print(str(num+1461)+','+str(prediction))
    num = num + 1
submission.close()


(1460,)
(1460, 21)
                 Generalized Linear Model Regression Results                  
==============================================================================
Dep. Variable:                      y   No. Observations:                 1460
Model:                            GLM   Df Residuals:                     1439
Model Family:                Gaussian   Df Model:                           20
Link Function:                    log   Scale:                   1072767062.36
Method:                          IRLS   Log-Likelihood:                -17240.
Date:                Sun, 09 Jul 2017   Deviance:                   1.5437e+12
Time:                        17:06:07   Pearson chi2:                 1.54e+12
No. Iterations:                    21                                         
==============================================================================
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
x1            -0.0015      0.001     -1.924      0.054      -0.003    2.79e-05
x2            -0.0041      0.004     -1.060      0.289      -0.012       0.004
x3            -0.1483      0.015     -9.819      0.000      -0.178      -0.119
x4             0.0048      0.004      1.213      0.225      -0.003       0.012
x5             0.0954      0.005     18.268      0.000       0.085       0.106
x6             0.0017      0.000      6.662      0.000       0.001       0.002
x7             0.0038      0.000     15.156      0.000       0.003       0.004
x8            -0.1524      0.011    -13.571      0.000      -0.174      -0.130
x9            -0.0008      0.003     -0.305      0.760      -0.006       0.004
x10            0.0050      0.003      1.993      0.046    8.47e-05       0.010
x11        -5.951e-05   4.86e-05     -1.226      0.220      -0.000    3.57e-05
x12         1.965e-05   2.06e-05      0.952      0.341   -2.08e-05    6.01e-05
x13         9.194e-05   2.45e-05      3.760      0.000     4.4e-05       0.000
x14            0.0002   1.54e-05     13.229      0.000       0.000       0.000
x15           -0.0212      0.012     -1.785      0.074      -0.044       0.002
x16            0.0095      0.004      2.127      0.033       0.001       0.018
x17           -0.0120      0.008     -1.489      0.137      -0.028       0.004
x18            0.0724      0.008      8.707      0.000       0.056       0.089
x19            0.0739      0.014      5.401      0.000       0.047       0.101
x20         5.048e-05   4.27e-05      1.183      0.237   -3.32e-05       0.000
x21            0.0088      0.006      1.543      0.123      -0.002       0.020
==============================================================================
(1459,)

In [ ]: