In [2]:
import requests
import pandas as pd
from bs4 import BeautifulSoup
%pylab inline
import seaborn as sns
sns.set(style="ticks")


Populating the interactive namespace from numpy and matplotlib

In [36]:
#set parameters
make = 'audi'
model = 'a3'
postcode = 'w1h1pg'
radius = '10'
price_to = '10000'
page = 1

#establish number of pages in result set
url = ('http://www.autotrader.co.uk/search/used/cars/%s/%s/postcode/%s/radius/%s/price-to/%s/onesearchad/used/sort/default/page/%s/searchcontext/default')%(make, model, postcode, radius, price_to, page)
x = requests.get(url)
soup = BeautifulSoup(x.text, 'html.parser')

pages = []
for i in soup.find('li', class_ = "paginationMini__count").find_all('strong'):
    pages.append(int(i.string))

In [37]:
#initiate master dataframe
master = []

for q in range(pages[0], pages[1] + 1):
    
    page = q
    url = ('http://www.autotrader.co.uk/search/used/cars/%s/%s/postcode/%s/radius/%s/price-to/%s/onesearchad/used/sort/default/page/%s/searchcontext/default')%(make, model, postcode, radius, price_to, page)
    x = requests.get(url)
    soup = BeautifulSoup(x.text, 'html.parser')
    
    for k in range(0, len(soup.find_all('article', class_ = 'search-result '))):
        ad = soup.find_all('article', class_ = 'search-result ')[k]

        #create list of ad attributes
        attributes = []
        attributes.append(ad.find('h1', class_ = "search-result__title").text)

        #add price to list
        attributes.append(ad.find_all('div', class_= "search-result__price")[0].string)

        #add additional attributes
        for i in ad.find_all('ul', class_="search-result__attributes")[0].find_all('li')[-7:]:
            attributes.append(i.string)

        #add seller details to list
        for i in ad.find_all('p', class_= "search-result__seller-details")[0].find_all('strong'):
            attributes.append(i.string)

        master.append(attributes)
    print 'page ' + str(q)


page 1
page 2
page 3
page 4
page 5
page 6
page 7
page 8
page 9
page 10
page 11
page 12
page 13
page 14
page 15
page 16
page 17
page 18
page 19
page 20
page 21
page 22
page 23

In [95]:
df = pd.DataFrame(master)
df.columns = ['model', 'price', 'year', 'type', 'mileage', 'transmission', 'engine_size', 'hp', 'fuel', 'seller_type', 'distance']
df = df.dropna()

In [96]:
#strip out text and convert columns to numerical values
df.mileage = pd.to_numeric(df.mileage.str.replace(',| miles', ''))
df.price = pd.to_numeric(df.price.str.replace('\\xa3|,', ''))
df.year = pd.to_numeric(df.year.str.replace(' \(.*\)', ''))
df.engine_size = pd.to_numeric(df.engine_size.str.replace('L', ''))
df.hp = pd.to_numeric(df.hp.str.replace(' bhp', ''))
df.distance = pd.to_numeric(df.distance.str.replace(' miles| mile', ''))
df['petrol'] = pd.get_dummies(df.fuel)['Petrol']
df['private'] = pd.get_dummies(df.seller_type)['Private seller']
df['manual'] = pd.get_dummies(df.transmission)['Manual']
df = pd.concat([df, pd.get_dummies(df.model.str[-3], prefix = 'doors')], axis = 1)

In [97]:
df


Out[97]:
model price year type mileage transmission engine_size hp fuel seller_type distance petrol private manual doors_3 doors_5
0 Audi A3 1.8 Sport 3dr 300 1999 Hatchback 115000 Manual 1.8 125 Petrol Private seller 5 1.0 1.0 1.0 1.0 0.0
1 Audi A3 1.8 SE 3dr 500 1999 Hatchback 112000 Manual 1.8 125 Petrol Trade seller 7 1.0 0.0 1.0 1.0 0.0
2 Audi A3 1.9 TDI 3dr 580 2000 Hatchback 134000 Manual 1.9 90 Diesel Private seller 7 0.0 1.0 1.0 1.0 0.0
3 Audi A3 1.6 SE 5dr 595 2001 Hatchback 130000 Manual 1.6 101 Petrol Trade seller 6 1.0 0.0 1.0 0.0 1.0
4 Audi A3 1.9 TDI SE 5dr 599 2000 Hatchback 222000 Manual 1.9 90 Diesel Private seller 7 0.0 1.0 1.0 0.0 1.0
5 Audi A3 1.8 Sport 3dr 600 1997 Hatchback 49600 Manual 1.8 125 Petrol Private seller 8 1.0 1.0 1.0 1.0 0.0
6 Audi A3 1.8 T Sport 3dr 634 2001 Hatchback 174000 Manual 1.8 150 Petrol Private seller 8 1.0 1.0 1.0 1.0 0.0
7 Audi A3 1.8 SE 3dr 640 2002 Hatchback 128000 Manual 1.8 125 Petrol Private seller 7 1.0 1.0 1.0 1.0 0.0
8 Audi A3 1.9 TDI SE 3dr 695 2002 Hatchback 210000 Manual 1.9 100 Diesel Trade seller 6 0.0 0.0 1.0 1.0 0.0
9 Audi A3 1.6 SE 3dr 790 2001 Hatchback 109000 Manual 1.6 101 Petrol Trade seller 6 1.0 0.0 1.0 1.0 0.0
10 Audi A3 1.6 5dr 790 2002 Hatchback 109000 Manual 1.6 101 Petrol Trade seller 9 1.0 0.0 1.0 0.0 1.0
11 Audi A3 1.9 TDI SE 5dr 1290 2002 Hatchback 114900 Manual 1.9 130 Diesel Trade seller 10 0.0 0.0 1.0 0.0 1.0
12 Audi A3 1.8 T Sport 5dr 1495 2003 Hatchback 120000 Automatic 1.8 150 Petrol Private seller 7 1.0 1.0 0.0 0.0 1.0
13 Audi A3 1.6 SE 3dr 1495 2003 Hatchback 78100 Automatic 1.6 101 Petrol Trade seller 10 1.0 0.0 0.0 1.0 0.0
14 Audi A3 1.9 TDI SE 3dr 1595 2004 Hatchback 140000 Manual 1.9 104 Diesel Private seller 10 0.0 1.0 1.0 1.0 0.0
15 Audi A3 2.0 TDI Sport 3dr 1690 2004 Hatchback 140000 Manual 2.0 140 Diesel Private seller 9 0.0 1.0 1.0 1.0 0.0
16 Audi A3 1.6 5dr 1695 2002 Hatchback 119000 Manual 1.6 101 Petrol Trade seller 9 1.0 0.0 1.0 0.0 1.0
17 Audi A3 1.6 Sport 3dr 1850 2004 Hatchback 102196 Manual 1.6 100 Petrol Private seller 10 1.0 1.0 1.0 1.0 0.0
18 Audi A3 1.6 SE 5dr 1850 2003 Hatchback 71000 Manual 1.6 101 Petrol Trade seller 7 1.0 0.0 1.0 0.0 1.0
19 Audi A3 2.0 TDI Sport 3dr 1950 2004 Hatchback 136325 Manual 2.0 140 Diesel Trade seller 5 0.0 0.0 1.0 1.0 0.0
20 Audi A3 2.0 FSI Sport 3dr 1950 2003 Hatchback 119000 Manual 2.0 150 Petrol Trade seller 7 1.0 0.0 1.0 1.0 0.0
21 Audi A3 2.0 TDI SE 3dr 1980 2005 Hatchback 125000 Manual 2.0 140 Diesel Trade seller 7 0.0 0.0 1.0 1.0 0.0
22 Audi A3 1.6 3dr 1995 2004 Hatchback 136650 Manual 1.6 100 Petrol Trade seller 10 1.0 0.0 1.0 1.0 0.0
23 Audi A3 1.6 Special Edition 3dr 2099 2005 Hatchback 119000 Manual 1.6 100 Petrol Private seller 8 1.0 1.0 1.0 1.0 0.0
24 Audi A3 1.6 MPI Special Edition 3dr 2099 2004 Hatchback 90000 Manual 1.6 100 Petrol Trade seller 7 1.0 0.0 1.0 1.0 0.0
25 Audi A3 2.0 TDI SE Sportback 5dr 2100 2005 Hatchback 176000 Manual 2.0 140 Diesel Private seller 10 0.0 1.0 1.0 0.0 1.0
26 Audi A3 1.9 TDI 3dr 2400 2004 Hatchback 125000 Manual 1.9 104 Diesel Private seller 6 0.0 1.0 1.0 1.0 0.0
27 Audi A3 1.6 Special Edition 3dr 2450 2004 Hatchback 109000 Manual 1.6 100 Petrol Trade seller 7 1.0 0.0 1.0 1.0 0.0
28 Audi A3 2.0 TDI Sport 3dr 2475 2004 Hatchback 144000 Manual 2.0 140 Diesel Trade seller 6 0.0 0.0 1.0 1.0 0.0
29 Audi A3 1.9 TDI Sportback 5dr 2480 2006 Hatchback 183000 Manual 1.9 104 Diesel Trade seller 7 0.0 0.0 1.0 0.0 1.0
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
189 Audi A3 2.0 TDi 140 SE Sportback 5dr 8500 2011 Hatchback 66138 Manual 2.0 138 Diesel Trade seller 4 0.0 0.0 1.0 0.0 1.0
190 Audi A3 1.6 TDi 105 Sportback 5dr 8599 2012 Hatchback 60429 Manual 1.6 104 Diesel Trade seller 4 0.0 0.0 1.0 0.0 1.0
191 Audi A3 2.0 TDi 170 Sport Sportback 5dr 8599 2012 Hatchback 82380 Manual 2.0 168 Diesel Trade seller 4 0.0 0.0 1.0 0.0 1.0
192 Audi A3 2.0 TDi 140 Sport Sportback 5dr 8799 2012 Hatchback 84147 Manual 2.0 138 Diesel Trade seller 4 0.0 0.0 1.0 0.0 1.0
193 Audi A3 2.0 TDi 140 SE Sportback 5dr 8799 2012 Hatchback 58719 Manual 2.0 138 Diesel Trade seller 4 0.0 0.0 1.0 0.0 1.0
194 Audi A3 2.0 TDi 140 SE Sportback S-Tronic 5dr 8799 2010 Hatchback 63908 Automatic 2.0 138 Diesel Trade seller 4 0.0 0.0 0.0 0.0 1.0
195 Audi A3 2.0 TDI Black Edition 3dr 8880 2012 Hatchback 123000 Manual 2.0 138 Diesel Trade seller 7 0.0 0.0 1.0 1.0 0.0
196 Audi A3 1.6 TDI S Line Sportback S Tronic 5dr 8995 2010 Hatchback 92000 Automatic 1.6 104 Diesel Trade seller 5 0.0 0.0 0.0 0.0 1.0
197 Audi A3 1.6 Sport S Tronic 3dr 8995 2010 Hatchback 62200 Automatic 1.6 101 Petrol Trade seller 1 1.0 0.0 0.0 1.0 0.0
198 Audi A3 2.0 TDi 140 Sport Sportback 5dr 9000 2012 Hatchback 66776 Manual 2.0 138 Diesel Trade seller 4 0.0 0.0 1.0 0.0 1.0
201 Audi A3 1.2 TFSI SE Sportback S Tronic 5dr 9000 2010 Hatchback 29000 Automatic 1.2 104 Petrol Trade seller 8 1.0 0.0 0.0 0.0 1.0
202 Audi A3 2.0 TDi 140 Sport Sportback 5dr 9000 2012 Hatchback 72402 Manual 2.0 138 Diesel Trade seller 4 0.0 0.0 1.0 0.0 1.0
203 Audi A3 2.0 TDi 140 S-Line 3dr 9299 2012 Hatchback 90273 Manual 2.0 138 Diesel Trade seller 4 0.0 0.0 1.0 1.0 0.0
204 Audi A3 2.0 TDI S Tronic 3dr 9495 2009 Hatchback 57000 Automatic 2.0 168 Diesel Trade seller 6 0.0 0.0 0.0 1.0 0.0
205 Audi A3 1.6 TDi 105 SE Sportback 5dr 9500 2012 Hatchback 69973 Manual 1.6 104 Diesel Trade seller 4 0.0 0.0 1.0 0.0 1.0
206 Audi A3 1.6 TDi 105 Sportback 5dr 9500 2012 Hatchback 35815 Manual 1.6 104 Diesel Trade seller 4 0.0 0.0 1.0 0.0 1.0
207 Audi A3 2.0 TDi 140 Sport Sportback 5dr 9500 2011 Hatchback 55046 Manual 2.0 138 Diesel Trade seller 4 0.0 0.0 1.0 0.0 1.0
208 Audi A3 1.6 TDi 105 SE Sportback S-Tronic 5dr 9500 2011 Hatchback 48167 Automatic 1.6 104 Diesel Trade seller 4 0.0 0.0 0.0 0.0 1.0
209 Audi A3 2.0 TDi 140 Sport Sportback 5dr 9500 2012 Hatchback 69757 Manual 2.0 138 Diesel Trade seller 4 0.0 0.0 1.0 0.0 1.0
210 Audi A3 1.6 TDi 105 S-Line 3dr 9500 2010 Hatchback 33771 Manual 1.6 104 Diesel Trade seller 4 0.0 0.0 1.0 1.0 0.0
211 Audi A3 1.6 TDi 105 S-Line Sportback 5dr 9500 2011 Hatchback 75889 Manual 1.6 104 Diesel Trade seller 4 0.0 0.0 1.0 0.0 1.0
212 Audi A3 2.0 TD Sport Sportback 5dr 9780 2011 Hatchback 57000 Manual 2.0 138 Diesel Trade seller 9 0.0 0.0 1.0 0.0 1.0
213 Audi A3 2.0 TDi 140 Sport Sportback 5dr 9799 2012 Hatchback 75344 Manual 2.0 138 Diesel Trade seller 4 0.0 0.0 1.0 0.0 1.0
214 Audi A3 2.0 TDi 140 SE Sportback 5dr 9799 2012 Hatchback 47436 Manual 2.0 138 Diesel Trade seller 4 0.0 0.0 1.0 0.0 1.0
215 Audi A3 2.0 TDi 140 Black Edition Sportback 5dr 9799 2011 Hatchback 95770 Manual 2.0 138 Diesel Trade seller 4 0.0 0.0 1.0 0.0 1.0
216 Audi A3 2.0 TDi 140 Black Edition Sportback 5dr 9799 2011 Hatchback 96400 Manual 2.0 138 Diesel Trade seller 4 0.0 0.0 1.0 0.0 1.0
217 Audi A3 1.6 TDi 105 Sport Sportback 5dr 10000 2012 Hatchback 60829 Manual 1.6 104 Diesel Trade seller 4 0.0 0.0 1.0 0.0 1.0
218 Audi A3 1.2 T FSi Sport S-Tronic 3dr 10000 2012 Hatchback 27677 Automatic 1.2 104 Petrol Trade seller 4 1.0 0.0 0.0 1.0 0.0
219 Audi A3 2.0 TDi 140 Sport Sportback 5dr 10000 2011 Hatchback 45467 Manual 2.0 138 Diesel Trade seller 4 0.0 0.0 1.0 0.0 1.0
220 Audi A3 2.0 T FSi Quattro Sport Sportback S-Tr... 10000 2010 Hatchback 51902 Automatic 2.0 197 Petrol Trade seller 4 1.0 0.0 0.0 0.0 1.0

217 rows × 16 columns


In [101]:
sns.lmplot('hp', 'price', data=df)


Out[101]:
<seaborn.axisgrid.FacetGrid at 0x14b174390>

In [16]:
import numpy as np
import statsmodels.api as sm
import statsmodels.formula.api as smf

In [98]:
results = smf.ols('price ~ mileage + hp + year + doors_3 + doors_5 + petrol + private + manual', data=df).fit()

# Inspect the results
print results.summary()


                            OLS Regression Results                            
==============================================================================
Dep. Variable:                  price   R-squared:                       0.881
Model:                            OLS   Adj. R-squared:                  0.877
Method:                 Least Squares   F-statistic:                     222.0
Date:                Sat, 16 Apr 2016   Prob (F-statistic):           4.35e-93
Time:                        19:28:09   Log-Likelihood:                -1775.9
No. Observations:                 217   AIC:                             3568.
Df Residuals:                     209   BIC:                             3595.
Df Model:                           7                                         
Covariance Type:            nonrobust                                         
==============================================================================
                 coef    std err          t      P>|t|      [95.0% Conf. Int.]
------------------------------------------------------------------------------
Intercept  -8.065e+05   3.53e+04    -22.864      0.000     -8.76e+05 -7.37e+05
mileage       -0.0213      0.002     -9.768      0.000        -0.026    -0.017
hp             6.5481      2.110      3.103      0.002         2.388    10.708
year         606.0062     26.277     23.062      0.000       554.205   657.808
doors_3    -4.034e+05   1.76e+04    -22.889      0.000     -4.38e+05 -3.69e+05
doors_5    -4.031e+05   1.77e+04    -22.839      0.000     -4.38e+05 -3.68e+05
petrol      -279.8032    148.861     -1.880      0.062      -573.264    13.658
private     -233.3731    143.909     -1.622      0.106      -517.073    50.327
manual      -240.6816    143.886     -1.673      0.096      -524.335    42.971
==============================================================================
Omnibus:                        6.511   Durbin-Watson:                   1.315
Prob(Omnibus):                  0.039   Jarque-Bera (JB):                6.294
Skew:                           0.409   Prob(JB):                       0.0430
Kurtosis:                       3.161   Cond. No.                     1.13e+21
==============================================================================

Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The smallest eigenvalue is 1.7e-30. This might indicate that there are
strong multicollinearity problems or that the design matrix is singular.

In [ ]: