Plotting the cheapest and the most expensive houses for sale in Mexico City

elnortescrapper is a custom-made Python web scrapper for advertisements of houses for sale in Mexico that are listed in Avisos de ocasión.

It was used to retrieve the advertisements of houses for sale in Mexico City into a CSV file.

This notebook shows how to use that information to plot the 300 cheapest and the 300 most expensive houses for sale in Mexico City


In [2]:
import pandas
from pandas import DataFrame

In [3]:
df = pandas.read_csv('mexico_city_houses.csv')

In [4]:
df.count()


Out[4]:
timestamp     1909
precio        1909
zona          1909
colonia       1909
visitas       1226
plantas       1884
m2_terreno    1909
recámaras     1906
baños         1903
m2_constr     1909
fecha_pub     1909
latitude      1902
longitude     1902
url           1909
dtype: int64

In [5]:
df.columns


Out[5]:
Index(['timestamp', 'precio', 'zona', 'colonia', 'visitas', 'plantas',
       'm2_terreno', 'recámaras', 'baños', 'm2_constr', 'fecha_pub',
       'latitude', 'longitude', 'url'],
      dtype='object')

In [6]:
df.head()


Out[6]:
timestamp precio zona colonia visitas plantas m2_terreno recámaras baños m2_constr fecha_pub latitude longitude url
0 2017-02-05T23:24:28.235641 11,800,000 pesos TLALPAN AILES II 8,533 2.0 380 3.0 3.5 361 13 de Ene 19.295180 -99.187938 http://www.avisosdeocasion.com/reforma/Detalle...
1 2017-02-05T23:24:32.084964 1,900,000 pesos TLALPAN AMPLIACION MIGUEL HI 5,709 2.0 124 3.0 1.5 170 14 de Ene 19.282095 -99.196866 http://www.avisosdeocasion.com/reforma/Detalle...
2 2017-02-05T23:24:32.687735 6,175,000 pesos TLALPAN AMPLIACION MIGUEL HI 2,201 3.0 164 4.0 4.5 295 25 de Ene 19.271200 -99.209100 http://www.avisosdeocasion.com/reforma/Detalle...
3 2017-02-05T23:24:33.266031 4,300,000 pesos TLALPAN AMPLIACION MIGUEL HI 2,258 3.0 95 3.0 2.5 195 27 de Ene 19.278350 -99.205226 http://www.avisosdeocasion.com/reforma/Detalle...
4 2017-02-05T23:24:34.186730 3,420,000 pesos TLALPAN AMPLIACION MIGUEL HI 1,847 3.0 219 3.0 2.5 299 8 de Ene 19.277180 -99.210502 http://www.avisosdeocasion.com/reforma/Detalle...

In [7]:
# Prepare the column "precio" (price)

# Remove the word 'pesos'
df_with_pesos_removed = df[ df['precio'].str.contains(' pesos') ]['precio'].str.replace(' pesos', '')
df.ix[df['precio'].str.contains(' pesos'), 'precio'] = df_with_pesos_removed

 # Remove the commas
df.ix[:,'precio'] = df['precio'].str.replace(',','')

# Convert the prices in USD ("dólares") to MXN
from forex_python.converter import CurrencyRates
exchange_rates = CurrencyRates()
USD_to_MXN = exchange_rates.get_rate('USD','MXN')
df.ix[df['precio'].str.contains('dólares'),'precio'] = df[ df['precio'].str.contains('dólares')]['precio'].str.replace(' dólares','').str.replace(',','').astype('int')*USD_to_MXN

# Convert the 'precio' column to numeric
df.ix[:,'precio'] = pandas.to_numeric(df['precio'])

print("(Converted USD prices with 1 USD =",USD_to_MXN," MXN)")


(Converted USD prices with 1 USD = 18.724  MXN)

In [8]:
# We are going to plot the n most expensive and n cheapest houses for sale
n = 300

In [9]:
from string import Template

In [10]:
# Template for the final html with the embedded map
# It has two placeholders: one for the javascript code of the map and one for the google maps api key

html_template = Template("""
<!DOCTYPE html>
    <html>
      <head>
        <meta name="viewport" content="initial-scale=1.0, user-scalable=no">
        <meta charset="utf-8">
        <title>Map</title>
        <style>
          #map {
            height: 500px;
            width: 1000px;
          }
          html, body {
            height: 100%;
            margin: 0;
            padding: 0;
          }
        </style>
      </head>
      <body>
        <div id="map"></div>
        <script>
            $map_js
        </script>
        $google_maps_api_key
      </body>
    </html>
""")

In [11]:
# Template for the javascript code for the map

map_js_template = Template("""
function initMap() {
    var map = new google.maps.Map(document.getElementById('map'));

    var bounds = new google.maps.LatLngBounds();

    // Markers of most expensive houses
    var markers_set_1 = [$markers_1];

    for( i = 0; i < markers_set_1.length; i++ ) {
        marker_position = new google.maps.LatLng(markers_set_1[i][1], markers_set_1[i][2]);
        var marker = new google.maps.Marker({
            position: marker_position,
            map: map,
            title: markers_set_1[i][0],
            url: markers_set_1[i][3]
        });
        marker.setIcon('http://maps.google.com/mapfiles/ms/icons/green-dot.png')

        //  Update the bounds
        bounds.extend(marker_position)

        // Add the InfoWindow as a property of each marker in order
        // to ensure that it is displayed next to it
        marker.info = new google.maps.InfoWindow({
            content: markers_set_1[i][0] + '<br> <a target="_blank" href="'+markers_set_1[i][3]+'">Open ad</a>'
        });

        // Listener to open the InfoWindow
        google.maps.event.addListener(marker, 'click', function() {
            this.info.open(map, this);
        });

        // If closing an opened InfoWindow when another part of the map is clicked,
        // add a listener to the map here, and keep track of the last opened InfoWindow
    }

    // Markers of cheapest houses
    var markers_set_2 = [$markers_2];

    for( i = 0; i < markers_set_2.length; i++ ) {
        marker_position = new google.maps.LatLng(markers_set_2[i][1], markers_set_2[i][2]);
        var marker = new google.maps.Marker({
            position: marker_position,
            map: map,
            title: markers_set_1[i][0],
            url: markers_set_1[i][3]
        });
        marker.setIcon('http://maps.google.com/mapfiles/ms/icons/purple-dot.png')

        //  Update the bounds
        bounds.extend(marker_position)

        // Add the InfoWindow as a property of each marker in order
        // to ensure that it is displayed next to the marker
        // and not next to the last marker
        marker.info = new google.maps.InfoWindow({
            content: markers_set_2[i][0] + '<br> <a target="_blank" href="'+markers_set_2[i][3]+'">Open ad</a>'
        });

        google.maps.event.addListener(marker, 'click', function() {
            this.info.open(map, this);
        });

        // Adjust the bounds of the map
        map.fitBounds(bounds);
        map.setCenter(bounds.getCenter());
    }
  }"""
)

In [12]:
# Choose the entries that have location and sort the DataFrame by price
df_by_price_asc = df[ pandas.notnull(df['latitude'])].sort_values(['precio'], ascending=True)

# Markers for the most expensive houses, in the form of a javascript list
markers_1 = ""
for index, element in df_by_price_asc.ix[:,['precio', 'colonia', 'latitude', 'longitude', 'url']].tail(n).iterrows():
    precio = "$"+"{:,}".format(int(element['precio']))
    colonia = str(element['colonia'])
    latitude = str(element['latitude'])
    longitude = str(element['longitude'])
    url = element['url']
    markers_1 += "['"+precio+" ("+colonia+")',"+latitude+","+longitude+",'"+url+"'],\n"

# Markers for the cheapest houses, in the form of a javascript list
markers_2 = ""
for index, element in df_by_price_asc.ix[:,['precio', 'colonia', 'latitude', 'longitude', 'url']].head(n).iterrows():
    precio = "$"+"{:,}".format(int(element['precio']))
    colonia = str(element['colonia'])
    latitude = str(element['latitude'])
    longitude = str(element['longitude'])
    url = element['url']
    markers_2 += "['"+precio+" ("+colonia+")',"+latitude+","+longitude+",'"+url+"'],\n"
    
# Replace in the template of the map
map_js = map_js_template.safe_substitute({'markers_1':markers_1,'markers_2':markers_2})

In [13]:
# Replace the key and the javascript of the map in the final html template

google_maps_api_key = '''<script async defer
            src="https://maps.googleapis.com/maps/api/js?key=AIzaSyByfLrvUSff1YaEZq1r1vDT9xhW8-6nZOc&callback=initMap">
            </script>'''

final_html = html_template.safe_substitute({'map_js':map_js, 'google_maps_api_key':google_maps_api_key})

Purple: The cheapest houses for sale

Green: The most expensive houses for sale


In [15]:
from IPython.display import HTML
HTML(final_html)


Out[15]:
Map