elnortescrapper is a custom-made Python web scrapper for advertisements of houses for sale in Mexico that are listed in Avisos de ocasión.
It was used to retrieve the advertisements of houses for sale in Mexico City into a CSV file.
This notebook shows how to use that information to plot the 300 cheapest and the 300 most expensive houses for sale in Mexico City
In [2]:
import pandas
from pandas import DataFrame
In [3]:
df = pandas.read_csv('mexico_city_houses.csv')
In [4]:
df.count()
Out[4]:
In [5]:
df.columns
Out[5]:
In [6]:
df.head()
Out[6]:
In [7]:
# Prepare the column "precio" (price)
# Remove the word 'pesos'
df_with_pesos_removed = df[ df['precio'].str.contains(' pesos') ]['precio'].str.replace(' pesos', '')
df.ix[df['precio'].str.contains(' pesos'), 'precio'] = df_with_pesos_removed
# Remove the commas
df.ix[:,'precio'] = df['precio'].str.replace(',','')
# Convert the prices in USD ("dólares") to MXN
from forex_python.converter import CurrencyRates
exchange_rates = CurrencyRates()
USD_to_MXN = exchange_rates.get_rate('USD','MXN')
df.ix[df['precio'].str.contains('dólares'),'precio'] = df[ df['precio'].str.contains('dólares')]['precio'].str.replace(' dólares','').str.replace(',','').astype('int')*USD_to_MXN
# Convert the 'precio' column to numeric
df.ix[:,'precio'] = pandas.to_numeric(df['precio'])
print("(Converted USD prices with 1 USD =",USD_to_MXN," MXN)")
In [8]:
# We are going to plot the n most expensive and n cheapest houses for sale
n = 300
In [9]:
from string import Template
In [10]:
# Template for the final html with the embedded map
# It has two placeholders: one for the javascript code of the map and one for the google maps api key
html_template = Template("""
<!DOCTYPE html>
<html>
<head>
<meta name="viewport" content="initial-scale=1.0, user-scalable=no">
<meta charset="utf-8">
<title>Map</title>
<style>
#map {
height: 500px;
width: 1000px;
}
html, body {
height: 100%;
margin: 0;
padding: 0;
}
</style>
</head>
<body>
<div id="map"></div>
<script>
$map_js
</script>
$google_maps_api_key
</body>
</html>
""")
In [11]:
# Template for the javascript code for the map
map_js_template = Template("""
function initMap() {
var map = new google.maps.Map(document.getElementById('map'));
var bounds = new google.maps.LatLngBounds();
// Markers of most expensive houses
var markers_set_1 = [$markers_1];
for( i = 0; i < markers_set_1.length; i++ ) {
marker_position = new google.maps.LatLng(markers_set_1[i][1], markers_set_1[i][2]);
var marker = new google.maps.Marker({
position: marker_position,
map: map,
title: markers_set_1[i][0],
url: markers_set_1[i][3]
});
marker.setIcon('http://maps.google.com/mapfiles/ms/icons/green-dot.png')
// Update the bounds
bounds.extend(marker_position)
// Add the InfoWindow as a property of each marker in order
// to ensure that it is displayed next to it
marker.info = new google.maps.InfoWindow({
content: markers_set_1[i][0] + '<br> <a target="_blank" href="'+markers_set_1[i][3]+'">Open ad</a>'
});
// Listener to open the InfoWindow
google.maps.event.addListener(marker, 'click', function() {
this.info.open(map, this);
});
// If closing an opened InfoWindow when another part of the map is clicked,
// add a listener to the map here, and keep track of the last opened InfoWindow
}
// Markers of cheapest houses
var markers_set_2 = [$markers_2];
for( i = 0; i < markers_set_2.length; i++ ) {
marker_position = new google.maps.LatLng(markers_set_2[i][1], markers_set_2[i][2]);
var marker = new google.maps.Marker({
position: marker_position,
map: map,
title: markers_set_1[i][0],
url: markers_set_1[i][3]
});
marker.setIcon('http://maps.google.com/mapfiles/ms/icons/purple-dot.png')
// Update the bounds
bounds.extend(marker_position)
// Add the InfoWindow as a property of each marker in order
// to ensure that it is displayed next to the marker
// and not next to the last marker
marker.info = new google.maps.InfoWindow({
content: markers_set_2[i][0] + '<br> <a target="_blank" href="'+markers_set_2[i][3]+'">Open ad</a>'
});
google.maps.event.addListener(marker, 'click', function() {
this.info.open(map, this);
});
// Adjust the bounds of the map
map.fitBounds(bounds);
map.setCenter(bounds.getCenter());
}
}"""
)
In [12]:
# Choose the entries that have location and sort the DataFrame by price
df_by_price_asc = df[ pandas.notnull(df['latitude'])].sort_values(['precio'], ascending=True)
# Markers for the most expensive houses, in the form of a javascript list
markers_1 = ""
for index, element in df_by_price_asc.ix[:,['precio', 'colonia', 'latitude', 'longitude', 'url']].tail(n).iterrows():
precio = "$"+"{:,}".format(int(element['precio']))
colonia = str(element['colonia'])
latitude = str(element['latitude'])
longitude = str(element['longitude'])
url = element['url']
markers_1 += "['"+precio+" ("+colonia+")',"+latitude+","+longitude+",'"+url+"'],\n"
# Markers for the cheapest houses, in the form of a javascript list
markers_2 = ""
for index, element in df_by_price_asc.ix[:,['precio', 'colonia', 'latitude', 'longitude', 'url']].head(n).iterrows():
precio = "$"+"{:,}".format(int(element['precio']))
colonia = str(element['colonia'])
latitude = str(element['latitude'])
longitude = str(element['longitude'])
url = element['url']
markers_2 += "['"+precio+" ("+colonia+")',"+latitude+","+longitude+",'"+url+"'],\n"
# Replace in the template of the map
map_js = map_js_template.safe_substitute({'markers_1':markers_1,'markers_2':markers_2})
In [13]:
# Replace the key and the javascript of the map in the final html template
google_maps_api_key = '''<script async defer
src="https://maps.googleapis.com/maps/api/js?key=AIzaSyByfLrvUSff1YaEZq1r1vDT9xhW8-6nZOc&callback=initMap">
</script>'''
final_html = html_template.safe_substitute({'map_js':map_js, 'google_maps_api_key':google_maps_api_key})
Purple: The cheapest houses for sale
Green: The most expensive houses for sale
In [15]:
from IPython.display import HTML
HTML(final_html)
Out[15]: