In [1]:
import wget
import os
import zipfile
import urllib3
import certifi
import sys
import glob
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import plotly.offline as py
import plotly.graph_objs as go
import plotly.tools as tls
import seaborn as sns
import plotly
plotly.offline.init_notebook_mode()
In [2]:
zf = zipfile.ZipFile('../data/GlobalLandTemperatures.zip')
global_temp_country = pd.read_csv(zf.open('GlobalLandTemperaturesByCountry.csv'))
In [3]:
global_temp_country_clean = global_temp_country[~global_temp_country['Country'].isin(
['Denmark', 'Antarctica', 'France', 'Europe', 'Netherlands',
'United Kingdom', 'Africa', 'South America'])]
global_temp_country_clean = global_temp_country_clean.replace(
['Denmark (Europe)', 'France (Europe)', 'Netherlands (Europe)', 'United Kingdom (Europe)'],
['Denmark', 'France', 'Netherlands', 'United Kingdom'])
In [4]:
#average temperature
countries = np.unique(global_temp_country_clean['Country'])
mean_temp = []
for country in countries:
mean_temp.append(global_temp_country_clean[global_temp_country_clean['Country'] ==
country]['AverageTemperature'].mean())
Here we are creating a mercator projection of the world map taking into account only the average temperature for each country. Users can hover over the countries in order to visualize the name of the Country and the relative average temperature:
In [5]:
data = [ dict(
type = 'choropleth',
locations = countries,
z = mean_temp,
text = countries,
locationmode = 'country names',
autocolorscale = True,
reversescale = False,
marker = dict(
line = dict (
color = 'rgb(180,180,180)',
width = 0.5
) ),
colorbar = dict(
autotick = False,
title = 'Average land temperature'),
) ]
layout = dict(
title = 'Average land temperature by country',
geo = dict(
showframe = False,
showcoastlines = False,
showocean = True,
oceancolor = 'rgb(10,200,255)',
projection = dict(
type = 'Mercator'
)
)
)
fig = dict( data=data, layout=layout )
py.iplot( fig, validate=False, filename='d3-world-map' )
Here we are creating a globe graph taking into account only the average temperature for each country. Users can hover over the countries in order to visualize the name of the Country and the relative average temperature:
In [6]:
#create globe graph
data = [ dict(
type = 'choropleth',
locations = countries,
z = mean_temp,
locationmode = 'country names',
text = countries,
marker = dict(
line = dict(color = 'rgb(0,0,0)', width = 1)),
colorbar = dict(autotick = True, tickprefix = '',
title = '# Average\nTemperature,\n°C')
)
]
layout = dict(
title = 'Average land temperature by countries',
geo = dict(
showframe = False,
showocean = True,
oceancolor = 'rgb(10,200,255)',
projection = dict(
type = 'orthographic',
rotation = dict(
lon = 60,
lat = 10),
),
lonaxis = dict(
showgrid = True,
gridcolor = 'rgb(102, 102, 102)'
),
lataxis = dict(
showgrid = True,
gridcolor = 'rgb(102, 102, 102)'
)
),
)
fig = dict(data=data, layout=layout)
py.iplot(fig, validate=False, filename='worldmap')
Here we are plotting the average land temperature taking into account the uncertainty related to the average temperature over the years:
In [7]:
global_temp = pd.read_csv(zf.open('GlobalTemperatures.csv'))
#Extract the year from a date
years = np.unique(global_temp['dt'].apply(lambda x: x[:4]))
mean_temp_world = []
mean_temp_world_uncertainty = []
for year in years:
mean_temp_world.append(global_temp[global_temp['dt'].apply(
lambda x: x[:4]) == year]['LandAverageTemperature'].mean())
mean_temp_world_uncertainty.append(global_temp[global_temp['dt'].apply(
lambda x: x[:4]) == year]['LandAverageTemperatureUncertainty'].mean())
trace0 = go.Scatter(
x = years,
y = np.array(mean_temp_world) + np.array(mean_temp_world_uncertainty),
fill= None,
mode='lines',
name='Uncertainty top',
line=dict(
color='rgb(256, 100, 0)',
)
)
trace1 = go.Scatter(
x = years,
y = np.array(mean_temp_world) - np.array(mean_temp_world_uncertainty),
fill='tonexty',
mode='lines',
name='Uncertainty bot',
line=dict(
color='rgb(0, 10, 0)',
)
)
trace2 = go.Scatter(
x = years,
y = mean_temp_world,
name='Average Temperature',
line=dict(
color='rgb(0, 0, 200)',
)
)
data = [trace0, trace1, trace2]
layout = go.Layout(
xaxis=dict(title='year'),
yaxis=dict(title='Average Temperature, °C'),
title='Average land temperature of the world over the years',
showlegend = False)
fig = go.Figure(data=data, layout=layout)
py.iplot(fig)
Here we are trying to have a more cleaned up visualization of the average land temperature taking into account the uncertainty related to the average temperature over the years:
In [8]:
globaltemp = pd.read_csv(zf.open('GlobalTemperatures.csv'), parse_dates=['dt'])
year_temp = globaltemp.groupby(globaltemp.dt.dt.year).mean()
plt.figure(figsize = (12, 6))
pd.stats.moments.ewma(year_temp.LandAverageTemperature,5).plot()
year_temp.LandAverageTemperature.plot(linewidth=1)
plt.title('Average temperature by year')
plt.xlabel('year')
Out[8]:
Now we are analyzing how temperature varies in different countries and comparing how developing countries fair against developed countries.
In [9]:
developing = ['India', 'Mexico', 'Turkey', 'South Africa', 'Brazil']
mean_temp_year_country = [ [0] * len(years[70:]) for i in range(len(developing))]
j = 0
for country in developing:
all_temp_country = global_temp_country_clean[global_temp_country_clean['Country'] == country]
i = 0
for year in years[70:]:
mean_temp_year_country[j][i] = all_temp_country[all_temp_country['dt'].apply(
lambda x: x[:4]) == year]['AverageTemperature'].mean()
i +=1
j += 1
traces = []
colors = ['rgb(0, 255, 255)', 'rgb(255, 0, 255)', 'rgb(0, 0, 0)',
'rgb(255, 0, 0)', 'rgb(0, 255, 0)', 'rgb(0, 0, 255)']
for i in range(len(developing)):
traces.append(go.Scatter(
x=years[70:],
y=mean_temp_year_country[i],
mode='lines',
name=developing[i],
line=dict(color=colors[i]),
))
layout = go.Layout(
xaxis=dict(title='year'),
yaxis=dict(title='Average Temperature, °C'),
title='Average land temperature of developing countries',)
fig = go.Figure(data=traces, layout=layout)
py.iplot(fig)
In [10]:
developed = ['United States','Russia']
mean_temp_country = [ [0] * len(years[70:]) for i in range(len(developed))]
j = 0
for country in developed:
all_temp_country = global_temp_country_clean[global_temp_country_clean['Country'] == country]
i = 0
for year in years[70:]:
mean_temp_country[j][i] = all_temp_country[all_temp_country['dt'].apply(
lambda x: x[:4]) == year]['AverageTemperature'].mean()
i +=1
j += 1
traces = []
colors = ['rgb(0, 255, 255)', 'rgb(255, 0, 255)', 'rgb(0, 0, 0)',
'rgb(255, 0, 0)', 'rgb(0, 255, 0)', 'rgb(0, 0, 255)']
for i in range(len(developed)):
traces.append(go.Scatter(
x=years[70:],
y=mean_temp_country[i],
mode='lines',
name=developed[i],
line=dict(color=colors[i]),
))
layout = go.Layout(
xaxis=dict(title='year'),
yaxis=dict(title='Average Temperature, °C'),
title='Average land temperature of developed countries',)
fig = go.Figure(data=traces, layout=layout)
py.iplot(fig)
As we can see from the graph above, the developed countries have a huge spike and average land temperatures over the past 40 years. The developing countries also show a rising trend in average land temperatures, but not to the scale of the developed countries.
Now we compare the average temperature for manufacturing countries against non-manufacturing countries. On one hand we have taken the examples of United States and China who are high on manufacturing; on the other hand, we have Nepal which has much less industries.
In [11]:
compare = ['United States','China', 'Nepal']
mean_temp_country = [ [0] * len(years[70:]) for i in range(len(compare))]
j = 0
for country in compare:
all_temp_country = global_temp_country_clean[global_temp_country_clean['Country'] == country]
i = 0
for year in years[70:]:
mean_temp_country[j][i] = all_temp_country[all_temp_country['dt'].apply(
lambda x: x[:4]) == year]['AverageTemperature'].mean()
i +=1
j += 1
traces = []
colors = ['rgb(0, 255, 255)', 'rgb(255, 0, 255)', 'rgb(0, 0, 0)',
'rgb(255, 0, 0)', 'rgb(0, 255, 0)', 'rgb(0, 0, 255)']
for i in range(len(compare)):
traces.append(go.Scatter(
x=years[70:],
y=mean_temp_country[i],
mode='lines',
name=compare[i],
line=dict(color=colors[i]),
))
layout = go.Layout(
xaxis=dict(title='year'),
yaxis=dict(title='Average Temperature, °C'),
title='comparing temperature rise between Industrial countries vs non industrial',)
fig = go.Figure(data=traces, layout=layout)
py.iplot(fig)
From the graph above, we can see the manufacturing countries have a huge spike in temperature, while Nepal not so much.
While the pollutants have a greater local impact, they in-turn impact the entire globe. Here we give users to see the temperature variation of 6 random cities at a go, cities change everytime the script runs.
In [12]:
bycities = pd.read_csv(zf.open('GlobalLandTemperaturesByCity.csv'), parse_dates=['dt'])
# there are some cities with the same name but in different countries
bycities[['City', 'Country']].drop_duplicates()
bycities.City = bycities.City.str.cat(bycities.Country, sep=' ')
bycities = bycities[bycities.dt.dt.year >= 1900]
bycities.head()
Out[12]:
In [13]:
city_means = bycities.groupby(['City', bycities.dt.dt.year])['AverageTemperature'].mean().unstack()
city_mins = bycities.groupby(['City', bycities.dt.dt.year])['AverageTemperature'].min().unstack()
city_maxs = bycities.groupby(['City', bycities.dt.dt.year])['AverageTemperature'].max().unstack()
city_means.head()
Out[13]:
In [14]:
first_years_mean = city_means.iloc[:, :5].mean(axis=1) # mean temperature for the first 5 years
city_means_shifted = city_means.subtract(first_years_mean, axis=0)
def plot_temps(cities, city_ser, ax):
first_years_mean = city_ser.iloc[:, :5].mean(axis=1)
city_ser = city_ser.subtract(first_years_mean, axis=0)
for city in random_cities:
row = city_ser.loc[city]
pd.stats.moments.ewma(row, 10).plot(label=row.name, ax=ax)
ax.set_xlabel('')
ax.legend(loc='best')
The users can change the number of cities that they want to see at glance by changing "n".
In [16]:
fig, axes = plt.subplots(3,1, figsize=(10,10))
n = 6 # number of random cities you want to see
random_cities = city_means_shifted.sample(n).index
plot_temps(random_cities, city_means, axes[0])
plot_temps(random_cities, city_mins, axes[1])
plot_temps(random_cities, city_maxs, axes[2])
axes[0].set_title("Year's mean temperature increase for random cities")
axes[1].set_title("Year's min temperature increase for random cities")
axes[2].set_title("Year's max temperature increase for random cities")
Out[16]:
In [ ]: