Temperature demo



In [1]:

    
import wget
    import os
    import zipfile
    
    import urllib3
    import certifi
    import sys
    import glob
    
    import numpy as np
    import pandas as pd
    
    import matplotlib.pyplot as plt
    
    %matplotlib inline


    import plotly.offline as py
    import plotly.graph_objs as go
    import plotly.tools as tls
    import seaborn as sns
    
    import plotly
    plotly.offline.init_notebook_mode()



In [2]:

    
zf = zipfile.ZipFile('../data/GlobalLandTemperatures.zip')
global_temp_country = pd.read_csv(zf.open('GlobalLandTemperaturesByCountry.csv'))



In [3]:

    
global_temp_country_clean = global_temp_country[~global_temp_country['Country'].isin(
    ['Denmark', 'Antarctica', 'France', 'Europe', 'Netherlands',
     'United Kingdom', 'Africa', 'South America'])]

    global_temp_country_clean = global_temp_country_clean.replace(
       ['Denmark (Europe)', 'France (Europe)', 'Netherlands (Europe)', 'United Kingdom (Europe)'],
       ['Denmark', 'France', 'Netherlands', 'United Kingdom'])

Average temperature for each country



In [4]:

    
#average temperature 
    countries = np.unique(global_temp_country_clean['Country'])
    mean_temp = []
    for country in countries:
        mean_temp.append(global_temp_country_clean[global_temp_country_clean['Country'] ==
                                                   country]['AverageTemperature'].mean())

Create a mercator projection of world map showing the average land temperature

Here we are creating a mercator projection of the world map taking into account only the average temperature for each country. Users can hover over the countries in order to visualize the name of the Country and the relative average temperature:



In [5]:

    
data = [ dict(
        type = 'choropleth',
        locations = countries,
        z = mean_temp,
        text = countries,
        locationmode = 'country names',
        autocolorscale = True,
        reversescale = False,
        marker = dict(
            line = dict (
                color = 'rgb(180,180,180)',
                width = 0.5
            ) ),
        colorbar = dict(
            autotick = False,
            title = 'Average land temperature'),
      ) ]

layout = dict(
    title = 'Average land temperature by country',
    geo = dict(
        showframe = False,
        showcoastlines = False,
        showocean = True,
        oceancolor = 'rgb(10,200,255)',
        projection = dict(
            type = 'Mercator'
        )
    )
)

fig = dict( data=data, layout=layout )
py.iplot( fig, validate=False, filename='d3-world-map' )

Create globe graph showing average temperature

Here we are creating a globe graph taking into account only the average temperature for each country. Users can hover over the countries in order to visualize the name of the Country and the relative average temperature:



In [6]:

    
#create globe graph

    data = [ dict(
            type = 'choropleth',
            locations = countries,
            z = mean_temp,
            locationmode = 'country names',
            text = countries,
            marker = dict(
                line = dict(color = 'rgb(0,0,0)', width = 1)),
                colorbar = dict(autotick = True, tickprefix = '',
                title = '# Average\nTemperature,\n°C')
                )
           ]

    layout = dict(
        title = 'Average land temperature by countries',
        geo = dict(
            showframe = False,
            showocean = True,
            oceancolor = 'rgb(10,200,255)',
            projection = dict(
            type = 'orthographic',
                rotation = dict(
                        lon = 60,
                        lat = 10),
            ),
            lonaxis =  dict(
                    showgrid = True,
                    gridcolor = 'rgb(102, 102, 102)'
                ),
            lataxis = dict(
                    showgrid = True,
                    gridcolor = 'rgb(102, 102, 102)'
                    )
                ),
            )

    fig = dict(data=data, layout=layout)
    py.iplot(fig, validate=False, filename='worldmap')

Average land temperature over the years

Here we are plotting the average land temperature taking into account the uncertainty related to the average temperature over the years:



In [7]:

    
global_temp = pd.read_csv(zf.open('GlobalTemperatures.csv'))

#Extract the year from a date
years = np.unique(global_temp['dt'].apply(lambda x: x[:4]))
mean_temp_world = []
mean_temp_world_uncertainty = []

for year in years:
    mean_temp_world.append(global_temp[global_temp['dt'].apply(
        lambda x: x[:4]) == year]['LandAverageTemperature'].mean())
    mean_temp_world_uncertainty.append(global_temp[global_temp['dt'].apply(
                lambda x: x[:4]) == year]['LandAverageTemperatureUncertainty'].mean())

trace0 = go.Scatter(
    x = years, 
    y = np.array(mean_temp_world) + np.array(mean_temp_world_uncertainty),
    fill= None,
    mode='lines',
    name='Uncertainty top',
    line=dict(
        color='rgb(256, 100, 0)',
    )
)
trace1 = go.Scatter(
    x = years, 
    y = np.array(mean_temp_world) - np.array(mean_temp_world_uncertainty),
    fill='tonexty',
    mode='lines',
    name='Uncertainty bot',
    line=dict(
        color='rgb(0, 10, 0)',
    )
)

trace2 = go.Scatter(
    x = years, 
    y = mean_temp_world,
    name='Average Temperature',
    line=dict(
        color='rgb(0, 0, 200)',
    )
)
data = [trace0, trace1, trace2]

layout = go.Layout(
    xaxis=dict(title='year'),
    yaxis=dict(title='Average Temperature, °C'),
    title='Average land temperature of the world over the years',
    showlegend = False)

fig = go.Figure(data=data, layout=layout)
py.iplot(fig)

Here we are trying to have a more cleaned up visualization of the average land temperature taking into account the uncertainty related to the average temperature over the years:



In [8]:

    
globaltemp = pd.read_csv(zf.open('GlobalTemperatures.csv'), parse_dates=['dt'])
year_temp = globaltemp.groupby(globaltemp.dt.dt.year).mean()
plt.figure(figsize = (12, 6))
pd.stats.moments.ewma(year_temp.LandAverageTemperature,5).plot()
year_temp.LandAverageTemperature.plot(linewidth=1)
plt.title('Average temperature by year')
plt.xlabel('year')









    



/Users/sinapi/miniconda3/lib/python3.5/site-packages/ipykernel/__main__.py:4: FutureWarning:

pd.ewm_mean is deprecated for Series and will be removed in a future version, replace with 
	Series.ewm(min_periods=0,ignore_na=False,adjust=True,com=5).mean()







    Out[8]:





<matplotlib.text.Text at 0x1178e7828>

Temperature by country

Now we are analyzing how temperature varies in different countries and comparing how developing countries fair against developed countries.

Developing countries



In [9]:

    
developing = ['India', 'Mexico', 'Turkey', 'South Africa', 'Brazil']
mean_temp_year_country = [ [0] * len(years[70:]) for i in range(len(developing))]
j = 0
for country in developing:
    all_temp_country = global_temp_country_clean[global_temp_country_clean['Country'] == country]
    i = 0
    for year in years[70:]:
        mean_temp_year_country[j][i] = all_temp_country[all_temp_country['dt'].apply(
                lambda x: x[:4]) == year]['AverageTemperature'].mean()
        i +=1
    j += 1

traces = []
colors = ['rgb(0, 255, 255)', 'rgb(255, 0, 255)', 'rgb(0, 0, 0)',
          'rgb(255, 0, 0)', 'rgb(0, 255, 0)', 'rgb(0, 0, 255)']
for i in range(len(developing)):
    traces.append(go.Scatter(
        x=years[70:],
        y=mean_temp_year_country[i],
        mode='lines',
        name=developing[i],
        line=dict(color=colors[i]),
    ))

layout = go.Layout(
    xaxis=dict(title='year'),
    yaxis=dict(title='Average Temperature, °C'),
    title='Average land temperature of developing countries',)

fig = go.Figure(data=traces, layout=layout)
py.iplot(fig)

Developed countries



In [10]:

    
developed = ['United States','Russia']
mean_temp_country = [ [0] * len(years[70:]) for i in range(len(developed))]
j = 0
for country in developed:
    all_temp_country = global_temp_country_clean[global_temp_country_clean['Country'] == country]
    i = 0
    for year in years[70:]:
        mean_temp_country[j][i] = all_temp_country[all_temp_country['dt'].apply(
                lambda x: x[:4]) == year]['AverageTemperature'].mean()
        i +=1
    j += 1

traces = []
colors = ['rgb(0, 255, 255)', 'rgb(255, 0, 255)', 'rgb(0, 0, 0)',
          'rgb(255, 0, 0)', 'rgb(0, 255, 0)', 'rgb(0, 0, 255)']
for i in range(len(developed)):
    traces.append(go.Scatter(
        x=years[70:],
        y=mean_temp_country[i],
        mode='lines',
        name=developed[i],
        line=dict(color=colors[i]),
    ))

layout = go.Layout(
    xaxis=dict(title='year'),
    yaxis=dict(title='Average Temperature, °C'),
    title='Average land temperature of developed countries',)

fig = go.Figure(data=traces, layout=layout)
py.iplot(fig)

As we can see from the graph above, the developed countries have a huge spike and average land temperatures over the past 40 years. The developing countries also show a rising trend in average land temperatures, but not to the scale of the developed countries.

Manufacturing vs non-manufacturing countries

Now we compare the average temperature for manufacturing countries against non-manufacturing countries. On one hand we have taken the examples of United States and China who are high on manufacturing; on the other hand, we have Nepal which has much less industries.



In [11]:

    
compare = ['United States','China', 'Nepal']
mean_temp_country = [ [0] * len(years[70:]) for i in range(len(compare))]
j = 0
for country in compare:
    all_temp_country = global_temp_country_clean[global_temp_country_clean['Country'] == country]
    i = 0
    for year in years[70:]:
        mean_temp_country[j][i] = all_temp_country[all_temp_country['dt'].apply(
                lambda x: x[:4]) == year]['AverageTemperature'].mean()
        i +=1
    j += 1

traces = []
colors = ['rgb(0, 255, 255)', 'rgb(255, 0, 255)', 'rgb(0, 0, 0)',
          'rgb(255, 0, 0)', 'rgb(0, 255, 0)', 'rgb(0, 0, 255)']
for i in range(len(compare)):
    traces.append(go.Scatter(
        x=years[70:],
        y=mean_temp_country[i],
        mode='lines',
        name=compare[i],
        line=dict(color=colors[i]),
    ))

layout = go.Layout(
    xaxis=dict(title='year'),
    yaxis=dict(title='Average Temperature, °C'),
    title='comparing temperature rise between Industrial countries vs non industrial',)

fig = go.Figure(data=traces, layout=layout)
py.iplot(fig)

From the graph above, we can see the manufacturing countries have a huge spike in temperature, while Nepal not so much.

Temperature by City

While the pollutants have a greater local impact, they in-turn impact the entire globe. Here we give users to see the temperature variation of 6 random cities at a go, cities change everytime the script runs.



In [12]:

    
bycities = pd.read_csv(zf.open('GlobalLandTemperaturesByCity.csv'), parse_dates=['dt'])
# there are some cities with the same name but in different countries 
bycities[['City', 'Country']].drop_duplicates()
bycities.City = bycities.City.str.cat(bycities.Country, sep=' ')
bycities = bycities[bycities.dt.dt.year >= 1900]
bycities.head()









    Out[12]:






  
    
      
      dt
      AverageTemperature
      AverageTemperatureUncertainty
      City
      Country
      Latitude
      Longitude
    
  
  
    
      1874
      1900-01-01
      -0.989
      0.588
      Århus Denmark
      Denmark
      57.05N
      10.33E
    
    
      1875
      1900-02-01
      -2.799
      0.882
      Århus Denmark
      Denmark
      57.05N
      10.33E
    
    
      1876
      1900-03-01
      0.592
      0.429
      Århus Denmark
      Denmark
      57.05N
      10.33E
    
    
      1877
      1900-04-01
      4.630
      0.417
      Århus Denmark
      Denmark
      57.05N
      10.33E
    
    
      1878
      1900-05-01
      9.576
      0.521
      Århus Denmark
      Denmark
      57.05N
      10.33E



In [13]:

    
city_means = bycities.groupby(['City', bycities.dt.dt.year])['AverageTemperature'].mean().unstack()
city_mins = bycities.groupby(['City', bycities.dt.dt.year])['AverageTemperature'].min().unstack()
city_maxs = bycities.groupby(['City', bycities.dt.dt.year])['AverageTemperature'].max().unstack()
city_means.head()









    Out[13]:






  
    
      dt
      1900
      1901
      1902
      1903
      1904
      1905
      1906
      1907
      1908
      1909
      ...
      2004
      2005
      2006
      2007
      2008
      2009
      2010
      2011
      2012
      2013
    
    
      City
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
    
  
  
    
      A Coruña Spain
      13.267917
      12.773417
      12.828333
      13.028167
      13.349083
      12.688000
      13.107333
      12.900583
      13.235250
      12.694667
      ...
      13.928500
      13.977667
      14.540917
      13.806167
      13.682333
      14.076167
      13.946500
      14.585500
      13.625667
      13.983750
    
    
      Aachen Germany
      9.132500
      8.339750
      8.133583
      8.994000
      8.987333
      8.571083
      8.877917
      8.479833
      8.108750
      7.979000
      ...
      9.657917
      10.046333
      10.471667
      10.551833
      9.863083
      9.993750
      8.703750
      10.654667
      9.788500
      9.531500
    
    
      Aalborg Denmark
      7.375250
      7.875667
      6.542667
      7.830833
      7.577083
      7.808917
      8.229000
      7.273000
      7.796667
      6.878167
      ...
      8.919000
      8.952333
      9.514750
      9.528583
      9.628000
      8.938667
      6.965000
      9.181167
      8.399833
      8.497625
    
    
      Aba Nigeria
      26.418833
      26.455333
      26.001750
      25.875917
      25.342083
      26.315250
      26.145000
      26.029000
      26.117500
      26.082250
      ...
      27.427333
      27.422167
      27.551083
      27.551583
      27.351583
      27.631167
      27.696667
      27.388083
      27.253500
      27.513625
    
    
      Abadan Iran
      25.016167
      25.770750
      25.459083
      24.242750
      24.777750
      24.393500
      24.314667
      23.778750
      24.683250
      25.284333
      ...
      26.108583
      25.901083
      26.278583
      26.281833
      26.216917
      26.392083
      27.592833
      26.019833
      26.607833
      27.095625
    
  

5 rows × 114 columns



In [14]:

    
first_years_mean = city_means.iloc[:, :5].mean(axis=1) # mean temperature for the first 5 years
city_means_shifted = city_means.subtract(first_years_mean, axis=0)

def plot_temps(cities, city_ser, ax):
    first_years_mean = city_ser.iloc[:, :5].mean(axis=1)
    city_ser = city_ser.subtract(first_years_mean, axis=0)
    for city in random_cities:
        row = city_ser.loc[city]
        pd.stats.moments.ewma(row, 10).plot(label=row.name, ax=ax)
    ax.set_xlabel('')
    ax.legend(loc='best')

The users can change the number of cities that they want to see at glance by changing "n".



In [16]:

    
fig, axes = plt.subplots(3,1, figsize=(10,10))

n = 6 # number of random cities you want to see
random_cities = city_means_shifted.sample(n).index

plot_temps(random_cities, city_means, axes[0])
plot_temps(random_cities, city_mins, axes[1])
plot_temps(random_cities, city_maxs, axes[2])

axes[0].set_title("Year's mean temperature increase for random cities")
axes[1].set_title("Year's min temperature increase for random cities")
axes[2].set_title("Year's max temperature increase  for random cities")









    



/Users/sinapi/miniconda3/lib/python3.5/site-packages/ipykernel/__main__.py:9: FutureWarning:

pd.ewm_mean is deprecated for Series and will be removed in a future version, replace with 
	Series.ewm(min_periods=0,ignore_na=False,adjust=True,com=10).mean()







    Out[16]:





<matplotlib.text.Text at 0x136311358>



In [ ]:

	dt	AverageTemperature	AverageTemperatureUncertainty	City	Country	Latitude	Longitude
1874	1900-01-01	-0.989	0.588	Århus Denmark	Denmark	57.05N	10.33E
1875	1900-02-01	-2.799	0.882	Århus Denmark	Denmark	57.05N	10.33E
1876	1900-03-01	0.592	0.429	Århus Denmark	Denmark	57.05N	10.33E
1877	1900-04-01	4.630	0.417	Århus Denmark	Denmark	57.05N	10.33E
1878	1900-05-01	9.576	0.521	Århus Denmark	Denmark	57.05N	10.33E

dt	1900	1901	1902	1903	1904	1905	1906	1907	1908	1909	...	2004	2005	2006	2007	2008	2009	2010	2011	2012	2013
City
A Coruña Spain	13.267917	12.773417	12.828333	13.028167	13.349083	12.688000	13.107333	12.900583	13.235250	12.694667	...	13.928500	13.977667	14.540917	13.806167	13.682333	14.076167	13.946500	14.585500	13.625667	13.983750
Aachen Germany	9.132500	8.339750	8.133583	8.994000	8.987333	8.571083	8.877917	8.479833	8.108750	7.979000	...	9.657917	10.046333	10.471667	10.551833	9.863083	9.993750	8.703750	10.654667	9.788500	9.531500
Aalborg Denmark	7.375250	7.875667	6.542667	7.830833	7.577083	7.808917	8.229000	7.273000	7.796667	6.878167	...	8.919000	8.952333	9.514750	9.528583	9.628000	8.938667	6.965000	9.181167	8.399833	8.497625
Aba Nigeria	26.418833	26.455333	26.001750	25.875917	25.342083	26.315250	26.145000	26.029000	26.117500	26.082250	...	27.427333	27.422167	27.551083	27.551583	27.351583	27.631167	27.696667	27.388083	27.253500	27.513625
Abadan Iran	25.016167	25.770750	25.459083	24.242750	24.777750	24.393500	24.314667	23.778750	24.683250	25.284333	...	26.108583	25.901083	26.278583	26.281833	26.216917	26.392083	27.592833	26.019833	26.607833	27.095625