In [1]:
import logging
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
root = logging.getLogger()
root.addHandler(logging.StreamHandler())
%matplotlib inline
In [2]:
# download from Google Drive: https://drive.google.com/open?id=0B9cazFzBtPuCOFNiUHYwcVFVODQ
# Representative example with multiple polygons in the shapefile, and a lot of point-records (also outside rangemaps)
from iSDM.species import IUCNSpecies
salmo_trutta = IUCNSpecies(name_species='Salmo trutta')
salmo_trutta.load_shapefile("../data/fish/selection/salmo_trutta")
In [3]:
rasterized = salmo_trutta.rasterize(raster_file="./salmo_trutta_full.tif", pixel_size=0.5)
In [4]:
plt.figure(figsize=(25,20))
plt.imshow(rasterized, cmap="hot", interpolation="none")
Out[4]:
In [5]:
from iSDM.environment import RasterEnvironmentalLayer
biomes_adf = RasterEnvironmentalLayer(file_path="../data/rebioms/w001001.adf", name_layer="Biomes")
biomes_adf.load_data()
Out[5]:
In [6]:
biomes_adf.plot()
In [7]:
from iSDM.environment import ContinentsLayer
from iSDM.environment import Source
continents = ContinentsLayer(file_path="../data/continents/continent.shp", source=Source.ARCGIS)
continents.load_data()
fig, ax = plt.subplots(1,1, figsize=(30,20))
continents.data_full.plot(column="continent", colormap="hsv")
Out[7]:
In [8]:
continents_rasters = continents.rasterize(raster_file="../data/continents/continents_raster.tif", pixel_size=0.5)
In [9]:
continents_rasters.shape # stacked raster with 8 bands, one for each continent.
Out[9]:
As agreed, we will merge Europe and Asia to be a bit closer to the biogeographical regions. We do this user-specific patching for now, until a better solution is found.
In [10]:
continents_rasters[0] = continents_rasters[0] + continents_rasters[2] # combine Europe and Asia
In [11]:
continents_rasters[0].max() # where the continents touch, we have overlap! that's why max is not 1, but 2.
Out[11]:
Set all values >1 to 1. (we only care about presence/absence)
In [12]:
continents_rasters[0][continents_rasters[0] > 1] = 1
Delete band 2 (Europe, previously merged with layer 0==Asia)
In [13]:
continents_rasters = np.delete(continents_rasters, 2, 0)
This is how the band 0 looks like now.
In [14]:
plt.figure(figsize=(25,20))
plt.imshow(continents_rasters[0], cmap="hot", interpolation="none")
Out[14]:
In [15]:
continents_rasters.shape # now total of 7 band rather than 8
Out[15]:
In [16]:
selected_layers, pseudo_absences = biomes_adf.sample_pseudo_absences(species_raster_data=rasterized,continents_raster_data=continents_rasters, number_of_pseudopoints=1000)
In [17]:
plt.figure(figsize=(25,20))
plt.imshow(selected_layers, cmap="hot", interpolation="none")
Out[17]:
In [18]:
plt.figure(figsize=(25,20))
plt.imshow(pseudo_absences, cmap="hot", interpolation="none")
Out[18]:
In [19]:
all_coordinates = biomes_adf.pixel_to_world_coordinates(raster_data=np.zeros_like(rasterized), filter_no_data_value=False)
In [20]:
all_coordinates
Out[20]:
In [21]:
base_dataframe = pd.DataFrame([all_coordinates[0], all_coordinates[1]]).T
base_dataframe.columns=['decimallatitude', 'decimallongitude']
base_dataframe.set_index(['decimallatitude', 'decimallongitude'], inplace=True, drop=True)
In [22]:
base_dataframe.head()
Out[22]:
In [23]:
base_dataframe.tail()
Out[23]:
In [24]:
presence_coordinates = salmo_trutta.pixel_to_world_coordinates()
In [25]:
presence_coordinates
Out[25]:
In [26]:
presences_dataframe = pd.DataFrame([presence_coordinates[0], presence_coordinates[1]]).T
presences_dataframe.columns=['decimallatitude', 'decimallongitude']
presences_dataframe[salmo_trutta.name_species] = 1 # fill presences with 1's
presences_dataframe.set_index(['decimallatitude', 'decimallongitude'], inplace=True, drop=True)
presences_dataframe.head()
Out[26]:
In [27]:
presences_dataframe.tail()
Out[27]:
In [28]:
pseudo_absence_coordinates = biomes_adf.pixel_to_world_coordinates(raster_data=pseudo_absences)
In [29]:
pseudo_absences_dataframe = pd.DataFrame([pseudo_absence_coordinates[0], pseudo_absence_coordinates[1]]).T
pseudo_absences_dataframe.columns=['decimallatitude', 'decimallongitude']
pseudo_absences_dataframe[salmo_trutta.name_species] = 0 # fill pseudo-absences with 0
pseudo_absences_dataframe.set_index(['decimallatitude', 'decimallongitude'], inplace=True, drop=True)
In [30]:
pseudo_absences_dataframe.head()
Out[30]:
In [31]:
pseudo_absences_dataframe.tail()
Out[31]:
In [32]:
from iSDM.environment import ClimateLayer
water_min_layer = ClimateLayer(file_path="../data/watertemp/min_wt_2000.tif")
water_min_reader = water_min_layer.load_data()
# HERE: should we ignore cells with no-data values for temperature? They are set to a really big negative number
# for now we keep them, otherwise could be NaN
water_min_coordinates = water_min_layer.pixel_to_world_coordinates(filter_no_data_value=False)
In [33]:
water_min_coordinates
Out[33]:
In [34]:
mintemp_dataframe = pd.DataFrame([water_min_coordinates[0], water_min_coordinates[1]]).T
mintemp_dataframe.columns=['decimallatitude', 'decimallongitude']
water_min_matrix = water_min_reader.read(1)
mintemp_dataframe['MinT'] = water_min_matrix.reshape(np.product(water_min_matrix.shape))
mintemp_dataframe.set_index(['decimallatitude', 'decimallongitude'], inplace=True, drop=True)
mintemp_dataframe.head()
Out[34]:
In [35]:
mintemp_dataframe.tail()
Out[35]:
In [36]:
water_max_layer = ClimateLayer(file_path="../data/watertemp/max_wt_2000.tif")
water_max_reader = water_max_layer.load_data()
# HERE: should we ignore cells with no-data values for temperature? They are set to a really big negative number
# for now we keep them, otherwise could be NaN
water_max_coordinates = water_max_layer.pixel_to_world_coordinates(filter_no_data_value=False)
In [37]:
maxtemp_dataframe = pd.DataFrame([water_max_coordinates[0], water_max_coordinates[1]]).T
maxtemp_dataframe.columns=['decimallatitude', 'decimallongitude']
water_max_matrix = water_max_reader.read(1)
maxtemp_dataframe['MaxT'] = water_max_matrix.reshape(np.product(water_max_matrix.shape))
maxtemp_dataframe.set_index(['decimallatitude', 'decimallongitude'], inplace=True, drop=True)
maxtemp_dataframe.head()
Out[37]:
In [38]:
maxtemp_dataframe.tail()
Out[38]:
In [39]:
water_mean_layer = ClimateLayer(file_path="../data/watertemp/mean_wt_2000.tif")
water_mean_reader = water_mean_layer.load_data()
# HERE: should we ignore cells with no-data values for temperature? They are set to a really big negative number
# for now we keep them, otherwise could be NaN
water_mean_coordinates = water_mean_layer.pixel_to_world_coordinates(filter_no_data_value=False)
In [40]:
meantemp_dataframe = pd.DataFrame([water_mean_coordinates[0], water_mean_coordinates[1]]).T
meantemp_dataframe.columns=['decimallatitude', 'decimallongitude']
water_mean_matrix = water_mean_reader.read(1)
meantemp_dataframe['MeanT'] = water_mean_matrix.reshape(np.product(water_mean_matrix.shape))
meantemp_dataframe.set_index(['decimallatitude', 'decimallongitude'], inplace=True, drop=True)
meantemp_dataframe.head()
Out[40]:
In [41]:
meantemp_dataframe.tail()
Out[41]:
In [42]:
# merge base with presences
merged = base_dataframe.combine_first(presences_dataframe)
In [43]:
merged.head()
Out[43]:
In [44]:
merged.tail()
Out[44]:
In [45]:
# merge based+presences with pseudo-absences
# merged2 = pd.merge(merged1, pseudo_absences_dataframe, on=["decimallatitude", "decimallongitude", salmo_trutta.name_species], how="outer")
merged = merged.combine_first(pseudo_absences_dataframe)
http://pandas.pydata.org/pandas-docs/stable/merging.html
For this, use the combine_first method.
Note that this method only takes values from the right DataFrame if they are missing in the left DataFrame. A related method, update, alters non-NA values inplace
In [46]:
merged.head()
Out[46]:
In [47]:
merged.tail()
Out[47]:
In [48]:
# merge base+presences+pseudo-absences with min temperature
#merged3 = pd.merge(merged2, mintemp_dataframe, on=["decimallatitude", "decimallongitude"], how="outer")
merged = merged.combine_first(mintemp_dataframe)
In [49]:
merged.head()
Out[49]:
In [50]:
merged.tail()
Out[50]:
In [51]:
# merged4 = pd.merge(merged3, maxtemp_dataframe, on=["decimallatitude", "decimallongitude"], how="outer")
merged = merged.combine_first(maxtemp_dataframe)
In [52]:
merged.head()
Out[52]:
In [53]:
merged.tail()
Out[53]:
In [54]:
# merged5 = pd.merge(merged4, meantemp_dataframe, on=["decimallatitude", "decimallongitude"], how="outer")
merged = merged.combine_first(meantemp_dataframe)
In [55]:
merged.tail()
Out[55]:
In [58]:
merged.to_csv("../data/fish/selection/salmo_trutta.csv")
In [56]:
merged[merged['Salmo trutta']==0].shape[0] # should be equal to number of pseudo absences below
Out[56]:
In [57]:
pseudo_absence_coordinates[0].shape[0]
Out[57]:
In [58]:
merged[merged['Salmo trutta']==1].shape[0] # should be equal to number of presences below
Out[58]:
In [59]:
presence_coordinates[0].shape[0]
Out[59]:
In [60]:
merged[merged['Salmo trutta'].isnull()].shape[0] # all that's left
Out[60]:
In [61]:
360 * 720 == merged[merged['Salmo trutta']==0].shape[0] + merged[merged['Salmo trutta']==1].shape[0] + merged[merged['Salmo trutta'].isnull()].shape[0]
Out[61]:
In [62]:
# == all pixels in 360 x 720 matrix
In [63]:
# Download from Google Drive: https://drive.google.com/open?id=0B9cazFzBtPuCaW0wRkk2N0g5d1k
lepidomeda_mollispinis = IUCNSpecies(name_species='Lepidomeda mollispinis')
lepidomeda_mollispinis.load_shapefile("../data/fish/selection/lepidomeda_mollispinis")
In [64]:
rasterized_lm = lepidomeda_mollispinis.rasterize(raster_file="./lepidomeda_mollispinis_full.tif", pixel_size=0.5)
In [65]:
plt.figure(figsize=(25,20))
plt.imshow(rasterized_lm, cmap="hot", interpolation="none")
Out[65]:
In [66]:
selected_layers_lm, pseudo_absences_lm = biomes_adf.sample_pseudo_absences(species_raster_data=rasterized_lm, continents_raster_data=continents_rasters, number_of_pseudopoints=1000)
In [67]:
plt.figure(figsize=(25,20))
plt.imshow(selected_layers_lm, cmap="hot", interpolation="none")
Out[67]:
In [68]:
plt.figure(figsize=(25,20))
plt.imshow(pseudo_absences_lm, cmap="hot", interpolation="none")
Out[68]:
In [69]:
presence_coordinates_lm = lepidomeda_mollispinis.pixel_to_world_coordinates()
In [70]:
presences_dataframe = pd.DataFrame([presence_coordinates_lm[0], presence_coordinates_lm[1]]).T
presences_dataframe.columns=['decimallatitude', 'decimallongitude']
presences_dataframe[lepidomeda_mollispinis.name_species] = 1 # fill presences with 1's
presences_dataframe.set_index(['decimallatitude', 'decimallongitude'], inplace=True, drop=True)
presences_dataframe.head()
Out[70]:
In [71]:
presences_dataframe.tail()
Out[71]:
In [72]:
pseudo_absence_coordinates_lm = biomes_adf.pixel_to_world_coordinates(raster_data=pseudo_absences_lm)
In [73]:
pseudo_absences_dataframe = pd.DataFrame([pseudo_absence_coordinates_lm[0], pseudo_absence_coordinates_lm[1]]).T
pseudo_absences_dataframe.columns=['decimallatitude', 'decimallongitude']
pseudo_absences_dataframe[lepidomeda_mollispinis.name_species] = 0
pseudo_absences_dataframe.set_index(['decimallatitude', 'decimallongitude'], inplace=True, drop=True)
In [74]:
pseudo_absences_dataframe.head()
Out[74]:
In [75]:
pseudo_absences_dataframe.tail()
Out[75]:
In [76]:
merged1 = merged.combine_first(presences_dataframe)
In [77]:
merged1.tail()
Out[77]:
In [78]:
merged1 = merged1.combine_first(pseudo_absences_dataframe)
In [79]:
merged1.tail()
Out[79]:
In [80]:
merged1['Lepidomeda mollispinis'].unique()
Out[80]:
In [81]:
merged1[merged1['Lepidomeda mollispinis']==0].shape # pseudo-absences
Out[81]:
In [82]:
merged1[merged1['Lepidomeda mollispinis']==1].shape # presences
Out[82]:
In [83]:
merged1[merged1['Lepidomeda mollispinis'].isnull()].shape
Out[83]:
In [84]:
merged1[merged1['Lepidomeda mollispinis'].isnull()].shape[0] + merged1[merged1['Lepidomeda mollispinis']==1].shape[0] + merged1[merged1['Lepidomeda mollispinis']==0].shape[0]
Out[84]:
In [85]:
salmo_trutta.get_data().shape_area.sum()
Out[85]:
In [86]:
lepidomeda_mollispinis.get_data().shape_area.sum()
Out[86]:
Third species.... (largest IUCN area, also plenty of occurrences in GBIF)
In [87]:
# Download from Google drive: https://drive.google.com/open?id=0B9cazFzBtPuCamEwWlZxV3lBZmc
esox_lucius = IUCNSpecies(name_species='Esox lucius')
esox_lucius.load_shapefile("../data/fish/selection/esox_lucius/")
In [88]:
rasterized_el = esox_lucius.rasterize(raster_file="./esox_lucius_full.tif", pixel_size=0.5)
In [89]:
plt.figure(figsize=(25,20))
plt.imshow(rasterized_el, cmap="hot", interpolation="none")
Out[89]:
In [90]:
selected_layers_el, pseudo_absences_el = biomes_adf.sample_pseudo_absences(species_raster_data=rasterized_el, continents_raster_data=continents_rasters, number_of_pseudopoints=1000)
In [91]:
plt.figure(figsize=(25,20))
plt.imshow(selected_layers_el, cmap="hot", interpolation="none")
Out[91]:
Hmm, why does it take South Afrika into account?
In [92]:
np.where((continents_rasters[2]+rasterized_el)>1)
Out[92]:
This above adds two bands with ones and zeroes: The raster for the species, and the raster for the South-Afrika continent (id=2). Then it finds pixel positions where the "summed band" has value >1. (np.where(...)
)
Result indicates the pixel x/y positions. Basically they overlap at one pixel position. There, the value is 2.
In [93]:
(continents_rasters[2]+rasterized_el)[108,348]
Out[93]:
In [94]:
plt.figure(figsize=(25,20))
plt.imshow(pseudo_absences_el, cmap="hot", interpolation="none")
Out[94]:
In [95]:
presence_coordinates_el = esox_lucius.pixel_to_world_coordinates()
In [96]:
presences_dataframe = pd.DataFrame([presence_coordinates_el[0], presence_coordinates_el[1]]).T
presences_dataframe.columns=['decimallatitude', 'decimallongitude']
presences_dataframe[esox_lucius.name_species] = 1 # fill presences with 1's
presences_dataframe.set_index(['decimallatitude', 'decimallongitude'], inplace=True, drop=True)
presences_dataframe.head()
Out[96]:
In [97]:
pseudo_absence_coordinates_el = biomes_adf.pixel_to_world_coordinates(raster_data=pseudo_absences_el)
In [98]:
pseudo_absences_dataframe = pd.DataFrame([pseudo_absence_coordinates_el[0], pseudo_absence_coordinates_el[1]]).T
pseudo_absences_dataframe.columns=['decimallatitude', 'decimallongitude']
pseudo_absences_dataframe[esox_lucius.name_species] = 0 # fill pseudo-absences with 0
pseudo_absences_dataframe.set_index(['decimallatitude', 'decimallongitude'], inplace=True, drop=True)
pseudo_absences_dataframe.head()
Out[98]:
In [99]:
merged2 = merged1.combine_first(presences_dataframe)
In [100]:
merged2.head()
Out[100]:
In [101]:
merged2 = merged2.combine_first(pseudo_absences_dataframe)
In [102]:
merged2.head()
Out[102]:
In [103]:
merged2['Esox lucius'].unique()
Out[103]:
In [104]:
merged2[merged2['Esox lucius']==0].shape # pseudo-absences
Out[104]:
In [105]:
merged2[merged2['Esox lucius']==1].shape # presences
Out[105]:
In [106]:
merged2[merged2['Esox lucius'].isnull()].shape
Out[106]:
In [107]:
merged2[merged2['Esox lucius'].isnull()].shape[0] + merged2[merged2['Esox lucius']==1].shape[0] + merged2[merged2['Esox lucius']==0].shape[0]
Out[107]:
In [108]:
merged2.tail()
Out[108]:
In [109]:
# rearange columns (nothing critical)
In [110]:
cols = merged2.columns.values
cols1 = [cols[4], cols[2], cols[3], cols[0], cols[1], cols[5]]
In [111]:
merged2 = merged2[cols1]
In [112]:
merged2.to_csv("../data/fish/selection/dataframe_merged_all_touching_false.csv")
In [113]:
merged2.columns.values
Out[113]:
Add continents column
In [114]:
for idx, band in enumerate(continents_rasters):
continents_coordinates = biomes_adf.pixel_to_world_coordinates(raster_data=band)
continent_dataframe = pd.DataFrame([continents_coordinates[0], continents_coordinates[1]]).T
continent_dataframe.columns=['decimallatitude', 'decimallongitude']
continent_dataframe['Continent'] = idx
continent_dataframe.set_index(['decimallatitude', 'decimallongitude'], inplace=True, drop=True)
continent_dataframe.head()
merged2 = merged2.combine_first(continent_dataframe)
In [115]:
merged2[merged2.Continent==0].shape
Out[115]:
In [116]:
np.count_nonzero(continents_rasters[0]) # good!
Out[116]:
In [117]:
merged2[merged2.Continent==2].shape
Out[117]:
In [118]:
np.count_nonzero(continents_rasters[2]) # good!
Out[118]:
In [119]:
merged2.columns.values
Out[119]:
In [120]:
# rearange columns again
In [121]:
cols = merged2.columns.values
cols1 = [cols[5], cols[3], cols[4], cols[1], cols[2], cols[6], cols[0]]
In [122]:
merged2 = merged2[cols1]
In [123]:
merged2.head()
Out[123]:
In [124]:
merged2.to_csv("../data/fish/selection/dataframe_merged_with_continents_rasterized_2.csv")
In [125]:
np.where((continents_rasters[2]+rasterized_el)>1)
Out[125]:
In [ ]: