In [1]:
import logging
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
root = logging.getLogger()
root.addHandler(logging.StreamHandler())
%matplotlib inline
In [2]:
# download from Google Drive: https://drive.google.com/open?id=0B9cazFzBtPuCOFNiUHYwcVFVODQ
# Representative example with multiple polygons in the shapefile, and a lot of point-records (also outside rangemaps)
from iSDM.species import IUCNSpecies
salmo_trutta = IUCNSpecies(name_species='Salmo trutta')
salmo_trutta.load_shapefile("../data/fish/selection/salmo_trutta")
In [3]:
rasterized = salmo_trutta.rasterize(raster_file="./salmo_trutta_full.tif", pixel_size=0.5, all_touched=True)
In [4]:
plt.figure(figsize=(25,20))
plt.imshow(rasterized, cmap="hot", interpolation="none")
Out[4]:
In [5]:
from iSDM.environment import RasterEnvironmentalLayer
biomes_adf = RasterEnvironmentalLayer(file_path="../data/rebioms/w001001.adf", name_layer="Biomes")
biomes_adf.load_data()
Out[5]:
In [6]:
biomes_adf.plot()
In [7]:
from iSDM.environment import ContinentsLayer
from iSDM.environment import Source
continents = ContinentsLayer(file_path="../data/continents/continent.shp", source=Source.ARCGIS)
continents.load_data()
fig, ax = plt.subplots(1,1, figsize=(30,20))
continents.data_full.plot(column="continent", colormap="hsv")
Out[7]:
In [8]:
continents_rasters = continents.rasterize(raster_file="../data/continents/continents_raster.tif", pixel_size=0.5, all_touched=True)
In [9]:
continents_rasters.shape # stacked raster with 8 bands, one for each continent.
Out[9]:
In [10]:
selected_layers, pseudo_absences = biomes_adf.sample_pseudo_absences(species_raster_data=rasterized, continents_raster_data=continents_rasters, number_of_pseudopoints=1000)
In [11]:
plt.figure(figsize=(25,20))
plt.imshow(selected_layers, cmap="hot", interpolation="none")
Out[11]:
In [12]:
plt.figure(figsize=(25,20))
plt.imshow(pseudo_absences, cmap="hot", interpolation="none")
Out[12]:
In [13]:
all_coordinates = biomes_adf.pixel_to_world_coordinates(raster_data=np.zeros_like(rasterized), filter_no_data_value=False)
In [14]:
all_coordinates
Out[14]:
In [19]:
base_dataframe = pd.DataFrame([all_coordinates[0], all_coordinates[1]]).T
base_dataframe.columns=['decimallatitude', 'decimallongitude']
In [20]:
base_dataframe.set_index(['decimallatitude', 'decimallongitude'], inplace=True, drop=True)
In [22]:
base_dataframe.head()
Out[22]:
In [23]:
base_dataframe.tail()
Out[23]:
In [24]:
presence_coordinates = salmo_trutta.pixel_to_world_coordinates()
In [25]:
presence_coordinates
Out[25]:
In [26]:
presences_dataframe = pd.DataFrame([presence_coordinates[0], presence_coordinates[1]]).T
presences_dataframe.columns=['decimallatitude', 'decimallongitude']
presences_dataframe[salmo_trutta.name_species] = 1 # fill presences with 1's
presences_dataframe.set_index(['decimallatitude', 'decimallongitude'], inplace=True, drop=True)
In [27]:
presences_dataframe.head()
Out[27]:
In [28]:
presences_dataframe.tail()
Out[28]:
In [29]:
pseudo_absence_coordinates = biomes_adf.pixel_to_world_coordinates(raster_data=pseudo_absences)
In [30]:
pseudo_absences_dataframe = pd.DataFrame([pseudo_absence_coordinates[0], pseudo_absence_coordinates[1]]).T
pseudo_absences_dataframe.columns=['decimallatitude', 'decimallongitude']
pseudo_absences_dataframe[salmo_trutta.name_species] = 0
pseudo_absences_dataframe.set_index(['decimallatitude', 'decimallongitude'], inplace=True, drop=True)
In [31]:
pseudo_absences_dataframe.head()
Out[31]:
In [32]:
pseudo_absences_dataframe.tail()
Out[32]:
In [33]:
from iSDM.environment import ClimateLayer
water_min_layer = ClimateLayer(file_path="../data/watertemp/min_wt_2000.tif")
water_min_reader = water_min_layer.load_data()
# HERE: should we ignore cells with no-data values for temperature? They are set to a really big negative number
# for now we keep them, otherwise could be NaN
water_min_coordinates = water_min_layer.pixel_to_world_coordinates(filter_no_data_value=False)
In [34]:
water_min_coordinates
Out[34]:
In [35]:
mintemp_dataframe = pd.DataFrame([water_min_coordinates[0], water_min_coordinates[1]]).T
mintemp_dataframe.columns=['decimallatitude', 'decimallongitude']
water_min_matrix = water_min_reader.read(1)
mintemp_dataframe['MinT'] = water_min_matrix.reshape(np.product(water_min_matrix.shape))
mintemp_dataframe.set_index(['decimallatitude', 'decimallongitude'], inplace=True, drop=True)
mintemp_dataframe.head()
Out[35]:
In [36]:
mintemp_dataframe.tail()
Out[36]:
In [37]:
water_max_layer = ClimateLayer(file_path="../data/watertemp/max_wt_2000.tif")
water_max_reader = water_max_layer.load_data()
# HERE: should we ignore cells with no-data values for temperature? They are set to a really big negative number
# for now we keep them, otherwise could be NaN
water_max_coordinates = water_max_layer.pixel_to_world_coordinates(filter_no_data_value=False)
In [38]:
maxtemp_dataframe = pd.DataFrame([water_max_coordinates[0], water_max_coordinates[1]]).T
maxtemp_dataframe.columns=['decimallatitude', 'decimallongitude']
water_max_matrix = water_max_reader.read(1)
maxtemp_dataframe['MaxT'] = water_max_matrix.reshape(np.product(water_max_matrix.shape))
maxtemp_dataframe.set_index(['decimallatitude', 'decimallongitude'], inplace=True, drop=True)
maxtemp_dataframe.head()
Out[38]:
In [39]:
maxtemp_dataframe.tail()
Out[39]:
In [40]:
water_mean_layer = ClimateLayer(file_path="../data/watertemp/mean_wt_2000.tif")
water_mean_reader = water_mean_layer.load_data()
# HERE: should we ignore cells with no-data values for temperature? They are set to a really big negative number
# for now we keep them, otherwise could be NaN
water_mean_coordinates = water_mean_layer.pixel_to_world_coordinates(filter_no_data_value=False)
In [41]:
meantemp_dataframe = pd.DataFrame([water_mean_coordinates[0], water_mean_coordinates[1]]).T
meantemp_dataframe.columns=['decimallatitude', 'decimallongitude']
water_mean_matrix = water_mean_reader.read(1)
meantemp_dataframe['MeanT'] = water_mean_matrix.reshape(np.product(water_mean_matrix.shape))
meantemp_dataframe.set_index(['decimallatitude', 'decimallongitude'], inplace=True, drop=True)
meantemp_dataframe.head()
Out[41]:
In [42]:
meantemp_dataframe.tail()
Out[42]:
In [45]:
# merge base with presences
merged = base_dataframe.combine_first(presences_dataframe)
In [46]:
merged.head()
Out[46]:
In [47]:
merged.tail()
Out[47]:
In [48]:
# merge based+presences with pseudo-absences
# merged2 = pd.merge(merged1, pseudo_absences_dataframe, on=["decimallatitude", "decimallongitude", salmo_trutta.name_species], how="outer")
merged = merged.combine_first(pseudo_absences_dataframe)
http://pandas.pydata.org/pandas-docs/stable/merging.html
For this, use the combine_first method.
Note that this method only takes values from the right DataFrame if they are missing in the left DataFrame. A related method, update, alters non-NA values inplace
In [49]:
merged.head()
Out[49]:
In [50]:
merged.tail()
Out[50]:
In [51]:
# merge base+presences+pseudo-absences with min temperature
#merged3 = pd.merge(merged2, mintemp_dataframe, on=["decimallatitude", "decimallongitude"], how="outer")
merged = merged.combine_first(mintemp_dataframe)
In [52]:
merged.head()
Out[52]:
In [53]:
merged.tail()
Out[53]:
In [54]:
# merged4 = pd.merge(merged3, maxtemp_dataframe, on=["decimallatitude", "decimallongitude"], how="outer")
merged = merged.combine_first(maxtemp_dataframe)
In [55]:
merged.head()
Out[55]:
In [56]:
merged.tail()
Out[56]:
In [57]:
# merged5 = pd.merge(merged4, meantemp_dataframe, on=["decimallatitude", "decimallongitude"], how="outer")
merged = merged.combine_first(meantemp_dataframe)
In [58]:
merged.tail()
Out[58]:
In [59]:
merged.to_csv("../data/fish/selection/salmo_trutta_again.csv")
In [60]:
merged[merged['Salmo trutta']==0].shape[0] # should be equal to number of pseudo absences below
Out[60]:
In [61]:
pseudo_absence_coordinates[0].shape[0]
Out[61]:
In [62]:
merged[merged['Salmo trutta']==1].shape[0] # should be equal to number of presences below
Out[62]:
In [63]:
presence_coordinates[0].shape[0]
Out[63]:
In [64]:
merged[merged['Salmo trutta'].isnull()].shape[0] # all that's left
Out[64]:
In [65]:
360 * 720 == merged[merged['Salmo trutta']==0].shape[0] + merged[merged['Salmo trutta']==1].shape[0] + merged[merged['Salmo trutta'].isnull()].shape[0]
Out[65]:
In [66]:
# == all pixels in 360 x 720 matrix
In [69]:
merged[merged['Salmo trutta']==0.0]
Out[69]:
In [70]:
pseudo_absences_dataframe
Out[70]:
In [ ]: