In [1]:
import logging
import numpy as np
import pandas as pd
root = logging.getLogger()
root.addHandler(logging.StreamHandler())
import datetime
%matplotlib inline
from shapely.prepared import prep
from shapely import speedups
speedups.enable()

In [3]:
import pandas as pd
important_columns1 = ['species', 'dateidentified', 'eventdate', 'basisofrecord', 'decimallatitude','decimallongitude', 'day', 'month', 'year' ]
result_with_lat_long = pd.DataFrame(columns=important_columns1)
counter = 0
for df in pd.read_msgpack("../data/fish/selection/merged.msg", iterator=True):
    counter += 1
    if (counter%100==0):
        print("%s Processing.. %s " % (datetime.datetime.now().time().isoformat(), counter))
    if "decimallatitude" in df.columns.tolist() and "decimallongitude" in df.columns.tolist():
        common_columns = list(set(important_columns1).intersection(set(df.columns.tolist())))
        result_with_lat_long = result_with_lat_long.append(df[common_columns], ignore_index=True)


15:04:05.390324 Processing.. 100 
15:04:17.992654 Processing.. 200 
15:04:33.019456 Processing.. 300 
15:04:51.353167 Processing.. 400 
15:05:11.014846 Processing.. 500 
15:05:32.764214 Processing.. 600 
15:05:56.665404 Processing.. 700 
15:06:23.198995 Processing.. 800 
15:06:51.932699 Processing.. 900 
15:07:26.564767 Processing.. 1000 
15:08:03.810959 Processing.. 1100 
15:08:47.837072 Processing.. 1200 
15:09:31.719969 Processing.. 1300 
15:10:18.034893 Processing.. 1400 
15:11:12.971453 Processing.. 1500 
15:12:08.266557 Processing.. 1600 
15:13:10.106589 Processing.. 1700 
15:14:13.539186 Processing.. 1800 
15:15:17.949457 Processing.. 1900 
15:16:26.108047 Processing.. 2000 
15:17:40.069562 Processing.. 2100 
15:19:08.791963 Processing.. 2200 
15:20:36.307465 Processing.. 2300 
15:22:18.219172 Processing.. 2400 
15:23:52.060455 Processing.. 2500 
15:25:36.458143 Processing.. 2600 
15:27:14.736542 Processing.. 2700 
15:28:38.109453 Processing.. 2800 
15:30:07.359079 Processing.. 2900 
15:31:46.442218 Processing.. 3000 
15:33:21.544590 Processing.. 3100 
15:34:57.250393 Processing.. 3200 
15:36:48.141724 Processing.. 3300 
15:38:48.266840 Processing.. 3400 
15:41:12.213891 Processing.. 3500 
15:43:05.406919 Processing.. 3600 
15:45:11.116797 Processing.. 3700 
15:47:26.784023 Processing.. 3800 
15:49:44.004158 Processing.. 3900 
15:51:58.750333 Processing.. 4000 
15:54:21.594642 Processing.. 4100 
15:56:45.095902 Processing.. 4200 
15:59:17.855855 Processing.. 4300 
16:01:54.443593 Processing.. 4400 
16:04:41.506282 Processing.. 4500 
16:07:03.937957 Processing.. 4600 
16:09:36.701213 Processing.. 4700 
16:12:12.768436 Processing.. 4800 
16:15:02.770409 Processing.. 4900 
16:18:01.434213 Processing.. 5000 
16:21:26.162147 Processing.. 5100 
16:24:55.575311 Processing.. 5200 
16:28:31.539922 Processing.. 5300 
16:31:54.363220 Processing.. 5400 
16:35:06.648524 Processing.. 5500 
16:38:46.682096 Processing.. 5600 
16:42:29.907429 Processing.. 5700 
16:45:58.895379 Processing.. 5800 

1. Collect and filter all observations

Recrods with latitude/longitude


In [4]:
result_with_lat_long = result_with_lat_long[result_with_lat_long.decimallatitude.notnull() & result_with_lat_long.decimallongitude.notnull()]

How many unique species have occurrence records with latitude/longitude?


In [5]:
result_with_lat_long['species'].unique().size


Out[5]:
5242

Best to take into account all observations which have either "year" or "eventdate" present. (or both) Let's group them by species name, and count the number of observation records.


In [6]:
grouped_lat_long_year_or_eventdate = pd.DataFrame()
grouped_lat_long_year_or_eventdate['count'] = result_with_lat_long[result_with_lat_long.eventdate.notnull() | result_with_lat_long.year.notnull()].groupby(['species']).apply(lambda x: x['species'].count())
grouped_lat_long_year_or_eventdate.head(10) # peak at the top 10 only


Out[6]:
count
species
Aaptosyax grypus 2
Aborichthys elongatus 1
Abramis brama 114212
Acantharchus pomotis 477
Acanthobrama centisquama 1
Acanthobrama lissneri 58
Acanthobrama marmid 18
Acanthobrama telavivensis 39
Acanthobrama terraesanctae 11
Acanthobrama tricolor 1

How many unique species HAVE records with latitude/longitude, AND date of event (at least year)


In [7]:
result_with_lat_long['species'].unique().size


Out[7]:
5242

How many unique species with latitude/longitude, AND event date after 1990?


In [8]:
year_or_eventdate_1990 = result_with_lat_long[['species', 'year', 'eventdate', 'basisofrecord', 'decimallatitude', 'decimallongitude']][(result_with_lat_long.year>1990) | (result_with_lat_long.eventdate>"1990")]

grouped_year_or_eventdate_1990 = pd.DataFrame()
grouped_year_or_eventdate_1990['numobservations'] = year_or_eventdate_1990.groupby(['species']).apply(lambda x: x['species'].count())
grouped_year_or_eventdate_1990.shape[0]


Out[8]:
4036

In [9]:
year_or_eventdate_1990.basisofrecord.unique()


Out[9]:
array(['PRESERVED_SPECIMEN', 'UNKNOWN', 'HUMAN_OBSERVATION', 'OBSERVATION',
       'MACHINE_OBSERVATION', 'LITERATURE', 'MATERIAL_SAMPLE',
       'LIVING_SPECIMEN', 'FOSSIL_SPECIMEN'], dtype=object)

I guess we should keep only observations of type 'OBSERVATION', 'MACHINE_OBSERVATION' and 'HUMAN_OBSERVATION'?


In [10]:
final_selection = year_or_eventdate_1990[(year_or_eventdate_1990.basisofrecord=='OBSERVATION') | (year_or_eventdate_1990.basisofrecord=='HUMAN_OBSERVATION') | (year_or_eventdate_1990.basisofrecord=='MACHINE_OBSERVATION')]

In [11]:
final_selection.species.unique().shape


Out[11]:
(1001,)

In [12]:
final_selection


Out[12]:
species year eventdate basisofrecord decimallatitude decimallongitude
399 Haplochromis elegans 2007.0 2007-01-29T23:00:00.000+0000 HUMAN_OBSERVATION 0.05810 30.15430
400 Haplochromis elegans 2007.0 2007-01-27T23:00:00.000+0000 HUMAN_OBSERVATION -0.18930 29.94970
401 Haplochromis elegans 2007.0 2007-01-28T23:00:00.000+0000 HUMAN_OBSERVATION 0.05620 30.07970
402 Haplochromis elegans 2007.0 2007-01-31T23:00:00.000+0000 HUMAN_OBSERVATION -0.08050 30.18710
403 Haplochromis elegans 2006.0 2006-11-21T23:00:00.000+0000 HUMAN_OBSERVATION 0.05670 30.14480
404 Haplochromis elegans 2006.0 2006-11-21T23:00:00.000+0000 HUMAN_OBSERVATION 0.05670 30.14480
561 Cirrhinus molitorella 2013.0 2013-10-12T17:13:31.000+0000 HUMAN_OBSERVATION 40.41930 -79.92587
593 Cirrhinus molitorella 1990.0 1990-06-26T22:00:00.000+0000 OBSERVATION 23.18000 120.35000
1497 Nannocharax macropterus 2015.0 2015-05-03T22:00:00.000+0000 HUMAN_OBSERVATION -15.56737 23.28333
1589 Nannocharax macropterus 2000.0 2000-06-07T22:00:00.000+0000 HUMAN_OBSERVATION -18.27400 21.78700
1592 Nannocharax macropterus 2000.0 2000-06-07T22:00:00.000+0000 HUMAN_OBSERVATION -18.27200 21.81000
1593 Nannocharax macropterus 2000.0 2000-06-04T22:00:00.000+0000 HUMAN_OBSERVATION -18.99000 23.42900
1594 Nannocharax macropterus 2000.0 2000-06-08T22:00:00.000+0000 HUMAN_OBSERVATION -18.44000 21.91200
1595 Nannocharax macropterus 2000.0 2000-06-06T22:00:00.000+0000 HUMAN_OBSERVATION -18.40700 21.88600
1598 Nannocharax macropterus 2000.0 2000-06-06T22:00:00.000+0000 HUMAN_OBSERVATION -18.40700 21.88600
1601 Nannocharax macropterus 2000.0 2000-06-08T22:00:00.000+0000 HUMAN_OBSERVATION -18.42400 21.93900
5716 Rhinichthys falcatus 1993.0 1993-10-23T23:00:00.000+0000 HUMAN_OBSERVATION 50.11667 -120.80000
5717 Rhinichthys falcatus 1993.0 1993-10-07T23:00:00.000+0000 HUMAN_OBSERVATION 50.43333 -121.31667
5718 Rhinichthys falcatus 1992.0 1992-09-25T22:00:00.000+0000 HUMAN_OBSERVATION 50.11667 -120.80000
6227 Salaria fluviatilis 2015.0 2015-06-09T22:00:00.000+0000 HUMAN_OBSERVATION 42.12337 12.17802
6237 Salaria fluviatilis 2011.0 2011-08-24T22:00:00.000+0000 HUMAN_OBSERVATION 38.98880 -0.18757
6238 Salaria fluviatilis 2011.0 NaN HUMAN_OBSERVATION 45.33556 8.89028
6239 Salaria fluviatilis 2011.0 NaN HUMAN_OBSERVATION 45.23167 9.02583
6240 Salaria fluviatilis 2011.0 NaN HUMAN_OBSERVATION 46.01317 9.26097
6241 Salaria fluviatilis 2011.0 NaN HUMAN_OBSERVATION 45.91123 8.56348
6242 Salaria fluviatilis 2011.0 NaN HUMAN_OBSERVATION 45.19056 9.12639
6243 Salaria fluviatilis 2011.0 NaN HUMAN_OBSERVATION 45.59969 10.63168
6244 Salaria fluviatilis 2011.0 NaN HUMAN_OBSERVATION 45.33556 8.89028
6245 Salaria fluviatilis 2011.0 NaN HUMAN_OBSERVATION 45.23167 9.02583
6246 Salaria fluviatilis 2011.0 NaN HUMAN_OBSERVATION 46.01317 9.26097
... ... ... ... ... ... ...
4710646 Capoeta turani 2007.0 2007-11-05T23:00:00.000+0000 HUMAN_OBSERVATION 37.13850 35.14217
4710647 Capoeta turani 2007.0 2007-11-05T23:00:00.000+0000 HUMAN_OBSERVATION 37.09617 35.11700
4710849 Schistura kongphengi 2000.0 2000-03-19T23:00:00.000+0000 HUMAN_OBSERVATION 16.31083 107.19350
4710850 Schistura kongphengi 2000.0 2000-03-19T23:00:00.000+0000 HUMAN_OBSERVATION 16.34283 107.15683
4710851 Schistura kongphengi 2000.0 2000-03-19T23:00:00.000+0000 HUMAN_OBSERVATION 16.20517 107.29017
4710852 Schistura kongphengi 1996.0 1996-03-19T23:00:00.000+0000 HUMAN_OBSERVATION 18.11867 105.26750
4711398 Proterorhinus tataricus 2003.0 2003-06-18T22:00:00.000+0000 HUMAN_OBSERVATION 44.57033 33.63750
4711399 Proterorhinus tataricus 2003.0 2003-06-17T22:00:00.000+0000 HUMAN_OBSERVATION 44.54500 33.66517
4711400 Proterorhinus tataricus 2003.0 2003-06-18T22:00:00.000+0000 HUMAN_OBSERVATION 44.57033 33.63750
4711401 Proterorhinus tataricus 2002.0 2002-06-12T22:00:00.000+0000 HUMAN_OBSERVATION 44.61550 33.60367
4711402 Proterorhinus tataricus 2002.0 2002-06-12T22:00:00.000+0000 HUMAN_OBSERVATION 44.54467 33.66500
4712104 Eremichthys acros 1999.0 1998-12-31T23:00:00.000+0000 HUMAN_OBSERVATION 41.39184 -119.16352
4712810 Gobiomorphus basalis 2013.0 2013-03-31T08:50:00.000+0000 HUMAN_OBSERVATION -37.07322 175.65800
4712811 Gobiomorphus basalis 2013.0 2013-03-31T08:50:00.000+0000 HUMAN_OBSERVATION -37.07322 175.65799
4715035 Etheostoma fusiforme 2007.0 2007-06-11T22:00:00.000+0000 HUMAN_OBSERVATION 35.45639 -82.56834
4715037 Etheostoma fusiforme 2007.0 2007-06-13T22:00:00.000+0000 HUMAN_OBSERVATION 35.66694 -82.99111
4715038 Etheostoma fusiforme 2007.0 2007-06-13T22:00:00.000+0000 HUMAN_OBSERVATION 35.66694 -82.99111
4715040 Etheostoma fusiforme 2007.0 2007-06-11T22:00:00.000+0000 HUMAN_OBSERVATION 35.45639 -82.56834
4715041 Etheostoma fusiforme 2007.0 2007-06-11T22:00:00.000+0000 HUMAN_OBSERVATION 35.45639 -82.56834
4715042 Etheostoma fusiforme 2007.0 2007-06-13T22:00:00.000+0000 HUMAN_OBSERVATION 35.66694 -82.99111
4715185 Etheostoma fusiforme 2002.0 2002-06-03T22:00:00.000+0000 HUMAN_OBSERVATION 35.30722 -82.42973
4715187 Etheostoma fusiforme 2002.0 2002-06-03T22:00:00.000+0000 HUMAN_OBSERVATION 35.30722 -82.42973
4715189 Etheostoma fusiforme 2002.0 2002-06-03T22:00:00.000+0000 HUMAN_OBSERVATION 35.30722 -82.42973
4715484 Etheostoma fusiforme 1991.0 1990-12-31T23:00:00.000+0000 HUMAN_OBSERVATION 35.71195 -82.44827
4725965 Macropodus erythropterus 2000.0 2000-03-17T23:00:00.000+0000 HUMAN_OBSERVATION 16.70033 106.87783
4725966 Macropodus erythropterus 2000.0 2000-03-17T23:00:00.000+0000 HUMAN_OBSERVATION 16.71983 106.86883
4725967 Macropodus erythropterus 2000.0 2000-03-17T23:00:00.000+0000 HUMAN_OBSERVATION 16.76183 106.86050
4725974 Macropodus erythropterus 2000.0 2000-04-07T22:00:00.000+0000 HUMAN_OBSERVATION 17.21933 106.65133
4725976 Macropodus erythropterus 2000.0 2000-04-07T22:00:00.000+0000 HUMAN_OBSERVATION 17.23533 106.62517
4725983 Macropodus erythropterus 1999.0 1999-03-13T23:00:00.000+0000 HUMAN_OBSERVATION 16.78333 106.89767

1939676 rows × 6 columns


In [13]:
from iSDM.species import GBIFSpecies

In [14]:
all_species = GBIFSpecies(name_species='All')


Enabled Shapely speedups for performance.

In [15]:
all_species.set_data(final_selection)

In [16]:
all_species.get_data().species.unique().shape # these many different species


Out[16]:
(1001,)

In [17]:
all_species.get_data().shape[0] # 1939675? this many observations satisfying our criteria (after 1990, with the correct observation type)


Out[17]:
1939676

In [18]:
year_or_eventdate_1990.shape[0] # total number, before filtering out observations that match our criteria


Out[18]:
2349853

In [19]:
all_species.geometrize()


Data geometrized: converted into GeoPandas dataframe.
Points with NaN coordinates ignored. 

In [20]:
all_species.get_data().species.unique().shape


Out[20]:
(1001,)

In [22]:
final_observations = all_species.get_data()[['species', 'year','eventdate', 'basisofrecord','geometry']]

In [32]:
final_observations.to_file("../data/bias_grid/final_observations", driver="ESRI Shapefile")

In [21]:
import gc
gc.collect()


Out[21]:
162

In [2]:
from geopandas import GeoDataFrame
final_observations = GeoDataFrame.from_file("../data/bias_grid/final_observations/")

In [4]:
final_observations.head()


Out[4]:
basisofrec eventdate geometry species year
0 HUMAN_OBSERVATION 2007-01-29T23:00:00.000+0000 POINT (30.1543 0.0581) Haplochromis elegans 2007.0
1 HUMAN_OBSERVATION 2007-01-27T23:00:00.000+0000 POINT (29.9497 -0.1893) Haplochromis elegans 2007.0
2 HUMAN_OBSERVATION 2007-01-28T23:00:00.000+0000 POINT (30.0797 0.0562) Haplochromis elegans 2007.0
3 HUMAN_OBSERVATION 2007-01-31T23:00:00.000+0000 POINT (30.1871 -0.0805) Haplochromis elegans 2007.0
4 HUMAN_OBSERVATION 2006-11-21T23:00:00.000+0000 POINT (30.1448 0.0567) Haplochromis elegans 2006.0

2. Create a bias grid at a resolution of 5arcmin (for the moment it's just counts of observations per pixel)

5 arcmin = 5/60 pixel = 0.083333333 => 'height': 2160, 'width': 4320 for a global map

(180/0.083333333) ~ 2160
(360/0.083333333) ~ 4320

Generate 2D array and use it as a basis for bias grid.


In [5]:
x_min, y_min, x_max, y_max = -180, -90, 180, 90
pixel_size = 0.0083333333 # changed from 0.083333333 to 30arcsec
x_res = int((x_max - x_min) / pixel_size)
y_res = int((y_max - y_min) / pixel_size)

In [6]:
bias_grid=np.zeros(shape=(y_res, x_res)).astype('int32')

In [7]:
def increase_pixel_value(row):
    bias_grid[np.abs(int((row.y - 90) / pixel_size)),
              np.abs(int((row.x + 180) / pixel_size))]+=1

In [8]:
here = final_observations.geometry.apply(lambda row: increase_pixel_value(row))

In [22]:
bias_grid.max()


Out[22]:
42848

In [22]:
bias_grid.std()


---------------------------------------------------------------------------
MemoryError                               Traceback (most recent call last)
<ipython-input-22-62a0d89003df> in <module>()
----> 1 bias_grid.std()

/home/daniela/anaconda/envs/biodiversity_py3/lib/python3.4/site-packages/numpy/core/_methods.py in _std(a, axis, dtype, out, ddof, keepdims)
    122 def _std(a, axis=None, dtype=None, out=None, ddof=0, keepdims=False):
    123     ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
--> 124                keepdims=keepdims)
    125 
    126     if isinstance(ret, mu.ndarray):

/home/daniela/anaconda/envs/biodiversity_py3/lib/python3.4/site-packages/numpy/core/_methods.py in _var(a, axis, dtype, out, ddof, keepdims)
     99     # Note that x may not be inexact and that we need it to be an array,
    100     # not a scalar.
--> 101     x = asanyarray(arr - arrmean)
    102     if issubclass(arr.dtype.type, nt.complexfloating):
    103         x = um.multiply(x, um.conjugate(x), out=x).real

MemoryError: 

In [10]:
bias_grid.sum()


Out[10]:
1939676

In [11]:
# is the sum of the bias grid equal to the total number of observations?
bias_grid.sum() == final_observations.shape[0]


Out[11]:
True

In [12]:
import gc
gc.collect()


Out[12]:
142

Normalize using log10?


In [13]:
# bias_grid_plus_1 = bias_grid + 1
bias_grid_log = np.log10(bias_grid + 1)
# bias_grid_log[np.isneginf(bias_grid_log)] = 0 # is this a good idea, setting to 0? log10(0) = infinity otherwise
bias_grid_log.max()


Out[13]:
4.6319406909138356

In [12]:
bias_grid_log.min()


Out[12]:
0.0

In [13]:
bias_grid_log.std()


---------------------------------------------------------------------------
MemoryError                               Traceback (most recent call last)
<ipython-input-13-c346a1bc185d> in <module>()
----> 1 bias_grid_log.std()

/home/daniela/anaconda/envs/biodiversity_py3/lib/python3.4/site-packages/numpy/core/_methods.py in _std(a, axis, dtype, out, ddof, keepdims)
    122 def _std(a, axis=None, dtype=None, out=None, ddof=0, keepdims=False):
    123     ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
--> 124                keepdims=keepdims)
    125 
    126     if isinstance(ret, mu.ndarray):

/home/daniela/anaconda/envs/biodiversity_py3/lib/python3.4/site-packages/numpy/core/_methods.py in _var(a, axis, dtype, out, ddof, keepdims)
     99     # Note that x may not be inexact and that we need it to be an array,
    100     # not a scalar.
--> 101     x = asanyarray(arr - arrmean)
    102     if issubclass(arr.dtype.type, nt.complexfloating):
    103         x = um.multiply(x, um.conjugate(x), out=x).real

MemoryError: 

Standardize?


In [ ]:
bias_grid_standardized = (bias_grid - bias_grid.mean()) / bias_grid.std()
bias_grid_standardized.max()

In [ ]:
bias_grid_standardized.min()

In [ ]:
bias_grid_standardized.std()

Min-Max scaling?


In [ ]:
bias_grid_minmax_scale = (bias_grid - bias_grid.min()) / (bias_grid.max() - bias_grid.min())
bias_grid_minmax_scale.max()

In [ ]:
bias_grid_minmax_scale.min()

In [ ]:
bias_grid_minmax_scale.std()

In [42]:
import matplotlib.pyplot as plt
plt.figure(figsize=(25,20))
plt.imshow(bias_grid_log, cmap="hot", interpolation="none")


Out[42]:
<matplotlib.image.AxesImage at 0x7f4cc2ee2978>

In [17]:
import pickle
pickle.dump(bias_grid, open("../data/bias_grid/bias_grid_30arcsec.pkl", "wb"))


---------------------------------------------------------------------------
MemoryError                               Traceback (most recent call last)
<ipython-input-17-5a0024330dc7> in <module>()
      1 import pickle
----> 2 pickle.dump(bias_grid, open("../data/bias_grid/bias_grid_30arcsec.pkl", "wb"))

MemoryError: 

In [ ]:
np.count_nonzero(bias_grid)

In [ ]:
np.product(bias_grid.shape)

In [49]:
41881/9331200


Out[49]:
0.004488275891632373

In [26]:
bias_grid.shape


Out[26]:
(2160, 4320)

In [21]:
import rasterio
from rasterio.transform import Affine
x_min, y_min, x_max, y_max = -180, -90, 180, 90
pixel_size = 0.083333333
x_res = int((x_max - x_min) / pixel_size)
y_res = int((y_max - y_min) / pixel_size)
crs = {'init': "EPSG:4326"}
transform = Affine.translation(x_min, y_max) * Affine.scale(pixel_size, -pixel_size)

In [37]:
with rasterio.open("../data/bias_grid/bias_grid_minmax_scale.tif", 'w', driver='GTiff', width=x_res, height=y_res,
                   count=1,
                   dtype=np.uint16,
                   nodata=0,
                   transform=transform,
                   crs=crs) as out:
    out.write(bias_grid_minmax_scale.astype(np.uint16), indexes=1)
    out.close()

In [27]:
pixel_size


Out[27]:
0.083333333

In [39]:
bias_grid_minmax_scale.std()


Out[39]:
0.00063270764269115793

In [19]:
bias_grid_log.shape


Out[19]:
(21600, 43200)

In [20]:
import pickle
pickle.dump(bias_grid_log, open("../data/bias_grid/bias_grid_log_30arcsec.pkl", "wb"), protocol=4)


---------------------------------------------------------------------------
MemoryError                               Traceback (most recent call last)
<ipython-input-20-f18e82bcb7e8> in <module>()
      1 import pickle
----> 2 pickle.dump(bias_grid_log, open("../data/bias_grid/bias_grid_log_30arcsec.pkl", "wb"), protocol=4)

MemoryError: 

In [43]:
pickle.dump(bias_grid_standardized, open("../data/bias_grid/bias_grid_standardized.pkl", "wb"))

In [44]:
pickle.dump(bias_grid_minmax_scale, open("../data/bias_grid/bias_grid_minmax_scale.pkl", "wb"))

numpy array memory map


In [26]:
# bias_grid_mm=np.zeros(shape=(y_res, x_res)).astype('int32')
bias_grid_mm = np.memmap("../data/bias_grid/bias_grid_mm.dat", dtype='int32', mode='w+', shape=(y_res,x_res))

In [27]:
def increase_pixel_value(row):
    bias_grid_mm[np.abs(int((row.y - 90) / pixel_size)),
              np.abs(int((row.x + 180) / pixel_size))]+=1

In [28]:
here = final_observations.geometry.apply(lambda row: increase_pixel_value(row))

In [33]:
bias_grid_mm.flush()

In [35]:
bias_grid_mm.max()


Out[35]:
memmap(42848, dtype=int32)

In [36]:
bias_grid_mm.std()


Out[36]:
memmap(1.8395193632862339)

In [40]:
del bias_grid_mm

In [41]:
gc.collect()


Out[41]:
87

In [42]:
fpr = np.memmap("../data/bias_grid/bias_grid_mm.dat", dtype='int32', mode='r', shape=(y_res, x_res))

In [44]:
fpr.max()


Out[44]:
memmap(42848, dtype=int32)

In [47]:
fpr.flags.writeable


Out[47]:
False

Can read any tif file as memory map??


In [81]:
anything = np.memmap("/home/daniela/git/iSDM/data/GLWD/downscaled/original_corrected.tif", dtype='uint8',  mode='r', shape=(y_res,x_res))

In [82]:
np.unique(anything)


Out[82]:
memmap([  0,   1,   2,   3,   4,   5,   6,   7,   8,   9,  10,  11,  12,
        13,  14,  15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  25,
        26,  27,  28,  29,  30,  31,  32,  33,  34,  35,  36,  37,  38,
        39,  40,  41,  42,  43,  44,  45,  46,  47,  48,  49,  50,  51,
        52,  53,  54,  55,  56,  57,  58,  59,  60,  61,  62,  63,  64,
        65,  66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,
        78,  79,  80,  81,  82,  83,  84,  85,  86,  87,  88,  89,  90,
        91,  92,  93,  94,  95,  96,  97,  98,  99, 100, 101, 102, 103,
       104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116,
       117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129,
       130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142,
       143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155,
       156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168,
       169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181,
       182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194,
       195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207,
       208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220,
       221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233,
       234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246,
       247, 248, 249, 250, 251, 252, 253, 254, 255], dtype=uint8)

In [67]:
type(anything)


Out[67]:
numpy.core.memmap.memmap

In [78]:
gc.collect()


Out[78]:
3190

In [77]:
del anything

In [101]:
some_data = np.memmap("../data/bias_grid/some_data.tif", dtype='float64', mode='w+', shape=(y_res,x_res))

In [97]:
type(some_data)


Out[97]:
numpy.core.memmap.memmap

In [93]:
some_data.flags.writeable


Out[93]:
True

In [99]:
isinstance(some_data, np.ndarray)


Out[99]:
True

In [106]:
some_data.shape


Out[106]:
(21600, 43200)

In [109]:
some_data[:,:] = 1

In [110]:
some_data._mmap


Out[110]:
<mmap.mmap at 0x7f885a06f618>

In [112]:
some_data


Out[112]:
memmap([[ 1.,  1.,  1., ...,  1.,  1.,  1.],
       [ 1.,  1.,  1., ...,  1.,  1.,  1.],
       [ 1.,  1.,  1., ...,  1.,  1.,  1.],
       ..., 
       [ 1.,  1.,  1., ...,  1.,  1.,  1.],
       [ 1.,  1.,  1., ...,  1.,  1.,  1.],
       [ 1.,  1.,  1., ...,  1.,  1.,  1.]])

In [ ]: