notebook.community

Edit and run



In [1]:

    
import sys, os
import math, random
import numpy as np
import pandas as pd
import operator

from scipy.sparse import csr_matrix, lil_matrix
from scipy.sparse.csgraph import dijkstra

from tqdm import tqdm



In [2]:

    
class Raster(np.ndarray):
    def __new__(cls, input_array, cellsize, xllcorner, yllcorner):
        obj = np.asarray(input_array).view(cls)
        obj.cellsize  = cellsize
        obj.xllcorner = xllcorner
        obj.yllcorner = yllcorner
        
        # 1-D list of the raster's values 
        v = obj.flatten()
        obj.values = v[~np.isnan(v)]
        
        return obj
    
    @classmethod
    def load(cls, filename):
        data  = [ ]
        attrs = { }
        with open(filename) as f:
            for i in range(6):
                key,value = next(f).strip().split()
                attrs[key] = value
            NODATA_value = attrs.get("NODATA_value", "-9999")
        
            for line in f:
                row = list(map(lambda x: np.nan if x == NODATA_value else float(x), line.split()))
                #row = [ x if x < 1e3 else np.nan for x in row ]
                if row:
                    data.append(row)

        return Raster(np.asarray(data), float(attrs["cellsize"])
                                      , float(attrs["xllcorner"])
                                      , float(attrs["yllcorner"]))



In [3]:

    
%%time
I = Raster.load("../bb450_final.asc")









    



CPU times: user 4.08 s, sys: 341 ms, total: 4.42 s
Wall time: 5.54 s



In [4]:

    
%%time
R = Raster.load("../../BB450/bb_resist450.asc")









    



CPU times: user 3.89 s, sys: 289 ms, total: 4.18 s
Wall time: 4.48 s



In [5]:

    
nodes = Raster.load("../../BB450/bb_nodes450.asc")
nodes = np.argwhere(~np.isnan(nodes))+1  # only care about the indicies
len(nodes)









    Out[5]:





245



In [6]:

    
N = len(I.values)
print("N = {:,}\nsqr(N) = {:,}\n9N = {:,}".format(N, N**2, 9*N))









    



N = 2,806,114
sqr(N) = 7,874,275,780,996
9N = 25,255,026



In [7]:

    
%%time
indices = { (int(x),int(y)):index for index,(x,y) in enumerate(np.argwhere(~np.isnan(I))) }
print(len(indices))









    



2806114
CPU times: user 6.18 s, sys: 503 ms, total: 6.68 s
Wall time: 6.95 s



In [8]:

    
%%time
coordinates = [ (int(x),int(y)) for (x,y) in np.argwhere(~np.isnan(I)) ]









    



CPU times: user 5.23 s, sys: 320 ms, total: 5.55 s
Wall time: 5.92 s



In [9]:

    
G1 = lil_matrix((N, N))
for (i,j),id1 in tqdm(indices.items()):
    try:
        id2 = indices[i+1,j]
        value = (R[i,j] + R[i+1,j]) / 2.
        G1[id1,id2] = value
        G1[id2,id1] = value
    except KeyError:
        pass
    try:
        id2 = indices[i,j+1]
        value = (R[i,j] + R[i,j+1]) / 2.
        G1[id1,id2] = value
        G1[id2,id1] = value
    except KeyError:
        pass

G1









    



100%|██████████| 2806114/2806114 [01:41<00:00, 27534.45it/s]






    Out[9]:





<2806114x2806114 sparse matrix of type '<class 'numpy.float64'>'
	with 11211754 stored elements in LInked List format>



In [10]:

    
%%time
G = csr_matrix(G1)









    



CPU times: user 9.9 s, sys: 356 ms, total: 10.3 s
Wall time: 10.6 s



In [11]:

    
G









    Out[11]:





<2806114x2806114 sparse matrix of type '<class 'numpy.float64'>'
	with 11211754 stored elements in Compressed Sparse Row format>



In [12]:

    
nindices = list(filter(lambda i: i is not None, [ indices.get((y,x)) for y,x in nodes ]))



In [13]:

    
len(nindices)









    Out[13]:





245



In [14]:

    
%%time
dist_matrix,predecessors = dijkstra(G, directed=False, return_predecessors=True, indices=nindices)









    



CPU times: user 16min 50s, sys: 36.4 s, total: 17min 26s
Wall time: 21min 42s



In [15]:

    
print(repr(predecessors))









    



array([[     34,      35,       1, ..., 2806112, 2806107, 2806110],
       [     34,      35,       1, ..., 2806112, 2806107, 2806110],
       [     34,      35,       1, ..., 2806112, 2806107, 2806110],
       ..., 
       [     34,      35,       1, ..., 2806112, 2806107, 2806110],
       [     34,      35,       1, ..., 2806112, 2806107, 2806110],
       [     34,      35,       1, ..., 2806112, 2806107, 2806110]], dtype=int32)



In [16]:

    
def getPaths(origin):
    lines  = [ ]
    for index,start in enumerate(nindices):
        if index == origin:
            continue
        i = start
        lines.append((dist_matrix[origin][i], [ ], [ ]))
        while i != -9999:
            lines[-1][1].append(coordinates[i][1])
            lines[-1][2].append(coordinates[i][0])
            i = predecessors[origin][i]
    return lines



In [17]:

    
def savePaths(origin, output_name):
    paths = getPaths(origin)
    rmin  = min(map(operator.itemgetter(0), paths))
    rmax  = max(map(operator.itemgetter(0), paths))

    with open(output_name.format(origin), "w") as f:
        print(len(paths), rmin, rmax, file=f)
        for rdist,xs,ys in paths:
            print(len(xs), rdist, file=f)
            print(*xs, file=f)
            print(*ys, file=f)



In [18]:

    
for i in tqdm(range(len(nindices))):
    savePaths(i, "/Users/eduffy/Research/BB450/LCP/BB450_{:06d}.lcp")









    



100%|██████████| 245/245 [10:08<00:00,  2.97s/it]



In [ ]: