NumPy Data Access Using ArcPy


In [1]:
import arcpy as ARCPY
import arcpy.da as DA
inputFC = r'../data/CA_Polygons.shp'
fieldNames = ['PCR2000', 'POP2000', 'PERCNOHS']
tab = DA.TableToNumPyArray(inputFC, fieldNames)
print(tab)


[(1.30711491471, 1449840, 37.0) (0.949885742909, 1209, 38.3)
 (0.814953582949, 35153, 41.4) (0.763664715408, 203807, 42.9)
 (0.867384795299, 40645, 48.1) (0.813593366955, 18817, 44.3)
 (1.5038884687, 952810, 32.1) (0.612516024774, 27440, 50.0)
 (1.23401339141, 157162, 36.3) (0.758988619134, 801288, 47.3)
 (0.666868957024, 26416, 46.4) (0.791296201062, 126462, 44.3)
 (0.625993159441, 142410, 56.9) (0.838241159573, 17955, 38.3)
 (0.709664321446, 663803, 48.3) (0.558059878736, 129835, 50.9)
 (0.795906059028, 58538, 49.6) (0.59516742156, 33719, 43.3)
 (0.98591007865, 9538191, 38.0) (0.626400277293, 123587, 58.4)
 (2.30772253595, 247520, 20.9) (0.763503906916, 17101, 44.5)
 (0.856496344376, 86407, 43.1) (0.659142469682, 212258, 49.3)
 (0.696038086886, 9406, 40.1) (0.906145315496, 12921, 31.1)
 (1.04151492927, 402990, 37.5) (1.28209385709, 124565, 36.6)
 (1.06427625094, 92520, 39.0) (1.2656840848, 2854513, 29.5)
 (1.25503460751, 251012, 36.9) (0.871632187308, 20754, 39.4)
 (0.809361084377, 1558985, 41.6) (0.970339838629, 1229940, 34.2)
 (0.97698382771, 53781, 55.9) (0.746540559206, 1718037, 43.3)
 (1.11463689739, 2827366, 34.7) (1.87815228123, 777885, 38.2)
 (0.829782920213, 567885, 50.6) (0.94607588002, 247839, 40.2)
 (2.00222842151, 707820, 28.5) (1.0993818875, 399990, 28.7)
 (1.83427209221, 1684947, 31.0) (1.35152165122, 255835, 36.9)
 (0.83764148855, 163615, 41.1) (0.788376107359, 3574, 41.7)
 (0.771636055401, 44218, 43.7) (0.93624192685, 396974, 37.3)
 (1.24377672351, 460421, 36.4) (0.8013076198, 449471, 49.6)
 (0.844111222501, 79192, 42.7) (0.64214819647, 56170, 44.2)
 (0.676074973366, 12980, 43.3) (0.662256627258, 368627, 55.8)
 (0.800930337009, 54658, 41.2) (1.13167786774, 756506, 36.2)
 (0.894016777176, 169835, 38.4) (0.644656909946, 60372, 43.4)]

SSDataObject

  1. Environment Settings (Except Extent)
  2. Bad Records
  3. Error/Warning Messages
  4. Localization
  5. Feature Accounting

    • Cursors and DataAccess are not assured to read attributes in order.

    • Keeps track of the shapes and their attributes so that one can create output features w/o post-joins.

    • Unique ID works with Spatial Weights Formats in ArcGIS, PySAL, R, Matlab, GeoDa etc..


In [2]:
import SSDataObject as SSDO
ssdo = SSDO.SSDataObject(inputFC)
ssdo.obtainData("MYID", fieldNames)
print(ssdo.fields['PCR2000'].data)


[ 1.30711491  0.94988574  0.81495358  0.76366472  0.8673848   0.81359337
  1.50388847  0.61251602  1.23401339  0.75898862  0.66686896  0.7912962
  0.62599316  0.83824116  0.70966432  0.55805988  0.79590606  0.59516742
  0.98591008  0.62640028  2.30772254  0.76350391  0.85649634  0.65914247
  0.69603809  0.90614532  1.04151493  1.28209386  1.06427625  1.26568408
  1.25503461  0.87163219  0.80936108  0.97033984  0.97698383  0.74654056
  1.1146369   1.87815228  0.82978292  0.94607588  2.00222842  1.09938189
  1.83427209  1.35152165  0.83764149  0.78837611  0.77163606  0.93624193
  1.24377672  0.80130762  0.84411122  0.6421482   0.67607497  0.66225663
  0.80093034  1.13167787  0.89401678  0.64465691]

Using PANDAS to get that R Feel


In [3]:
import pandas as PANDAS
df = ssdo.getDataFrame()
print(df)


      PCR2000  PERCNOHS  POP2000
158  1.307115      37.0  1449840
159  0.949886      38.3     1209
160  0.814954      41.4    35153
161  0.763665      42.9   203807
162  0.867385      48.1    40645
163  0.813593      44.3    18817
164  1.503888      32.1   952810
165  0.612516      50.0    27440
166  1.234013      36.3   157162
167  0.758989      47.3   801288
168  0.666869      46.4    26416
169  0.791296      44.3   126462
170  0.625993      56.9   142410
171  0.838241      38.3    17955
172  0.709664      48.3   663803
173  0.558060      50.9   129835
174  0.795906      49.6    58538
175  0.595167      43.3    33719
176  0.985910      38.0  9538191
177  0.626400      58.4   123587
178  2.307723      20.9   247520
179  0.763504      44.5    17101
180  0.856496      43.1    86407
181  0.659142      49.3   212258
182  0.696038      40.1     9406
183  0.906145      31.1    12921
184  1.041515      37.5   402990
185  1.282094      36.6   124565
186  1.064276      39.0    92520
187  1.265684      29.5  2854513
188  1.255035      36.9   251012
189  0.871632      39.4    20754
190  0.809361      41.6  1558985
191  0.970340      34.2  1229940
192  0.976984      55.9    53781
193  0.746541      43.3  1718037
194  1.114637      34.7  2827366
195  1.878152      38.2   777885
196  0.829783      50.6   567885
197  0.946076      40.2   247839
198  2.002228      28.5   707820
199  1.099382      28.7   399990
200  1.834272      31.0  1684947
201  1.351522      36.9   255835
202  0.837641      41.1   163615
203  0.788376      41.7     3574
204  0.771636      43.7    44218
205  0.936242      37.3   396974
206  1.243777      36.4   460421
207  0.801308      49.6   449471
208  0.844111      42.7    79192
209  0.642148      44.2    56170
210  0.676075      43.3    12980
211  0.662257      55.8   368627
212  0.800930      41.2    54658
213  1.131678      36.2   756506
214  0.894017      38.4   169835
215  0.644657      43.4    60372

Advanced Analysis [SciPy Example - KMeans]


In [4]:
import numpy as NUM
import scipy.cluster.vq as CLUST
import arcgisscripting as ARC
X = df.as_matrix()
whiteData = CLUST.whiten(X)
centers, distortion = CLUST.kmeans(whiteData, 6)
groups = ARC._ss.closest_centroid(whiteData, centers)
print(groups)


[4 3 3 3 2 3 4 2 4 2 2 3 1 3 2 2 2 3 5 1 0 3 3 2 3 4 4 4 3 4 4 3 3 4 1 2 4
 0 2 3 0 4 0 4 3 3 3 3 4 2 3 3 3 1 3 4 3 3]

Max-P Regions Using PySAL


In [5]:
import pysal as PYSAL
import WeightsUtilities as WU
import SSUtilities as UTILS

def swm2pysal(swmfile):
    neighbors = {}
    weights = {}
    swm = WU.SWMReader(swmfile)
    N = swm.numObs
    for r in UTILS.ssRange(N):
            info = swm.swm.readEntry()
            masterID, nn, nhs, whs, sumUnstandard = info
            if nn != 0:
                neighbors[masterID] = nhs
                weights[masterID] = whs
    swm.close()
    ids = list(neighbors.keys())
    ids.sort()
    w = PYSAL.W(neighbors, weights, ids)
    return w

In [6]:
swmFile = r"C:\Data\Conferences\esri_stat_summit_16\zzQueen.swm"
w = swm2pysal(swmFile)
maxp = PYSAL.region.Maxp(w, X[:,0:2], 3000000., floor_variable = X[:,2])
maxpGroups = NUM.empty((ssdo.numObs,), int)
for regionID, masterIDs in enumerate(maxp.regions):
    orderIDs = [ssdo.master2Order[i] for i in masterIDs]
    maxpGroups[orderIDs] = regionID
    print((regionID, orderIDs))


(0, [0, 38, 33, 6])
(1, [44, 24, 46, 31, 51, 45, 10, 52, 22, 16, 7, 11, 27, 5, 48, 56, 57, 47, 30, 20, 17, 28, 50, 8, 3, 1, 54, 2, 21])
(2, [40, 37, 43, 42, 26])
(3, [13, 35, 29, 25])
(4, [41, 55, 18])
(5, [39, 14, 53, 15, 34, 9, 23, 19, 49, 4])
(6, [12, 32, 36])

SKATER for Comparison


In [8]:
import Partition as PART
skater = PART.Partition(ssdo, fieldNames, spaceConcept = "GET_SPATIAL_WEIGHTS_FROM_FILE",
                        weightsFile = swmFile, kPartitions = 6)
print(skater.partition)


[1 4 4 4 5 4 1 4 4 5 4 4 5 4 5 5 4 4 0 5 2 5 4 5 4 4 5 4 4 3 4 4 5 4 5 5 3
 1 5 5 1 5 1 1 4 4 4 4 4 5 4 4 4 5 4 5 4 4]

In [9]:
ARCPY.env.overwriteOutput = True
outputFC = r'C:\Data\Conferences\esri_stat_summit_16\PYDemo\PYDemo.gdb\cluster_output'
outK = SSDO.CandidateField('KMEANS', 'LONG', groups + 1)
outMax = SSDO.CandidateField('MAXP', 'LONG', maxpGroups + 1)
outSKATER = SSDO.CandidateField('SKATER', 'LONG', skater.partitionOutput)
outFields = {'KMEANS': outK, 'MAXP': outMax, 'SKATER': outSKATER}
appendFields = fieldNames + ["NEW_NAME"]
ssdo.output2NewFC(outputFC, outFields, appendFields = appendFields)

In [ ]: