NumPy Data Access Using ArcPy


In [1]:
import arcpy as ARCPY
import arcpy.da as DA
inputFC = r'../data/CA_Polygons.shp'
fieldNames = ['PCR2000', 'POP2000', 'PERCNOHS']
tab = DA.TableToNumPyArray(inputFC, fieldNames)
print(tab)


[(1.30711491471, 1449840, 37.0) (0.949885742909, 1209, 38.3)
 (0.814953582949, 35153, 41.4) (0.763664715408, 203807, 42.9)
 (0.867384795299, 40645, 48.1) (0.813593366955, 18817, 44.3)
 (1.5038884687, 952810, 32.1) (0.612516024774, 27440, 50.0)
 (1.23401339141, 157162, 36.3) (0.758988619134, 801288, 47.3)
 (0.666868957024, 26416, 46.4) (0.791296201062, 126462, 44.3)
 (0.625993159441, 142410, 56.9) (0.838241159573, 17955, 38.3)
 (0.709664321446, 663803, 48.3) (0.558059878736, 129835, 50.9)
 (0.795906059028, 58538, 49.6) (0.59516742156, 33719, 43.3)
 (0.98591007865, 9538191, 38.0) (0.626400277293, 123587, 58.4)
 (2.30772253595, 247520, 20.9) (0.763503906916, 17101, 44.5)
 (0.856496344376, 86407, 43.1) (0.659142469682, 212258, 49.3)
 (0.696038086886, 9406, 40.1) (0.906145315496, 12921, 31.1)
 (1.04151492927, 402990, 37.5) (1.28209385709, 124565, 36.6)
 (1.06427625094, 92520, 39.0) (1.2656840848, 2854513, 29.5)
 (1.25503460751, 251012, 36.9) (0.871632187308, 20754, 39.4)
 (0.809361084377, 1558985, 41.6) (0.970339838629, 1229940, 34.2)
 (0.97698382771, 53781, 55.9) (0.746540559206, 1718037, 43.3)
 (1.11463689739, 2827366, 34.7) (1.87815228123, 777885, 38.2)
 (0.829782920213, 567885, 50.6) (0.94607588002, 247839, 40.2)
 (2.00222842151, 707820, 28.5) (1.0993818875, 399990, 28.7)
 (1.83427209221, 1684947, 31.0) (1.35152165122, 255835, 36.9)
 (0.83764148855, 163615, 41.1) (0.788376107359, 3574, 41.7)
 (0.771636055401, 44218, 43.7) (0.93624192685, 396974, 37.3)
 (1.24377672351, 460421, 36.4) (0.8013076198, 449471, 49.6)
 (0.844111222501, 79192, 42.7) (0.64214819647, 56170, 44.2)
 (0.676074973366, 12980, 43.3) (0.662256627258, 368627, 55.8)
 (0.800930337009, 54658, 41.2) (1.13167786774, 756506, 36.2)
 (0.894016777176, 169835, 38.4) (0.644656909946, 60372, 43.4)]

SSDataObject

  1. Environment Settings (Except Extent)
  2. Bad Records
  3. Error/Warning Messages
  4. Localization
  5. Feature Accounting

    • Cursors and DataAccess are not assured to read attributes in order.

    • Keeps track of the shapes and their attributes so that one can create output features w/o post-joins.

    • Unique ID works with Spatial Weights Formats in ArcGIS, PySAL, R, Matlab, GeoDa etc..


In [38]:
import SSDataObject as SSDO
import os as OS
inputFC = r'../data/CA_Polygons.shp'
fullFC = OS.path.abspath(inputFC)
fullPath, fcName = OS.path.split(fullFC)
ssdo = SSDO.SSDataObject(inputFC)
uniqueIDField = "MYID"
fieldNames = ['PCR2010', 'POP2010', 'PERCNOHS']
ssdo.obtainData(uniqueIDField, fieldNames)
ssdo = SSDO.SSDataObject(inputFC)
ssdo.obtainData("MYID", fieldNames)
print(ssdo.fields['POP2010'].data)


[1513043    1162   38026  220000   45485   21475 1052605   28634  181151
  933168   28112  134762  175136   18519  842813  153183   64753   34811
 9826773  151468  252789   18267   87812  256754    9695   14224  416366
  136840   98765 3017598  350233   19949 2202978 1422094   55520 2042027
 3105115  805340  687661  270112  719604  424630 1786267  262880  177308
    3229   44897  414125  484712  515420   94859   63637   13768  443509
   55221  825445  201105   72364]

Using PANDAS to get that R Feel


In [39]:
import pandas as PANDAS
df = ssdo.getDataFrame()
print(df)


      PCR2010  PERCNOHS  POP2010
158  1.206422      37.0  1513043
159  1.079837      38.3     1162
160  0.886305      41.4    38026
161  0.816018      42.9   220000
162  0.877746      48.1    45485
163  1.130213      44.3    21475
164  1.391533      32.1  1052605
165  0.675817      50.0    28634
166  1.209810      36.3   181151
167  0.775344      47.3   933168
168  0.832085      46.4    28112
169  0.818885      44.3   134762
170  0.685975      56.9   175136
171  0.931374      38.3    18519
172  0.742849      48.3   842813
173  0.666717      50.9   153183
174  0.816463      49.6    64753
175  0.692517      43.3    34811
176  1.048479      38.0  9826773
177  0.646304      58.4   151468
178  2.080740      20.9   252789
179  0.839421      44.5    18267
180  0.890045      43.1    87812
181  0.685651      49.3   256754
182  0.849918      40.1     9695
183  0.984652      31.1    14224
184  1.022456      37.5   416366
185  1.249204      36.6   136840
186  1.081794      39.0    98765
187  1.250990      29.5  3017598
188  1.179444      36.9   350233
189  0.973480      39.4    19949
190  0.733145      41.6  2202978
191  0.945829      34.2  1422094
192  0.889421      55.9    55520
193  0.742851      43.3  2042027
194  1.144717      34.7  3105115
195  1.760952      38.2   805340
196  0.771014      50.6   687661
197  0.978288      40.2   270112
198  1.705111      28.5   719604
199  1.100690      28.7   424630
200  1.455588      31.0  1786267
201  1.189412      36.9   262880
202  0.879299      41.1   177308
203  0.833698      41.7     3229
204  0.842544      43.7    44897
205  0.951722      37.3   414125
206  1.108553      36.4   484712
207  0.782673      49.6   515420
208  0.837630      42.7    94859
209  0.664463      44.2    63637
210  0.739619      43.3    13768
211  0.703468      55.8   443509
212  0.897840      41.2    55221
213  1.120266      36.2   825445
214  0.915843      38.4   201105
215  0.747482      43.4    72364

Advanced Analysis [SciPy Example - KMeans]


In [40]:
import numpy as NUM
import scipy.cluster.vq as CLUST
import arcgisscripting as ARC
X = df.as_matrix()
whiteData = CLUST.whiten(X)
centers, distortion = CLUST.kmeans(whiteData, 6)
groups = ARC._ss.closest_centroid(whiteData, centers)
print(groups)


[1 1 2 2 2 1 3 0 1 0 2 2 4 1 0 4 0 2 5 4 3 2 2 0 2 1 1 1 1 3 1 1 0 1 4 0 3
 3 0 1 3 1 3 1 2 2 2 1 1 0 2 2 2 4 2 1 1 2]
C:\ArcGIS\bin\Python\envs\arcgispro-py3\lib\site-packages\ipykernel\__main__.py:7: DeprecationWarning: PyArray_FromDims: use PyArray_SimpleNew.
C:\ArcGIS\bin\Python\envs\arcgispro-py3\lib\site-packages\ipykernel\__main__.py:7: DeprecationWarning: PyArray_FromDimsAndDataAndDescr: use PyArray_NewFromDescr.

Max-P Regions Using PySAL


In [41]:
import pysal as PYSAL
import pysal2ArcGIS as PYSAL_UTILS
swmFile = OS.path.join(fullPath, "rook_bin.swm")
w = PYSAL_UTILS.swm2Weights(ssdo, swmFile)
maxp = PYSAL.region.Maxp(w, X[:,0:2], 3000000., floor_variable = X[:,2])
maxpGroups = NUM.empty((ssdo.numObs,), int)
for regionID, orderIDs in enumerate(maxp.regions):
    maxpGroups[orderIDs] = regionID
maxpGroups


Out[41]:
array([2, 2, 2, 2, 1, 2, 2, 2, 2, 1, 2, 2, 5, 2, 1, 1, 2, 2, 4, 1, 0, 2, 2,
       1, 2, 2, 4, 0, 2, 7, 2, 2, 5, 0, 1, 5, 6, 3, 2, 4, 3, 4, 3, 4, 2, 2,
       2, 0, 0, 1, 0, 2, 2, 1, 2, 4, 0, 2])

SKATER for Comparison


In [42]:
import Partition as PART
skater = PART.Partition(ssdo, fieldNames, spaceConcept = "GET_SPATIAL_WEIGHTS_FROM_FILE",
                        weightsFile = swmFile, kPartitions = 6)
print(skater.partition)


[1 4 4 4 5 4 1 4 4 5 4 4 5 4 5 5 4 4 0 5 2 5 4 5 4 4 5 4 4 3 4 4 5 4 5 5 3
 1 5 5 1 5 1 5 4 4 4 4 4 5 4 4 4 5 4 5 4 4]

In [43]:
ARCPY.env.overwriteOutput = True
outputFC = r'../data/cluster_output.shp'
outK = SSDO.CandidateField('KMEANS', 'LONG', groups + 1)
outMax = SSDO.CandidateField('MAXP', 'LONG', maxpGroups + 1)
outSKATER = SSDO.CandidateField('SKATER', 'LONG', skater.partitionOutput)
outFields = {'KMEANS': outK, 'MAXP': outMax, 'SKATER': outSKATER}
appendFields = fieldNames + ["NEW_NAME"]
ssdo.output2NewFC(outputFC, outFields, appendFields = appendFields)

In [ ]: