In [1]:
import arcpy as ARCPY
import arcpy.da as DA
inputFC = r'../data/CA_Polygons.shp'
fieldNames = ['PCR2000', 'POP2000', 'PERCNOHS']
tab = DA.TableToNumPyArray(inputFC, fieldNames)
print(tab)
Feature Accounting
Cursors and DataAccess are not assured to read attributes in order.
Keeps track of the shapes and their attributes so that one can create output features w/o post-joins.
Unique ID works with Spatial Weights Formats in ArcGIS, PySAL, R, Matlab, GeoDa etc..
In [2]:
import SSDataObject as SSDO
ssdo = SSDO.SSDataObject(inputFC)
ssdo.obtainData("MYID", fieldNames)
print(ssdo.fields['PCR2000'].data)
In [3]:
import pandas as PANDAS
df = ssdo.getDataFrame()
print(df)
In [4]:
import numpy as NUM
import scipy.cluster.vq as CLUST
import arcgisscripting as ARC
X = df.as_matrix()
whiteData = CLUST.whiten(X)
centers, distortion = CLUST.kmeans(whiteData, 6)
groups = ARC._ss.closest_centroid(whiteData, centers)
print(groups)
In [5]:
import pysal as PYSAL
import WeightsUtilities as WU
import SSUtilities as UTILS
def swm2pysal(swmfile):
neighbors = {}
weights = {}
swm = WU.SWMReader(swmfile)
N = swm.numObs
for r in UTILS.ssRange(N):
info = swm.swm.readEntry()
masterID, nn, nhs, whs, sumUnstandard = info
if nn != 0:
neighbors[masterID] = nhs
weights[masterID] = whs
swm.close()
ids = list(neighbors.keys())
ids.sort()
w = PYSAL.W(neighbors, weights, ids)
return w
In [6]:
swmFile = r"C:\Data\Conferences\esri_stat_summit_16\zzQueen.swm"
w = swm2pysal(swmFile)
maxp = PYSAL.region.Maxp(w, X[:,0:2], 3000000., floor_variable = X[:,2])
maxpGroups = NUM.empty((ssdo.numObs,), int)
for regionID, masterIDs in enumerate(maxp.regions):
orderIDs = [ssdo.master2Order[i] for i in masterIDs]
maxpGroups[orderIDs] = regionID
print((regionID, orderIDs))
In [8]:
import Partition as PART
skater = PART.Partition(ssdo, fieldNames, spaceConcept = "GET_SPATIAL_WEIGHTS_FROM_FILE",
weightsFile = swmFile, kPartitions = 6)
print(skater.partition)
In [9]:
ARCPY.env.overwriteOutput = True
outputFC = r'C:\Data\Conferences\esri_stat_summit_16\PYDemo\PYDemo.gdb\cluster_output'
outK = SSDO.CandidateField('KMEANS', 'LONG', groups + 1)
outMax = SSDO.CandidateField('MAXP', 'LONG', maxpGroups + 1)
outSKATER = SSDO.CandidateField('SKATER', 'LONG', skater.partitionOutput)
outFields = {'KMEANS': outK, 'MAXP': outMax, 'SKATER': outSKATER}
appendFields = fieldNames + ["NEW_NAME"]
ssdo.output2NewFC(outputFC, outFields, appendFields = appendFields)
In [ ]: