This content was created to investigate a problem that originally looked more complex than it was. This example shows something to look out for when using np.where()
for the simplest test to find the index of a floating point number in a numpy array. The real problem here isn't with np.where()
but rather, with getting Python to display the true underlyng floating number so we can test with it. These tests should shed some light on the level of precision one can expect when using np.where()
with floating point numbers.
Though this content was created in Python 3.6, it is expected to work the same in Python 2.7. Most of it was originally created and tested under Python 2.7 first before simplifying the content into a more cohesive story for this write-up.
In [2]:
import numpy as np
In [3]:
m3d=np.random.rand(3,4,5)
m3d
Out[3]:
In [4]:
m3d[0][1][3]
Out[4]:
In [79]:
# none of these will show us the full floating point number:
print(m3d[0][1][3])
print("%s" %m3d[0][1][3])
print(m3d[0][1][3]*1.0000000000000000)
str(m3d[0][1][3])
Out[79]:
In [80]:
# worse: what is the full number? Consider these tests using a formatter to increase the number of decimal places.
print("{0:.17f}".format(m3d[0][1][3])) # this level of precision works with where tests that follow
print("{0:.20f}".format(m3d[0][1][3])) # but there is still more to uncover ...
print("{0:.25f}".format(m3d[0][1][3]))
print("{0:.30f}".format(m3d[0][1][3]))
print("{0:.60f}".format(m3d[0][1][3]))
In [81]:
# one final idea to see the whole number:
print("%r" %m3d[0][1][3]) # this gets enough digits for an np.where() test even though there are more hidden digits
# this is illustrated further in the content that follows
In [9]:
print(np.where(m3d==0.640593901718)) # not found because it is not an exact match (to enough decimal places)
In [10]:
print(np.where(m3d==0.64059390171803487)) # found: it is an exact match (on a sufficient number of decimal places)
In [47]:
print(m3d[0][1][3]) # index indicated above
print("{0:.30}".format(m3d[0][1][3])) # printed out to 30 decimal places ...
# note that we found the number by matching to about 17 decimal places
In [32]:
simpTest = [0.64059390171803487] # this confirms the problem is with floating points, not specifically random numbers
print(simpTest[0]) # specifically, print() rounds the fp # so we don't know its true value
# to a high enough degree of precision to work with np.where()
np.where(np.array(simpTest)==0.64059390171803487)
Out[32]:
In [29]:
simpTest = np.array(simpTest)
simpTest = np.append(simpTest, [0.64059390171803487])
simpTest # both numbers are as entered above, note how they display
Out[29]:
In [31]:
np.where(simpTest==0.64059390171803487) # this works: matches on both copies of the number
Out[31]:
In [38]:
complexTest = np.array([[[ 0.72005073, 0.11109093, 0.68028471, 0.95046754, 0.12395542],
[ 0.64059390171803487, 0.93291227, 0.88059964, 0.640593901718 , 0.44233101],
[ 0.30116737, 0.29999162, 0.15315462, 0.89932621, 0.85077063],
[ 0.86296231, 0.16234297, 0.36499936, 0.70494879, 0.44010862]],
[[ 0.01374351, 0.5563092 , 0.60554606, 0.17335924, 0.80520799],
[ 0.35663633, 0.51720075, 0.67551499, 0.88688033, 0.00112873],
[ 0.27817067, 0.29122932, 0.64059390171803487, 0.82422563, 0.35326511],
[ 0.72938731, 0.85729267, 0.7299159 , 0.02867732, 0.37095061]],
[[ 0.69498848, 0.03304285, 0.92535693, 0.67561394, 0.53754546],
[ 0.7359454 , 0.2463966 , 0.02519301, 0.86923853, 0.98044879],
[ 0.0297628 , 0.21868526, 0.26389986, 0.39412533, 0.92344829],
[ 0.85994709, 0.54386487, 0.47746197, 0.44148437, 0.64059390171803487]]])
print(type(complexTest))
In [39]:
print(np.where(complexTest==0.11109093))
In [40]:
print(np.where(complexTest==0.640593901718)) # this number is in the data structure once
In [41]:
print(np.where(complexTest==0.64059390171803487)) # this more specific number is in the data structure 3 times
In [48]:
answer = [0, 0, 0]
answer[0] = np.where(complexTest==0.11109093)
answer[1] = np.where(complexTest==0.640593901718)
answer[2] = np.where(complexTest==0.64059390171803487)
In [49]:
answer
Out[49]:
In [59]:
# assembling the answers provided above ...
print("complexTest" + str(answer[0][0]) + str(answer[0][1]) + str(answer[0][2]))
print("complexTest" + str(answer[1][0]) + str(answer[1][1]) + str(answer[1][2]))
print("complexTest" + str([answer[2][0][0]]) + str([answer[2][1][0]]) + str([answer[2][2][0]]))
print("complexTest" + str([answer[2][0][1]]) + str([answer[2][1][1]]) + str([answer[2][2][1]]))
print("complexTest" + str([answer[2][0][2]]) + str([answer[2][1][2]]) + str([answer[2][2][2]]))
In [62]:
# confirming they are right:
print(complexTest[0][0][1])
print(complexTest[0][1][3])
print("{0:.17}".format(complexTest[0][1][0]))
print("{0:.17}".format(complexTest[1][2][2]))
print("{0:.17}".format(complexTest[2][3][4]))
In [78]:
# Determining the level of precision
testFrag = np.array([ [ 0.640593901718134878, 0.64059390171813486, 0.88059969, 0.95046754, 0.12395542],
[ 0.64059390171813487, 0.93291227, 0.88059964, 0.86296231, 0.44233101],
[ 0.6405939017181348, 0.29999162, 0.15315462, 0.89932621, 0.85077063],
[ 0.640593901718134, 0.16234297, 0.30116737, 0.70494879, 0.44010862],
[ 0.64059390171813, 0.93291228, 0.88059964, 0.86296231, 0.44233101],
[ 0.6405939017181, 0.29999188, 0.15315462, 0.89932621, 0.85077063],
[ 0.640593901718, 0.16234288, 0.30116737, 0.70494879, 0.44010862]]
)
print(np.where(testFrag==0.93291227)) # occurs once
print(np.where(testFrag==0.640593901718)) # occurs once, but then we add a digit to each successive test
print(np.where(testFrag==0.6405939017181))
print(np.where(testFrag==0.64059390171813))
print(np.where(testFrag==0.640593901718134))
print(np.where(testFrag==0.6405939017181348))
print(np.where(testFrag==0.64059390171813486))
print(np.where(testFrag==0.64059390171813487))
print(np.where(testFrag==0.640593901718134878))
The above shows that:
np.where()
cannot distinguish between these 3 numbers)When less precision is needed, we can use np.isclose()
to find all like numbers in the array. The level of precision on floating point numbers that it can provide is shown here:
In [84]:
# np.where() generates a boolean mask of T/F for whether the values in the test array
# "are close" or not:
print(np.isclose(testFrag, 0.93291))
In [86]:
# combine with np.where() to get the location(s) of matches:
print(np.where(np.isclose(testFrag, 0.93291)))
In [88]:
# the close answers found above:
print(testFrag[1][1])
print(testFrag[4][1])
In [89]:
# looking for pieces of this number, let's see how np.isclose() behaves on this array
# test number: 0.640593901718134878
print(np.where(np.isclose(testFrag, 0.6)))
print(np.where(np.isclose(testFrag, 0.64)))
print(np.where(np.isclose(testFrag, 0.640)))
print(np.where(np.isclose(testFrag, 0.6405))) # needs at least 5 digits to get a match
print(np.where(np.isclose(testFrag, 0.64059))) # once a match is found, precision is the same no matter
print(np.where(np.isclose(testFrag, 0.640593))) # how many digits are added
print(np.where(np.isclose(testFrag, 0.6405939)))
print(np.where(np.isclose(testFrag, 0.64059390)))
print(np.where(np.isclose(testFrag, 0.640593901)))
print(np.where(np.isclose(testFrag, 0.6405939017)))
print(np.where(np.isclose(testFrag, 0.64059390171)))
print(np.where(np.isclose(testFrag, 0.640593901718)))
print(np.where(np.isclose(testFrag, 0.6405939017181)))
print(np.where(np.isclose(testFrag, 0.64059390171813)))
print(np.where(np.isclose(testFrag, 0.640593901718134)))
print(np.where(np.isclose(testFrag, 0.6405939017181348)))
print(np.where(np.isclose(testFrag, 0.64059390171813487)))
print(np.where(np.isclose(testFrag, 0.640593901718134878)))
In [90]:
testFrag2 = np.array([[0.6, 0.64, 0.640, 0.6405, 0.64059, 0.640593,
0.6405939, 0.64059390, 0.640593901, 0.6405939017,
0.64059390171, 0.640593901718, 0.6405939017181, 0.64059390171813,
0.640593901718134, 0.6405939017181348, 0.64059390171813487, 0.640593901718134878 ],
[0.7, 0.63, 0.641, 0.6404, 0.64058, 0.640594,
0.6405938, 0.64059391, 0.640593902, 0.6405939018,
0.64059390172, 0.640593901717, 0.6405939017182, 0.64059390171814,
0.640593901718135, 0.6405939017181349, 0.64059390171813488, 0.640593901718134879 ]
])
# this fragment has all test cases from previous cell + a second copy where the last digit of each case is changed
# this comparison corroborates assumptions about previous test
print(np.where(np.isclose(testFrag2, 0.6)))
print(np.where(np.isclose(testFrag2, 0.64)))
print(np.where(np.isclose(testFrag2, 0.640)))
print(np.where(np.isclose(testFrag2, 0.6405))) # needs at least 5 digits to get a match
print(np.where(np.isclose(testFrag2, 0.64059))) # once a match is found, precision is the same no matter
print(np.where(np.isclose(testFrag2, 0.640593))) # how many digits are added
print(np.where(np.isclose(testFrag2, 0.6405939)))
print(np.where(np.isclose(testFrag2, 0.64059390)))
print(np.where(np.isclose(testFrag2, 0.640593901)))
print(np.where(np.isclose(testFrag2, 0.6405939017)))
print(np.where(np.isclose(testFrag2, 0.64059390171)))
print(np.where(np.isclose(testFrag2, 0.640593901718)))
print(np.where(np.isclose(testFrag2, 0.6405939017181)))
print(np.where(np.isclose(testFrag2, 0.64059390171813)))
print(np.where(np.isclose(testFrag2, 0.640593901718134)))
print(np.where(np.isclose(testFrag2, 0.6405939017181348)))
print(np.where(np.isclose(testFrag2, 0.64059390171813487)))
print(np.where(np.isclose(testFrag2, 0.640593901718134878)))
Additional research and help topics:
In [ ]: