In [1]:
hash('') == hash(0) == hash(0.0) == hash(False) == 0


Out[1]:
True

In [2]:
import pandas as pd

In [3]:
pd.Series([42, 43, 44], dtype='f8')


Out[3]:
0    42
1    43
2    44
dtype: float64

In [4]:
s = pd.Series([42, 43, 44], 
      index=["electron", 
             "proton", 
             "neutron"])

In [5]:
s


Out[5]:
electron    42
proton      43
neutron     44
dtype: int64

In [6]:
s['electron']


Out[6]:
42

In [7]:
# inclusive bounds
s['electron':'proton']


Out[7]:
electron    42
proton      43
dtype: int64

In [8]:
# integer indexing still OK
s[1:]


Out[8]:
proton     43
neutron    44
dtype: int64

In [9]:
t = pd.Series({'electron': 6, 
              'neutron': 28, 
              'proton': 496, 
              'neutrino': 8128})

In [10]:
t


Out[10]:
electron       6
neutrino    8128
neutron       28
proton       496
dtype: int64

In [11]:
s + t


Out[11]:
electron     48
neutrino    NaN
neutron      72
proton      539
dtype: float64

In [12]:
df = pd.DataFrame({'S': s, 'T': t})

In [13]:
df


Out[13]:
S T
electron 42 6
neutrino NaN 8128
neutron 44 28
proton 43 496

In [14]:
df[::2]


Out[14]:
S T
electron 42 6
neutron 44 28

In [15]:
dg = df.append(pd.DataFrame({'S': [-8128]}, index=['antineutrino']))
dg


Out[15]:
S T
electron 42 6
neutrino NaN 8128
neutron 44 28
proton 43 496
antineutrino -8128 NaN

In [16]:
dh = dg.drop('neutron')
dh


Out[16]:
S T
electron 42 6
neutrino NaN 8128
proton 43 496
antineutrino -8128 NaN

In [17]:
df.T


Out[17]:
electron neutrino neutron proton
S 42 NaN 44 43
T 6 8128 28 496

In [18]:
df < 42


Out[18]:
S T
electron False True
neutrino False False
neutron False True
proton False False

In [19]:
# accessing a single column 
# will return a series
df['T']


Out[19]:
electron       6
neutrino    8128
neutron       28
proton       496
Name: T, dtype: int64

In [20]:
# setting a name to a series
# or expression will add a 
# column to the frame
df['small'] = df['T'] < 100
df


Out[20]:
S T small
electron 42 6 True
neutrino NaN 8128 False
neutron 44 28 True
proton 43 496 False

In [21]:
# deleting a column will
# remove it from the frame
del df['small']
df


Out[21]:
S T
electron 42 6
neutrino NaN 8128
neutron 44 28
proton 43 496

In [22]:
from blist import sorteddict

In [23]:
b = sorteddict(first="Albert", 
               last="Einstein",
               birthday=[1879, 3, 14])
b


Out[23]:
sorteddict({'birthday': [1879, 3, 14], 'first': 'Albert', 'last': 'Einstein'})

In [24]:
b['died'] = [1955, 4, 18]
b


Out[24]:
sorteddict({'birthday': [1879, 3, 14], 'died': [1955, 4, 18], 'first': 'Albert', 'last': 'Einstein'})

In [25]:
list(b.keys())


Out[25]:
['birthday', 'died', 'first', 'last']

In [26]:
class Node(object):
    
    def __init__(self, point, left=None, right=None):
        self.point = point
        self.left = left
        self.right = right
        
    def __repr__(self):
        isleaf = self.left is None and self.right is None
        s = repr(self.point)
        if not isleaf:
            s = "[" + s + ":"
        if self.left is not None:
            s += "\n  left = " + "\n  ".join(repr(self.left).split('\n'))
        if self.right is not None:
            s += "\n  right = " + "\n  ".join(repr(self.right).split('\n'))
        if not isleaf:
            s += "\n  ]"
        return s


def kdtree(points, depth=0):
    if len(points) == 0:
        return None
    k = len(points[0])
    a = depth % k
    points = sorted(points, key=lambda x: x[a])
    i = int(len(points) / 2)  # middle index, rounded down
    node_left = kdtree(points[:i], depth + 1)
    node_right = kdtree(points[i+1:], depth + 1)
    node = Node(points[i], node_left, node_right)
    return node

In [27]:
points = [(1, 2), (3, 2), 
          (5, 5), (2, 1), 
          (4, 3), (1, 5)]
root = kdtree(points)
print(root)


[(3, 2):
  left = [(1, 2):
    left = (2, 1)
    right = (1, 5)
    ]
  right = [(5, 5):
    left = (4, 3)
    ]
  ]

In [28]:
from scipy.spatial import KDTree
tree = KDTree(points)

In [29]:
tree.data


Out[29]:
array([[1, 2],
       [3, 2],
       [5, 5],
       [2, 1],
       [4, 3],
       [1, 5]])

In [30]:
# query() defaults to only the closest point
dist, idx = tree.query([(4.5, 1.25)])

In [31]:
dist


Out[31]:
array([ 1.67705098])

In [32]:
idx


Out[32]:
array([1])

In [33]:
# fancy index by idx to get the point
tree.data[idx]


Out[33]:
array([[3, 2]])