Sparse Search


In [1]:
import qualified Data.Array as A
import qualified Data.Sequence as S

import Data.Maybe (listToMaybe, isNothing)

In [2]:
{-# LANGUAGE ScopedTypeVariables #-}

In [3]:
type StringArray i = A.Array i String

type Interval i = (i, i)
type IntervalQueue i = S.Seq (Interval i)

sparseSearch :: forall i. (Integral i, A.Ix i, Show i) => String -> StringArray i -> Maybe i
sparseSearch x xs
    | x /= stopWord =
        modifiedBinary bounds $ S.singleton bounds
    | otherwise =
        listToMaybe $ map snd $ filter ((== stopWord) . fst) $ zip (A.elems xs) (A.indices xs)
    where
        stopWord = ""

        bounds = A.bounds xs

        popInterval :: IntervalQueue i -> (Interval i, IntervalQueue i)
        popInterval intervals = (S.index intervals 0, S.drop 1 intervals)

        modifiedBinary :: (i, i) -> IntervalQueue i -> Maybe i
        modifiedBinary (leftBound, rightBound) intervals
            | S.null intervals =
                Nothing
            | leftIndex > rightIndex || leftIndex > rightBound || rightIndex < leftBound =
                modifiedBinary (leftBound, rightBound) remaining
            | x' /= stopWord =
                case compare x x' of
                    EQ -> Just middleIndex
                    LT -> modifiedBinary (leftBound, pred middleIndex) $ remaining S.|> leftHalf
                    GT -> modifiedBinary (succ middleIndex, rightBound) $ remaining S.|> rightHalf
            | otherwise =
                    modifiedBinary (leftBound, rightBound) $ remaining S.|> leftHalf S.|> rightHalf
            where
                ((leftIndex, rightIndex), remaining) = popInterval intervals

                middleIndex = leftIndex + (rightIndex - leftIndex) `div` 2
                    
                leftHalf = (leftIndex, pred middleIndex)
                rightHalf = (succ middleIndex, rightIndex)
                
                x' = xs A.! middleIndex

In [4]:
xs = ["at", "", "", "", "ball", "", "", "car", "", "", "dad", "", ""]
xs' = A.listArray (0, length xs - 1) xs

"" `sparseSearch` xs'
"a" `sparseSearch` xs'
"dad" `sparseSearch` xs'


Just1
Nothing
Just10

Tests


In [5]:
import Test.QuickCheck

In [6]:
import Data.List (nub, sort)

In [7]:
testSparseSearchRandomized :: ([String], [Int]) -> String -> Bool
testSparseSearchRandomized (strings, positions) extra =
    let
        prepare :: [String] -> [Int] -> [String]
        prepare [] ps = replicate (length ps) ""
        prepare xs [] = xs
        prepare xs (p:ps) =
            let
                (us, vs) = splitAt (p `mod` length xs) xs
            in
                prepare (us ++ [""] ++ vs) ps
        
        strings' = filter (/= extra) $ prepare (sort strings) positions
        
        haystack = A.listArray (1, length strings') strings'
        needles = nub strings'
        
        checkIndex :: (String, Maybe Int) -> Bool 
        checkIndex (needle, index) =
            maybe False (\i -> haystack A.! i == needle) index
    in
        all checkIndex (zip needles $ map (`sparseSearch` haystack) needles) &&
            isNothing (extra `sparseSearch` haystack)

In [8]:
quickCheck testSparseSearchRandomized


+++ OK, passed 100 tests.