diff --git a/src/EditDistance.elm b/src/EditDistance.elm index ef62d17..0eb84d9 100644 --- a/src/EditDistance.elm +++ b/src/EditDistance.elm @@ -13,10 +13,14 @@ module EditDistance between two lists, or the actual edit steps required to go from one to the other. + # Edit Steps + @docs EditStep, edits, editsFromStrings, editsWithCostFunc, editsWithCostFuncFromStrings + # Levenshtein + @docs levenshtein, levenshteinFromStrings -} @@ -47,10 +51,11 @@ required to turn one given list into another. , Move 'r' 2 3 ] -The resulting indices reflect edits where _deletions are made first_, before +The resulting indices reflect edits where *deletions are made first*, before insertions and substitutions. That is, indices for deletions refer to the source list, whereas indices for insertions and substitutions refer to the latter, intermediate lists. + -} edits : List comparable -> List comparable -> List (EditStep comparable) edits source target = @@ -77,9 +82,10 @@ Int). , Delete 'b' 1 ] -(Note that the cost function is applied _before_ insertions and deletions are +(Note that the cost function is applied *before* insertions and deletions are converted into moves, meaning it will never receive an EditStep of type Move as an argument.) + -} editsWithCostFunc : (EditStep comparable -> Int) -> List comparable -> List comparable -> List (EditStep comparable) editsWithCostFunc costFunc source target = @@ -253,6 +259,7 @@ moveFromSteps editSteps step = [ Delete 'g' 0 , Move 'r' 2 3 ] + -} editsFromStrings : String -> String -> List (EditStep Char) editsFromStrings source target = @@ -276,6 +283,7 @@ editsFromStrings source target = [ Insert 'd' 1 , Delete 'b' 1 ] + -} editsWithCostFuncFromStrings : (EditStep Char -> Int) -> String -> String -> List (EditStep Char) editsWithCostFuncFromStrings costFunc source target = @@ -294,27 +302,107 @@ into another. levenshtein (String.toList "garvey") (String.toList "avery") == 3 + -} levenshtein : List comparable -> List comparable -> Int levenshtein source target = case ( source, target ) of - ( source, [] ) -> + ( [], _ ) -> + List.length target + + ( _, [] ) -> List.length source - ( [], target ) -> - List.length target + ( [ src ], _ ) -> + if List.any ((==) src) target then + List.length target - 1 + else + List.length target + + ( _, [ tgt ] ) -> + if List.any ((==) tgt) source then + List.length source - 1 + else + List.length source ( src_hd :: src_tail, tgt_hd :: tgt_tail ) -> if src_hd == tgt_hd then levenshtein src_tail tgt_tail else - Maybe.withDefault 0 - (List.minimum - [ (levenshtein src_tail target) + 1 - , (levenshtein source tgt_tail) + 1 - , (levenshtein src_tail tgt_tail) + 1 - ] - ) + initTextLoop source target + + +hdOrZero : List Int -> Int +hdOrZero lst = + case lst of + hd :: tl -> + hd + + [] -> + 0 + + +tlOrEmpty : List a -> List a +tlOrEmpty lst = + case lst of + hd :: tl -> + tl + + [] -> + [] + + +patternLoop : comparable -> List comparable -> Int -> List Int -> List Int +patternLoop tChar pattern b0 prevCol = + let + recLoop : List comparable -> Int -> Int -> Int -> Int -> List Int -> List Int -> List Int + recLoop pattern idx b0 b1 b2 prevCol revCurCol = + case pattern of + pChar :: pTail -> + let + b0_ : Int + b0_ = + b1 + + b1_ : Int + b1_ = + hdOrZero prevCol + + b2_ : Int + b2_ = + if pChar == tChar then + b0 + else + 1 + (min b1 b2 |> min b0) + in + recLoop pTail (idx + 1) b0_ b1_ b2_ (tlOrEmpty prevCol) (b2_ :: revCurCol) + + [] -> + revCurCol + in + recLoop pattern 1 b0 (hdOrZero prevCol) (b0 + 1) (tlOrEmpty prevCol) [] + + +textLoop : List comparable -> List comparable -> Int -> List Int -> List Int +textLoop text pattern idx revCol = + case text of + tChar :: tTail -> + List.reverse revCol + |> patternLoop tChar pattern (idx - 1) + |> textLoop tTail pattern (idx + 1) + + [] -> + revCol + + +initTextLoop : List comparable -> List comparable -> Int +initTextLoop text pattern = + let + initCol : List Int + initCol = + List.range 1 (List.length pattern) |> List.reverse + in + textLoop text pattern 1 initCol |> hdOrZero {-| Same as the `levenshtein` function, but for String values. @@ -324,6 +412,7 @@ levenshtein source target = levenshtein "preterit" "zeitgeist" == 6 levenshtein "garvey" "avery" == 3 + -} levenshteinFromStrings : String -> String -> Int levenshteinFromStrings source target = diff --git a/tests/LevenshteinFuzz.elm b/tests/LevenshteinFuzz.elm new file mode 100644 index 0000000..e79deb0 --- /dev/null +++ b/tests/LevenshteinFuzz.elm @@ -0,0 +1,41 @@ +module LevenshteinFuzz exposing (all) + +import EditDistance exposing (levenshtein) +import Test exposing (Test, describe, fuzz2) +import Fuzz exposing (list, char) +import Expect + + +all : Test +all = + describe "The optimized levenshtein implementation" + [ fuzz2 (list char) (list char) "should match the old implementation" <| + \source target -> + Expect.equal (levenshtein source target) + (safeLevenshtein source target) + ] + + +min3 : comparable -> comparable -> comparable -> comparable +min3 a b c = + min a b |> min c + + +safeLevenshtein : List comparable -> List comparable -> Int +safeLevenshtein source target = + case ( source, target ) of + ( source, [] ) -> + List.length source + + ( [], target ) -> + List.length target + + ( src_hd :: src_tail, tgt_hd :: tgt_tail ) -> + if src_hd == tgt_hd then + levenshtein src_tail tgt_tail + else + min3 + (levenshtein src_tail target) + (levenshtein source tgt_tail) + (levenshtein src_tail tgt_tail) + + 1 diff --git a/tests/Main.elm b/tests/Main.elm deleted file mode 100644 index 7d62c1a..0000000 --- a/tests/Main.elm +++ /dev/null @@ -1,13 +0,0 @@ -port module Main exposing (..) - -import Tests -import Test.Runner.Node exposing (run, TestProgram) -import Json.Encode exposing (Value) - - -main : TestProgram -main = - run emit Tests.all - - -port emit : ( String, Value ) -> Cmd msg diff --git a/tests/Tests.elm b/tests/Tests.elm index e56331b..7082877 100644 --- a/tests/Tests.elm +++ b/tests/Tests.elm @@ -91,7 +91,7 @@ all = Expect.equal (edits abc adc) [ Substitute 'd' 1 ] - , test "abc -> adc (without cost function) edit steps" <| + , test "abc -> adc (with cost function) edit steps" <| \() -> let costFunc editStep = @@ -170,6 +170,18 @@ all = , describe "when getting Levenshtein distance between strings" [ test "kitten <-> sitting Levenshtein distance" <| \() -> Expect.equal (levenshteinFromStrings "kitten" "sitting") 3 + , test "a <-> abc Levenshtein distance" <| + \() -> Expect.equal (levenshteinFromStrings "a" "abc") 2 + , test "abc <-> a Levenshtein distance" <| + \() -> Expect.equal (levenshteinFromStrings "abc" "a") 2 + , test "ab <-> abc Levenshtein distance" <| + \() -> Expect.equal (levenshteinFromStrings "ab" "abc") 1 + , test "abc <-> ab Levenshtein distance" <| + \() -> Expect.equal (levenshteinFromStrings "abc" "ab") 1 + , test "a <-> xyz Levenshtein distance" <| + \() -> Expect.equal (levenshteinFromStrings "a" "xyz") 3 + , test "xyz <-> a Levenshtein distance" <| + \() -> Expect.equal (levenshteinFromStrings "xyz" "a") 3 , test "sitting <-> kitten Levenshtein distance" <| \() -> Expect.equal (levenshteinFromStrings "sitting" "kitten") 3 , test "kitten <-> empty string Levenshtein distance" <| diff --git a/tests/elm-package.json b/tests/elm-package.json index ea659db..88811c3 100644 --- a/tests/elm-package.json +++ b/tests/elm-package.json @@ -1,20 +1,19 @@ { "version": "1.0.0", - "summary": "Sample Elm Test", - "repository": "https://github.com/user/project.git", - "license": "BSD-3-Clause", + "summary": "Test Suites", + "repository": "https://github.com/erwald/elm-edit-distance.git", + "license": "MIT", "source-directories": [ - ".", - "../src" + "..", + "../src", + "." ], "exposed-modules": [], "dependencies": { - "elm-community/json-extra": "2.0.0 <= v < 3.0.0", - "elm-lang/html": "2.0.0 <= v < 3.0.0", - "mgold/elm-random-pcg": "4.0.2 <= v < 5.0.0", + "eeue56/elm-html-test": "5.1.2 <= v < 6.0.0", + "elm-community/elm-test": "4.0.0 <= v < 5.0.0", "elm-lang/core": "5.0.0 <= v < 6.0.0", - "elm-community/elm-test": "3.0.0 <= v < 4.0.0", - "rtfeldman/node-test-runner": "3.0.0 <= v < 4.0.0" + "elm-lang/html": "2.0.0 <= v < 3.0.0" }, "elm-version": "0.18.0 <= v < 0.19.0" }