| /** |
| * Spell checker |
| * |
| * Does not have any dependencies on the rest of DMD. |
| * |
| * Copyright: Copyright (C) 1999-2023 by The D Language Foundation, All Rights Reserved |
| * Authors: Walter Bright, https://www.digitalmars.com |
| * License: $(LINK2 https://www.boost.org/LICENSE_1_0.txt, Boost License 1.0) |
| * Source: $(LINK2 https://github.com/dlang/dmd/blob/master/src/dmd/root/speller.d, root/_speller.d) |
| * Documentation: https://dlang.org/phobos/dmd_root_speller.html |
| * Coverage: https://codecov.io/gh/dlang/dmd/src/master/src/dmd/root/speller.d |
| */ |
| |
| module dmd.root.speller; |
| |
| /************************************************** |
| * Looks for correct spelling. |
| * Looks a distance of up to two. |
| * This does an exhaustive search, so can potentially be very slow. |
| * Params: |
| * seed = wrongly spelled word |
| * dg = search delegate of the form `T delegate(const(char)[] p, out int cost)` |
| * Returns: |
| * T.init = no correct spellings found, |
| * otherwise the value returned by dg() for first possible correct spelling |
| */ |
| auto speller(alias dg)(const(char)[] seed) |
| if (isSearchFunction!dg) |
| { |
| const size_t maxdist = seed.length < 4 ? seed.length / 2 : 2; |
| foreach (distance; 0 .. maxdist) |
| { |
| if (auto p = spellerX!dg(seed, distance != 0)) |
| return p; |
| // if (seedlen > 10) |
| // break; |
| } |
| return null; // didn't find it |
| } |
| |
| private: |
| |
| import core.stdc.stdlib; |
| import core.stdc.string; |
| import dmd.common.string : SmallBuffer; |
| |
| enum isSearchFunction(alias fun) = is(searchFunctionType!fun); |
| alias searchFunctionType(alias fun) = typeof(() {int x; return fun("", x);}()); |
| |
| /************************************* |
| * Spell check level 1. |
| * Params: |
| * dg = delegate that looks up string in dictionary AA and returns value found |
| * seed = starting string |
| * flag = if true, do 2 level lookup otherwise 1 level |
| * Returns: |
| * whatever dg returns, null if no match |
| */ |
| auto spellerX(alias dg)(const(char)[] seed, bool flag) |
| { |
| if (!seed.length) |
| return null; |
| |
| /* Need buffer to store trial strings in |
| */ |
| char[30] tmp = void; |
| auto sb = SmallBuffer!char(seed.length + 1, tmp[]); |
| char[] buf = sb[]; |
| |
| int cost = int.max; |
| searchFunctionType!dg p = null; |
| |
| /* Deletions */ |
| buf[0 .. seed.length - 1] = seed[1 .. $]; |
| foreach (i; 0 .. seed.length) |
| { |
| //printf("del buf = '%s'\n", buf); |
| int ncost; |
| auto np = flag ? spellerY!dg(buf[0 .. seed.length - 1], i, ncost) |
| : dg(buf[0 .. seed.length - 1], ncost); |
| if (combineSpellerResult(p, cost, np, ncost)) |
| return p; |
| buf[i] = seed[i]; |
| } |
| |
| /* Transpositions */ |
| if (!flag) |
| { |
| buf[0 .. seed.length] = seed; |
| for (size_t i = 0; i + 1 < seed.length; i++) |
| { |
| // swap [i] and [i + 1] |
| buf[i] = seed[i + 1]; |
| buf[i + 1] = seed[i]; |
| //printf("tra buf = '%s'\n", buf); |
| int ncost; |
| auto np = dg(buf[0 .. seed.length], ncost); |
| if (combineSpellerResult(p, cost, np, ncost)) |
| return p; |
| buf[i] = seed[i]; |
| } |
| } |
| |
| /* Substitutions */ |
| buf[0 .. seed.length] = seed; |
| foreach (i; 0 .. seed.length) |
| { |
| foreach (s; idchars) |
| { |
| buf[i] = s; |
| //printf("sub buf = '%s'\n", buf); |
| int ncost; |
| auto np = flag ? spellerY!dg(buf[0 .. seed.length], i + 1, ncost) |
| : dg(buf[0 .. seed.length], ncost); |
| if (combineSpellerResult(p, cost, np, ncost)) |
| return p; |
| } |
| buf[i] = seed[i]; |
| } |
| |
| /* Insertions */ |
| buf[1 .. seed.length + 1] = seed; |
| foreach (i; 0 .. seed.length + 1) // yes, do seed.length+1 iterations |
| { |
| foreach (s; idchars) |
| { |
| buf[i] = s; |
| //printf("ins buf = '%s'\n", buf); |
| int ncost; |
| auto np = flag ? spellerY!dg(buf[0 .. seed.length + 1], i + 1, ncost) |
| : dg(buf[0 .. seed.length + 1], ncost); |
| if (combineSpellerResult(p, cost, np, ncost)) |
| return p; |
| } |
| if (i < seed.length) |
| buf[i] = seed[i]; |
| } |
| |
| return p; // return "best" result |
| } |
| |
| /********************************************** |
| * Do second level of spell matching. |
| * Params: |
| * dg = delegate that looks up string in dictionary AA and returns value found |
| * seed = starting string |
| * index = index into seed[] that is where we will mutate it |
| * cost = set to cost of match |
| * Returns: |
| * whatever dg returns, null if no match |
| */ |
| auto spellerY(alias dg)(const(char)[] seed, size_t index, out int cost) |
| { |
| if (!seed.length) |
| return null; |
| |
| /* Allocate a buf to store the new string to play with, needs |
| * space for an extra char for insertions |
| */ |
| char[30] tmp = void; // stack allocations are fastest |
| auto sb = SmallBuffer!char(seed.length + 1, tmp[]); |
| char[] buf = sb[]; |
| buf[0 .. index] = seed[0 .. index]; |
| |
| cost = int.max; // start with worst possible match |
| searchFunctionType!dg p = null; |
| |
| /* Delete character at seed[index] */ |
| if (index < seed.length) |
| { |
| buf[index .. seed.length - 1] = seed[index + 1 .. $]; // seed[] with deleted character |
| int ncost; |
| auto np = dg(buf[0 .. seed.length - 1], ncost); // look it up |
| if (combineSpellerResult(p, cost, np, ncost)) // compare with prev match |
| return p; // cannot get any better |
| } |
| |
| /* Substitute character at seed[index] */ |
| if (index < seed.length) |
| { |
| buf[0 .. seed.length] = seed; |
| foreach (s; idchars) |
| { |
| buf[index] = s; // seed[] with substituted character |
| //printf("sub buf = '%s'\n", buf); |
| int ncost; |
| auto np = dg(buf[0 .. seed.length], ncost); |
| if (combineSpellerResult(p, cost, np, ncost)) |
| return p; |
| } |
| } |
| |
| /* Insert character at seed[index] */ |
| buf[index + 1 .. seed.length + 1] = seed[index .. $]; |
| foreach (s; idchars) |
| { |
| buf[index] = s; |
| //printf("ins buf = '%s'\n", buf); |
| int ncost; |
| auto np = dg(buf[0 .. seed.length + 1], ncost); |
| if (combineSpellerResult(p, cost, np, ncost)) |
| return p; |
| } |
| return p; // return "best" result |
| } |
| |
| |
| /* Characters used to substitute ones in the string we're checking |
| * the spelling on. |
| */ |
| immutable string idchars = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_"; |
| |
| /************************************************** |
| * Combine a new result from the spell checker to |
| * find the one with the closest symbol with |
| * respect to the cost defined by the search function |
| * Params: |
| * p = best found spelling so far, T.init if none found yet. |
| * If np is better, p is replaced with np |
| * cost = cost of p (int.max if none found yet). |
| * If np is better, cost is replaced with ncost |
| * np = current spelling to check against p, T.init if none |
| * ncost = cost of np if np is not T.init |
| * Returns: |
| * true if the cost is less or equal 0, meaning we can stop looking |
| * false otherwise |
| */ |
| bool combineSpellerResult(T)(ref T p, ref int cost, T np, int ncost) |
| { |
| if (np && ncost < cost) // if np is better |
| { |
| p = np; // np is new best match |
| cost = ncost; |
| if (cost <= 0) |
| return true; // meaning we can stop looking |
| } |
| return false; |
| } |
| |
| /************************************* Tests ***********************/ |
| |
| unittest |
| { |
| static immutable string[][] cases = |
| [ |
| ["hello", "hell", "y"], |
| ["hello", "hel", "y"], |
| ["hello", "ello", "y"], |
| ["hello", "llo", "y"], |
| ["hello", "hellox", "y"], |
| ["hello", "helloxy", "y"], |
| ["hello", "xhello", "y"], |
| ["hello", "xyhello", "y"], |
| ["hello", "ehllo", "y"], |
| ["hello", "helol", "y"], |
| ["hello", "abcd", "n"], |
| ["hello", "helxxlo", "y"], |
| ["hello", "ehlxxlo", "n"], |
| ["hello", "heaao", "y"], |
| ["_123456789_123456789_123456789_123456789", "_123456789_123456789_123456789_12345678", "y"], |
| ]; |
| //printf("unittest_speller()\n"); |
| |
| string dgarg; |
| |
| string speller_test(const(char)[] s, ref int cost) |
| { |
| assert(s[$-1] != '\0'); |
| //printf("speller_test(%s, %s)\n", dgarg, s); |
| cost = 0; |
| if (dgarg == s) |
| return dgarg; |
| return null; |
| } |
| |
| dgarg = "hell"; |
| auto p = speller!speller_test("hello"); |
| assert(p !is null); |
| foreach (testCase; cases) |
| { |
| //printf("case [%d]\n", i); |
| dgarg = testCase[1]; |
| auto p2 = speller!speller_test(testCase[0]); |
| if (p2) |
| assert(testCase[2][0] == 'y'); |
| else |
| assert(testCase[2][0] == 'n'); |
| } |
| //printf("unittest_speller() success\n"); |
| } |