| /* Find near-matches for strings and identifiers. |
| Copyright (C) 2015-2017 Free Software Foundation, Inc. |
| |
| This file is part of GCC. |
| |
| GCC is free software; you can redistribute it and/or modify it under |
| the terms of the GNU General Public License as published by the Free |
| Software Foundation; either version 3, or (at your option) any later |
| version. |
| |
| GCC is distributed in the hope that it will be useful, but WITHOUT ANY |
| WARRANTY; without even the implied warranty of MERCHANTABILITY or |
| FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
| for more details. |
| |
| You should have received a copy of the GNU General Public License |
| along with GCC; see the file COPYING3. If not see |
| <http://www.gnu.org/licenses/>. */ |
| |
| #ifndef GCC_SPELLCHECK_H |
| #define GCC_SPELLCHECK_H |
| |
| typedef unsigned int edit_distance_t; |
| const edit_distance_t MAX_EDIT_DISTANCE = UINT_MAX; |
| |
| /* spellcheck.c */ |
| extern edit_distance_t |
| levenshtein_distance (const char *s, int len_s, |
| const char *t, int len_t); |
| |
| extern edit_distance_t |
| levenshtein_distance (const char *s, const char *t); |
| |
| extern const char * |
| find_closest_string (const char *target, |
| const auto_vec<const char *> *candidates); |
| |
| /* A traits class for describing a string-like type usable by |
| class best_match. |
| Specializations should provide the implementations of the following: |
| |
| static size_t get_length (TYPE); |
| static const char *get_string (TYPE); |
| |
| get_string should return a non-NULL ptr, which does not need to be |
| 0-terminated. */ |
| |
| template <typename TYPE> |
| struct edit_distance_traits {}; |
| |
| /* Specialization of edit_distance_traits for C-style strings. */ |
| |
| template <> |
| struct edit_distance_traits<const char *> |
| { |
| static size_t get_length (const char *str) |
| { |
| gcc_assert (str); |
| return strlen (str); |
| } |
| |
| static const char *get_string (const char *str) |
| { |
| gcc_assert (str); |
| return str; |
| } |
| }; |
| |
| /* A type for use when determining the best match against a string, |
| expressed as a template so that we can match against various |
| string-like types (const char *, frontend identifiers, and preprocessor |
| macros). |
| |
| This type accumulates the best possible match against GOAL_TYPE for |
| a sequence of elements of CANDIDATE_TYPE, whilst minimizing the |
| number of calls to levenshtein_distance and to |
| edit_distance_traits<T>::get_length. */ |
| |
| template <typename GOAL_TYPE, typename CANDIDATE_TYPE> |
| class best_match |
| { |
| public: |
| typedef GOAL_TYPE goal_t; |
| typedef CANDIDATE_TYPE candidate_t; |
| typedef edit_distance_traits<goal_t> goal_traits; |
| typedef edit_distance_traits<candidate_t> candidate_traits; |
| |
| /* Constructor. */ |
| |
| best_match (GOAL_TYPE goal, |
| edit_distance_t best_distance_so_far = MAX_EDIT_DISTANCE) |
| : m_goal (goal_traits::get_string (goal)), |
| m_goal_len (goal_traits::get_length (goal)), |
| m_best_candidate (NULL), |
| m_best_distance (best_distance_so_far) |
| {} |
| |
| /* Compare the edit distance between CANDIDATE and m_goal, |
| and if it's the best so far, record it. */ |
| |
| void consider (candidate_t candidate) |
| { |
| size_t candidate_len = candidate_traits::get_length (candidate); |
| |
| /* Calculate a lower bound on the candidate's distance to the goal, |
| based on the difference in lengths; it will require at least |
| this many insertions/deletions. */ |
| edit_distance_t min_candidate_distance |
| = abs ((ssize_t)candidate_len - (ssize_t)m_goal_len); |
| |
| /* If the candidate's length is sufficiently different to that |
| of the goal string, then the number of insertions/deletions |
| may be >= the best distance so far. If so, we can reject |
| the candidate immediately without needing to compute |
| the exact distance, since it won't be an improvement. */ |
| if (min_candidate_distance >= m_best_distance) |
| return; |
| |
| /* If the candidate will be unable to beat the criterion in |
| get_best_meaningful_candidate, reject it without computing |
| the exact distance. */ |
| unsigned int cutoff = MAX (m_goal_len, candidate_len) / 2; |
| if (min_candidate_distance > cutoff) |
| return; |
| |
| /* Otherwise, compute the distance and see if the candidate |
| has beaten the previous best value. */ |
| edit_distance_t dist |
| = levenshtein_distance (m_goal, m_goal_len, |
| candidate_traits::get_string (candidate), |
| candidate_len); |
| if (dist < m_best_distance) |
| { |
| m_best_distance = dist; |
| m_best_candidate = candidate; |
| m_best_candidate_len = candidate_len; |
| } |
| } |
| |
| /* Assuming that BEST_CANDIDATE is known to be better than |
| m_best_candidate, update (without recomputing the edit distance to |
| the goal). */ |
| |
| void set_best_so_far (CANDIDATE_TYPE best_candidate, |
| edit_distance_t best_distance, |
| size_t best_candidate_len) |
| { |
| gcc_assert (best_distance < m_best_distance); |
| m_best_candidate = best_candidate; |
| m_best_distance = best_distance; |
| m_best_candidate_len = best_candidate_len; |
| } |
| |
| /* Get the best candidate so far, but applying a filter to ensure |
| that we return NULL if none of the candidates are close to the goal, |
| to avoid offering nonsensical suggestions to the user. */ |
| |
| candidate_t get_best_meaningful_candidate () const |
| { |
| /* If more than half of the letters were misspelled, the suggestion is |
| likely to be meaningless. */ |
| if (m_best_candidate) |
| { |
| unsigned int cutoff = MAX (m_goal_len, m_best_candidate_len) / 2; |
| if (m_best_distance > cutoff) |
| return NULL; |
| } |
| |
| /* If the goal string somehow makes it into the candidate list, offering |
| it as a suggestion will be nonsensical e.g. |
| 'constexpr' does not name a type; did you mean 'constexpr'? |
| Ultimately such suggestions are due to bugs in constructing the |
| candidate list, but as a band-aid, do not offer suggestions for |
| distance == 0 (where candidate == goal). */ |
| if (m_best_distance == 0) |
| return NULL; |
| |
| return m_best_candidate; |
| } |
| |
| edit_distance_t get_best_distance () const { return m_best_distance; } |
| size_t get_best_candidate_length () const { return m_best_candidate_len; } |
| |
| private: |
| const char *m_goal; |
| size_t m_goal_len; |
| candidate_t m_best_candidate; |
| edit_distance_t m_best_distance; |
| size_t m_best_candidate_len; |
| }; |
| |
| #endif /* GCC_SPELLCHECK_H */ |