""" Spellwise library wrapper."""
import warnings
from enum import Enum
from typing import Dict
from typing import Iterable
from typing import List
from spellwise import CaverphoneOne
from spellwise import CaverphoneTwo
from spellwise import Editex
from spellwise import Levenshtein
from spellwise import Soundex
from spellwise import Typox
from typing_extensions import Protocol
from typing_extensions import TypedDict
from iamsystem.fuzzy.api import FuzzyAlgo
from iamsystem.fuzzy.api import NormLabelAlgo
from iamsystem.fuzzy.api import SynType
class Suggestions(TypedDict):
"""Spellwise algorithm's output."""
word: str
distance: int
[docs]class ESpellWiseAlgo(Enum):
"""Enumerated list of spellwise library algorithms.
See spellwise documentation for more information.
"""
LEVENSHTEIN = Levenshtein
SOUNDEX = Soundex
EDITEX = Editex
TYPOX = Typox
CAVERPHONE_1 = CaverphoneOne
CAVERPHONE_2 = CaverphoneTwo
class ISpellWiseAlgo(Protocol):
"""Spellwise algorithm interface."""
def add_words(self, words: List[str]) -> None:
"""Add words to a spellwise algorithm."""
pass
def get_suggestions(
self, query_word: str, max_distance: int = 2
) -> List[Suggestions]:
"""Returns synonyms."""
pass
[docs]class SpellWiseWrapper(NormLabelAlgo):
"""A :class:`~iamsystem.FuzzyAlgo` that wraps an algorithm from
the spellwise library."""
def __init__(
self,
spellwise_algo: ESpellWiseAlgo,
max_distance: int,
min_nb_char: int = 5,
name: str = None,
):
"""Create an instance to leverage a spellwise algorithm.
:param spellwise_algo: A value from :class:`~iamsystem.SpellWiseAlgo`
enumerated list.
:param max_distance: maximum edit distance
(see spellwise documentation).
:param min_nb_char: the minimum number of characters a word
must have not to be ignored.
:param name: a name given to this algorithm.
Default: spellwise algorithm's name.
"""
algo_name = spellwise_algo.name
if name is not None:
algo_name = name
super().__init__(algo_name)
self._tokens2ignore: Dict[str, None] = {}
self._suggester: ISpellWiseAlgo = spellwise_algo.value()
self._max_distance = max_distance
self._min_nb_char = min_nb_char
@property
def max_distance(self):
"""Maximum edit distance (see spellwise documentation)."""
return self._max_distance
@max_distance.setter
def max_distance(self, value: int):
"""Set the maximum edit distance."""
self._max_distance = value
@property
def min_nb_char(self):
"""The minimum number of characters a word must have
not to be ignored."""
return self._min_nb_char
@min_nb_char.setter
def min_nb_char(self, value: int):
"""Set the minimum number of characters a word must have."""
self._min_nb_char = value
[docs] def add_words(self, words: Iterable[str], warn=False) -> None:
"""A list of possible word synonyms, in general all the tokens
of your keywords. An easy way to provide these tokens is to call
:py:meth:`~iamsystem.Matcher.get_keywords_unigrams` method after
you added your keywords to the matcher instance.
:param words: A list of possible synonyms.
:param warn: raise a warning if a word added is ignored.
Default False.
:return: None.
"""
words = list(words)
words_filtered = [
word for word in words if not self._is_a_word_to_ignore(word=word)
]
n_removed = len(words) - len(words_filtered)
if n_removed != 0 and warn:
warnings.warn(
f"{n_removed} words weren't added to fuzzy algo '{self.name}'"
f" after filtering"
)
self._suggester.add_words(words=words_filtered)
[docs] def add_words_to_ignore(self, words: Iterable[str]):
"""Add words that the algorithm will ignore: no string distance
will be computed."""
for word in words:
self._tokens2ignore[word] = None
def _is_a_word_to_ignore(self, word: str) -> bool:
"""Check if this ignore must be ignored right away."""
if len(word) < self._min_nb_char or word in self._tokens2ignore:
return True
else:
return False
[docs] def get_syns_of_word(self, word: str) -> Iterable[SynType]:
"""Returns closest words if this the word is not a word to ignore."""
if self._is_a_word_to_ignore(word):
return FuzzyAlgo.NO_SYN
suggs: List[Suggestions] = self._suggester.get_suggestions(
query_word=word, max_distance=self._max_distance
)
if len(suggs) == 0:
return FuzzyAlgo.NO_SYN
return [self.word_to_syn(sugg.get("word", "")) for sugg in suggs]