spaCy
This package provides a stateful spaCy component to add iamsystem algorithm in a spaCy pipeline. Since a Matcher configuration is not JSON serializable, matcher’s parameters are passed in registered functions:
from typing import Iterable
from typing import List
import spacy
from spacy.lang.fr import French
from iamsystem import Abbreviations
from iamsystem import FuzzyAlgo
from iamsystem.spacy import IAMsystemSpacy # noqa
from iamsystem.spacy import IsStopSpacy
from iamsystem.spacy import TokenSpacyAdapter
from iamsystem import IKeyword
from iamsystem import IStopwords
from iamsystem import Term
from iamsystem import Terminology
from iamsystem import french_tokenizer
@spacy.registry.misc("umls_terms.v1")
def get_termino_umls() -> Iterable[IKeyword]:
"""An imaginary set of umls terms."""
termino = Terminology()
term1 = Term("Insuffisance Cardiaque", "I50.9")
term2 = Term("Insuffisance Cardiaque Gauche", "I50.1")
termino.add_keywords(keywords=[term1, term2])
return termino
@spacy.registry.misc("fuzzy_algos_short_notes.v1")
def get_fuzzy_algos_short_notes() -> List[FuzzyAlgo]:
"""An imaginary set of fuzzy algorithms for medical short notes."""
tokenizer = french_tokenizer()
abbs = Abbreviations(name="French medical abbreviations")
abbs.add(short_form="ins", long_form="insuffisance",
tokenizer=tokenizer)
abbs.add(
short_form="ic",
long_form="insuffisance cardiaque",
tokenizer=tokenizer,
)
return [abbs]
@spacy.registry.misc("stopwords_spacy.v1")
def get_stopwords_short_notes() -> IStopwords[TokenSpacyAdapter]:
"""Use spaCy stopword list."""
stopwords = IsStopSpacy()
return stopwords
nlp = French()
nlp.add_pipe(
"iamsystem",
name="iamsystem",
last=True,
config={
"keywords": {"@misc": "umls_terms.v1"},
"stopwords": {"@misc": "stopwords_spacy.v1"},
"fuzzy_algos": {"@misc": "fuzzy_algos_short_notes.v1"},
"w": 1,
"remove_nested_annots": True,
},
)
doc = nlp("ic gauche")
spans = doc.spans["iamsystem"]
for span in spans:
print(span._.iamsystem)
# ic gauche 0 9 Insuffisance Cardiaque Gauche (I50.1)
See IAMsystemSpacy to configure this component.