spaCy

This package provides a stateful spaCy component to add iamsystem algorithm in a spaCy pipeline. Since a Matcher configuration is not JSON serializable, matcher’s parameters are passed in registered functions:

 1from typing import Iterable
 2from typing import List
 3
 4import spacy
 5
 6from spacy.lang.fr import French
 7
 8from iamsystem import Abbreviations
 9from iamsystem import Entity
10from iamsystem import FuzzyAlgo
11from iamsystem import IKeyword
12from iamsystem import IStopwords
13from iamsystem import Terminology
14from iamsystem import french_tokenizer
15from iamsystem.spacy import IAMsystemSpacy  # noqa
16from iamsystem.spacy import IsStopSpacy
17from iamsystem.spacy import TokenSpacyAdapter
18
19@spacy.registry.misc("umls_ents.v1")
20def get_termino_umls() -> Iterable[IKeyword]:
21    """An imaginary set of umls ents."""
22    termino = Terminology()
23    ent1 = Entity("Insuffisance Cardiaque", "I50.9")
24    ent2 = Entity("Insuffisance Cardiaque Gauche", "I50.1")
25    termino.add_keywords(keywords=[ent1, ent2])
26    return termino
27
28@spacy.registry.misc("fuzzy_algos_short_notes.v1")
29def get_fuzzy_algos_short_notes() -> List[FuzzyAlgo]:
30    """An imaginary set of fuzzy algorithms for medical short notes."""
31    tokenizer = french_tokenizer()
32    abbs = Abbreviations(name="French medical abbreviations")
33    abbs.add(
34        short_form="ins", long_form="insuffisance", tokenizer=tokenizer
35    )
36    abbs.add(
37        short_form="ic",
38        long_form="insuffisance cardiaque",
39        tokenizer=tokenizer,
40    )
41    return [abbs]
42
43@spacy.registry.misc("stopwords_spacy.v1")
44def get_stopwords_short_notes() -> IStopwords[TokenSpacyAdapter]:
45    """Use spaCy stopword list."""
46    stopwords = IsStopSpacy()
47    return stopwords
48
49nlp = French()
50nlp.add_pipe(
51    "iamsystem",
52    name="iamsystem",
53    last=True,
54    config={
55        "keywords": {"@misc": "umls_ents.v1"},
56        "stopwords": {"@misc": "stopwords_spacy.v1"},
57        "fuzzy_algos": {"@misc": "fuzzy_algos_short_notes.v1"},
58        "w": 1,
59        "remove_nested_annots": True,
60    },
61)
62doc = nlp("ic gauche")
63self.assertEqual(1, len(doc.spans["iamsystem"]))
64spans = doc.spans["iamsystem"]
65for span in spans:
66    print(span._.iamsystem)
67# ic gauche	0 9	Insuffisance Cardiaque Gauche (I50.1)

See IAMsystemSpacy to configure this component.