spaCy
This package provides a stateful spaCy component to add iamsystem algorithm in a spaCy pipeline. Since a Matcher configuration is not JSON serializable, matcher’s parameters are passed in registered functions:
1from typing import Iterable
2from typing import List
3
4import spacy
5
6from spacy.lang.fr import French
7
8from iamsystem import Abbreviations
9from iamsystem import Entity
10from iamsystem import FuzzyAlgo
11from iamsystem import IKeyword
12from iamsystem import IStopwords
13from iamsystem import Terminology
14from iamsystem import french_tokenizer
15from iamsystem.spacy import IAMsystemSpacy # noqa
16from iamsystem.spacy import IsStopSpacy
17from iamsystem.spacy import TokenSpacyAdapter
18
19@spacy.registry.misc("umls_ents.v1")
20def get_termino_umls() -> Iterable[IKeyword]:
21 """An imaginary set of umls ents."""
22 termino = Terminology()
23 ent1 = Entity("Insuffisance Cardiaque", "I50.9")
24 ent2 = Entity("Insuffisance Cardiaque Gauche", "I50.1")
25 termino.add_keywords(keywords=[ent1, ent2])
26 return termino
27
28@spacy.registry.misc("fuzzy_algos_short_notes.v1")
29def get_fuzzy_algos_short_notes() -> List[FuzzyAlgo]:
30 """An imaginary set of fuzzy algorithms for medical short notes."""
31 tokenizer = french_tokenizer()
32 abbs = Abbreviations(name="French medical abbreviations")
33 abbs.add(
34 short_form="ins", long_form="insuffisance", tokenizer=tokenizer
35 )
36 abbs.add(
37 short_form="ic",
38 long_form="insuffisance cardiaque",
39 tokenizer=tokenizer,
40 )
41 return [abbs]
42
43@spacy.registry.misc("stopwords_spacy.v1")
44def get_stopwords_short_notes() -> IStopwords[TokenSpacyAdapter]:
45 """Use spaCy stopword list."""
46 stopwords = IsStopSpacy()
47 return stopwords
48
49nlp = French()
50nlp.add_pipe(
51 "iamsystem",
52 name="iamsystem",
53 last=True,
54 config={
55 "keywords": {"@misc": "umls_ents.v1"},
56 "stopwords": {"@misc": "stopwords_spacy.v1"},
57 "fuzzy_algos": {"@misc": "fuzzy_algos_short_notes.v1"},
58 "w": 1,
59 "remove_nested_annots": True,
60 },
61)
62doc = nlp("ic gauche")
63self.assertEqual(1, len(doc.spans["iamsystem"]))
64spans = doc.spans["iamsystem"]
65for span in spans:
66 print(span._.iamsystem)
67# ic gauche 0 9 Insuffisance Cardiaque Gauche (I50.1)
See IAMsystemSpacy to configure this component.