""" Brat connectors. """
from typing import Any
from typing import Callable
from typing import Iterable
from typing import List
from iamsystem.brat.formatter import ContSeqFormatter
from iamsystem.matcher.api import IAnnotation
from iamsystem.matcher.api import IBratFormatter
[docs]
class BratEntity:
"""Class representing a Brat Entity. https://brat.nlplab.org/standoff.html:
'Each entity annotation has a unique ID and is defined by type
(e.g. Person or Organization). and the span of characters containing
the entity mention (represented as a "start end" offset pair).'
Format: ID \t TYPE START END[;START END]* \t TEXT.
"""
[docs]
def __init__(
self,
entity_id: str,
brat_type: str,
offsets: str,
text: str,
):
"""Create a Brat Entity.
:param entity_id: a unique ID (^T[0-9]+$).
:param brat_type: A Brat entity type (see Brat documentation).
:param offsets: (start,end) offsets.
:param text: document substring using (start,end) offsets.
"""
self.entity_id = self._check_entity_id(entity_id)
self.brat_type = brat_type
self.offsets = offsets
self.text = text
@staticmethod
def _check_entity_id(entity_id: str) -> str:
"""Check id is valid."""
if len(entity_id) == 0 or entity_id[0] != "T":
raise ValueError("Brat entity ID must start by a T")
return entity_id
def __str__(self):
"""Return the Brat string format."""
return (
f"{self.entity_id}\t"
f"{self.brat_type} "
f"{self.offsets}\t"
f"{self.text}"
)
[docs]
class BratNote:
"""Class representing a Brat Note. https://brat.nlplab.org/standoff.html
Brat notes are used to store additionnal information on a detected entity.
Format: #ID \t TYPE REFID \t NOTE
"""
TYPE = "IAMSYSTEM"
"""BratNote type. Replace by 'AnnotatorNotes' to be human writable
in Brat interface"""
[docs]
def __init__(self, note_id: str, ref_id: str, note: str):
"""Create a Brat Note.
:param note_id: a unique ID (^#[0-9]+$)
:param ref_id: a unique ID. For a BratEntity, the format is (^T[0-9]+$)
:param note: any string comment.
"""
self.note_id = self._check_note_id(note_id)
self.ref_id = self._check_ref_id(ref_id)
self.note = note
@staticmethod
def _check_note_id(note_id: str) -> str:
"""Check id is correct."""
if len(note_id) == 0 or note_id[0] != "#":
raise ValueError("Brat note ID must start by a #")
return note_id
@staticmethod
def _check_ref_id(ref_id: str) -> str:
"""Check ref_id is valid."""
if len(ref_id) == 0 or ref_id[0] != "T":
raise ValueError("Brat ref ID must start by a T")
return ref_id
def __str__(self):
"""Return the Brat string format."""
return (
f"{self.note_id}\t"
f"{BratNote.TYPE} "
f"{self.ref_id}\t"
f"{self.note}"
)
get_note_fun = Callable[[IAnnotation], str]
def get_note_keyword_label(annot: IAnnotation) -> str:
"""Return the string representation of the first keyword
of the annotation."""
return str(annot.keywords[0])
[docs]
class BratDocument:
"""Class representing a Brat Document containing Brat's annotations,
namely Brat Entity and Brat Note in this package.
A BratDocument should be linked to a single text document.
Entities and notes can be serialized in a text file with 'ann' extension,
one per line. See https://brat.nlplab.org/standoff.html
"""
[docs]
def __init__(self, brat_formatter: IBratFormatter = None):
"""Create a Brat Document.
:param brat_formatter: a strategy to create Brat annotations span,
like merging continuous sequence of tokens. Default BratFormatter
create a Brat span for each individual token.
"""
self.brat_entities: List[BratEntity] = []
self.brat_notes: List[BratNote] = []
self.get_note: get_note_fun = get_note_keyword_label
self.brat_formatter = brat_formatter or ContSeqFormatter()
[docs]
def add_annots(
self,
annots: List[IAnnotation],
keyword_attr: str = None,
brat_type: str = None,
) -> None:
"""Add iamsystem annotations to convert them to Brat format.
:param annots: a list of :class:`~iamsystem.Annotation`,
:class:`~iamsystem.Matcher` output.
:param keyword_attr: the attribute name of a
:class:`~iamsystem.IKeyword` that stores brat_type.
Default to None. If None, brat_type parameter must be used.
:param brat_type: A string, the Brat entity type for all these
annotations. Default to None. If None, keyword_attr parameter
must be used.
:return: None
"""
if keyword_attr is None and brat_type is None:
raise ValueError("keyword_attr or brat_type argument must be set.")
for annot in annots:
b_type = ""
if keyword_attr is not None:
b_type = annot.keywords[0].__getattribute__(keyword_attr)
elif brat_type is not None:
b_type = brat_type
text, offsets = self.brat_formatter.get_text_and_offsets(annot)
brat_entity = BratEntity(
entity_id=self._get_entity_id(),
brat_type=b_type,
offsets=offsets,
text=text.replace("\n", "\\n"),
)
self.brat_entities.append(brat_entity)
brat_note = BratNote(
note_id=self._get_note_id(),
ref_id=brat_entity.entity_id,
note=self.get_note(annot),
)
self.brat_notes.append(brat_note)
[docs]
def add_entity(self, brat_type: str, offsets: str, text: str) -> None:
"""Add a Brat Entity.
:param brat_type: A Brat entity type (see Brat documentation).
:param offsets: a list of (start,end) annotation offsets.
See :class:`~iamsystem.IOffsets`.
A list is expected since the tokens can be discontinuous.
:param text: document substring using (start,end) offsets
(not the document itself).
:return: None
"""
brat_entity = BratEntity(
entity_id=self._get_entity_id(),
brat_type=brat_type,
offsets=offsets,
text=text,
)
self.brat_entities.append(brat_entity)
[docs]
def get_entities(self) -> Iterable[BratEntity]:
"""An iterable of Brat entities."""
return iter(self.brat_entities)
[docs]
def get_notes(self) -> Iterable[BratNote]:
"""An iterable of Brat notes."""
return iter(self.brat_notes)
[docs]
def entities_to_string(self) -> str:
"""Brat entities in the Brat format ready to be serialized
to '.ann' text file."""
brat_entities = [
str(brat_entity) for brat_entity in self.brat_entities
]
return "\n".join(brat_entities)
[docs]
def notes_to_string(self) -> str:
"""Brat notes in the Brat format ready to be serialized
to '.ann' text file."""
brat_notes = [str(brat_note) for brat_note in self.brat_notes]
return "\n".join(brat_notes)
def __str__(self):
"""Return the Brat string format for all of these entities."""
return (
f"{self.entities_to_string()}\n"
f"{self.notes_to_string()}".strip()
)
def _get_entity_id(self) -> str:
"""Build an entity id."""
return f"T{len(self.brat_entities) + 1}"
def _get_note_id(self) -> str:
"""Build an node id."""
return f"#{len(self.brat_notes) + 1}"
[docs]
class BratWriter:
"""Utility class to write IAMsystem annotations in Brat format
to a text file."""
[docs]
@classmethod
def saveEntities(
cls, brat_entities: Iterable[BratEntity], write: Callable[[str], Any]
) -> None:
"""Write Brat entities.
:param brat_entities: an iterable of Brat entities.
:param write: a write function (ex: f.write from
'with(open(filename, 'w')) as f:')
:return: None
"""
for brat_entity in brat_entities:
write(str(brat_entity))
write("\n")
[docs]
@classmethod
def saveNotes(
cls, brat_notes: Iterable[BratNote], write: Callable[[str], Any]
) -> None:
"""Write Brat notes.
:param brat_notes: an iterable of Brat notes.
:param write: a write function
ex: f.write from 'with(open(filename, 'w')) as f:
:return: None
"""
for brat_note in brat_notes:
write(str(brat_note))
write("\n")