From 732329abbf6916e3465cf2ba797ab72a2f33c7ec Mon Sep 17 00:00:00 2001 From: Jules SINTES Date: Fri, 30 Jun 2023 16:53:06 +0200 Subject: [PATCH 1/5] refactor: Rewrite to_rttm/lab and write_rttm/lab as partial methods from generic _serialize and _write methods --- pyannote/core/annotation.py | 62 ++++++++++++++++--------------------- 1 file changed, 26 insertions(+), 36 deletions(-) diff --git a/pyannote/core/annotation.py b/pyannote/core/annotation.py index fc2d17a..b3e6742 100755 --- a/pyannote/core/annotation.py +++ b/pyannote/core/annotation.py @@ -107,9 +107,11 @@ See :class:`pyannote.core.Annotation` for the complete reference. """ import itertools +import functools import warnings from collections import defaultdict from typing import ( + Callable, Hashable, Optional, Dict, @@ -388,31 +390,6 @@ def _iter_rttm(self) -> Iterator[Text]: f" {label} \n" ) - def to_rttm(self) -> Text: - """Serialize annotation as a string using RTTM format - - Returns - ------- - serialized: str - RTTM string - """ - return "".join([line for line in self._iter_rttm()]) - - def write_rttm(self, file: TextIO): - """Dump annotation to file using RTTM format - - Parameters - ---------- - file : file object - - Usage - ----- - >>> with open('file.rttm', 'w') as file: - ... annotation.write_rttm(file) - """ - for line in self._iter_rttm(): - file.write(line) - def _iter_lab(self) -> Iterator[Text]: """Generate lines for a LAB file for this annotation @@ -430,30 +407,43 @@ def _iter_lab(self) -> Iterator[Text]: raise ValueError(msg) yield f"{segment.start:.3f} {segment.start + segment.duration:.3f} {label}\n" - def to_lab(self) -> Text: - """Serialize annotation as a string using LAB format + def _serialize(self, iter_func : Callable) -> Text : + """Serialize annotation as a string given an iter function + Parameters + ---------- + iter_func : function + Function generating lines for a given format, e.g. "_iter_rttm","_iter_lab",etc. Returns ------- serialized: str - LAB string + String in the specified format """ - return "".join([line for line in self._iter_lab()]) - - def write_lab(self, file: TextIO): - """Dump annotation to file using LAB format + return "".join([line for line in iter_func(self)]) + + to_rttm = functools.partialmethod(_serialize, iter_func = _iter_rttm) # Serialize to RTTM string + to_lab = functools.partialmethod(_serialize, iter_func = _iter_lab) # Serialize to LAB string + to_audacity = functools.partialmethod(_serialize, iter_func = _iter_audacity) # Serialize to Audacity marker string + + def _write(self, file: TextIO, iter_func: Callable): + """Dump annotation to file using specified format Parameters ---------- file : file object - + iter_func : function Usage ----- - >>> with open('file.lab', 'w') as file: - ... annotation.write_lab(file) + >>> with open('file.txt', 'w') as file: + ... annotation.write_audacity(file) """ - for line in self._iter_lab(): + for line in iter_func(self): file.write(line) + + write_rttm = functools.partialmethod(_write, iter_func = _iter_rttm) # Dump annotation to RTTM file + write_lab = functools.partialmethod(_write, iter_func = _iter_lab) # Dump annotation to LAB file + write_audacity = functools.partialmethod(_write, iter_func = _iter_audacity) # Dump annotation to Audacity marker file + def crop(self, support: Support, mode: CropMode = "intersection") -> "Annotation": """Crop annotation to new support From 7aa2fb0d414eb8cfe58ad0b561dc93fcb3c53d72 Mon Sep 17 00:00:00 2001 From: Jules SINTES Date: Fri, 30 Jun 2023 16:56:47 +0200 Subject: [PATCH 2/5] feat: Add method to support audacity marker file serialization --- pyannote/core/annotation.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/pyannote/core/annotation.py b/pyannote/core/annotation.py index b3e6742..6df837b 100755 --- a/pyannote/core/annotation.py +++ b/pyannote/core/annotation.py @@ -407,6 +407,17 @@ def _iter_lab(self) -> Iterator[Text]: raise ValueError(msg) yield f"{segment.start:.3f} {segment.start + segment.duration:.3f} {label}\n" + def _iter_audacity(self) -> Iterator[Text]: + """Generate lines for a audacity marker file for this annotation + + Returns + ------- + iterator: Iterator[str] + An iterator over audacity text lines + """ + for segment, _, label in self.itertracks(yield_label=True): + yield f"{segment.start:.3f}\t{segment.start + segment.duration:.3f}\t{label}\n" + def _serialize(self, iter_func : Callable) -> Text : """Serialize annotation as a string given an iter function From e6d86344d0be8334ada93764a831833de4beef25 Mon Sep 17 00:00:00 2001 From: Jules SINTES Date: Fri, 30 Jun 2023 16:59:48 +0200 Subject: [PATCH 3/5] feat: Add classmethods to create annotation from audacity marker and rttm files --- pyannote/core/annotation.py | 69 +++++++++++++++++++++++++++++++++++++ 1 file changed, 69 insertions(+) diff --git a/pyannote/core/annotation.py b/pyannote/core/annotation.py index 6df837b..f915b4b 100755 --- a/pyannote/core/annotation.py +++ b/pyannote/core/annotation.py @@ -161,6 +161,75 @@ class Annotation: """ + @classmethod + def from_rttm( + cls, + rttm_file: TextIO, + uri: Optional[str] = None, + modality: Optional[str] = None, + ) -> "Annotation": + """Create annotation from rttm + + Parameters + ---------- + rttm_file : string, + path to the rttm file + uri : string, optional + name of annotated resource (e.g. audio or video file) + modality : string, optional + name of annotated modality + + Returns + ------- + annotation : Annotation + New annotation + + """ + segment_list = [] + for line in rttm_file: + splitted_line = line.rstrip().split(" ") + uri = uri if isinstance(uri,str) else splitted_line[1] + segment_list.append( + ( + Segment(start=float(splitted_line[3]), end=float(splitted_line[3]) + float(splitted_line[4])), + int(splitted_line[2]), + str(splitted_line[7]), + ) + ) + return Annotation.from_records(segment_list, uri, modality) + + @classmethod + def from_audacity( + cls, + audacity_file: str, + uri: Optional[str] = None, + modality: Optional[str] = None, + ) -> "Annotation": + """Create annotation from audacity marker file + + Parameters + ---------- + audacity_txt_file : string, + path to the rttm file + uri : string, optional + name of annotated resource (e.g. audio or video file) + modality : string, optional + name of annotated modality + + Returns + ------- + annotation : Annotation + New annotation + + """ + segment_list = [] + for line in audacity_file: + start, end, label = line.rstrip().split("\t") + segment_list.append( + (Segment(start=float(start), end=float(end)), 1, str(label)) + ) + return Annotation.from_records(segment_list, uri, modality) + @classmethod def from_df( cls, From 1c658296dbb7c152db2be66e08ee75af513b25ee Mon Sep 17 00:00:00 2001 From: Jules SINTES Date: Fri, 30 Jun 2023 17:29:37 +0200 Subject: [PATCH 4/5] fix: Handle whitespace unconsitancy when creating annotation from rttm or audacity --- pyannote/core/annotation.py | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/pyannote/core/annotation.py b/pyannote/core/annotation.py index f915b4b..39eef09 100755 --- a/pyannote/core/annotation.py +++ b/pyannote/core/annotation.py @@ -187,15 +187,17 @@ def from_rttm( """ segment_list = [] for line in rttm_file: - splitted_line = line.rstrip().split(" ") - uri = uri if isinstance(uri,str) else splitted_line[1] - segment_list.append( - ( - Segment(start=float(splitted_line[3]), end=float(splitted_line[3]) + float(splitted_line[4])), - int(splitted_line[2]), - str(splitted_line[7]), + if not line.isspace() : + line = ' '.join(line.split()) # Remove eventual multiple and trailing spaces in rttm line + splitted_line = line.rstrip().split(" ") + uri = uri if isinstance(uri,str) else splitted_line[1] + segment_list.append( + ( + Segment(start=float(splitted_line[3]), end=float(splitted_line[3]) + float(splitted_line[4])), + int(splitted_line[2]), + str(splitted_line[7]), + ) ) - ) return Annotation.from_records(segment_list, uri, modality) @classmethod @@ -224,10 +226,11 @@ def from_audacity( """ segment_list = [] for line in audacity_file: - start, end, label = line.rstrip().split("\t") - segment_list.append( - (Segment(start=float(start), end=float(end)), 1, str(label)) - ) + if not line.isspace() : + start, end, label = line.rstrip().split("\t") + segment_list.append( + (Segment(start=float(start), end=float(end)), 1, str(label)) + ) return Annotation.from_records(segment_list, uri, modality) @classmethod From fb603b7d61dfd3fbddda23313f5ad7ba55cd2334 Mon Sep 17 00:00:00 2001 From: Jules SINTES Date: Mon, 17 Jul 2023 11:10:37 +0200 Subject: [PATCH 5/5] fix : add exception in from_rttm to handle multiple uri in rttm file --- pyannote/core/annotation.py | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/pyannote/core/annotation.py b/pyannote/core/annotation.py index 39eef09..c351a17 100755 --- a/pyannote/core/annotation.py +++ b/pyannote/core/annotation.py @@ -186,19 +186,22 @@ def from_rttm( """ segment_list = [] + default_uri = uri if isinstance(uri,str) else rttm_file.readline().split(" ")[1] # if not specified, default uri is read from the first line of the file for line in rttm_file: - if not line.isspace() : + if not line.isspace() and line.startswith("SPEAKER"): line = ' '.join(line.split()) # Remove eventual multiple and trailing spaces in rttm line - splitted_line = line.rstrip().split(" ") - uri = uri if isinstance(uri,str) else splitted_line[1] - segment_list.append( - ( - Segment(start=float(splitted_line[3]), end=float(splitted_line[3]) + float(splitted_line[4])), - int(splitted_line[2]), - str(splitted_line[7]), + _,segment_uri,channel,start,length,_,_,label,_,_ = line.rstrip().split(" ") + if segment_uri != default_uri and not isinstance(uri,str): + raise Exception("Provided input rttm file contains data for more than one audio file, please specify uri. Found at least 2: {} and {}".format(default_uri,segment_uri)) + elif segment_uri == default_uri : + segment_list.append( + ( + Segment(start=float(start), end=float(start) + float(length)), + int(channel), + str(label), + ) ) - ) - return Annotation.from_records(segment_list, uri, modality) + return Annotation.from_records(segment_list, default_uri, modality) @classmethod def from_audacity(