From 134398bdcde35591a0472a5b3c3b76683adf561e Mon Sep 17 00:00:00 2001 From: kirle Date: Fri, 6 Jun 2025 02:14:24 +0200 Subject: [PATCH] Modified beam_search() to work with multi-character tokens --- src/duplex.rs | 9 ++++++--- src/search.rs | 10 +++++++--- 2 files changed, 13 insertions(+), 6 deletions(-) diff --git a/src/duplex.rs b/src/duplex.rs index 4a46e60..f89075a 100644 --- a/src/duplex.rs +++ b/src/duplex.rs @@ -635,15 +635,18 @@ pub fn beam_search, E: Data>( } } - let mut sequence = String::new(); + let mut tokens: Vec<&str> = Vec::new(); if beam[0].node != ROOT_NODE { for label in suffix_tree.iter_from_no_data(beam[0].node) { - sequence.push_str(&alphabet[label + 1]); + tokens.push(&alphabet[label + 1]); } } - Ok(sequence.chars().rev().collect()) + tokens.reverse(); + let sequence = tokens.concat(); + + Ok(sequence) } pub fn crf_beam_search, E: Data>( diff --git a/src/search.rs b/src/search.rs index aa8d0b6..bf2f023 100644 --- a/src/search.rs +++ b/src/search.rs @@ -283,17 +283,21 @@ pub fn beam_search>( } let mut path = Vec::new(); - let mut sequence = String::new(); + let mut tokens: Vec<&str> = Vec::new(); if beam[0].node != ROOT_NODE { for (label, &time) in suffix_tree.iter_from(beam[0].node) { path.push(time); - sequence.push_str(&alphabet[label + 1]); + tokens.push(&alphabet[label + 1]); } } path.reverse(); - Ok((sequence.chars().rev().collect::(), path)) + tokens.reverse(); + + let sequence = tokens.concat(); + + Ok((sequence, path)) } fn find_max(