From dad70eb68d37a705a5b114f82d52edc653bccf5e Mon Sep 17 00:00:00 2001 From: Excosy Date: Thu, 2 Jul 2026 22:32:04 +0800 Subject: [PATCH 1/2] Migrate obsoleted editdistance to rapidfuzz --- funasr/metrics/common.py | 10 ++++------ setup.py | 4 ++-- 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/funasr/metrics/common.py b/funasr/metrics/common.py index 2443e0dc5..49eef6856 100644 --- a/funasr/metrics/common.py +++ b/funasr/metrics/common.py @@ -10,6 +10,7 @@ import logging import sys +from rapidfuzz.distance import Levenshtein from itertools import groupby import numpy as np import six @@ -155,7 +156,6 @@ def calculate_cer_ctc(self, ys_hat, ys_pad): :return: average sentence-level CER score :rtype float """ - import editdistance cers, char_ref_lens = [], [] for i, y in enumerate(ys_hat): @@ -175,7 +175,7 @@ def calculate_cer_ctc(self, ys_hat, ys_pad): hyp_chars = "".join(seq_hat) ref_chars = "".join(seq_true) if len(ref_chars) > 0: - cers.append(editdistance.eval(hyp_chars, ref_chars)) + cers.append(Levenshtein.distance(hyp_chars, ref_chars)) char_ref_lens.append(len(ref_chars)) cer_ctc = float(sum(cers)) / sum(char_ref_lens) if cers else None @@ -214,14 +214,13 @@ def calculate_cer(self, seqs_hat, seqs_true): :return: average sentence-level CER score :rtype float """ - import editdistance char_eds, char_ref_lens = [], [] for i, seq_hat_text in enumerate(seqs_hat): seq_true_text = seqs_true[i] hyp_chars = seq_hat_text.replace(" ", "") ref_chars = seq_true_text.replace(" ", "") - char_eds.append(editdistance.eval(hyp_chars, ref_chars)) + char_eds.append(Levenshtein.distance(hyp_chars, ref_chars)) char_ref_lens.append(len(ref_chars)) return float(sum(char_eds)) / sum(char_ref_lens) @@ -233,13 +232,12 @@ def calculate_wer(self, seqs_hat, seqs_true): :return: average sentence-level WER score :rtype float """ - import editdistance word_eds, word_ref_lens = [], [] for i, seq_hat_text in enumerate(seqs_hat): seq_true_text = seqs_true[i] hyp_words = seq_hat_text.split() ref_words = seq_true_text.split() - word_eds.append(editdistance.eval(hyp_words, ref_words)) + word_eds.append(Levenshtein.distance(hyp_words, ref_words)) word_ref_lens.append(len(ref_words)) return float(sum(word_eds)) / sum(word_ref_lens) diff --git a/setup.py b/setup.py index fdc693e1d..88d7a71ab 100644 --- a/setup.py +++ b/setup.py @@ -35,7 +35,7 @@ "jaconv", # Speaker & evaluation "umap_learn", - "editdistance>=0.5.2", + "rapidfuzz", # Optional (training/enhancement) "torch_complex", "tensorboardX", @@ -44,7 +44,7 @@ ], # train: The modules invoked when training only. "train": [ - "editdistance", + "rapidfuzz", ], # all: The modules should be optionally installled due to some reason. # Please consider moving them to "install" occasionally From 71af8ebada96a5c0f64872c8d59c27a5bfaf3ede Mon Sep 17 00:00:00 2001 From: Excosy Date: Thu, 2 Jul 2026 23:18:48 +0800 Subject: [PATCH 2/2] Adopt gemini suggestions --- funasr/metrics/common.py | 6 +++--- setup.py | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/funasr/metrics/common.py b/funasr/metrics/common.py index 49eef6856..11bd2cdfb 100644 --- a/funasr/metrics/common.py +++ b/funasr/metrics/common.py @@ -9,9 +9,9 @@ import json import logging import sys +from itertools import groupby from rapidfuzz.distance import Levenshtein -from itertools import groupby import numpy as np import six @@ -222,7 +222,7 @@ def calculate_cer(self, seqs_hat, seqs_true): ref_chars = seq_true_text.replace(" ", "") char_eds.append(Levenshtein.distance(hyp_chars, ref_chars)) char_ref_lens.append(len(ref_chars)) - return float(sum(char_eds)) / sum(char_ref_lens) + return float(sum(char_eds)) / sum(char_ref_lens) if char_eds else None def calculate_wer(self, seqs_hat, seqs_true): """Calculate sentence-level WER score. @@ -240,4 +240,4 @@ def calculate_wer(self, seqs_hat, seqs_true): ref_words = seq_true_text.split() word_eds.append(Levenshtein.distance(hyp_words, ref_words)) word_ref_lens.append(len(ref_words)) - return float(sum(word_eds)) / sum(word_ref_lens) + return float(sum(word_eds)) / sum(word_ref_lens) if word_eds else None diff --git a/setup.py b/setup.py index 88d7a71ab..de2f5d4be 100644 --- a/setup.py +++ b/setup.py @@ -35,7 +35,7 @@ "jaconv", # Speaker & evaluation "umap_learn", - "rapidfuzz", + "rapidfuzz>=3.0.0", # Optional (training/enhancement) "torch_complex", "tensorboardX", @@ -44,7 +44,7 @@ ], # train: The modules invoked when training only. "train": [ - "rapidfuzz", + "rapidfuzz>=3.0.0", ], # all: The modules should be optionally installled due to some reason. # Please consider moving them to "install" occasionally