Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 9 additions & 1 deletion .github/workflows/diarizer-benchmark.yml
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,12 @@ jobs:
path: ~/FluidAudioDatasets/ami_official
key: ${{ runner.os }}-ami-dataset-${{ hashFiles('Sources/FluidAudioCLI/DatasetParsers/DatasetDownloader.swift') }}

- name: Cache AMI annotations
uses: actions/cache@v4
with:
path: Datasets/ami_public_1.6.2
key: ${{ runner.os }}-ami-annotations-1.6.2

- name: Build package
run: swift build -c release

Expand Down Expand Up @@ -123,7 +129,9 @@ jobs:
fi

- name: Comment PR with Benchmark Results
if: always()
# Only comment when metrics were actually extracted — a failed run must
# show up as a red check, not a comment with garbage numbers (issue #752)
if: steps.extract.outcome == 'success'
uses: actions/github-script@v7
with:
script: |
Expand Down
6 changes: 6 additions & 0 deletions .github/workflows/offline-pipeline.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,12 @@ jobs:
with:
swift-version: "6.1"

- name: Cache AMI annotations
uses: actions/cache@v4
with:
path: Datasets/ami_public_1.6.2
key: ${{ runner.os }}-ami-annotations-1.6.2

- name: Build package
run: swift build -c release

Expand Down
6 changes: 6 additions & 0 deletions .github/workflows/sortformer-benchmark.yml
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,12 @@ jobs:
path: ~/FluidAudioDatasets/ami_official
key: ${{ runner.os }}-ami-dataset

- name: Cache AMI annotations
uses: actions/cache@v4
with:
path: Datasets/ami_public_1.6.2
key: ${{ runner.os }}-ami-annotations-1.6.2

- name: Build package
run: swift build -c release

Expand Down
176 changes: 109 additions & 67 deletions Documentation/Benchmarks.md

Large diffs are not rendered by default.

17 changes: 13 additions & 4 deletions Sources/FluidAudioCLI/Commands/DiarizationBenchmark.swift
Original file line number Diff line number Diff line change
Expand Up @@ -605,6 +605,13 @@ enum StreamDiarizationBenchmark {
}
}

// Fail loudly if no meeting produced a result (e.g. missing ground truth
// annotations) instead of exiting cleanly with empty metrics (issue #752).
guard !allResults.isEmpty else {
logger.error("❌ Benchmark produced no results — see errors above")
exit(1)
}

// Print final summary
printFinalSummary(results: allResults)

Expand Down Expand Up @@ -765,8 +772,9 @@ enum StreamDiarizationBenchmark {
let totalElapsed = Date().timeIntervalSince(startTime)
let finalRTFx = totalDuration / totalElapsed

// Load ground truth
let groundTruth = await AMIParser.loadAMIGroundTruth(
// Load ground truth (throws if annotations are missing — never scores
// against placeholder data, see issue #752)
let groundTruth = try AMIParser.loadAMIGroundTruth(
for: meetingName,
duration: Float(totalDuration)
)
Expand Down Expand Up @@ -870,8 +878,9 @@ enum StreamDiarizationBenchmark {
logger.info(" RTFx: \(String(format: "%.1f", finalRTFx))x")
}

// Load ground truth
let groundTruth = await AMIParser.loadAMIGroundTruth(
// Load ground truth (throws if annotations are missing — never scores
// against placeholder data, see issue #752)
let groundTruth = try AMIParser.loadAMIGroundTruth(
for: meetingName,
duration: Float(totalDuration)
)
Expand Down
2 changes: 1 addition & 1 deletion Sources/FluidAudioCLI/Commands/LSEENDBenchmark.swift
Original file line number Diff line number Diff line change
Expand Up @@ -487,7 +487,7 @@ enum LSEENDBenchmark {

if dataset == .ami {
print(" [REF] Using AMI word-aligned annotations")
referenceSegments = await AMIParser.loadWordAlignedDERReference(
referenceSegments = try AMIParser.loadWordAlignedDERReference(
for: meetingName,
duration: duration
)
Expand Down
2 changes: 1 addition & 1 deletion Sources/FluidAudioCLI/Commands/SortformerBenchmark.swift
Original file line number Diff line number Diff line change
Expand Up @@ -608,7 +608,7 @@ enum SortformerBenchmark {
// Fall back to AMI word-aligned annotations if no RTTM available (AMI only)
if groundTruth.isEmpty && dataset == .ami {
print(" [RTTM] No RTTM file, falling back to AMI word-aligned annotations")
groundTruth = await AMIParser.loadWordAlignedGroundTruth(
groundTruth = try AMIParser.loadWordAlignedGroundTruth(
for: meetingName,
duration: duration
)
Expand Down
190 changes: 78 additions & 112 deletions Sources/FluidAudioCLI/DatasetParsers/AMIParser.swift
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,25 @@
import FluidAudio
import Foundation

/// Errors thrown when AMI reference annotations cannot be loaded.
///
/// Benchmarks must fail loudly on these instead of scoring against a synthetic
/// reference: a transient annotation download failure once produced a bogus
/// 80.8% DER report scored against placeholder ground truth (issue #752).
enum AMIParserError: Error, LocalizedError {
case annotationsNotFound(subdirectory: String)

var errorDescription: String? {
switch self {
case .annotationsNotFound(let subdirectory):
return
"AMI annotations not found in any expected location. "
+ "Expected structure: [path]/\(subdirectory)/ AND [path]/corpusResources/meetings.xml. "
+ "Run with --auto-download or download manually from https://groups.inf.ed.ac.uk/ami/download/"
}
}
}

/// AMI annotation parser and ground truth handling
struct AMIParser {
private static let logger = AppLogger(category: "AMIParser")
Expand Down Expand Up @@ -40,41 +59,31 @@ struct AMIParser {
return 4 // AMI meetings typically have 4 speakers
}

/// Load AMI ground truth annotations for a specific meeting
/// Load AMI ground truth annotations for a specific meeting.
///
/// Throws if annotations are missing or unparsable — never substitutes a
/// placeholder reference, so callers cannot silently score against fake data.
static func loadAMIGroundTruth(
for meetingId: String, duration: Float
) async
-> [TimedSpeakerSegment]
{
guard let validAmiDir = findAnnotationRoot(requiringSubdirectory: "segments") else {
logger.warning(" AMI annotations not found in any expected location")
logger.warning(
" 📁 Expected structure: [path]/segments/ AND [path]/corpusResources/meetings.xml"
)
logger.warning(
" 🔧 To download annotations: visit https://groups.inf.ed.ac.uk/ami/download/"
)
logger.warning(
" 📋 Using simplified placeholder ground truth (causes poor DER performance)"
)
return generateSimplifiedGroundTruth(duration: duration, speakerCount: 4)
for meetingId: String,
duration: Float,
searchRoots: [URL]? = nil
) throws -> [TimedSpeakerSegment] {
guard
let validAmiDir = findAnnotationRoot(
requiringSubdirectory: "segments", searchRoots: searchRoots)
else {
throw AMIParserError.annotationsNotFound(subdirectory: "segments")
}

logger.info(" 📖 Loading AMI annotations for meeting: \(meetingId)")

do {
let allSegments = try loadAMIGroundTruth(
for: meetingId,
in: validAmiDir,
duration: duration
)
logger.info(" Total segments loaded: \(allSegments.count)")
return allSegments
} catch {
logger.warning(" Failed to parse AMI annotations: \(error)")
logger.warning(" Using simplified placeholder instead")
return generateSimplifiedGroundTruth(duration: duration, speakerCount: 4)
}
let allSegments = try loadAMIGroundTruth(
for: meetingId,
in: validAmiDir,
duration: duration
)
logger.info(" Total segments loaded: \(allSegments.count)")
return allSegments
}

/// Internal hook for tests and benchmark helpers that need deterministic parsing
Expand Down Expand Up @@ -163,35 +172,22 @@ struct AMIParser {
static func loadFrameAlignedDERReference(
for meetingId: String,
duration: Float,
frameStep: Double = defaultReferenceFrameStepSeconds
) async -> [DERSpeakerSegment] {
guard let validAmiDir = findAnnotationRoot(requiringSubdirectory: "segments") else {
logger.warning(" AMI annotations not found in any expected location")
logger.warning(
" 📁 Expected structure: [path]/segments/ AND [path]/corpusResources/meetings.xml"
)
logger.warning(" 📋 Falling back to simplified placeholder ground truth")
return frameAlignedDERReference(
from: generateSimplifiedGroundTruth(duration: duration, speakerCount: 4),
frameStep: frameStep
)
frameStep: Double = defaultReferenceFrameStepSeconds,
searchRoots: [URL]? = nil
) throws -> [DERSpeakerSegment] {
guard
let validAmiDir = findAnnotationRoot(
requiringSubdirectory: "segments", searchRoots: searchRoots)
else {
throw AMIParserError.annotationsNotFound(subdirectory: "segments")
}

do {
return try loadFrameAlignedDERReference(
for: meetingId,
in: validAmiDir,
duration: duration,
frameStep: frameStep
)
} catch {
logger.warning(" Failed to parse AMI annotations: \(error)")
logger.warning(" Falling back to simplified placeholder ground truth")
return frameAlignedDERReference(
from: generateSimplifiedGroundTruth(duration: duration, speakerCount: 4),
frameStep: frameStep
)
}
return try loadFrameAlignedDERReference(
for: meetingId,
in: validAmiDir,
duration: duration,
frameStep: frameStep
)
}

static func loadFrameAlignedDERReference(
Expand All @@ -216,29 +212,22 @@ struct AMIParser {
static func loadWordAlignedGroundTruth(
for meetingId: String,
duration: Float,
mergeGap: Double = defaultMergeGapSeconds
) async -> [TimedSpeakerSegment] {
guard let validAmiDir = findAnnotationRoot(requiringSubdirectory: "words") else {
logger.warning(" AMI word annotations not found in any expected location")
logger.warning(
" 📁 Expected structure: [path]/words/ AND [path]/corpusResources/meetings.xml"
)
logger.warning(" 📋 Falling back to simplified placeholder ground truth")
return generateSimplifiedGroundTruth(duration: duration, speakerCount: 4)
mergeGap: Double = defaultMergeGapSeconds,
searchRoots: [URL]? = nil
) throws -> [TimedSpeakerSegment] {
guard
let validAmiDir = findAnnotationRoot(
requiringSubdirectory: "words", searchRoots: searchRoots)
else {
throw AMIParserError.annotationsNotFound(subdirectory: "words")
}

do {
return try loadWordAlignedGroundTruth(
for: meetingId,
in: validAmiDir,
duration: duration,
mergeGap: mergeGap
)
} catch {
logger.warning(" Failed to parse AMI word annotations: \(error)")
logger.warning(" Falling back to simplified placeholder ground truth")
return generateSimplifiedGroundTruth(duration: duration, speakerCount: 4)
}
return try loadWordAlignedGroundTruth(
for: meetingId,
in: validAmiDir,
duration: duration,
mergeGap: mergeGap
)
}

/// Internal hook for tests and benchmark helpers that need deterministic parsing
Expand Down Expand Up @@ -293,12 +282,14 @@ struct AMIParser {
static func loadWordAlignedDERReference(
for meetingId: String,
duration: Float,
mergeGap: Double = defaultMergeGapSeconds
) async -> [DERSpeakerSegment] {
let segments = await loadWordAlignedGroundTruth(
mergeGap: Double = defaultMergeGapSeconds,
searchRoots: [URL]? = nil
) throws -> [DERSpeakerSegment] {
let segments = try loadWordAlignedGroundTruth(
for: meetingId,
duration: duration,
mergeGap: mergeGap
mergeGap: mergeGap,
searchRoots: searchRoots
)
return segments.map {
DERSpeakerSegment(
Expand Down Expand Up @@ -330,34 +321,6 @@ struct AMIParser {
}
}

/// Generate simplified ground truth for testing
static func generateSimplifiedGroundTruth(
duration: Float, speakerCount: Int
)
-> [TimedSpeakerSegment]
{
let segmentDuration = duration / Float(speakerCount * 2)
var segments: [TimedSpeakerSegment] = []
let dummyEmbedding: [Float] = Array(repeating: 0.1, count: 512)

for i in 0..<(speakerCount * 2) {
let speakerId = "Speaker \((i % speakerCount) + 1)"
let startTime = Float(i) * segmentDuration
let endTime = min(startTime + segmentDuration, duration)

segments.append(
TimedSpeakerSegment(
speakerId: speakerId,
embedding: dummyEmbedding,
startTimeSeconds: startTime,
endTimeSeconds: endTime,
qualityScore: 1.0
))
}

return segments
}

/// Generate consistent placeholder embeddings for each speaker
static func generatePlaceholderEmbedding(for participantId: String) -> [Float] {
// Generate a consistent embedding based on participant ID
Expand Down Expand Up @@ -388,8 +351,11 @@ struct AMIParser {
]
}

private static func findAnnotationRoot(requiringSubdirectory subdirectory: String) -> URL? {
for path in possibleAnnotationRoots() {
private static func findAnnotationRoot(
requiringSubdirectory subdirectory: String,
searchRoots: [URL]? = nil
) -> URL? {
for path in searchRoots ?? possibleAnnotationRoots() {
let requiredDir = path.appendingPathComponent(subdirectory)
let meetingsFile = path.appendingPathComponent("corpusResources/meetings.xml")
let hasRequiredDir = FileManager.default.fileExists(atPath: requiredDir.path)
Expand Down
19 changes: 16 additions & 3 deletions Sources/FluidAudioCLI/DatasetParsers/DatasetDownloader.swift
Original file line number Diff line number Diff line change
Expand Up @@ -191,14 +191,27 @@ struct DatasetDownloader {
return
}

// Download and extract AMI manual annotations v1.6.2
// Download and extract AMI manual annotations v1.6.2.
// The Edinburgh server is occasionally flaky, so retry with backoff —
// a single transient failure here once poisoned a CI benchmark run
// with placeholder ground truth (issue #752).
let zipURL =
"https://groups.inf.ed.ac.uk/ami/AMICorpusAnnotations/ami_public_manual_1.6.2.zip"
let zipFile = annotationsDir.appendingPathComponent("ami_public_manual_1.6.2.zip")
let zipSuccess = await downloadAnnotationFile(from: zipURL, to: zipFile)

var zipSuccess = false
let maxAttempts = 3
for attempt in 1...maxAttempts {
zipSuccess = await downloadAnnotationFile(from: zipURL, to: zipFile)
if zipSuccess { break }
logger.warning("Annotation download attempt \(attempt)/\(maxAttempts) failed")
if attempt < maxAttempts {
try? await Task.sleep(nanoseconds: UInt64(attempt) * 2_000_000_000)
}
}

if !zipSuccess {
logger.error("Failed to download AMI annotations")
logger.error("Failed to download AMI annotations after \(maxAttempts) attempts")
return
}

Expand Down
Loading
Loading