diff --git a/Cotabby.xcodeproj/project.pbxproj b/Cotabby.xcodeproj/project.pbxproj index 8f398e3..2b5550d 100644 --- a/Cotabby.xcodeproj/project.pbxproj +++ b/Cotabby.xcodeproj/project.pbxproj @@ -14,6 +14,7 @@ 046C133967B32BBF9205EBB1 /* LLMIOFileHandler.swift in Sources */ = {isa = PBXBuildFile; fileRef = 8D610FCA3A97249DCCE7D0B8 /* LLMIOFileHandler.swift */; }; 078FDE669437D756678E9AB7 /* SettingsRowLabel.swift in Sources */ = {isa = PBXBuildFile; fileRef = 907549CB913B40C28B953A5D /* SettingsRowLabel.swift */; }; 07D046D406411ED85AC5758A /* InputMonitorTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = BAC01317B0B68E3C4125E421 /* InputMonitorTests.swift */; }; + 097B59F01FEC03651D5732A3 /* RepetitionGuard.swift in Sources */ = {isa = PBXBuildFile; fileRef = 04FAB8DC9CC29F7A3EB8C91F /* RepetitionGuard.swift */; }; 0A2DDD946654076675AC0FC6 /* LanguageCatalog.swift in Sources */ = {isa = PBXBuildFile; fileRef = BF4BB93056F291FD24EFAD22 /* LanguageCatalog.swift */; }; 0A3443AEE6540F11E5E6BF8F /* AppDelegate.swift in Sources */ = {isa = PBXBuildFile; fileRef = A3E8E86A14090BC7BD13BA76 /* AppDelegate.swift */; }; 0A658BF137DBD0898E40B87F /* AcknowledgementsView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 2B7A28471B8526C2693FFF65 /* AcknowledgementsView.swift */; }; @@ -234,6 +235,7 @@ E9E4CC657771DF9F4C56183C /* VisualContextCoordinator.swift in Sources */ = {isa = PBXBuildFile; fileRef = A854CAFB1F557BC4CAED8819 /* VisualContextCoordinator.swift */; }; EB13A392BFA5349DD8A0DD25 /* EmojiUsageStore.swift in Sources */ = {isa = PBXBuildFile; fileRef = FE35C7770405ED368AA02448 /* EmojiUsageStore.swift */; }; ED0843752B297D7E9DB2C468 /* EmojiTriggerStateMachineTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = 723E1EFA85D2E61B6C5F33E8 /* EmojiTriggerStateMachineTests.swift */; }; + ED642B8D6D0EAF52E3907DE5 /* RepetitionGuardTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = 5D957E76B6EA508DE3510F98 /* RepetitionGuardTests.swift */; }; ED9C51B0D7056F0753AADF2D /* GhostSuggestionLayout.swift in Sources */ = {isa = PBXBuildFile; fileRef = 043E8AA850F930222DD112C0 /* GhostSuggestionLayout.swift */; }; EE87886AC1BFC8BB3DE09762 /* HuggingFaceModelBrowserView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 78E49BDA7F3A42455C4C5350 /* HuggingFaceModelBrowserView.swift */; }; EF0DE5E045F328F1E912A02A /* AppsPaneView.swift in Sources */ = {isa = PBXBuildFile; fileRef = D9C1C921A1CDA2ADFC39EA01 /* AppsPaneView.swift */; }; @@ -271,6 +273,7 @@ 043E8AA850F930222DD112C0 /* GhostSuggestionLayout.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = GhostSuggestionLayout.swift; sourceTree = ""; }; 04D853218B0A77B0CE090828 /* BrowserAppDetectorTests.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = BrowserAppDetectorTests.swift; sourceTree = ""; }; 04E25414C307A20B6F9F20EC /* FocusSnapshotResolver.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = FocusSnapshotResolver.swift; sourceTree = ""; }; + 04FAB8DC9CC29F7A3EB8C91F /* RepetitionGuard.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = RepetitionGuard.swift; sourceTree = ""; }; 050D929E13BE52E6282B64D2 /* VisualContextStartCoalescerTests.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = VisualContextStartCoalescerTests.swift; sourceTree = ""; }; 06FF2B0A3094A952A8EBA9B5 /* ConfidenceSuppressionPolicyTests.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ConfidenceSuppressionPolicyTests.swift; sourceTree = ""; }; 07480CE96ED0EBD94817C6B1 /* GeneralPaneView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = GeneralPaneView.swift; sourceTree = ""; }; @@ -351,6 +354,7 @@ 5C4E5869D103865486AAAEEC /* ModelFileValidator.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ModelFileValidator.swift; sourceTree = ""; }; 5C9FDF029F7828CAF3FE8850 /* FocusTracker.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = FocusTracker.swift; sourceTree = ""; }; 5D0AEFF86F8210CBE7CFCBAD /* SettingsCategory.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = SettingsCategory.swift; sourceTree = ""; }; + 5D957E76B6EA508DE3510F98 /* RepetitionGuardTests.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = RepetitionGuardTests.swift; sourceTree = ""; }; 5EED3CD2BC7B48DF35DEE562 /* OCRTextHygieneTests.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = OCRTextHygieneTests.swift; sourceTree = ""; }; 5F2C764D29C8D50D0C854FF8 /* PermissionGuidanceController.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = PermissionGuidanceController.swift; sourceTree = ""; }; 5F34AE24BB7C99D66E1F3904 /* InputModels.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = InputModels.swift; sourceTree = ""; }; @@ -775,6 +779,7 @@ 0D80CC2CCAAFE3F23FB8C37A /* PromptContextSanitizerTests.swift */, 4696A84D17890B154533A08F /* PromptPolicyTests.swift */, E260C4D08C786CDBD527B329 /* PromptSectionBudgetTests.swift */, + 5D957E76B6EA508DE3510F98 /* RepetitionGuardTests.swift */, B2BFD19A159680A495EE02FD /* ScreenshotContextGeneratorTests.swift */, 2D7360A6D4261989A66658ED /* SentenceBoundaryClassifierTests.swift */, 2BC293F6125E2B14DCF05AD9 /* SettingsAttentionEvaluatorTests.swift */, @@ -928,6 +933,7 @@ E6423D6CC8CC371D2DA899DE /* PermissionOverlayTracker.swift */, FA4B45B91D4DEAC979C3113E /* PromptContextSanitizer.swift */, AFCFCCCB69C29A86E726B10A /* PromptSectionBudget.swift */, + 04FAB8DC9CC29F7A3EB8C91F /* RepetitionGuard.swift */, 6DC693E00430F46E41CB56E6 /* RequestID.swift */, D4B56C250DDEF3E81F9DCBD7 /* SentenceBoundaryClassifier.swift */, 2A02336442BB735EE2E8D064 /* SettingsAttentionEvaluator.swift */, @@ -1180,6 +1186,7 @@ 39571AB31481959CD5C223AE /* PermissionsPaneView.swift in Sources */, 98E2E14A069384C1088CDB44 /* PromptContextSanitizer.swift in Sources */, 3C561CD717064F9250200667 /* PromptSectionBudget.swift in Sources */, + 097B59F01FEC03651D5732A3 /* RepetitionGuard.swift in Sources */, A5A6CE0EF01CA6A9AFA7A400 /* RequestID.swift in Sources */, 82D4ADEAF05337ABDE4C586C /* RuntimeBootstrapModel.swift in Sources */, 2C6159231472A849F15BD0AE /* ScreenFrameReader.swift in Sources */, @@ -1291,6 +1298,7 @@ 934885ACC2DEA20B27F10948 /* PromptContextSanitizerTests.swift in Sources */, 3CF1A4E39F24917DF0470A7D /* PromptPolicyTests.swift in Sources */, 7EB20783E0D36715D1230A5C /* PromptSectionBudgetTests.swift in Sources */, + ED642B8D6D0EAF52E3907DE5 /* RepetitionGuardTests.swift in Sources */, 1B3FFCB9A979F49BF86EAAD4 /* ScreenshotContextGeneratorTests.swift in Sources */, 1D1C6FF0B8F50AC14A1000F4 /* SentenceBoundaryClassifierTests.swift in Sources */, C618C5595DA9C57C806A3E03 /* SettingsAttentionEvaluatorTests.swift in Sources */, diff --git a/Cotabby/Services/Runtime/LlamaRuntimeCore.swift b/Cotabby/Services/Runtime/LlamaRuntimeCore.swift index a113a68..b71e967 100644 --- a/Cotabby/Services/Runtime/LlamaRuntimeCore.swift +++ b/Cotabby/Services/Runtime/LlamaRuntimeCore.swift @@ -205,6 +205,11 @@ nonisolated final class LlamaRuntimeCore: @unchecked Sendable { // MARK: - Decoders + /// No-repeat-ngram order for the constrained decoder: forbid re-emitting any 3-gram already in the + /// output. 3 is the conventional choice — it breaks phrase loops ("I think that I think that") and + /// single-token runs after a few repeats, without blocking ordinary short repeats like "very very". + private static let noRepeatNgramSize = 3 + /// The shipping decoder: delegates token selection to the engine's built-in sampler /// (`sampleNext`), which applies temperature / top-k / top-p / min-p and commits each token. private func runEngineSampledDecode(sequenceID: Int32, options: LlamaGenerationOptions) -> String { @@ -275,6 +280,9 @@ nonisolated final class LlamaRuntimeCore: @unchecked Sendable { let topK = options.topK > 0 ? options.topK : vocabSize var generatedBytes: [UInt8] = [] + // Token-id history feeds the no-repeat-ngram guard; tracked separately from bytes because the + // guard reasons over token ids, not decoded text. + var generatedTokenIDs: [Int] = [] var tokensGenerated = 0 var sumLogprob = 0.0 var stopReason = "budget_exhausted" @@ -294,11 +302,18 @@ nonisolated final class LlamaRuntimeCore: @unchecked Sendable { break } + // Block any token that would close an n-gram already emitted, so greedy argmax cannot fall + // into a repetition loop (the engine's repetition penalty does not reach this raw-logit path). + let blockedTokenIDs = RepetitionGuard.blockedTokens( + history: generatedTokenIDs, + ngramSize: Self.noRepeatNgramSize + ) guard let tokenID = ConstrainedSampler.selectToken( logits: logits, profile: profile, admissibleTokenIDs: nil, - topK: topK + topK: topK, + blockedTokenIDs: blockedTokenIDs ) else { stopReason = "no_admissible_token" break @@ -321,6 +336,7 @@ nonisolated final class LlamaRuntimeCore: @unchecked Sendable { sumLogprob += logProb } generatedBytes.append(contentsOf: profile.bytes(for: tokenID)) + generatedTokenIDs.append(tokenID) tokensGenerated += 1 if engine.acceptToken(sequenceID, Int32(tokenID)) != .ok { diff --git a/Cotabby/Support/ConstrainedSampler.swift b/Cotabby/Support/ConstrainedSampler.swift index 0fe5031..1e6949a 100644 --- a/Cotabby/Support/ConstrainedSampler.swift +++ b/Cotabby/Support/ConstrainedSampler.swift @@ -28,11 +28,16 @@ enum ConstrainedSampler { /// /// Determinism note: ties on logit are broken by the lower token id, so equal-logit inputs still /// produce a single stable result. + /// + /// `blockedTokenIDs` is a per-step block-list (defaults to empty) layered on top of the static + /// profile exclusions: a blocked id is skipped exactly like a control token. The decoder uses it + /// for dynamic constraints such as no-repeat-ngram, which the static profile cannot express. static func selectToken( logits: [Float], profile: TokenProfile, admissibleTokenIDs: Set?, - topK: Int + topK: Int, + blockedTokenIDs: Set = [] ) -> Int? { guard topK > 0, !logits.isEmpty else { return nil @@ -50,6 +55,9 @@ enum ConstrainedSampler { if profile.isExcluded(id) { continue } + if blockedTokenIDs.contains(id) { + continue + } if let admissible = admissibleTokenIDs, !admissible.contains(id) { continue } diff --git a/Cotabby/Support/RepetitionGuard.swift b/Cotabby/Support/RepetitionGuard.swift new file mode 100644 index 0000000..b899f23 --- /dev/null +++ b/Cotabby/Support/RepetitionGuard.swift @@ -0,0 +1,52 @@ +import Foundation + +/// File overview: +/// Pure no-repeat-ngram logic for the deterministic constrained decoder. Given the tokens generated +/// so far, it returns the token ids that must not be emitted next because doing so would repeat an +/// n-gram that already appeared in the output. +/// +/// Why this file exists: +/// The constrained decoder selects each token by raw-logit argmax. Greedy argmax has no inherent +/// resistance to repetition (the engine's `repetition_penalty` lives in its own sampler, which the +/// constrained path bypasses), so a base model can fall into a loop — "I think that I think that …" +/// or a single token emitted forever. A hard no-repeat-ngram block is the standard, deterministic +/// remedy: it forbids closing any (n)-gram that the output already contains. Keeping it pure makes +/// the rule exhaustively testable and keeps the decode loop a thin driver. +enum RepetitionGuard { + /// The token ids that would, if emitted next, repeat an `ngramSize`-gram already present in + /// `history`. A token `t` is blocked when the last `ngramSize - 1` tokens of `history` (the + /// pending prefix) already occur earlier in `history` immediately followed by `t`; emitting `t` + /// would reproduce that whole n-gram a second time. + /// + /// Returns an empty set when `ngramSize < 2` (a 1-gram block would forbid every token that ever + /// appeared, killing normal repetition like "the … the") or when `history` is too short to hold a + /// full prefix. Operates on token ids, not text, so it is independent of detokenization and works + /// the same for any vocabulary. + static func blockedTokens(history: [Int], ngramSize: Int) -> Set { + let prefixLength = ngramSize - 1 + guard ngramSize >= 2, history.count >= prefixLength else { + return [] + } + + // The pending prefix is the suffix of history that a next token would extend into an n-gram. + let prefix = Array(history.suffix(prefixLength)) + + var blocked: Set = [] + // Every earlier position whose `prefixLength`-gram equals the pending prefix contributes the + // token that followed it: emitting that token now would repeat the n-gram. + var start = 0 + let lastPrefixStart = history.count - prefixLength + while start < lastPrefixStart { + var matches = true + for offset in 0 ..< prefixLength where history[start + offset] != prefix[offset] { + matches = false + break + } + if matches { + blocked.insert(history[start + prefixLength]) + } + start += 1 + } + return blocked + } +} diff --git a/CotabbyTests/ConstrainedSamplerTests.swift b/CotabbyTests/ConstrainedSamplerTests.swift index 08eb25c..4af68a8 100644 --- a/CotabbyTests/ConstrainedSamplerTests.swift +++ b/CotabbyTests/ConstrainedSamplerTests.swift @@ -162,6 +162,32 @@ final class ConstrainedSamplerTests: XCTestCase { XCTAssertEqual(first, 1) } + func test_select_skipsBlockedTokens() { + // Token 1 has the highest logit but is blocked (e.g. by the repetition guard), so the + // next-highest unblocked token wins. + let logits: [Float] = [0.1, 5.0, 2.0, 1.0] + let id = ConstrainedSampler.selectToken( + logits: logits, + profile: plainProfile(count: 4), + admissibleTokenIDs: nil, + topK: 4, + blockedTokenIDs: [1] + ) + XCTAssertEqual(id, 2) + } + + func test_select_allBlocked_returnsNil() { + let logits: [Float] = [1.0, 2.0, 3.0] + let id = ConstrainedSampler.selectToken( + logits: logits, + profile: plainProfile(count: 3), + admissibleTokenIDs: nil, + topK: 3, + blockedTokenIDs: [0, 1, 2] + ) + XCTAssertNil(id) + } + // MARK: - averageLogProb func test_averageLogProb_uniformRow_matchesNegativeLogVocab() { diff --git a/CotabbyTests/RepetitionGuardTests.swift b/CotabbyTests/RepetitionGuardTests.swift new file mode 100644 index 0000000..10ef662 --- /dev/null +++ b/CotabbyTests/RepetitionGuardTests.swift @@ -0,0 +1,51 @@ +import XCTest +@testable import Cotabby + +/// Pure tests for the no-repeat-ngram block set. Operates on token ids only, so cases are written as +/// small id sequences with the expected blocked followers. +final class RepetitionGuardTests: XCTestCase { + + func test_ngramSizeBelowTwo_blocksNothing() { + // A 1-gram block would forbid every token that ever appeared; the guard refuses that. + XCTAssertEqual(RepetitionGuard.blockedTokens(history: [1, 1, 2], ngramSize: 1), []) + XCTAssertEqual(RepetitionGuard.blockedTokens(history: [1, 1, 2], ngramSize: 0), []) + } + + func test_historyShorterThanPrefix_blocksNothing() { + // n=3 needs a 2-token pending prefix; one token cannot form it. + XCTAssertEqual(RepetitionGuard.blockedTokens(history: [7], ngramSize: 3), []) + } + + func test_noRepeatedPrefix_blocksNothing() { + XCTAssertEqual(RepetitionGuard.blockedTokens(history: [1, 2, 3], ngramSize: 3), []) + } + + func test_repeatedPrefix_blocksItsFollower() { + // Pending prefix [1,2] occurred earlier at index 0, followed by 1, so emitting 1 would repeat + // the trigram [1,2,1]. Block 1. + XCTAssertEqual(RepetitionGuard.blockedTokens(history: [1, 2, 1, 2], ngramSize: 3), [1]) + } + + func test_singleTokenRun_blocksAfterThreeWithTrigram() { + // Three identical tokens are allowed; the fourth would repeat the trigram [5,5,5]. + XCTAssertEqual(RepetitionGuard.blockedTokens(history: [5, 5], ngramSize: 3), []) + XCTAssertEqual(RepetitionGuard.blockedTokens(history: [5, 5, 5], ngramSize: 3), [5]) + } + + func test_multipleFollowers_allBlocked() { + // [1,2] appears twice, followed by 9 then 8; both followers are blocked. + let blocked = RepetitionGuard.blockedTokens(history: [1, 2, 9, 1, 2, 8, 1, 2], ngramSize: 3) + XCTAssertEqual(blocked, [9, 8]) + } + + func test_bigramOrder_blocksRepeatedBigram() { + // n=2: pending prefix is the last single token. [1] occurred at index 0 followed by 2, so + // emitting 2 would repeat the bigram [1,2]. + XCTAssertEqual(RepetitionGuard.blockedTokens(history: [1, 2, 1], ngramSize: 2), [2]) + } + + func test_prefixPresentButNotPending_notBlocked() { + // [1,2] appears early but the pending prefix is [3,4]; nothing repeats, so nothing is blocked. + XCTAssertEqual(RepetitionGuard.blockedTokens(history: [1, 2, 9, 3, 4], ngramSize: 3), []) + } +}