FuJacob · FuJacob · Jun 1, 2026 · Jun 1, 2026 · greptile-apps · Jun 1, 2026
diff --git a/Cotabby.xcodeproj/project.pbxproj b/Cotabby.xcodeproj/project.pbxproj
@@ -14,6 +14,7 @@
 		046C133967B32BBF9205EBB1 /* LLMIOFileHandler.swift in Sources */ = {isa = PBXBuildFile; fileRef = 8D610FCA3A97249DCCE7D0B8 /* LLMIOFileHandler.swift */; };
 		078FDE669437D756678E9AB7 /* SettingsRowLabel.swift in Sources */ = {isa = PBXBuildFile; fileRef = 907549CB913B40C28B953A5D /* SettingsRowLabel.swift */; };
 		07D046D406411ED85AC5758A /* InputMonitorTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = BAC01317B0B68E3C4125E421 /* InputMonitorTests.swift */; };
+		097B59F01FEC03651D5732A3 /* RepetitionGuard.swift in Sources */ = {isa = PBXBuildFile; fileRef = 04FAB8DC9CC29F7A3EB8C91F /* RepetitionGuard.swift */; };
 		0A2DDD946654076675AC0FC6 /* LanguageCatalog.swift in Sources */ = {isa = PBXBuildFile; fileRef = BF4BB93056F291FD24EFAD22 /* LanguageCatalog.swift */; };
 		0A3443AEE6540F11E5E6BF8F /* AppDelegate.swift in Sources */ = {isa = PBXBuildFile; fileRef = A3E8E86A14090BC7BD13BA76 /* AppDelegate.swift */; };
 		0A658BF137DBD0898E40B87F /* AcknowledgementsView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 2B7A28471B8526C2693FFF65 /* AcknowledgementsView.swift */; };
@@ -234,6 +235,7 @@
 		E9E4CC657771DF9F4C56183C /* VisualContextCoordinator.swift in Sources */ = {isa = PBXBuildFile; fileRef = A854CAFB1F557BC4CAED8819 /* VisualContextCoordinator.swift */; };
 		EB13A392BFA5349DD8A0DD25 /* EmojiUsageStore.swift in Sources */ = {isa = PBXBuildFile; fileRef = FE35C7770405ED368AA02448 /* EmojiUsageStore.swift */; };
 		ED0843752B297D7E9DB2C468 /* EmojiTriggerStateMachineTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = 723E1EFA85D2E61B6C5F33E8 /* EmojiTriggerStateMachineTests.swift */; };
+		ED642B8D6D0EAF52E3907DE5 /* RepetitionGuardTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = 5D957E76B6EA508DE3510F98 /* RepetitionGuardTests.swift */; };
 		ED9C51B0D7056F0753AADF2D /* GhostSuggestionLayout.swift in Sources */ = {isa = PBXBuildFile; fileRef = 043E8AA850F930222DD112C0 /* GhostSuggestionLayout.swift */; };
 		EE87886AC1BFC8BB3DE09762 /* HuggingFaceModelBrowserView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 78E49BDA7F3A42455C4C5350 /* HuggingFaceModelBrowserView.swift */; };
 		EF0DE5E045F328F1E912A02A /* AppsPaneView.swift in Sources */ = {isa = PBXBuildFile; fileRef = D9C1C921A1CDA2ADFC39EA01 /* AppsPaneView.swift */; };
@@ -271,6 +273,7 @@
 		043E8AA850F930222DD112C0 /* GhostSuggestionLayout.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = GhostSuggestionLayout.swift; sourceTree = "<group>"; };
 		04D853218B0A77B0CE090828 /* BrowserAppDetectorTests.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = BrowserAppDetectorTests.swift; sourceTree = "<group>"; };
 		04E25414C307A20B6F9F20EC /* FocusSnapshotResolver.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = FocusSnapshotResolver.swift; sourceTree = "<group>"; };
+		04FAB8DC9CC29F7A3EB8C91F /* RepetitionGuard.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = RepetitionGuard.swift; sourceTree = "<group>"; };
 		050D929E13BE52E6282B64D2 /* VisualContextStartCoalescerTests.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = VisualContextStartCoalescerTests.swift; sourceTree = "<group>"; };
 		06FF2B0A3094A952A8EBA9B5 /* ConfidenceSuppressionPolicyTests.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ConfidenceSuppressionPolicyTests.swift; sourceTree = "<group>"; };
 		07480CE96ED0EBD94817C6B1 /* GeneralPaneView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = GeneralPaneView.swift; sourceTree = "<group>"; };
@@ -351,6 +354,7 @@
 		5C4E5869D103865486AAAEEC /* ModelFileValidator.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ModelFileValidator.swift; sourceTree = "<group>"; };
 		5C9FDF029F7828CAF3FE8850 /* FocusTracker.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = FocusTracker.swift; sourceTree = "<group>"; };
 		5D0AEFF86F8210CBE7CFCBAD /* SettingsCategory.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = SettingsCategory.swift; sourceTree = "<group>"; };
+		5D957E76B6EA508DE3510F98 /* RepetitionGuardTests.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = RepetitionGuardTests.swift; sourceTree = "<group>"; };
 		5EED3CD2BC7B48DF35DEE562 /* OCRTextHygieneTests.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = OCRTextHygieneTests.swift; sourceTree = "<group>"; };
 		5F2C764D29C8D50D0C854FF8 /* PermissionGuidanceController.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = PermissionGuidanceController.swift; sourceTree = "<group>"; };
 		5F34AE24BB7C99D66E1F3904 /* InputModels.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = InputModels.swift; sourceTree = "<group>"; };
@@ -775,6 +779,7 @@
 				0D80CC2CCAAFE3F23FB8C37A /* PromptContextSanitizerTests.swift */,
 				4696A84D17890B154533A08F /* PromptPolicyTests.swift */,
 				E260C4D08C786CDBD527B329 /* PromptSectionBudgetTests.swift */,
+				5D957E76B6EA508DE3510F98 /* RepetitionGuardTests.swift */,
 				B2BFD19A159680A495EE02FD /* ScreenshotContextGeneratorTests.swift */,
 				2D7360A6D4261989A66658ED /* SentenceBoundaryClassifierTests.swift */,
 				2BC293F6125E2B14DCF05AD9 /* SettingsAttentionEvaluatorTests.swift */,
@@ -928,6 +933,7 @@
 				E6423D6CC8CC371D2DA899DE /* PermissionOverlayTracker.swift */,
 				FA4B45B91D4DEAC979C3113E /* PromptContextSanitizer.swift */,
 				AFCFCCCB69C29A86E726B10A /* PromptSectionBudget.swift */,
+				04FAB8DC9CC29F7A3EB8C91F /* RepetitionGuard.swift */,
 				6DC693E00430F46E41CB56E6 /* RequestID.swift */,
 				D4B56C250DDEF3E81F9DCBD7 /* SentenceBoundaryClassifier.swift */,
 				2A02336442BB735EE2E8D064 /* SettingsAttentionEvaluator.swift */,
@@ -1180,6 +1186,7 @@
 				39571AB31481959CD5C223AE /* PermissionsPaneView.swift in Sources */,
 				98E2E14A069384C1088CDB44 /* PromptContextSanitizer.swift in Sources */,
 				3C561CD717064F9250200667 /* PromptSectionBudget.swift in Sources */,
+				097B59F01FEC03651D5732A3 /* RepetitionGuard.swift in Sources */,
 				A5A6CE0EF01CA6A9AFA7A400 /* RequestID.swift in Sources */,
 				82D4ADEAF05337ABDE4C586C /* RuntimeBootstrapModel.swift in Sources */,
 				2C6159231472A849F15BD0AE /* ScreenFrameReader.swift in Sources */,
@@ -1291,6 +1298,7 @@
 				934885ACC2DEA20B27F10948 /* PromptContextSanitizerTests.swift in Sources */,
 				3CF1A4E39F24917DF0470A7D /* PromptPolicyTests.swift in Sources */,
 				7EB20783E0D36715D1230A5C /* PromptSectionBudgetTests.swift in Sources */,
+				ED642B8D6D0EAF52E3907DE5 /* RepetitionGuardTests.swift in Sources */,
 				1B3FFCB9A979F49BF86EAAD4 /* ScreenshotContextGeneratorTests.swift in Sources */,
 				1D1C6FF0B8F50AC14A1000F4 /* SentenceBoundaryClassifierTests.swift in Sources */,
 				C618C5595DA9C57C806A3E03 /* SettingsAttentionEvaluatorTests.swift in Sources */,

diff --git a/Cotabby/Services/Runtime/LlamaRuntimeCore.swift b/Cotabby/Services/Runtime/LlamaRuntimeCore.swift
@@ -205,6 +205,11 @@ nonisolated final class LlamaRuntimeCore: @unchecked Sendable {
 
     // MARK: - Decoders
 
+    /// No-repeat-ngram order for the constrained decoder: forbid re-emitting any 3-gram already in the
+    /// output. 3 is the conventional choice — it breaks phrase loops ("I think that I think that") and
+    /// single-token runs after a few repeats, without blocking ordinary short repeats like "very very".
+    private static let noRepeatNgramSize = 3
+
     /// The shipping decoder: delegates token selection to the engine's built-in sampler
     /// (`sampleNext`), which applies temperature / top-k / top-p / min-p and commits each token.
     private func runEngineSampledDecode(sequenceID: Int32, options: LlamaGenerationOptions) -> String {
@@ -275,6 +280,9 @@ nonisolated final class LlamaRuntimeCore: @unchecked Sendable {
         let topK = options.topK > 0 ? options.topK : vocabSize
 
         var generatedBytes: [UInt8] = []
+        // Token-id history feeds the no-repeat-ngram guard; tracked separately from bytes because the
+        // guard reasons over token ids, not decoded text.
+        var generatedTokenIDs: [Int] = []
         var tokensGenerated = 0
         var sumLogprob = 0.0
         var stopReason = "budget_exhausted"
@@ -294,11 +302,18 @@ nonisolated final class LlamaRuntimeCore: @unchecked Sendable {
                 break
             }
 
+            // Block any token that would close an n-gram already emitted, so greedy argmax cannot fall
+            // into a repetition loop (the engine's repetition penalty does not reach this raw-logit path).
+            let blockedTokenIDs = RepetitionGuard.blockedTokens(
+                history: generatedTokenIDs,
+                ngramSize: Self.noRepeatNgramSize
+            )
             guard let tokenID = ConstrainedSampler.selectToken(
                 logits: logits,
                 profile: profile,
                 admissibleTokenIDs: nil,
-                topK: topK
+                topK: topK,
+                blockedTokenIDs: blockedTokenIDs
             ) else {
                 stopReason = "no_admissible_token"
                 break
@@ -321,6 +336,7 @@ nonisolated final class LlamaRuntimeCore: @unchecked Sendable {
                 sumLogprob += logProb
             }
             generatedBytes.append(contentsOf: profile.bytes(for: tokenID))
+            generatedTokenIDs.append(tokenID)
             tokensGenerated += 1
 
             if engine.acceptToken(sequenceID, Int32(tokenID)) != .ok {

diff --git a/Cotabby/Support/ConstrainedSampler.swift b/Cotabby/Support/ConstrainedSampler.swift
@@ -28,11 +28,16 @@ enum ConstrainedSampler {
     ///
     /// Determinism note: ties on logit are broken by the lower token id, so equal-logit inputs still
     /// produce a single stable result.
+    ///
+    /// `blockedTokenIDs` is a per-step block-list (defaults to empty) layered on top of the static
+    /// profile exclusions: a blocked id is skipped exactly like a control token. The decoder uses it
+    /// for dynamic constraints such as no-repeat-ngram, which the static profile cannot express.
     static func selectToken(
         logits: [Float],
         profile: TokenProfile,
         admissibleTokenIDs: Set<Int>?,
-        topK: Int
+        topK: Int,
+        blockedTokenIDs: Set<Int> = []
     ) -> Int? {
         guard topK > 0, !logits.isEmpty else {
             return nil
@@ -50,6 +55,9 @@ enum ConstrainedSampler {
             if profile.isExcluded(id) {
                 continue
             }
+            if blockedTokenIDs.contains(id) {
+                continue
+            }
             if let admissible = admissibleTokenIDs, !admissible.contains(id) {
                 continue
             }

diff --git a/Cotabby/Support/RepetitionGuard.swift b/Cotabby/Support/RepetitionGuard.swift
@@ -0,0 +1,52 @@
+import Foundation
+
+/// File overview:
-import Foundation
-
-/// File overview:
+/// File overview:
-import Foundation
-
-/// File overview:
+/// File overview:
+/// Pure no-repeat-ngram logic for the deterministic constrained decoder. Given the tokens generated
+/// so far, it returns the token ids that must not be emitted next because doing so would repeat an
+/// n-gram that already appeared in the output.
+///
+/// Why this file exists:
+/// The constrained decoder selects each token by raw-logit argmax. Greedy argmax has no inherent
+/// resistance to repetition (the engine's `repetition_penalty` lives in its own sampler, which the
+/// constrained path bypasses), so a base model can fall into a loop — "I think that I think that …"
+/// or a single token emitted forever. A hard no-repeat-ngram block is the standard, deterministic
+/// remedy: it forbids closing any (n)-gram that the output already contains. Keeping it pure makes
+/// the rule exhaustively testable and keeps the decode loop a thin driver.
+enum RepetitionGuard {
+    /// The token ids that would, if emitted next, repeat an `ngramSize`-gram already present in
+    /// `history`. A token `t` is blocked when the last `ngramSize - 1` tokens of `history` (the
+    /// pending prefix) already occur earlier in `history` immediately followed by `t`; emitting `t`
+    /// would reproduce that whole n-gram a second time.
+    ///
+    /// Returns an empty set when `ngramSize < 2` (a 1-gram block would forbid every token that ever
+    /// appeared, killing normal repetition like "the … the") or when `history` is too short to hold a
+    /// full prefix. Operates on token ids, not text, so it is independent of detokenization and works
+    /// the same for any vocabulary.
+    static func blockedTokens(history: [Int], ngramSize: Int) -> Set<Int> {
+        let prefixLength = ngramSize - 1
+        guard ngramSize >= 2, history.count >= prefixLength else {
+            return []
+        }
+
+        // The pending prefix is the suffix of history that a next token would extend into an n-gram.
+        let prefix = Array(history.suffix(prefixLength))
+
+        var blocked: Set<Int> = []
+        // Every earlier position whose `prefixLength`-gram equals the pending prefix contributes the
+        // token that followed it: emitting that token now would repeat the n-gram.
+        var start = 0
+        let lastPrefixStart = history.count - prefixLength
+        while start < lastPrefixStart {
+            var matches = true
+            for offset in 0 ..< prefixLength where history[start + offset] != prefix[offset] {
+                matches = false
+                break
+            }
+            if matches {
+                blocked.insert(history[start + prefixLength])
+            }
+            start += 1
+        }
+        return blocked
+    }
+}
diff --git a/CotabbyTests/ConstrainedSamplerTests.swift b/CotabbyTests/ConstrainedSamplerTests.swift
@@ -162,6 +162,32 @@ final class ConstrainedSamplerTests: XCTestCase {
         XCTAssertEqual(first, 1)
     }
 
+    func test_select_skipsBlockedTokens() {
+        // Token 1 has the highest logit but is blocked (e.g. by the repetition guard), so the
+        // next-highest unblocked token wins.
+        let logits: [Float] = [0.1, 5.0, 2.0, 1.0]
+        let id = ConstrainedSampler.selectToken(
+            logits: logits,
+            profile: plainProfile(count: 4),
+            admissibleTokenIDs: nil,
+            topK: 4,
+            blockedTokenIDs: [1]
+        )
+        XCTAssertEqual(id, 2)
+    }
+
+    func test_select_allBlocked_returnsNil() {
+        let logits: [Float] = [1.0, 2.0, 3.0]
+        let id = ConstrainedSampler.selectToken(
+            logits: logits,
+            profile: plainProfile(count: 3),
+            admissibleTokenIDs: nil,
+            topK: 3,
+            blockedTokenIDs: [0, 1, 2]
+        )
+        XCTAssertNil(id)
+    }
+
     // MARK: - averageLogProb
 
     func test_averageLogProb_uniformRow_matchesNegativeLogVocab() {

diff --git a/CotabbyTests/RepetitionGuardTests.swift b/CotabbyTests/RepetitionGuardTests.swift
@@ -0,0 +1,51 @@
+import XCTest
+@testable import Cotabby
+
+/// Pure tests for the no-repeat-ngram block set. Operates on token ids only, so cases are written as
+/// small id sequences with the expected blocked followers.
+final class RepetitionGuardTests: XCTestCase {
+
+    func test_ngramSizeBelowTwo_blocksNothing() {
+        // A 1-gram block would forbid every token that ever appeared; the guard refuses that.
+        XCTAssertEqual(RepetitionGuard.blockedTokens(history: [1, 1, 2], ngramSize: 1), [])
+        XCTAssertEqual(RepetitionGuard.blockedTokens(history: [1, 1, 2], ngramSize: 0), [])
+    }
+
+    func test_historyShorterThanPrefix_blocksNothing() {
+        // n=3 needs a 2-token pending prefix; one token cannot form it.
+        XCTAssertEqual(RepetitionGuard.blockedTokens(history: [7], ngramSize: 3), [])
+    }
+
+    func test_noRepeatedPrefix_blocksNothing() {
+        XCTAssertEqual(RepetitionGuard.blockedTokens(history: [1, 2, 3], ngramSize: 3), [])
+    }
+
+    func test_repeatedPrefix_blocksItsFollower() {
+        // Pending prefix [1,2] occurred earlier at index 0, followed by 1, so emitting 1 would repeat
+        // the trigram [1,2,1]. Block 1.
+        XCTAssertEqual(RepetitionGuard.blockedTokens(history: [1, 2, 1, 2], ngramSize: 3), [1])
+    }
+
+    func test_singleTokenRun_blocksAfterThreeWithTrigram() {
+        // Three identical tokens are allowed; the fourth would repeat the trigram [5,5,5].
+        XCTAssertEqual(RepetitionGuard.blockedTokens(history: [5, 5], ngramSize: 3), [])
+        XCTAssertEqual(RepetitionGuard.blockedTokens(history: [5, 5, 5], ngramSize: 3), [5])
+    }
+
+    func test_multipleFollowers_allBlocked() {
+        // [1,2] appears twice, followed by 9 then 8; both followers are blocked.
+        let blocked = RepetitionGuard.blockedTokens(history: [1, 2, 9, 1, 2, 8, 1, 2], ngramSize: 3)
+        XCTAssertEqual(blocked, [9, 8])
+    }
+
+    func test_bigramOrder_blocksRepeatedBigram() {
+        // n=2: pending prefix is the last single token. [1] occurred at index 0 followed by 2, so
+        // emitting 2 would repeat the bigram [1,2].
+        XCTAssertEqual(RepetitionGuard.blockedTokens(history: [1, 2, 1], ngramSize: 2), [2])
+    }
+
+    func test_prefixPresentButNotPending_notBlocked() {
+        // [1,2] appears early but the pending prefix is [3,4]; nothing repeats, so nothing is blocked.
+        XCTAssertEqual(RepetitionGuard.blockedTokens(history: [1, 2, 9, 3, 4], ngramSize: 3), [])
+    }
+}