Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions Cotabby.xcodeproj/project.pbxproj
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
046C133967B32BBF9205EBB1 /* LLMIOFileHandler.swift in Sources */ = {isa = PBXBuildFile; fileRef = 8D610FCA3A97249DCCE7D0B8 /* LLMIOFileHandler.swift */; };
078FDE669437D756678E9AB7 /* SettingsRowLabel.swift in Sources */ = {isa = PBXBuildFile; fileRef = 907549CB913B40C28B953A5D /* SettingsRowLabel.swift */; };
07D046D406411ED85AC5758A /* InputMonitorTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = BAC01317B0B68E3C4125E421 /* InputMonitorTests.swift */; };
097B59F01FEC03651D5732A3 /* RepetitionGuard.swift in Sources */ = {isa = PBXBuildFile; fileRef = 04FAB8DC9CC29F7A3EB8C91F /* RepetitionGuard.swift */; };
0A2DDD946654076675AC0FC6 /* LanguageCatalog.swift in Sources */ = {isa = PBXBuildFile; fileRef = BF4BB93056F291FD24EFAD22 /* LanguageCatalog.swift */; };
0A3443AEE6540F11E5E6BF8F /* AppDelegate.swift in Sources */ = {isa = PBXBuildFile; fileRef = A3E8E86A14090BC7BD13BA76 /* AppDelegate.swift */; };
0A658BF137DBD0898E40B87F /* AcknowledgementsView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 2B7A28471B8526C2693FFF65 /* AcknowledgementsView.swift */; };
Expand Down Expand Up @@ -234,6 +235,7 @@
E9E4CC657771DF9F4C56183C /* VisualContextCoordinator.swift in Sources */ = {isa = PBXBuildFile; fileRef = A854CAFB1F557BC4CAED8819 /* VisualContextCoordinator.swift */; };
EB13A392BFA5349DD8A0DD25 /* EmojiUsageStore.swift in Sources */ = {isa = PBXBuildFile; fileRef = FE35C7770405ED368AA02448 /* EmojiUsageStore.swift */; };
ED0843752B297D7E9DB2C468 /* EmojiTriggerStateMachineTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = 723E1EFA85D2E61B6C5F33E8 /* EmojiTriggerStateMachineTests.swift */; };
ED642B8D6D0EAF52E3907DE5 /* RepetitionGuardTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = 5D957E76B6EA508DE3510F98 /* RepetitionGuardTests.swift */; };
ED9C51B0D7056F0753AADF2D /* GhostSuggestionLayout.swift in Sources */ = {isa = PBXBuildFile; fileRef = 043E8AA850F930222DD112C0 /* GhostSuggestionLayout.swift */; };
EE87886AC1BFC8BB3DE09762 /* HuggingFaceModelBrowserView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 78E49BDA7F3A42455C4C5350 /* HuggingFaceModelBrowserView.swift */; };
EF0DE5E045F328F1E912A02A /* AppsPaneView.swift in Sources */ = {isa = PBXBuildFile; fileRef = D9C1C921A1CDA2ADFC39EA01 /* AppsPaneView.swift */; };
Expand Down Expand Up @@ -271,6 +273,7 @@
043E8AA850F930222DD112C0 /* GhostSuggestionLayout.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = GhostSuggestionLayout.swift; sourceTree = "<group>"; };
04D853218B0A77B0CE090828 /* BrowserAppDetectorTests.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = BrowserAppDetectorTests.swift; sourceTree = "<group>"; };
04E25414C307A20B6F9F20EC /* FocusSnapshotResolver.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = FocusSnapshotResolver.swift; sourceTree = "<group>"; };
04FAB8DC9CC29F7A3EB8C91F /* RepetitionGuard.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = RepetitionGuard.swift; sourceTree = "<group>"; };
050D929E13BE52E6282B64D2 /* VisualContextStartCoalescerTests.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = VisualContextStartCoalescerTests.swift; sourceTree = "<group>"; };
06FF2B0A3094A952A8EBA9B5 /* ConfidenceSuppressionPolicyTests.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ConfidenceSuppressionPolicyTests.swift; sourceTree = "<group>"; };
07480CE96ED0EBD94817C6B1 /* GeneralPaneView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = GeneralPaneView.swift; sourceTree = "<group>"; };
Expand Down Expand Up @@ -351,6 +354,7 @@
5C4E5869D103865486AAAEEC /* ModelFileValidator.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ModelFileValidator.swift; sourceTree = "<group>"; };
5C9FDF029F7828CAF3FE8850 /* FocusTracker.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = FocusTracker.swift; sourceTree = "<group>"; };
5D0AEFF86F8210CBE7CFCBAD /* SettingsCategory.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = SettingsCategory.swift; sourceTree = "<group>"; };
5D957E76B6EA508DE3510F98 /* RepetitionGuardTests.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = RepetitionGuardTests.swift; sourceTree = "<group>"; };
5EED3CD2BC7B48DF35DEE562 /* OCRTextHygieneTests.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = OCRTextHygieneTests.swift; sourceTree = "<group>"; };
5F2C764D29C8D50D0C854FF8 /* PermissionGuidanceController.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = PermissionGuidanceController.swift; sourceTree = "<group>"; };
5F34AE24BB7C99D66E1F3904 /* InputModels.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = InputModels.swift; sourceTree = "<group>"; };
Expand Down Expand Up @@ -775,6 +779,7 @@
0D80CC2CCAAFE3F23FB8C37A /* PromptContextSanitizerTests.swift */,
4696A84D17890B154533A08F /* PromptPolicyTests.swift */,
E260C4D08C786CDBD527B329 /* PromptSectionBudgetTests.swift */,
5D957E76B6EA508DE3510F98 /* RepetitionGuardTests.swift */,
B2BFD19A159680A495EE02FD /* ScreenshotContextGeneratorTests.swift */,
2D7360A6D4261989A66658ED /* SentenceBoundaryClassifierTests.swift */,
2BC293F6125E2B14DCF05AD9 /* SettingsAttentionEvaluatorTests.swift */,
Expand Down Expand Up @@ -928,6 +933,7 @@
E6423D6CC8CC371D2DA899DE /* PermissionOverlayTracker.swift */,
FA4B45B91D4DEAC979C3113E /* PromptContextSanitizer.swift */,
AFCFCCCB69C29A86E726B10A /* PromptSectionBudget.swift */,
04FAB8DC9CC29F7A3EB8C91F /* RepetitionGuard.swift */,
6DC693E00430F46E41CB56E6 /* RequestID.swift */,
D4B56C250DDEF3E81F9DCBD7 /* SentenceBoundaryClassifier.swift */,
2A02336442BB735EE2E8D064 /* SettingsAttentionEvaluator.swift */,
Expand Down Expand Up @@ -1180,6 +1186,7 @@
39571AB31481959CD5C223AE /* PermissionsPaneView.swift in Sources */,
98E2E14A069384C1088CDB44 /* PromptContextSanitizer.swift in Sources */,
3C561CD717064F9250200667 /* PromptSectionBudget.swift in Sources */,
097B59F01FEC03651D5732A3 /* RepetitionGuard.swift in Sources */,
A5A6CE0EF01CA6A9AFA7A400 /* RequestID.swift in Sources */,
82D4ADEAF05337ABDE4C586C /* RuntimeBootstrapModel.swift in Sources */,
2C6159231472A849F15BD0AE /* ScreenFrameReader.swift in Sources */,
Expand Down Expand Up @@ -1291,6 +1298,7 @@
934885ACC2DEA20B27F10948 /* PromptContextSanitizerTests.swift in Sources */,
3CF1A4E39F24917DF0470A7D /* PromptPolicyTests.swift in Sources */,
7EB20783E0D36715D1230A5C /* PromptSectionBudgetTests.swift in Sources */,
ED642B8D6D0EAF52E3907DE5 /* RepetitionGuardTests.swift in Sources */,
1B3FFCB9A979F49BF86EAAD4 /* ScreenshotContextGeneratorTests.swift in Sources */,
1D1C6FF0B8F50AC14A1000F4 /* SentenceBoundaryClassifierTests.swift in Sources */,
C618C5595DA9C57C806A3E03 /* SettingsAttentionEvaluatorTests.swift in Sources */,
Expand Down
18 changes: 17 additions & 1 deletion Cotabby/Services/Runtime/LlamaRuntimeCore.swift
Original file line number Diff line number Diff line change
Expand Up @@ -205,6 +205,11 @@ nonisolated final class LlamaRuntimeCore: @unchecked Sendable {

// MARK: - Decoders

/// No-repeat-ngram order for the constrained decoder: forbid re-emitting any 3-gram already in the
/// output. 3 is the conventional choice — it breaks phrase loops ("I think that I think that") and
/// single-token runs after a few repeats, without blocking ordinary short repeats like "very very".
private static let noRepeatNgramSize = 3

/// The shipping decoder: delegates token selection to the engine's built-in sampler
/// (`sampleNext`), which applies temperature / top-k / top-p / min-p and commits each token.
private func runEngineSampledDecode(sequenceID: Int32, options: LlamaGenerationOptions) -> String {
Expand Down Expand Up @@ -275,6 +280,9 @@ nonisolated final class LlamaRuntimeCore: @unchecked Sendable {
let topK = options.topK > 0 ? options.topK : vocabSize

var generatedBytes: [UInt8] = []
// Token-id history feeds the no-repeat-ngram guard; tracked separately from bytes because the
// guard reasons over token ids, not decoded text.
var generatedTokenIDs: [Int] = []
var tokensGenerated = 0
var sumLogprob = 0.0
var stopReason = "budget_exhausted"
Expand All @@ -294,11 +302,18 @@ nonisolated final class LlamaRuntimeCore: @unchecked Sendable {
break
}

// Block any token that would close an n-gram already emitted, so greedy argmax cannot fall
// into a repetition loop (the engine's repetition penalty does not reach this raw-logit path).
let blockedTokenIDs = RepetitionGuard.blockedTokens(
history: generatedTokenIDs,
ngramSize: Self.noRepeatNgramSize
)
guard let tokenID = ConstrainedSampler.selectToken(
logits: logits,
profile: profile,
admissibleTokenIDs: nil,
topK: topK
topK: topK,
blockedTokenIDs: blockedTokenIDs
) else {
stopReason = "no_admissible_token"
break
Comment on lines 317 to 319
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Ambiguous stopReason when repetition guard exhausts all candidates

"no_admissible_token" is already emitted when the byte-prefix constraint returns an empty admissible set; now the same string is logged when the repetition guard blocks every surviving candidate. A post-hoc log search won't distinguish between the two cases. Consider a distinct value such as "repetition_guard_exhausted" so decode diagnostics can tell apart a structural constraint failure from a repetition block.

Fix in Codex Fix in Claude Code

Expand All @@ -321,6 +336,7 @@ nonisolated final class LlamaRuntimeCore: @unchecked Sendable {
sumLogprob += logProb
}
generatedBytes.append(contentsOf: profile.bytes(for: tokenID))
generatedTokenIDs.append(tokenID)
tokensGenerated += 1

if engine.acceptToken(sequenceID, Int32(tokenID)) != .ok {
Expand Down
10 changes: 9 additions & 1 deletion Cotabby/Support/ConstrainedSampler.swift
Original file line number Diff line number Diff line change
Expand Up @@ -28,11 +28,16 @@ enum ConstrainedSampler {
///
/// Determinism note: ties on logit are broken by the lower token id, so equal-logit inputs still
/// produce a single stable result.
///
/// `blockedTokenIDs` is a per-step block-list (defaults to empty) layered on top of the static
/// profile exclusions: a blocked id is skipped exactly like a control token. The decoder uses it
/// for dynamic constraints such as no-repeat-ngram, which the static profile cannot express.
static func selectToken(
logits: [Float],
profile: TokenProfile,
admissibleTokenIDs: Set<Int>?,
topK: Int
topK: Int,
blockedTokenIDs: Set<Int> = []
) -> Int? {
guard topK > 0, !logits.isEmpty else {
return nil
Expand All @@ -50,6 +55,9 @@ enum ConstrainedSampler {
if profile.isExcluded(id) {
continue
}
if blockedTokenIDs.contains(id) {
continue
}
if let admissible = admissibleTokenIDs, !admissible.contains(id) {
continue
}
Expand Down
52 changes: 52 additions & 0 deletions Cotabby/Support/RepetitionGuard.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
import Foundation

/// File overview:
Comment on lines +1 to +3
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 import Foundation is not used in this file — all types (Array, Set, Int) are from the Swift standard library. Removing the import makes the file's dependencies clearer and avoids a small unnecessary overhead.

Suggested change
import Foundation
/// File overview:
/// File overview:

Note: If this suggestion doesn't match your team's coding style, reply to this and let me know. I'll remember it for next time!

Fix in Codex Fix in Claude Code

/// Pure no-repeat-ngram logic for the deterministic constrained decoder. Given the tokens generated
/// so far, it returns the token ids that must not be emitted next because doing so would repeat an
/// n-gram that already appeared in the output.
///
/// Why this file exists:
/// The constrained decoder selects each token by raw-logit argmax. Greedy argmax has no inherent
/// resistance to repetition (the engine's `repetition_penalty` lives in its own sampler, which the
/// constrained path bypasses), so a base model can fall into a loop — "I think that I think that …"
/// or a single token emitted forever. A hard no-repeat-ngram block is the standard, deterministic
/// remedy: it forbids closing any (n)-gram that the output already contains. Keeping it pure makes
/// the rule exhaustively testable and keeps the decode loop a thin driver.
enum RepetitionGuard {
/// The token ids that would, if emitted next, repeat an `ngramSize`-gram already present in
/// `history`. A token `t` is blocked when the last `ngramSize - 1` tokens of `history` (the
/// pending prefix) already occur earlier in `history` immediately followed by `t`; emitting `t`
/// would reproduce that whole n-gram a second time.
///
/// Returns an empty set when `ngramSize < 2` (a 1-gram block would forbid every token that ever
/// appeared, killing normal repetition like "the … the") or when `history` is too short to hold a
/// full prefix. Operates on token ids, not text, so it is independent of detokenization and works
/// the same for any vocabulary.
static func blockedTokens(history: [Int], ngramSize: Int) -> Set<Int> {
let prefixLength = ngramSize - 1
guard ngramSize >= 2, history.count >= prefixLength else {
return []
}

// The pending prefix is the suffix of history that a next token would extend into an n-gram.
let prefix = Array(history.suffix(prefixLength))

var blocked: Set<Int> = []
// Every earlier position whose `prefixLength`-gram equals the pending prefix contributes the
// token that followed it: emitting that token now would repeat the n-gram.
var start = 0
let lastPrefixStart = history.count - prefixLength
while start < lastPrefixStart {
var matches = true
for offset in 0 ..< prefixLength where history[start + offset] != prefix[offset] {
matches = false
break
}
if matches {
blocked.insert(history[start + prefixLength])
}
start += 1
}
return blocked
}
}
26 changes: 26 additions & 0 deletions CotabbyTests/ConstrainedSamplerTests.swift
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,32 @@ final class ConstrainedSamplerTests: XCTestCase {
XCTAssertEqual(first, 1)
}

func test_select_skipsBlockedTokens() {
// Token 1 has the highest logit but is blocked (e.g. by the repetition guard), so the
// next-highest unblocked token wins.
let logits: [Float] = [0.1, 5.0, 2.0, 1.0]
let id = ConstrainedSampler.selectToken(
logits: logits,
profile: plainProfile(count: 4),
admissibleTokenIDs: nil,
topK: 4,
blockedTokenIDs: [1]
)
XCTAssertEqual(id, 2)
}

func test_select_allBlocked_returnsNil() {
let logits: [Float] = [1.0, 2.0, 3.0]
let id = ConstrainedSampler.selectToken(
logits: logits,
profile: plainProfile(count: 3),
admissibleTokenIDs: nil,
topK: 3,
blockedTokenIDs: [0, 1, 2]
)
XCTAssertNil(id)
}

// MARK: - averageLogProb

func test_averageLogProb_uniformRow_matchesNegativeLogVocab() {
Expand Down
51 changes: 51 additions & 0 deletions CotabbyTests/RepetitionGuardTests.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
import XCTest
@testable import Cotabby

/// Pure tests for the no-repeat-ngram block set. Operates on token ids only, so cases are written as
/// small id sequences with the expected blocked followers.
final class RepetitionGuardTests: XCTestCase {

func test_ngramSizeBelowTwo_blocksNothing() {
// A 1-gram block would forbid every token that ever appeared; the guard refuses that.
XCTAssertEqual(RepetitionGuard.blockedTokens(history: [1, 1, 2], ngramSize: 1), [])
XCTAssertEqual(RepetitionGuard.blockedTokens(history: [1, 1, 2], ngramSize: 0), [])
}

func test_historyShorterThanPrefix_blocksNothing() {
// n=3 needs a 2-token pending prefix; one token cannot form it.
XCTAssertEqual(RepetitionGuard.blockedTokens(history: [7], ngramSize: 3), [])
}

func test_noRepeatedPrefix_blocksNothing() {
XCTAssertEqual(RepetitionGuard.blockedTokens(history: [1, 2, 3], ngramSize: 3), [])
}

func test_repeatedPrefix_blocksItsFollower() {
// Pending prefix [1,2] occurred earlier at index 0, followed by 1, so emitting 1 would repeat
// the trigram [1,2,1]. Block 1.
XCTAssertEqual(RepetitionGuard.blockedTokens(history: [1, 2, 1, 2], ngramSize: 3), [1])
}

func test_singleTokenRun_blocksAfterThreeWithTrigram() {
// Three identical tokens are allowed; the fourth would repeat the trigram [5,5,5].
XCTAssertEqual(RepetitionGuard.blockedTokens(history: [5, 5], ngramSize: 3), [])
XCTAssertEqual(RepetitionGuard.blockedTokens(history: [5, 5, 5], ngramSize: 3), [5])
}

func test_multipleFollowers_allBlocked() {
// [1,2] appears twice, followed by 9 then 8; both followers are blocked.
let blocked = RepetitionGuard.blockedTokens(history: [1, 2, 9, 1, 2, 8, 1, 2], ngramSize: 3)
XCTAssertEqual(blocked, [9, 8])
}

func test_bigramOrder_blocksRepeatedBigram() {
// n=2: pending prefix is the last single token. [1] occurred at index 0 followed by 2, so
// emitting 2 would repeat the bigram [1,2].
XCTAssertEqual(RepetitionGuard.blockedTokens(history: [1, 2, 1], ngramSize: 2), [2])
}

func test_prefixPresentButNotPending_notBlocked() {
// [1,2] appears early but the pending prefix is [3,4]; nothing repeats, so nothing is blocked.
XCTAssertEqual(RepetitionGuard.blockedTokens(history: [1, 2, 9, 3, 4], ngramSize: 3), [])
}
}