chore(main): release 0.2.3 (#638 )

Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
fix: prevent infinite auto-compact loop for unknown 3P models (#635 ) (#636 )
2026-04-13 02:06:34 +08:00 · 2026-04-13 02:03:02 +08:00 · 2026-04-13 01:03:57 +08:00 · 2026-04-13 01:00:33 +08:00 · 2026-04-13 01:00:07 +08:00
12 changed files with 175 additions and 22 deletions
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -4,9 +4,6 @@ on:
  push:
    branches:
      - main
  release:
    types:
      - published
 concurrency:
  group: auto-release-${{ github.ref }}
@@ -15,7 +12,6 @@ concurrency:
 jobs:
  release-please:
    name: Release Please
    if: ${{ github.event_name == 'push' }}
    runs-on: ubuntu-latest
    permissions:
      contents: write
@@ -34,7 +30,8 @@ jobs:
  publish-npm:
    name: Publish to npm
-    if: ${{ github.event_name == 'release' }}
+    needs: release-please
    if: ${{ needs.release-please.outputs.release_created == 'true' }}
    runs-on: ubuntu-latest
    environment: release
    permissions:
@@ -44,7 +41,7 @@ jobs:
      - name: Checkout release tag
        uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5
        with:
-          ref: ${{ github.event.release.tag_name }}
+          ref: ${{ needs.release-please.outputs.tag_name }}
          fetch-depth: 0
      - name: Set up Node.js
@@ -84,8 +81,8 @@ jobs:
      - name: Release summary
        run: |
          {
-            echo "## Released ${{ github.event.release.tag_name }}"
+            echo "## Released ${{ needs.release-please.outputs.tag_name }}"
            echo
            echo "- npm: https://www.npmjs.com/package/@gitlawb/openclaude"
-            echo "- GitHub: https://github.com/Gitlawb/openclaude/releases/tag/${{ github.event.release.tag_name }}"
+            echo "- GitHub: https://github.com/Gitlawb/openclaude/releases/tag/${{ needs.release-please.outputs.tag_name }}"
          } >> "$GITHUB_STEP_SUMMARY"
--- a/.release-please-manifest.json
+++ b/.release-please-manifest.json
@@ -1,3 +1,3 @@
 {
-  ".": "0.2.1"
+  ".": "0.2.3"
 }
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,19 @@
 # Changelog
 ## [0.2.3](https://github.com/Gitlawb/openclaude/compare/v0.2.2...v0.2.3) (2026-04-12)
 ### Bug Fixes
 * prevent infinite auto-compact loop for unknown 3P models ([#635](https://github.com/Gitlawb/openclaude/issues/635)) ([#636](https://github.com/Gitlawb/openclaude/issues/636)) ([aeaa658](https://github.com/Gitlawb/openclaude/commit/aeaa658f776fb8df95721e8b8962385f8b00f66a))
 ## [0.2.2](https://github.com/Gitlawb/openclaude/compare/v0.2.1...v0.2.2) (2026-04-12)
 ### Bug Fixes
 * **read/edit:** make compact line prefix unambiguous for tab-indented files ([#613](https://github.com/Gitlawb/openclaude/issues/613)) ([08cc6f3](https://github.com/Gitlawb/openclaude/commit/08cc6f328711cd93ce9fa53351266c29a0b0a341))
 ## [0.2.1](https://github.com/Gitlawb/openclaude/compare/v0.2.0...v0.2.1) (2026-04-12)
--- a/package.json
+++ b/package.json
@@ -1,6 +1,6 @@
 {
  "name": "@gitlawb/openclaude",
-  "version": "0.2.1",
+  "version": "0.2.3",
  "description": "Claude Code opened to any LLM — OpenAI, Gemini, DeepSeek, Ollama, and 200+ models",
  "type": "module",
  "bin": {
--- a/src/services/compact/autoCompact.test.ts
+++ b/src/services/compact/autoCompact.test.ts
@@ -0,0 +1,46 @@
 import { describe, expect, test } from 'bun:test'
 import {
  getEffectiveContextWindowSize,
  getAutoCompactThreshold,
 } from './autoCompact.ts'
 describe('getEffectiveContextWindowSize', () => {
  test('returns positive value for known models with large context windows', () => {
    // claude-sonnet-4 has 200k context
    const effective = getEffectiveContextWindowSize('claude-sonnet-4')
    expect(effective).toBeGreaterThan(0)
  })
  test('never returns negative even for unknown 3P models (issue #635)', () => {
    // Previously, unknown 3P models got 8k context → effective context was
    // 8k minus 20k summary reservation = -12k, causing infinite auto-compact.
    // Now the fallback is 128k and there's a floor, so effective is always
    // at least reservedTokensForSummary + buffer.
    process.env.CLAUDE_CODE_USE_OPENAI = '1'
    try {
      const effective = getEffectiveContextWindowSize('some-unknown-3p-model')
      expect(effective).toBeGreaterThan(0)
      // Must be at least summary reservation (20k) + buffer (13k) = 33k
      expect(effective).toBeGreaterThanOrEqual(33_000)
    } finally {
      delete process.env.CLAUDE_CODE_USE_OPENAI
    }
  })
 })
 describe('getAutoCompactThreshold', () => {
  test('returns positive threshold for known models', () => {
    const threshold = getAutoCompactThreshold('claude-sonnet-4')
    expect(threshold).toBeGreaterThan(0)
  })
  test('never returns negative threshold even for unknown 3P models (issue #635)', () => {
    process.env.CLAUDE_CODE_USE_OPENAI = '1'
    try {
      const threshold = getAutoCompactThreshold('some-unknown-3p-model')
      expect(threshold).toBeGreaterThan(0)
    } finally {
      delete process.env.CLAUDE_CODE_USE_OPENAI
    }
  })
 })
--- a/src/services/compact/autoCompact.ts
+++ b/src/services/compact/autoCompact.ts
@@ -45,7 +45,12 @@ export function getEffectiveContextWindowSize(model: string): number {
    }
  }
-  return contextWindow - reservedTokensForSummary
+  // Floor: effective context must be at least the summary reservation plus a
  // usable buffer. If it goes lower, the auto-compact threshold becomes
  // negative and fires on every message (issue #635).
  const autocompactBuffer = 13_000 // must match AUTOCOMPACT_BUFFER_TOKENS
  const effectiveContext = contextWindow - reservedTokensForSummary
  return Math.max(effectiveContext, reservedTokensForSummary + autocompactBuffer)
 }
 export type AutoCompactTrackingState = {
--- a/src/tools/FileEditTool/prompt.ts
+++ b/src/tools/FileEditTool/prompt.ts
@@ -11,7 +11,7 @@ export function getEditToolDescription(): string {
 function getDefaultEditDescription(): string {
  const prefixFormat = isCompactLinePrefixEnabled()
-    ? 'line number + tab'
+    ? 'line number + arrow'
    : 'spaces + line number + arrow'
  const minimalUniquenessHint =
    process.env.USER_TYPE === 'ant'
--- a/src/utils/context.test.ts
+++ b/src/utils/context.test.ts
@@ -107,9 +107,23 @@ test('MiniMax-M2.7 uses explicit provider-specific context and output caps', ()
  expect(getMaxOutputTokensForModel('MiniMax-M2.7')).toBe(131_072)
 })
-test('unknown openai-compatible models still use the conservative fallback window', () => {
+test('unknown openai-compatible models use the 128k fallback window (not 8k, see #635)', () => {
  process.env.CLAUDE_CODE_USE_OPENAI = '1'
  delete process.env.CLAUDE_CODE_MAX_OUTPUT_TOKENS
-  expect(getContextWindowForModel('some-unknown-3p-model')).toBe(8_000)
+  expect(getContextWindowForModel('some-unknown-3p-model')).toBe(128_000)
 })
 test('MiniMax-M2.5 and M2.1 use explicit provider-specific context and output caps', () => {
  process.env.CLAUDE_CODE_USE_OPENAI = '1'
  delete process.env.CLAUDE_CODE_MAX_OUTPUT_TOKENS
  expect(getContextWindowForModel('MiniMax-M2.5')).toBe(204_800)
  expect(getContextWindowForModel('MiniMax-M2.5-highspeed')).toBe(204_800)
  expect(getContextWindowForModel('MiniMax-M2.1')).toBe(204_800)
  expect(getContextWindowForModel('MiniMax-M2.1-highspeed')).toBe(204_800)
  expect(getModelMaxOutputTokens('MiniMax-M2.5')).toEqual({
    default: 131_072,
    upperLimit: 131_072,
  })
 })
--- a/src/utils/context.ts
+++ b/src/utils/context.ts
@@ -9,6 +9,11 @@ import { getOpenAIContextWindow, getOpenAIMaxOutputTokens } from './model/openai
 // Model context window size (200k tokens for all models right now)
 export const MODEL_CONTEXT_WINDOW_DEFAULT = 200_000
 // Fallback context window for unknown 3P models. Must be large enough that
 // the effective context (this minus output token reservation) stays positive,
 // otherwise auto-compact fires on every message (issue #635).
 export const OPENAI_FALLBACK_CONTEXT_WINDOW = 128_000
 // Maximum output tokens for compact operations
 export const COMPACT_MAX_OUTPUT_TOKENS = 20_000
@@ -73,8 +78,9 @@ export function getContextWindowForModel(
  }
  // OpenAI-compatible provider — use known context windows for the model.
-  // Unknown models get a conservative 8k default so auto-compact triggers
+  // Unknown models get a conservative 128k default. This was previously 8k,
-  // before hitting a hard context_window_exceeded error.
+  // but that caused auto-compact to fire on every turn because the effective
  // context (8k minus output reservation) became negative (issue #635).
  const isOpenAIProvider =
    isEnvTruthy(process.env.CLAUDE_CODE_USE_OPENAI) ||
    isEnvTruthy(process.env.CLAUDE_CODE_USE_GEMINI) ||
@@ -86,10 +92,10 @@ export function getContextWindowForModel(
      return openaiWindow
    }
    console.error(
-      `[context] Warning: model "${model}" not in context window table — using conservative 8k default. ` +
+      `[context] Warning: model "${model}" not in context window table — using conservative 128k default. ` +
      'Add it to src/utils/model/openaiContextWindows.ts for accurate compaction.',
    )
-    return 8_000
+    return OPENAI_FALLBACK_CONTEXT_WINDOW
  }
  const cap = getModelCapability(model)
--- a/src/utils/file.test.ts
+++ b/src/utils/file.test.ts
@@ -0,0 +1,51 @@
 import { afterEach, describe, expect, mock, test } from 'bun:test'
 async function importFileModuleWithKillswitchEnabled(
  killswitchEnabled: boolean,
 ) {
  mock.module('../services/analytics/growthbook.js', () => ({
    getFeatureValue_CACHED_MAY_BE_STALE: () => killswitchEnabled,
  }))
  return import(`./file.js?ts=${Date.now()}-${Math.random()}`)
 }
 afterEach(() => {
  mock.restore()
 })
 describe('addLineNumbers', () => {
  test('uses unambiguous arrow compact prefix and preserves leading tabs', async () => {
    const { addLineNumbers } = await importFileModuleWithKillswitchEnabled(false)
    const result = addLineNumbers({
      content: '\tfirst\n\t\tsecond',
      startLine: 41,
    })
    expect(result).toBe('41→\tfirst\n42→\t\tsecond')
  })
  test('keeps padded arrow format when compact mode is disabled', async () => {
    const { addLineNumbers } = await importFileModuleWithKillswitchEnabled(true)
    const result = addLineNumbers({
      content: 'alpha\nbeta',
      startLine: 1,
    })
    expect(result).toBe('     1→alpha\n     2→beta')
  })
 })
 describe('stripLineNumberPrefix', () => {
  test('strips compact arrow, padded arrow, and legacy tab prefixes', async () => {
    const { stripLineNumberPrefix } = await importFileModuleWithKillswitchEnabled(
      false,
    )
    expect(stripLineNumberPrefix('41→\tfirst')).toBe('\tfirst')
    expect(stripLineNumberPrefix('     2→beta')).toBe('beta')
    expect(stripLineNumberPrefix('7\t\tlegacy-tab')).toBe('\tlegacy-tab')
  })
 })
--- a/src/utils/file.ts
+++ b/src/utils/file.ts
@@ -267,7 +267,7 @@ export async function suggestPathUnderCwd(
 }
 /**
- * Whether to use the compact line-number prefix format (`N\t` instead of
+ * Whether to use the compact line-number prefix format (`N→` instead of
 * `     N→`). The padded-arrow format costs 9 bytes/line overhead; at
 * 1.35B Read calls × 132 lines avg this is 2.18% of fleet uncached input
 * (bq-queries/read_line_prefix_overhead_verify.sql).
@@ -303,7 +303,7 @@ export function addLineNumbers({
  if (isCompactLinePrefixEnabled()) {
    return lines
-      .map((line, index) => `${index + startLine}\t${line}`)
+      .map((line, index) => `${index + startLine}→${line}`)
      .join('\n')
  }
--- a/src/utils/model/openaiContextWindows.ts
+++ b/src/utils/model/openaiContextWindows.ts
@@ -104,9 +104,19 @@ const OPENAI_CONTEXT_WINDOWS: Record<string, number> = {
  'devstral-latest':          256_000,
  'ministral-3b-latest':      256_000,
-  // MiniMax
+  // MiniMax (all M2.x variants share 204,800 context, 131,072 max output)
  'MiniMax-M2.7':             204_800,
  'MiniMax-M2.7-highspeed':   204_800,
  'MiniMax-M2.5':             204_800,
  'MiniMax-M2.5-highspeed':   204_800,
  'MiniMax-M2.1':             204_800,
  'MiniMax-M2.1-highspeed':   204_800,
  'minimax-m2.7':             204_800,
  'minimax-m2.7-highspeed':   204_800,
  'minimax-m2.5':             204_800,
  'minimax-m2.5-highspeed':   204_800,
  'minimax-m2.1':             204_800,
  'minimax-m2.1-highspeed':   204_800,
  // Google (via OpenRouter)
  'google/gemini-2.0-flash':1_048_576,
@@ -223,9 +233,19 @@ const OPENAI_MAX_OUTPUT_TOKENS: Record<string, number> = {
  'mistral-large-latest':     32_768,
  'mistral-small-latest':     32_768,
-  // MiniMax
+  // MiniMax (all M2.x variants share 131,072 max output)
  'MiniMax-M2.7':            131_072,
  'MiniMax-M2.7-highspeed':  131_072,
  'MiniMax-M2.5':            131_072,
  'MiniMax-M2.5-highspeed':  131_072,
  'MiniMax-M2.1':            131_072,
  'MiniMax-M2.1-highspeed':  131_072,
  'minimax-m2.7':            131_072,
  'minimax-m2.7-highspeed':  131_072,
  'minimax-m2.5':            131_072,
  'minimax-m2.5-highspeed':  131_072,
  'minimax-m2.1':            131_072,
  'minimax-m2.1-highspeed':  131_072,
  // Google (via OpenRouter)
  'google/gemini-2.0-flash':   8_192,
Author	SHA1	Message	Date
github-actions[bot]	f6a4455ecf	chore(main): release 0.2.3 (#638 ) Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>	2026-04-13 02:06:34 +08:00
Vasanth T	aeaa658f77	fix: prevent infinite auto-compact loop for unknown 3P models (#635 ) (#636 ) - Raise context window fallback from 8k to 128k for unknown OpenAI-compat models. The 8k fallback caused effective context (8k minus output reservation) to go negative, making auto-compact fire on every single message. - Add safety floor in getEffectiveContextWindowSize(): effective context is always at least reservedTokensForSummary + 13k buffer, ensuring the auto-compact threshold stays positive. - Add missing MiniMax model entries (M2.5, M2.5-highspeed, M2.1, M2.1-highspeed) all at 204,800 context / 131,072 max output per MiniMax docs. - Add tests for MiniMax variants, 128k fallback, and autoCompact floor. Fixes #635 Co-authored-by: root <root@vm7508.lumadock.com>	2026-04-13 02:03:02 +08:00
github-actions[bot]	d2a057c6f1	chore(main): release 0.2.2 (#631 ) Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>	2026-04-13 01:03:57 +08:00
Jeevan Mohan Pawar	08cc6f3287	fix(read/edit): make compact line prefix unambiguous for tab-indented files (#613 )	2026-04-13 01:00:33 +08:00
Kevin Codex	84fcc7f7e0	ci: publish npm in release workflow (#630 )	2026-04-13 01:00:07 +08:00