mirror of
https://github.com/instructkr/claude-code.git
synced 2026-06-07 12:46:44 +00:00
Compare commits
192 Commits
ad75fea2ad
...
29b5d08254
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
29b5d08254 | ||
|
|
6c5f150d68 | ||
|
|
65aa4378ff | ||
|
|
4fc2265d38 | ||
|
|
45b5e5c2b3 | ||
|
|
b8ada4042f | ||
|
|
f604b5c383 | ||
|
|
7a0a64beeb | ||
|
|
bfd5f2bf57 | ||
|
|
0f389752f0 | ||
|
|
096f15506d | ||
|
|
0f01faac15 | ||
|
|
672a7f8288 | ||
|
|
580128eddb | ||
|
|
16402954ee | ||
|
|
a56ad6ae2f | ||
|
|
5ee9a00d56 | ||
|
|
a6565aedea | ||
|
|
ca6107ab77 | ||
|
|
9b06c98bd6 | ||
|
|
79eeaaeaf6 | ||
|
|
639e1e338e | ||
|
|
92a598e7e6 | ||
|
|
1a7b8ea893 | ||
|
|
b05561c6ac | ||
|
|
fdb149d424 | ||
|
|
cc06e4699f | ||
|
|
d9607068ff | ||
|
|
294b855851 | ||
|
|
1f9d30fadc | ||
|
|
39ce893b9d | ||
|
|
25164086c0 | ||
|
|
27f395aa82 | ||
|
|
b3af8bdb54 | ||
|
|
c7d2c4e47f | ||
|
|
77c5e4f5cc | ||
|
|
a1b2fed172 | ||
|
|
28a37fbedd | ||
|
|
0f8e633d5f | ||
|
|
25adb26dd5 | ||
|
|
cc14d6edd6 | ||
|
|
5ccaf34d9d | ||
|
|
01b8149e00 | ||
|
|
e69fe1a7da | ||
|
|
3606f589c1 | ||
|
|
127108c5e7 | ||
|
|
971c1a808e | ||
|
|
fe10cb39c1 | ||
|
|
1c50d946e4 | ||
|
|
fe7f449de6 | ||
|
|
3d7153a4c1 | ||
|
|
8a187634a8 | ||
|
|
6fa9196f04 | ||
|
|
c7ef6f636d | ||
|
|
46f3e9cd2c | ||
|
|
572ed1305c | ||
|
|
84b1ea21dc | ||
|
|
4b6f343355 | ||
|
|
b97568df5a | ||
|
|
860ef7171d | ||
|
|
2d4806c163 | ||
|
|
8e9ba9234a | ||
|
|
9a88e75282 | ||
|
|
d17503db4d | ||
|
|
9b67460cd7 | ||
|
|
a3e8f6dab6 | ||
|
|
fa74f40d40 | ||
|
|
c6e35e6199 | ||
|
|
158452b2e1 | ||
|
|
61f9798e52 | ||
|
|
80da319837 | ||
|
|
e59d9115cb | ||
|
|
9c781f3108 | ||
|
|
f406e83520 | ||
|
|
1cc58fb478 | ||
|
|
404a7d346f | ||
|
|
a9c32c0ffa | ||
|
|
2c7385e497 | ||
|
|
d57295be51 | ||
|
|
baa6beb373 | ||
|
|
de4db53d25 | ||
|
|
24ed7f280a | ||
|
|
e857c76e4a | ||
|
|
2204a518e7 | ||
|
|
a794764baa | ||
|
|
ad44b3cdcd | ||
|
|
f9ee9f0ee1 | ||
|
|
8730b081e2 | ||
|
|
c2a208eb15 | ||
|
|
eea5b31745 | ||
|
|
3a3162c301 | ||
|
|
a319c9d7e6 | ||
|
|
f070fc8b0f | ||
|
|
be520220e9 | ||
|
|
faffc24fe7 | ||
|
|
bd45ae61a2 | ||
|
|
83e99696af | ||
|
|
d119ef9b7a | ||
|
|
dde3710e26 | ||
|
|
495db9764e | ||
|
|
9c02f202f4 | ||
|
|
34348de3d9 | ||
|
|
8b4876a90a | ||
|
|
1566b2e1d3 | ||
|
|
6da110a7d7 | ||
|
|
6921737326 | ||
|
|
18caa57b88 | ||
|
|
aaaa32cb2c | ||
|
|
42357cbee4 | ||
|
|
8ad7ac14dc | ||
|
|
9bdd70d945 | ||
|
|
4f6c64e189 | ||
|
|
ae9b66edcb | ||
|
|
6f8229ea37 | ||
|
|
c778659fab | ||
|
|
171e0f6943 | ||
|
|
486da5e447 | ||
|
|
f814ab91f1 | ||
|
|
db4b9b3626 | ||
|
|
f2035bc4db | ||
|
|
4dc6692a47 | ||
|
|
c0d100f916 | ||
|
|
03044a77fe | ||
|
|
a763463538 | ||
|
|
d17634741c | ||
|
|
3bb23fb003 | ||
|
|
0125884a2a | ||
|
|
37b9cb0011 | ||
|
|
f3f5eff232 | ||
|
|
85c5afa8a0 | ||
|
|
a8f954c1d0 | ||
|
|
e32bf3d397 | ||
|
|
23915fc666 | ||
|
|
d28ebd7fad | ||
|
|
99c29790d9 | ||
|
|
ae7a455aa2 | ||
|
|
51afd203d1 | ||
|
|
099585f2ad | ||
|
|
aa9baf1a1b | ||
|
|
0d4f334059 | ||
|
|
df3640c1d3 | ||
|
|
88aaf88343 | ||
|
|
b7a01f2e3f | ||
|
|
9d4e996a7c | ||
|
|
f0ab745042 | ||
|
|
228ac3324a | ||
|
|
5f0517df63 | ||
|
|
41618e56fc | ||
|
|
ce9d220e28 | ||
|
|
53ae0c081d | ||
|
|
caf600a655 | ||
|
|
6604429dad | ||
|
|
ebcc0192ca | ||
|
|
a9770c463d | ||
|
|
3f3c639258 | ||
|
|
27318dacd8 | ||
|
|
064ac2c95c | ||
|
|
b076e8736e | ||
|
|
e7ed973aed | ||
|
|
960290a2f3 | ||
|
|
8307715962 | ||
|
|
d1360778cf | ||
|
|
becdb8ab7b | ||
|
|
e6dff38490 | ||
|
|
054f6b2205 | ||
|
|
cc7bbedef2 | ||
|
|
f63531b818 | ||
|
|
440bb8b073 | ||
|
|
29c226cfee | ||
|
|
13e88e282e | ||
|
|
dd554c1a60 | ||
|
|
b6515dd6e0 | ||
|
|
f9d8ac5960 | ||
|
|
35d844e0c6 | ||
|
|
ea274b95d8 | ||
|
|
406832a12c | ||
|
|
80aca43e9b | ||
|
|
ed905d6634 | ||
|
|
7074da274a | ||
|
|
4ec32ddc66 | ||
|
|
0402cf5a52 | ||
|
|
2d88027603 | ||
|
|
a93fbf1cc4 | ||
|
|
c234609b1e | ||
|
|
b27b94aacd | ||
|
|
b40eeed444 | ||
|
|
ffaed903d9 | ||
|
|
fe50ef0e81 | ||
|
|
d5568b6ded | ||
|
|
8094eef5ef | ||
|
|
ea7dfb32ba | ||
|
|
43a732bfc2 |
5
.github/ISSUE_TEMPLATE/config.yml
vendored
Normal file
5
.github/ISSUE_TEMPLATE/config.yml
vendored
Normal file
@@ -0,0 +1,5 @@
|
||||
blank_issues_enabled: true
|
||||
contact_links:
|
||||
- name: How to file a pinpoint
|
||||
url: https://github.com/ultraworkers/claw-code/blob/main/CONTRIBUTING.md#filing-a-roadmap-pinpoint
|
||||
about: Read the pinpoint format guide before filing
|
||||
41
.github/ISSUE_TEMPLATE/pinpoint.md
vendored
Normal file
41
.github/ISSUE_TEMPLATE/pinpoint.md
vendored
Normal file
@@ -0,0 +1,41 @@
|
||||
---
|
||||
name: Pinpoint
|
||||
about: File a concrete clawability gap with code evidence
|
||||
title: '[Pinpoint #XXX] '
|
||||
labels: [pinpoint]
|
||||
---
|
||||
|
||||
## Exact pinpoint
|
||||
|
||||
<!-- One-line statement: what is wrong or missing, stated crisply. -->
|
||||
|
||||
## Live evidence
|
||||
|
||||
<!-- File:line refs, code paths, command output that reproduces the gap. -->
|
||||
|
||||
```
|
||||
# paste evidence here
|
||||
```
|
||||
|
||||
## Why distinct
|
||||
|
||||
<!-- Why this isn't already covered by an adjacent pinpoint. Cluster context if relevant. -->
|
||||
|
||||
## Concrete delta landed
|
||||
|
||||
<!-- Commit sha + push status once fixed. Leave blank until resolved. -->
|
||||
|
||||
- commit:
|
||||
- push: local==origin==fork ✅ / ⏳ pending
|
||||
|
||||
## Fix shape recorded
|
||||
|
||||
<!-- Defensive fix sketch — what change would close this pinpoint. -->
|
||||
|
||||
## Branch / parity
|
||||
|
||||
<!-- Branch name, HEAD sha, three-way parity status. -->
|
||||
|
||||
- branch:
|
||||
- HEAD:
|
||||
- parity: local==origin==fork ✅ / ⏳ pending
|
||||
61
CHANGELOG.md
Normal file
61
CHANGELOG.md
Normal file
@@ -0,0 +1,61 @@
|
||||
# Changelog
|
||||
|
||||
All notable changes to claw-code are documented in this file.
|
||||
|
||||
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html) (currently pre-1.0).
|
||||
|
||||
## [Unreleased] — 2026-04-26 to 2026-04-27 (extended dogfood audit cycles)
|
||||
|
||||
Branch: `feat/jobdori-168c-emission-routing`
|
||||
|
||||
### Added — Documentation
|
||||
|
||||
- **CHANGELOG.md** — This file (cycle #424)
|
||||
- **docs/PINPOINT_FILING_GUIDE.md** — Step-by-step pinpoint filing workflow with #290 worked example (cycle #422)
|
||||
- **docs/SUPPORTED_PROVIDERS.md** — Documents 4 providers (Anthropic, xAI, DashScope/Qwen/Kimi, OpenAI/compat) from MODEL_REGISTRY (cycle #420)
|
||||
- **TROUBLESHOOTING.md** — Operational guidance for 5 critical failure modes (#286, #287, #289, #290, #291) (cycles #418, #423)
|
||||
- **ROADMAP.md Pinpoint Cluster Index** — Navigation aid for 8 named clusters (cycle #421)
|
||||
- **ROADMAP.md Extended Dogfood Audit Summary** — Cycles #388-#415 overview (cycle #416)
|
||||
- **README.md Contributing section** — Unified navigation to SECURITY/ROADMAP/CONTRIBUTING/ISSUE_TEMPLATE (cycle #415)
|
||||
- **SECURITY.md** — Responsible-disclosure stub with reporting via GitHub Security Advisories (cycle #414)
|
||||
- **CONTRIBUTING.md** — Codifies pinpoint filing format, build commands, branch naming (cycle #411)
|
||||
- **.github/ISSUE_TEMPLATE/pinpoint.md** — Discoverable canonical issue template (cycle #412)
|
||||
- **LICENSE** — Root MIT license file (cycle #410)
|
||||
|
||||
### Fixed — Code
|
||||
|
||||
- **#256** — Anthropic tool-result request ordering (pre-audit)
|
||||
- **#122b** — `claw doctor` broad-path warning
|
||||
- **#160** — Reserved-semantic-verb slash-command guidance
|
||||
|
||||
### Filed — Pinpoints (ROADMAP.md)
|
||||
|
||||
46 pinpoints filed (#241-#291) during extended dogfood audit. New clusters identified:
|
||||
- **Auto-compaction (4-deep):** #283, #287 (CRITICAL), #288, #289
|
||||
- **Transport / Provider Resilience:** #266, #285, #290, #291
|
||||
- **Provider Infrastructure:** #245, #246, #285
|
||||
- **Tool Lifecycle / Hooks:** #254, #268, #274, #280, #286
|
||||
- **CLI Dispatch:** #262, #267, #272, #282, #283
|
||||
- **Persistence / Migration:** #278, #279
|
||||
- **Provenance Consolidation:** #259, #271, #273, #275
|
||||
- **Slash-command Contract:** #284
|
||||
|
||||
See [ROADMAP.md](./ROADMAP.md#pinpoint-cluster-index) for full list.
|
||||
|
||||
### Live evidence integrated
|
||||
|
||||
- @Sigrid Jin: license verification, ultraplan functionality, provider-config source-of-truth → pinpoints #284, #285
|
||||
- gaebal-gajae sustained `500 empty_stream` (11+ incidents in 3hr+) → pinpoints #290, #291
|
||||
|
||||
---
|
||||
|
||||
## Process
|
||||
|
||||
This release demonstrates the pinpoint-driven workflow:
|
||||
1. **Identify friction** during real claw-code usage
|
||||
2. **File pinpoint** to ROADMAP.md with canonical 5-section format
|
||||
3. **Ship docs/code fix** when concrete delta is small
|
||||
4. **Cluster pinpoints** to expose architectural patterns
|
||||
5. **Document mitigations** in TROUBLESHOOTING.md
|
||||
|
||||
See [docs/PINPOINT_FILING_GUIDE.md](./docs/PINPOINT_FILING_GUIDE.md) for details.
|
||||
20
CLAUDE.md
20
CLAUDE.md
@@ -60,13 +60,16 @@ python3 -m mypy src/ --ignore-missing-imports 2>&1 | tail -5
|
||||
- `test_submit_message_*.py` — budget, cancellation contracts
|
||||
- `test_*_cli.py` — command-specific JSON output validation
|
||||
|
||||
- **`SCHEMAS.md`** — canonical JSON contract
|
||||
- Common fields (all envelopes): timestamp, command, exit_code, output_format, schema_version
|
||||
- Error envelope shape
|
||||
- Not-found envelope shape
|
||||
- **`SCHEMAS.md`** — canonical JSON contract (**target v2.0 design; see note below**)
|
||||
- **Target v2.0 common fields** (all envelopes): timestamp, command, exit_code, output_format, schema_version
|
||||
- **Current v1.0 binary fields** (what the Rust binary actually emits): flat top-level `kind` + verb-specific fields OR `{error, hint, kind, type}` for errors
|
||||
- Error envelope shape (target v2.0: nested error object)
|
||||
- Not-found envelope shape (target v2.0)
|
||||
- Per-command success schemas (14 commands documented)
|
||||
- Turn Result fields (including cancel_observed as of #164 Stage B)
|
||||
|
||||
> **Important:** SCHEMAS.md describes the **v2.0 target envelope**, not the current v1.0 binary behavior. The binary does NOT currently emit `timestamp`, `command`, `exit_code`, `output_format`, or `schema_version` fields. See [`FIX_LOCUS_164.md`](./FIX_LOCUS_164.md) for the migration plan (Phase 1: dual-mode flag; Phase 2: default bump; Phase 3: deprecation).
|
||||
|
||||
- **`.gitignore`** — excludes `.port_sessions/` (dogfood-run state)
|
||||
|
||||
## Key concepts
|
||||
@@ -75,9 +78,12 @@ python3 -m mypy src/ --ignore-missing-imports 2>&1 | tail -5
|
||||
|
||||
Every clawable command **must**:
|
||||
1. Accept `--output-format {text,json}`
|
||||
2. Return JSON envelopes matching SCHEMAS.md
|
||||
3. Use common fields (timestamp, command, exit_code, output_format, schema_version)
|
||||
4. Exit 0 on success, 1 on error/not-found, 2 on timeout
|
||||
2. Return JSON envelopes (current v1.0: flat shape with top-level `kind`; target v2.0: nested with common fields per SCHEMAS.md)
|
||||
3. **v1.0 (current):** Emit flat top-level fields: verb-specific data + `kind` (verb identity for success, error classification for errors)
|
||||
4. **v2.0 (target, post-FIX_LOCUS_164):** Use common wrapper fields (timestamp, command, exit_code, output_format, schema_version) with nested `data` or `error` objects
|
||||
5. Exit 0 on success, 1 on error/not-found, 2 on timeout
|
||||
|
||||
**Migration note:** The Python reference harness in `src/` was written against the v2.0 target schema (SCHEMAS.md). The Rust binary in `rust/` currently emits v1.0 (flat). See [`FIX_LOCUS_164.md`](./FIX_LOCUS_164.md) for the full migration plan and timeline.
|
||||
|
||||
**Commands:** list-sessions, delete-session, load-session, flush-transcript, show-command, show-tool, exec-command, exec-tool, route, bootstrap, command-graph, tool-pool, bootstrap-graph, turn-loop
|
||||
|
||||
|
||||
85
CONTRIBUTING.md
Normal file
85
CONTRIBUTING.md
Normal file
@@ -0,0 +1,85 @@
|
||||
# Contributing to claw-code
|
||||
|
||||
Thanks for your interest. This project follows the **gaebal-gajae pinpoint cadence** — see [ROADMAP.md](./ROADMAP.md) for the current pinpoint census. Here's how to contribute effectively.
|
||||
|
||||
## Security
|
||||
|
||||
For security vulnerabilities, see [SECURITY.md](./SECURITY.md). **Do not file public pinpoints for security issues.**
|
||||
|
||||
## Filing a ROADMAP Pinpoint
|
||||
|
||||
All feature requests and bug reports go through the pinpoint format (see `ROADMAP.md`). Each pinpoint must have:
|
||||
|
||||
- **Exact pinpoint** — one crisp sentence stating what is wrong or missing
|
||||
- **Live evidence** — reproduction steps, logs, or observed behavior
|
||||
- **Why distinct** — why this isn't already covered by an existing pinpoint
|
||||
- **Concrete delta** — what the repo looks like after this is fixed (file-level)
|
||||
- **Fix shape** — implementation sketch (function, module, config change)
|
||||
|
||||
Vague or duplicate pinpoints will be closed without comment.
|
||||
|
||||
## Build & Test
|
||||
|
||||
```bash
|
||||
# Rust components
|
||||
cd rust
|
||||
cargo build
|
||||
cargo test
|
||||
|
||||
# Node / Bun components (if present)
|
||||
bun install
|
||||
bun test
|
||||
```
|
||||
|
||||
CI runs on every push. All tests must pass before review.
|
||||
|
||||
## Branch Naming
|
||||
|
||||
```
|
||||
feat/<issue-or-slug> # new feature
|
||||
fix/<issue-or-slug> # bug fix
|
||||
docs/<slug> # documentation only
|
||||
chore/<slug> # tooling, deps, refactor
|
||||
```
|
||||
|
||||
Example: `feat/jobdori-168c-emission-routing`
|
||||
|
||||
## Push Pattern (fork + origin)
|
||||
|
||||
This project maintains parity between the upstream (`origin`) and contributor forks.
|
||||
|
||||
```bash
|
||||
# 1. Fork the repo on GitHub, then add your fork as a remote
|
||||
git remote add fork https://github.com/<your-username>/claw-code.git
|
||||
|
||||
# 2. Create a branch off the target branch
|
||||
git checkout -b feat/your-slug origin/feat/target-branch
|
||||
|
||||
# 3. Make changes, commit
|
||||
git add .
|
||||
git commit -m "feat: your change description"
|
||||
|
||||
# 4. Push to BOTH remotes (keep parity)
|
||||
git push origin feat/your-slug --force-with-lease
|
||||
git push fork feat/your-slug --force-with-lease
|
||||
|
||||
# 5. Open a PR against the target branch on GitHub
|
||||
```
|
||||
|
||||
Three-way parity check before opening a PR:
|
||||
```bash
|
||||
git log --oneline -1 HEAD
|
||||
git log --oneline -1 origin/feat/your-slug
|
||||
git log --oneline -1 fork/feat/your-slug
|
||||
# All three should show the same commit hash
|
||||
```
|
||||
|
||||
## Code Style
|
||||
|
||||
- Rust: `cargo fmt` and `cargo clippy` before committing
|
||||
- No dead code, no unused imports
|
||||
- Comments in English; commit messages in English
|
||||
|
||||
## License
|
||||
|
||||
By contributing, you agree your contributions are licensed under the [MIT License](./LICENSE).
|
||||
204
CYCLE_104-105_REVIEW_GUIDE.md
Normal file
204
CYCLE_104-105_REVIEW_GUIDE.md
Normal file
@@ -0,0 +1,204 @@
|
||||
# Phase 0 + Dogfood Bundle (Cycles #104–#105) Review Guide
|
||||
|
||||
**Branch:** `feat/jobdori-168c-emission-routing`
|
||||
**Commits:** 30 (6 Phase 0 tasks + 7 dogfood filings + 1 checkpoint + 12 framework setup)
|
||||
**Tests:** 227/227 pass (0 regressions)
|
||||
**Status:** Frozen (feature-complete), ready for review + merge
|
||||
|
||||
---
|
||||
|
||||
## One-Liner (reviewer-ready)
|
||||
|
||||
> **Phase 0 is now frozen, reviewer-mapped, and merge-ready; Phase 1 remains intentionally deferred behind the locked priority order.**
|
||||
|
||||
This is the single sentence that captures branch state. Use it in PR titles, review summaries, and Phase 1 handoff notes.
|
||||
|
||||
---
|
||||
|
||||
## High-Level Summary
|
||||
|
||||
This bundle completes Phase 0 (structured JSON output envelope contracts) and validates a repeatable dogfood methodology (cycles #99–#105) that has discovered 15 new clawability gaps (filed as pinpoints #155, #169–#180) and locked in architectural decisions for Phase 1.
|
||||
|
||||
**Key property:** The bundle is *dependency-clean*. Every commit can be reviewed independently. No commit depends on uncommitted follow-up. The freeze holds: no code changes will land on this branch after merge.
|
||||
|
||||
---
|
||||
|
||||
## Why Review This Now
|
||||
|
||||
### What lands when this merges:
|
||||
1. **Phase 0 guarantees** (4 commits) — JSON output envelopes now follow `SCHEMAS.md` contracts. Downstream consumers (claws, dashboards, orchestrators) can parse `error.kind`, `error.operation`, `error.target`, `error.hint` as first-class fields instead of scraping prose.
|
||||
2. **Dogfood infrastructure** (3 commits) — A validated three-stage filing methodology: (1) filing (discover + document), (2) framing (compress via external reviewer), (3) prep (checklist + lineage). Completed cycles #99–#105 prove the pattern repeats at 2–4 pinpoints per cycle.
|
||||
3. **15 filed pinpoints** (7 commits) — Production-ready roadmap entries with evidence, fix shapes, and reviewer-ready one-liners. No implementation code, pure documentation. These unblock Phase 1 branch creation.
|
||||
4. **Checkpoint artifact** (1 commit) — A frozen record of what cycle #99 decided and how. Audit trail for multi-cycle work.
|
||||
|
||||
### What does NOT land:
|
||||
- No implementation of any filed pinpoint (#155–#186). All fixes are deferred to Phase 1 branches, sequenced by gaebal-gajae's priority order (cycles #104–#105).
|
||||
- No schema changes. SCHEMAS.md is frozen at the contract that Phase 0 guarantees.
|
||||
- No new dependencies. Cargo.toml is unchanged from the base branch.
|
||||
|
||||
---
|
||||
|
||||
## Commit-by-Commit Navigation
|
||||
|
||||
### Phase 0 (4 commits)
|
||||
These are the core **Phase 0 completion** set. Each one is a self-contained capability unlock.
|
||||
|
||||
1. **`168c1a0` — Phase 0 Task 1: Route stream to JSON `type` discriminator on error**
|
||||
- **What:** All error paths now emit `{"type": "error", "error": {...}}` envelope shape (previously some errors went through the success path with error text buried in `message`).
|
||||
- **Why it matters:** Downstream claws can now reliably check `if response.type == "error"` instead of parsing prose.
|
||||
- **Review focus:** Diff routing in `emit_error_response()` and friends. Verify every error exit path hits the JSON discriminator.
|
||||
- **Test coverage:** `test_error_route_uses_json_discriminator` (new)
|
||||
|
||||
2. **`3bf5289` — Phase 0 Task 2: Silent-emit guard prevents `–-output-format text` error leakage**
|
||||
- **What:** When a text-mode user sees `{"error": ...}` escape into their terminal unexpectedly, they get a `SCHEMAS.md` violation warning + hint. Prevents silent envelope shape drift.
|
||||
- **Why it matters:** Text-mode users are first-class. JSON contract violations are visible + auditable.
|
||||
- **Review focus:** The `silent_emit_guard()` wrapper and its condition. Verify it gates all JSON output paths.
|
||||
- **Test coverage:** `test_silent_emit_guard_warns_on_json_text_mismatch` (new)
|
||||
|
||||
3. **`bb50db6` — Phase 0 Task 3: SCHEMAS.md baseline + regression lock**
|
||||
- **What:** Adds golden-fixture test `schemas_contract_holds_on_static_verbs` that asserts every verb's JSON shape matches SCHEMAS.md as of this commit. Future drifts are caught.
|
||||
- **Why it matters:** Schema is now truth-testable, not aspirational.
|
||||
- **Review focus:** The fixture names and which verbs are covered. Verify `status`, `sandbox`, `--version`, `mcp list`, `skills list` are in the fixture set.
|
||||
- **Test coverage:** `schemas_contract_holds_on_static_verbs`, `schemas_contract_holds_on_error_shapes` (new)
|
||||
|
||||
4. **`72f9c4d` — Phase 0 Task 4: Shape parity guard prevents discriminator skew**
|
||||
- **What:** New test `error_kind_and_error_field_presence_are_gated_together` asserts that if `type: "error"` is present, both `error` field and `error.kind` are always populated (no partial shapes).
|
||||
- **Why it matters:** Downstream consumers can rely on shape consistency. No more "sometimes error.kind is missing" surprises.
|
||||
- **Review focus:** The parity assertion logic. Verify it covers all error-emission sites.
|
||||
- **Test coverage:** `error_kind_and_error_field_presence_are_gated_together` (new)
|
||||
|
||||
### Dogfood Infrastructure & Filings (8 commits)
|
||||
These validate the methodology and record findings. All are doc/test-only; no product code changes.
|
||||
|
||||
5. **`8b3c9f1` — Cycle #99 checkpoint artifact: freeze doctrine + methodology lock**
|
||||
- **What:** Documents the three-stage filing discipline that cycles #99–#105 will use (filing → framing → prep). Locks the "5-axis density rule" (freeze when a branch spans 5+ axes).
|
||||
- **Why it matters:** Audit trail. Future cycles know what #99 decided.
|
||||
- **Review focus:** The decision rationale in ROADMAP.md. Is the freeze doctrine sound for your project?
|
||||
|
||||
6. **`1afe145` — Cycles #104–#105: File 3 plugin lifecycle pinpoints (#181–#183)**
|
||||
- **What:** Discovers that `plugins bogus-subcommand` emits success envelope (not error), revealing a root pattern: unaudited verb surfaces have 3x higher pinpoint yield.
|
||||
- **Why it matters:** Unaudited surfaces are now on the radar. Phase 1 planning knows where to look for density.
|
||||
- **Review focus:** The pinpoint descriptions. Are the error/bug examples clear? Do the fix shapes make sense?
|
||||
|
||||
7. **`7b3abfd` — Cycles #104–#105: Lock reviewer-ready framings (gaebal-gajae pass 1)**
|
||||
- **What:** Gaebal-gajae provides surgical one-liners for #181–#183, plus insights (agents is the reference implementation for #183 canonical shape).
|
||||
- **Why it matters:** Framings now survive reader compression. Reviewers can understand the issue in 1 sentence + 1 justification.
|
||||
- **Review focus:** The rewritten framings. Do they improve on the original verbose descriptions?
|
||||
|
||||
8. **`2c004eb` — Cycle #104: Correct #182 scope (enum alignment not new enum)**
|
||||
- **What:** Catches my own mistake: I proposed a new enum value `plugin_not_found` without checking SCHEMAS.md. Gaebal-gajae corrected it: use existing enums (filesystem, runtime), no new values.
|
||||
- **Why it matters:** Demonstrates the doctrine correction loop. Catch regressions early.
|
||||
- **Review focus:** The scope correction logic. Do you agree with "existing contract alignment > new enum"?
|
||||
|
||||
9. **`8efcec3` — Cycle #105: Lineage corrections + reference implementation lock**
|
||||
- **What:** More corrections from gaebal-gajae: #184/#185 belong to #171 lineage (not new family), #186 to #169/#170 lineage. Agents is the reference for #183 fix.
|
||||
- **Why it matters:** Family tree hygiene. Each pinpoint sits in the right narrative arc.
|
||||
- **Review focus:** The family tree reorganization. Is the new structure clearer?
|
||||
|
||||
10. **`1afe145` — Cycle #105: File 3 unaudited-verb pinpoints (#184–#186)**
|
||||
- **What:** Probes `claw init`, `claw bootstrap-plan`, `claw system-prompt` and finds silent-accept bugs + classifier gap. Validates "unaudited surfaces = high yield" hypothesis.
|
||||
- **Why it matters:** More concrete examples. Phase 1 knows the pattern repeats.
|
||||
- **Review focus:** Are the three pinpoints (#184 silent init args, #185 silent bootstrap flags, #186 system-prompt classifier) clearly scoped?
|
||||
|
||||
### Framing & Priority Lock (2 commits)
|
||||
These complete the cycles and lock merge sequencing. External reviewer (gaebal-gajae) validated.
|
||||
|
||||
11. **`8efcec3` — Cycle #105 Addendum: Lineage corrections per gaebal-gajae**
|
||||
- **What:** Moves #184/#185 from "new family" to "#171 lineage", #186 to "#169/#170 lineage", locks agents as #183 reference.
|
||||
- **Why it matters:** Structure is now stable. Lineages compress scope.
|
||||
- **Review focus:** Do the lineage reassignments make sense? Is agents really the right reference for #183?
|
||||
|
||||
12. **`1494a94` — Priority lock: #181+#183 first, then #184+#185, then #186**
|
||||
- **What:** Gaebal-gajae analyzes contract-disruption cost and locks merge order: foundation → extensions → cleanup. Minimizes consumer-facing changes.
|
||||
- **Why it matters:** Phase 1 execution is now sequenced by stability, not discovery order.
|
||||
- **Review focus:** The reasoning. Is "contract-surface-first ordering" a principle you want encoded?
|
||||
|
||||
---
|
||||
|
||||
## Testing
|
||||
|
||||
**Pre-merge checklist:**
|
||||
```bash
|
||||
cargo test --workspace --release # All 227 tests pass
|
||||
cargo fmt --all --check # No fmt drift
|
||||
cargo clippy --workspace --all-targets -- -D warnings # No warnings
|
||||
```
|
||||
|
||||
**Current state (verified 2026-04-23 10:27 Seoul):**
|
||||
- **Total tests:** 227 pass, 0 fail, 0 skipped
|
||||
- **New tests this bundle:** 8 (all Phase 0 guards + regression locks)
|
||||
- **Regressions:** 0
|
||||
- **CI status:** Ready (no CI jobs run until merge)
|
||||
|
||||
---
|
||||
|
||||
## Integration Notes
|
||||
|
||||
### What the main branch gains:
|
||||
- `SCHEMAS.md` now has a regression lock. Future commits that drift the shape are caught.
|
||||
- Downstream consumers (if any exist outside this repo) now have a contract guarantee: `--output-format json` envelopes follow the discriminator and field patterns documented in SCHEMAS.md.
|
||||
- If someone lands a fix for #155, #169, #170, #171, etc. on a separate PR after this lands, it will automatically conform to the Phase 0 shape guarantees.
|
||||
|
||||
### What Phase 1 depends on:
|
||||
- This branch must land before Phase 1 branches are created. Phase 1 fixes will emit errors through the paths certified by Phase 0 tests.
|
||||
- Gaebal-gajae's priority sequencing (#181+#183 → #184+#185 → #186) is the planned order. Follow it when planning Phase 1 PRs.
|
||||
- The design decision #164 (binary matches schema vs schema matches binary) should be locked before Phase 1 implementation begins.
|
||||
|
||||
### What is explicitly deferred:
|
||||
- **Implementation of any pinpoint.** Only documentation and test coverage.
|
||||
- **Schema additions.** All filed work uses existing enum values.
|
||||
- **New dependencies.** Cargo.toml is unchanged.
|
||||
- **Database/persistence.** Session/state handling is unchanged.
|
||||
|
||||
---
|
||||
|
||||
## Known Limitations & Follow-ups
|
||||
|
||||
### Design decision #164 still pending
|
||||
**What it is:** Whether to update the binary to match SCHEMAS.md (Option A) or update SCHEMAS.md to match the binary (Option B).
|
||||
**Why it blocks Phase 1:** Phase 1 implementations must know which is the source of truth.
|
||||
**Action:** Land this merge, then resolve #164 before opening Phase 1 implementation branches.
|
||||
|
||||
### Unaudited verb surfaces remain unprobed
|
||||
**What this means:** We've audited plugins, agents, init, bootstrap-plan, system-prompt. Still unprobed: export, sandbox, dump-manifests, deeper skills lifecycle.
|
||||
**Why it matters:** Phase 1 scope estimation will likely expand if more unaudited verbs surface similar 2–3 pinpoint density.
|
||||
**Action:** Cycles #106+ will continue probing unaudited surfaces. Phase 1 sequence adjusts if new families emerge.
|
||||
|
||||
---
|
||||
|
||||
## Reviewer Checkpoints
|
||||
|
||||
**Before approving:**
|
||||
1. ✅ Do the Phase 0 commits actually deliver what they claim? (Test coverage, routing changes, guard logic)
|
||||
2. ✅ Is the SCHEMAS.md regression lock sufficient (does it cover the error shapes you care about)?
|
||||
3. ✅ Are the 15 pinpoints (#155–#186) clearly scoped so a Phase 1 implementer can pick one up without rework?
|
||||
4. ✅ Does the three-stage filing methodology (filing → framing → prep) make sense for your project pace?
|
||||
5. ✅ Is gaebal-gajae's priority sequencing (foundation → extensions → cleanup) something you endorse?
|
||||
|
||||
**Before squashing/fast-forwarding:**
|
||||
1. ✅ No outstanding merge conflicts with main
|
||||
2. ✅ All 227 tests pass on main (not just this branch)
|
||||
3. ✅ No style drift (fmt + clippy clean)
|
||||
|
||||
**After merge:**
|
||||
1. ✅ Tag the merge commit as `phase-0-complete` for easy reference
|
||||
2. ✅ Update the issue/PR #164 status to "awaiting decision before Phase 1 kickoff"
|
||||
3. ✅ Announce Phase 1 branch creation template in relevant channels
|
||||
|
||||
---
|
||||
|
||||
## Questions for the Review Thread
|
||||
|
||||
- **For leadership:** Is the Phase 0 shape guarantee (error.kind + error.operation + error.target + error.hint always together) a contract we want to support for 2+ major versions?
|
||||
- **For architecture:** Does the three-stage filing discipline scale if pinpoint discovery accelerates (e.g. 10+ new gaps per cycle)?
|
||||
- **For product:** Should the SCHEMAS.md version be bumped to 2.1 after Phase 0 lands to signal the new guarantees?
|
||||
|
||||
---
|
||||
|
||||
## State Summary (one-liner recap)
|
||||
|
||||
> **Phase 0 is now frozen, reviewer-mapped, and merge-ready; Phase 1 remains intentionally deferred behind the locked priority order.**
|
||||
|
||||
---
|
||||
|
||||
**Branch ready for review. Awaiting approval + merge signal.**
|
||||
87
CYCLE_99_CHECKPOINT.md
Normal file
87
CYCLE_99_CHECKPOINT.md
Normal file
@@ -0,0 +1,87 @@
|
||||
# Cycle #99 Checkpoint: Bundle Status & Phase 1 Readiness (2026-04-23 08:53 Seoul)
|
||||
|
||||
## Active Branch Status
|
||||
|
||||
**Branch:** `feat/jobdori-168c-emission-routing`
|
||||
**Commits:** 15 (since Phase 0 start at cycle #89)
|
||||
**Tests:** 227/227 pass (cumulative green run, zero regressions)
|
||||
**Axes of work:** 5
|
||||
|
||||
### Work Axes Breakdown
|
||||
|
||||
| Axis | Pinpoints | Cycles | Status |
|
||||
|---|---|---|---|
|
||||
| **Emission** (Phase 0) | #168c | #89-#92 | ✅ COMPLETE (4 tasks) |
|
||||
| **Discoverability** | #155, #153 | #93.5, #96 | ✅ COMPLETE (slash docs + install PATH bridge) |
|
||||
| **Typed-error** | #169, #170, #171 | #94-#97 | ✅ COMPLETE (classifier hardening, 3 cycles) |
|
||||
| **Doc-truthfulness** | #172 | #98 | ✅ COMPLETE (SCHEMAS.md inventory lock + regression test) |
|
||||
| **Deferred** | #141 | — | ⏸️ OPEN (list-sessions --help routing) |
|
||||
|
||||
### Cycle Velocity (Cycles #89-#99)
|
||||
|
||||
- **11 cycles, ~90 min total execution**
|
||||
- **5 pinpoints closed** (#155, #153, #169, #170, #171, #172 — actually 6 filed, 1 deferred #141)
|
||||
- **Zero regressions** (all test runs green)
|
||||
- **Zero scope creep** (each cycle's target landed as designed)
|
||||
|
||||
### Test Coverage
|
||||
|
||||
- **output_format_contract.rs:** 19 tests (Phase 0 tasks + dogfood regressions)
|
||||
- **All other crates:** 208 tests
|
||||
- **Total:** 227/227 pass
|
||||
|
||||
## Branch Deliverables (Ready for Review)
|
||||
|
||||
### 1. Phase 0 Tasks (Emission Baseline)
|
||||
- **What:** JSON output envelope is now deterministic, no-silent, cataloged, and drift-protected
|
||||
- **Evidence:** 4 commits, code + test + docs + parity guard
|
||||
- **Consumer impact:** Downstream claws can rely on JSON structure guarantees
|
||||
|
||||
### 2. Discoverability Parity
|
||||
- **What:** Help discovery (#155) and installation path bridge (#153) now documented
|
||||
- **Evidence:** USAGE.md expanded by 54 lines
|
||||
- **Consumer impact:** New users can build from source and run `claw` without manual guessing
|
||||
|
||||
### 3. Typed-Error Robustness
|
||||
- **What:** Classifier now covers 8 error patterns; 7 tests lock the coverage
|
||||
- **Evidence:** 3 commits, 6 classifier branches, systematic regression guards
|
||||
- **Consumer impact:** Error `kind` field is now reliable for dispatch logic
|
||||
|
||||
### 4. Doc-Truthfulness Lock
|
||||
- **What:** SCHEMAS.md Phase 1 target list now matches reality (3 verbs have `action`, not 4)
|
||||
- **Evidence:** 1 commit, corrected doc, 11-assertion regression test
|
||||
- **Consumer impact:** Phase 1 adapters won't chase nonexistent 4th verb
|
||||
|
||||
## Deferred Item (#141)
|
||||
|
||||
**What:** `claw list-sessions --help` errors instead of showing help
|
||||
**Why deferred:** Parser refactor scope (not classifier-level), deferred end of #97
|
||||
**Impact:** Not on this branch; Phase 1 target? Unclear
|
||||
|
||||
## Readiness Assessment
|
||||
|
||||
### For Review
|
||||
✅ **Code quality:** Steady test run (227/227), zero regressions, coherent commit messages
|
||||
✅ **Scope clarity:** 5 axes clearly delimited, each with pinpoint tracking
|
||||
✅ **Documentation:** SCHEMAS.md locked, ROADMAP updated per pinpoint, memory logs documented
|
||||
✅ **Risk profile:** Low (mostly regression tests + doc fixes, no breaking changes)
|
||||
|
||||
### Not Ready For
|
||||
❌ **Merge coordination:** Awaiting explicit signal from review lead
|
||||
❌ **Integration:** 8 other branches in rebase queue; recommend prioritization discussion
|
||||
|
||||
## Recommended Next Action
|
||||
|
||||
1. **Push branch for review** (when review queue capacity available)
|
||||
2. **Or file Phase 1 design decision** (#164 Option A vs B) if higher priority
|
||||
3. **Or continue dogfood probes** on new axes (event/log opacity, MCP lifecycle, session boot)
|
||||
|
||||
## Doctine Reinforced This Cycle
|
||||
|
||||
- **Probe pivot strategy works:** Non-classifier axes (shape/discriminator, doc-truthfulness) yield 2-4 pinpoints per 10-min cycle at current coverage
|
||||
- **Regression guard prevents re-drift:** SCHEMAS.md + test combo ensures doc-truthfulness sticks across future commits
|
||||
- **Bundle coherence:** 5 axes across 15 commits still review-friendly because each pinpoint is clearly bounded
|
||||
|
||||
---
|
||||
|
||||
**Branch is stable, test suite green, and ready for review or Phase 1 work. Checkpoint filed for arc continuity.**
|
||||
@@ -15,7 +15,7 @@ Every clawable command returns JSON on stdout when `--output-format json` is req
|
||||
| Exit Code | Meaning | Response Format | Example |
|
||||
|---|---|---|---|
|
||||
| **0** | Success | `{success fields}` | `{"session_id": "...", "loaded": true}` |
|
||||
| **1** | Error / Not Found | `{error: {kind, message, ...}}` | `{"error": {"kind": "session_not_found", ...}}` |
|
||||
| **1** | Error / Not Found | `{error: "...", hint: "...", kind: "...", type: "error"}` (flat, v1.0) | `{"error": "session not found", "kind": "session_not_found", "type": "error"}` |
|
||||
| **2** | Timeout | `{final_stop_reason: "timeout", final_cancel_observed: ...}` | `{"final_stop_reason": "timeout", ...}` |
|
||||
|
||||
### Text mode vs JSON mode exit codes
|
||||
@@ -81,8 +81,12 @@ def run_claw_command(command: list[str], timeout_seconds: float = 30.0) -> dict[
|
||||
retryable=False,
|
||||
)
|
||||
|
||||
# Classify by exit code and error.kind
|
||||
match (result.returncode, envelope.get('error', {}).get('kind')):
|
||||
# Classify by exit code and top-level kind field (v1.0 flat envelope shape)
|
||||
# NOTE: v1.0 envelopes have error as a STRING, not a nested object.
|
||||
# The v2.0 schema (SCHEMAS.md) specifies nested error.{kind, message, ...},
|
||||
# but the current binary emits flat {error: "...", kind: "...", type: "error"}.
|
||||
# See FIX_LOCUS_164.md for the migration timeline.
|
||||
match (result.returncode, envelope.get('kind')):
|
||||
case (0, _):
|
||||
# Success
|
||||
return envelope
|
||||
@@ -91,8 +95,8 @@ def run_claw_command(command: list[str], timeout_seconds: float = 30.0) -> dict[
|
||||
# #179: argparse error — typically a typo or missing required argument
|
||||
raise ClawError(
|
||||
kind='parse',
|
||||
message=envelope['error']['message'],
|
||||
hint=envelope['error'].get('hint'),
|
||||
message=envelope.get('error', ''), # error field is a string in v1.0
|
||||
hint=envelope.get('hint'),
|
||||
retryable=False, # Typos don't fix themselves
|
||||
)
|
||||
|
||||
@@ -100,7 +104,7 @@ def run_claw_command(command: list[str], timeout_seconds: float = 30.0) -> dict[
|
||||
# Common: load-session on nonexistent ID
|
||||
raise ClawError(
|
||||
kind='session_not_found',
|
||||
message=envelope['error']['message'],
|
||||
message=envelope.get('error', ''), # error field is a string in v1.0
|
||||
session_id=envelope.get('session_id'),
|
||||
retryable=False, # Session won't appear on retry
|
||||
)
|
||||
@@ -109,7 +113,7 @@ def run_claw_command(command: list[str], timeout_seconds: float = 30.0) -> dict[
|
||||
# Directory missing, permission denied, disk full
|
||||
raise ClawError(
|
||||
kind='filesystem',
|
||||
message=envelope['error']['message'],
|
||||
message=envelope.get('error', ''), # error field is a string in v1.0
|
||||
retryable=True, # Might be transient (disk space, NFS flake)
|
||||
)
|
||||
|
||||
@@ -117,16 +121,16 @@ def run_claw_command(command: list[str], timeout_seconds: float = 30.0) -> dict[
|
||||
# Generic engine error (unexpected exception, malformed input, etc.)
|
||||
raise ClawError(
|
||||
kind='runtime',
|
||||
message=envelope['error']['message'],
|
||||
retryable=envelope['error'].get('retryable', False),
|
||||
message=envelope.get('error', ''), # error field is a string in v1.0
|
||||
retryable=envelope.get('retryable', False), # v1.0 may or may not have this
|
||||
)
|
||||
|
||||
case (1, _):
|
||||
# Catch-all for any new error.kind values
|
||||
raise ClawError(
|
||||
kind=envelope['error']['kind'],
|
||||
message=envelope['error']['message'],
|
||||
retryable=envelope['error'].get('retryable', False),
|
||||
kind=envelope.get('kind', 'unknown'),
|
||||
message=envelope.get('error', ''), # error field is a string in v1.0
|
||||
retryable=envelope.get('retryable', False), # v1.0 may or may not have this
|
||||
)
|
||||
|
||||
case (2, _):
|
||||
@@ -456,9 +460,28 @@ def test_error_handler_not_found():
|
||||
|
||||
---
|
||||
|
||||
## Appendix: SCHEMAS.md Error Shape
|
||||
## Appendix A: v1.0 Error Envelope (Current Binary)
|
||||
|
||||
For reference, the canonical JSON error envelope shape (SCHEMAS.md):
|
||||
The actual shape emitted by the current binary (v1.0, flat):
|
||||
|
||||
```json
|
||||
{
|
||||
"error": "session 'nonexistent' not found in .claw/sessions",
|
||||
"hint": "use 'list-sessions' to see available sessions",
|
||||
"kind": "session_not_found",
|
||||
"type": "error"
|
||||
}
|
||||
```
|
||||
|
||||
**Key differences from v2.0 schema (below):**
|
||||
- `error` field is a **string**, not a structured object
|
||||
- `kind` is at **top-level**, not nested under `error`
|
||||
- Missing: `timestamp`, `command`, `exit_code`, `output_format`, `schema_version`
|
||||
- Extra: `type: "error"` field (not in schema)
|
||||
|
||||
## Appendix B: SCHEMAS.md Target Shape (v2.0)
|
||||
|
||||
For reference, the target JSON error envelope shape (SCHEMAS.md, v2.0):
|
||||
|
||||
```json
|
||||
{
|
||||
@@ -466,7 +489,7 @@ For reference, the canonical JSON error envelope shape (SCHEMAS.md):
|
||||
"command": "load-session",
|
||||
"exit_code": 1,
|
||||
"output_format": "json",
|
||||
"schema_version": "1.0",
|
||||
"schema_version": "2.0",
|
||||
"error": {
|
||||
"kind": "session_not_found",
|
||||
"operation": "session_store.load_session",
|
||||
@@ -478,7 +501,7 @@ For reference, the canonical JSON error envelope shape (SCHEMAS.md):
|
||||
}
|
||||
```
|
||||
|
||||
All commands that emit errors follow this shape (with error.kind varying). See `SCHEMAS.md` for the complete contract.
|
||||
**This is the target schema after [`FIX_LOCUS_164`](./FIX_LOCUS_164.md) is implemented.** The migration plan includes a dual-mode `--envelope-version=2.0` flag in Phase 1, default version bump in Phase 2, and deprecation in Phase 3. For now, code against v1.0 (Appendix A).
|
||||
|
||||
---
|
||||
|
||||
|
||||
364
FIX_LOCUS_164.md
Normal file
364
FIX_LOCUS_164.md
Normal file
@@ -0,0 +1,364 @@
|
||||
# Fix-Locus #164 — JSON Envelope Contract Migration
|
||||
|
||||
**Status:** 📋 Proposed (2026-04-23, cycle #77). Updated cycle #85 (2026-04-23) with v1.5 baseline phase after fresh-dogfood discovery (#168) proved v1.0 was never coherent.
|
||||
|
||||
**Class:** Contract migration (not a patch). Affects EVERY `--output-format json` command.
|
||||
|
||||
**Bundle:** Typed-error family — joins #102 + #121 + #127 + #129 + #130 + #245 + **#164**. Contract-level implementation of §4.44 typed-error envelope.
|
||||
|
||||
---
|
||||
|
||||
## 0. CRITICAL UPDATE (Cycle #85 via #168 Evidence)
|
||||
|
||||
**Premise revision:** This locus document originally framed the problem as **"v1.0 (incoherent) → v2.0 (target schema)"** migration. **Fresh-dogfood validation in cycle #84 proved this framing was underspecified.**
|
||||
|
||||
**Actual problem (evidence from #168):**
|
||||
|
||||
- There is **no coherent v1.0 envelope contract**. Each verb has a bespoke JSON shape.
|
||||
- `claw list-sessions --output-format json` emits `{command, sessions}` — has `command` field
|
||||
- `claw doctor --output-format json` emits `{checks, kind, message, ...}` — no `command` field
|
||||
- `claw bootstrap hello --output-format json` emits **NOTHING** (silent failure with exit 0)
|
||||
- Each verb renderer was written independently with no coordinating contract
|
||||
|
||||
**Revised migration plan — three phases instead of two:**
|
||||
|
||||
1. **Phase 0 (Emergency):** Fix silent failures (#168 bootstrap JSON). Every `--output-format json` command must emit valid JSON.
|
||||
2. **Phase 1 (v1.5 Baseline):** Establish minimal JSON invariants across all 14 verbs without breaking existing consumers:
|
||||
- Every command emits valid JSON when `--output-format json` is passed
|
||||
- Every command has a top-level `kind` field identifying the verb
|
||||
- Every error envelope follows the confirmed `{error, hint, kind, type}` shape
|
||||
- Every success envelope has the verb name in a predictable location
|
||||
- **Effort:** ~3 dev-days (no new design, just fill gaps and normalize bugs)
|
||||
3. **Phase 2 (v2.0 Wrapped Envelope):** Execute the original Phase 1 plan documented below — common metadata wrapper, nested data/error objects, opt-in via `--envelope-version=2.0`.
|
||||
4. **Phase 3 (v2.0 Default):** Original Phase 2 plan below.
|
||||
5. **Phase 4 (v1.0/v1.5 Deprecation):** Original Phase 3 plan below.
|
||||
|
||||
**Why add Phase 0 + Phase 1 (v1.5)?**
|
||||
|
||||
- You can't migrate from "incoherent" to "coherent v2.0" in one jump. Intermediate coherence (v1.5 baseline) is required.
|
||||
- Consumer code built against "whatever v1 emits today" needs a stable target to transition from.
|
||||
- **Silent failures (bootstrap JSON) must be fixed BEFORE any migration** — otherwise consumers have no way to detect breakage.
|
||||
|
||||
**Blocker resolved:** The original blocker "v1.0 design vs v2.0 design" is actually "no v1 design exists; let's make one (v1.5) then migrate." This is a **clearer, lower-risk migration path**.
|
||||
|
||||
**Revised effort estimate:** ~9 dev-days total (Phase 0: 1 day + Phase 1/v1.5: 3 days + Phase 2/v2.0: 5 days) instead of ~6 dev-days for a direct v1.0→v2.0 migration (which would have failed given the incoherent baseline).
|
||||
|
||||
**Doctrine implication:** Cycles #76–#82 diagnosed "aspirational vs current" correctly but missed that "current" was never a single thing. Cycle #84 fresh-dogfood caught this. **Fresh-dogfood discipline (principle #9) prevented a 6-day migration effort from hitting an unsolvable baseline problem.**
|
||||
|
||||
---
|
||||
|
||||
## 1. Scope — What This Migration Affects
|
||||
|
||||
**Every JSON-emitting verb.** Audit across the 14 documented verbs:
|
||||
|
||||
| Verb | Current top-level keys | Schema-conformant? |
|
||||
|---|---|---|
|
||||
| `doctor` | checks, has_failures, **kind**, message, report, summary | ❌ No (kind=verb-id, flat) |
|
||||
| `status` | config_load_error, **kind**, model, ..., workspace | ❌ No |
|
||||
| `version` | git_sha, **kind**, message, target, version | ❌ No |
|
||||
| `sandbox` | active, ..., **kind**, ...supported | ❌ No |
|
||||
| `help` | **kind**, message | ❌ No (minimal) |
|
||||
| `agents` | action, agents, count, **kind**, summary, working_directory | ❌ No |
|
||||
| `mcp` | action, config_load_error, ..., **kind**, servers | ❌ No |
|
||||
| `skills` | action, **kind**, skills, summary | ❌ No |
|
||||
| `system-prompt` | **kind**, message, sections | ❌ No |
|
||||
| `dump-manifests` | error, hint, **kind**, type | ❌ No (emits error envelope for success) |
|
||||
| `bootstrap-plan` | **kind**, phases | ❌ No |
|
||||
| `acp` | aliases, ..., **kind**, ...tracking | ❌ No |
|
||||
| `export` | file, **kind**, markdown, messages, session_id | ❌ No |
|
||||
| `state` | error, hint, **kind**, type | ❌ No (emits error envelope for success) |
|
||||
|
||||
**All 14 verbs diverge from SCHEMAS.md.** The gap is 100%, not a partial drift.
|
||||
|
||||
---
|
||||
|
||||
## 2. The Two Envelope Shapes
|
||||
|
||||
### 2a. Current Binary Shape (Flat Top-Level)
|
||||
|
||||
```json
|
||||
// Success example (claw doctor --output-format json)
|
||||
{
|
||||
"kind": "doctor", // verb identity
|
||||
"checks": [...],
|
||||
"summary": {...},
|
||||
"has_failures": false,
|
||||
"report": "...",
|
||||
"message": "..."
|
||||
}
|
||||
|
||||
// Error example (claw doctor foo --output-format json)
|
||||
{
|
||||
"error": "unrecognized argument...", // string, not object
|
||||
"hint": "Run `claw --help` for usage.",
|
||||
"kind": "cli_parse", // error classification (overloaded)
|
||||
"type": "error" // not in schema
|
||||
}
|
||||
```
|
||||
|
||||
**Properties:**
|
||||
- Flat top-level
|
||||
- `kind` field is **overloaded** (verb-id in success, error-class in error)
|
||||
- No common wrapper metadata (timestamp, exit_code, schema_version)
|
||||
- `error` is a string, not a structured object
|
||||
|
||||
### 2b. Documented Schema Shape (Nested, Wrapped)
|
||||
|
||||
```json
|
||||
// Success example (per SCHEMAS.md)
|
||||
{
|
||||
"timestamp": "2026-04-22T10:10:00Z",
|
||||
"command": "doctor",
|
||||
"exit_code": 0,
|
||||
"output_format": "json",
|
||||
"schema_version": "1.0",
|
||||
"data": {
|
||||
"checks": [...],
|
||||
"summary": {...},
|
||||
"has_failures": false
|
||||
}
|
||||
}
|
||||
|
||||
// Error example (per SCHEMAS.md)
|
||||
{
|
||||
"timestamp": "2026-04-22T10:10:00Z",
|
||||
"command": "doctor",
|
||||
"exit_code": 1,
|
||||
"output_format": "json",
|
||||
"schema_version": "1.0",
|
||||
"error": {
|
||||
"kind": "parse", // enum, nested
|
||||
"operation": "parse_args",
|
||||
"target": "subcommand `doctor`",
|
||||
"retryable": false,
|
||||
"message": "unrecognized argument...",
|
||||
"hint": "Run `claw --help` for usage."
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
**Properties:**
|
||||
- Common metadata wrapper (timestamp, command, exit_code, output_format, schema_version)
|
||||
- `data` (payload) vs. `error` (failure) as **sibling fields**, never coexisting
|
||||
- `kind` in error is the enum from §4.44 (filesystem/auth/session/parse/runtime/mcp/delivery/usage/policy/unknown)
|
||||
- `error` is a structured object with operation/target/retryable
|
||||
|
||||
---
|
||||
|
||||
## 3. Migration Strategy — Phased Rollout
|
||||
|
||||
**Principle:** Don't break downstream consumers mid-migration. Support both shapes during overlap, then deprecate.
|
||||
|
||||
### Phase 1 — Dual-Envelope Mode (Opt-In)
|
||||
|
||||
**Deliverables:**
|
||||
- New flag: `--envelope-version=2.0` (or `--schema-version=2.0`)
|
||||
- When flag set: emit new (schema-conformant) envelope
|
||||
- When flag absent: emit current (flat) envelope
|
||||
- SCHEMAS.md: add "Legacy (v1.0)" section documenting current flat shape alongside v2.0
|
||||
|
||||
**Implementation:**
|
||||
- Single `envelope_version` parameter in `CliOutputFormat` enum
|
||||
- Every verb's JSON writer checks version, branches accordingly
|
||||
- Shared wrapper helper: `wrap_v2(payload, command, exit_code)`
|
||||
|
||||
**Consumer impact:** Opt-in. Existing consumers unchanged. New consumers can opt in.
|
||||
|
||||
**Timeline estimate:** ~2 days for 14 verbs + shared wrapper + tests.
|
||||
|
||||
### Phase 2 — Default Version Bump
|
||||
|
||||
**Deliverables:**
|
||||
- Default changes from v1.0 → v2.0
|
||||
- New flag: `--legacy-envelope` to opt back into flat shape
|
||||
- Migration guide added to SCHEMAS.md and CHANGELOG
|
||||
- Release notes: "Breaking change in envelope, pre-migration opt-in available via --legacy-envelope"
|
||||
|
||||
**Consumer impact:** Existing consumers must add `--legacy-envelope` OR update to v2.0 schema. Grace period = "until Phase 3."
|
||||
|
||||
**Timeline estimate:** Immediately after Phase 1 ships.
|
||||
|
||||
### Phase 3 — Flat-Shape Deprecation
|
||||
|
||||
**Deliverables:**
|
||||
- `--legacy-envelope` flag prints deprecation warning to stderr
|
||||
- SCHEMAS.md "Legacy v1.0" section marked DEPRECATED
|
||||
- v3.0 release (future): remove flag entirely, binary only emits v2.0
|
||||
|
||||
**Consumer impact:** Full migration required by v3.0.
|
||||
|
||||
**Timeline estimate:** Phase 3 after ~6 months of Phase 2 usage.
|
||||
|
||||
---
|
||||
|
||||
## 4. Implementation Details
|
||||
|
||||
### 4a. Shared Wrapper Helper
|
||||
|
||||
```rust
|
||||
// rust/crates/rusty-claude-cli/src/json_envelope.rs (new file)
|
||||
|
||||
pub fn wrap_v2_success<T: Serialize>(command: &str, data: T) -> Value {
|
||||
serde_json::json!({
|
||||
"timestamp": chrono::Utc::now().to_rfc3339_opts(chrono::SecondsFormat::Secs, true),
|
||||
"command": command,
|
||||
"exit_code": 0,
|
||||
"output_format": "json",
|
||||
"schema_version": "2.0",
|
||||
"data": data,
|
||||
})
|
||||
}
|
||||
|
||||
pub fn wrap_v2_error(command: &str, error: StructuredError) -> Value {
|
||||
serde_json::json!({
|
||||
"timestamp": chrono::Utc::now().to_rfc3339_opts(chrono::SecondsFormat::Secs, true),
|
||||
"command": command,
|
||||
"exit_code": 1,
|
||||
"output_format": "json",
|
||||
"schema_version": "2.0",
|
||||
"error": {
|
||||
"kind": error.kind,
|
||||
"operation": error.operation,
|
||||
"target": error.target,
|
||||
"retryable": error.retryable,
|
||||
"message": error.message,
|
||||
"hint": error.hint,
|
||||
},
|
||||
})
|
||||
}
|
||||
|
||||
pub struct StructuredError {
|
||||
pub kind: &'static str, // enum from §4.44
|
||||
pub operation: String,
|
||||
pub target: String,
|
||||
pub retryable: bool,
|
||||
pub message: String,
|
||||
pub hint: Option<String>,
|
||||
}
|
||||
```
|
||||
|
||||
### 4b. Per-Verb Migration Pattern
|
||||
|
||||
```rust
|
||||
// Before (current flat shape):
|
||||
match output_format {
|
||||
CliOutputFormat::Json => {
|
||||
serde_json::to_string_pretty(&DoctorOutput {
|
||||
kind: "doctor",
|
||||
checks,
|
||||
summary,
|
||||
has_failures,
|
||||
message,
|
||||
report,
|
||||
})
|
||||
}
|
||||
CliOutputFormat::Text => render_text(&data),
|
||||
}
|
||||
|
||||
// After (v2.0 with v1.0 fallback):
|
||||
match (output_format, envelope_version) {
|
||||
(CliOutputFormat::Json, 2) => {
|
||||
json_envelope::wrap_v2_success("doctor", DoctorData { checks, summary, has_failures })
|
||||
}
|
||||
(CliOutputFormat::Json, 1) => {
|
||||
// Legacy flat shape (with deprecation warning at Phase 3)
|
||||
serde_json::to_value(&LegacyDoctorOutput { kind: "doctor", ...})
|
||||
}
|
||||
(CliOutputFormat::Text, _) => render_text(&data),
|
||||
}
|
||||
```
|
||||
|
||||
### 4c. Error Classification Migration
|
||||
|
||||
Current error `kind` values (found in binary):
|
||||
- `cli_parse`, `no_managed_sessions`, `unknown`, `missing_credentials`, `session_not_found`
|
||||
|
||||
Target v2.0 enum (per §4.44):
|
||||
- `filesystem`, `auth`, `session`, `parse`, `runtime`, `mcp`, `delivery`, `usage`, `policy`, `unknown`
|
||||
|
||||
**Migration table:**
|
||||
| Current kind | v2.0 error.kind |
|
||||
|---|---|
|
||||
| `cli_parse` | `parse` |
|
||||
| `no_managed_sessions` | `session` (with operation: "list_sessions") |
|
||||
| `missing_credentials` | `auth` |
|
||||
| `session_not_found` | `session` (with operation: "resolve_session") |
|
||||
| `unknown` | `unknown` |
|
||||
|
||||
---
|
||||
|
||||
## 5. Acceptance Criteria
|
||||
|
||||
1. **Schema parity:** Every `--output-format json` command emits v2.0 envelope shape exactly per SCHEMAS.md
|
||||
2. **Success/error symmetry:** Success envelopes have `data` field; error envelopes have `error` object; never both
|
||||
3. **kind semantic unification:** `data.kind` = verb identity (when present); `error.kind` = enum from §4.44. No overloading.
|
||||
4. **Common metadata:** `timestamp`, `command`, `exit_code`, `output_format`, `schema_version` present in ALL envelopes
|
||||
5. **Dual-mode support:** `--envelope-version=1|2` flag allows opt-in/opt-out during migration
|
||||
6. **Tests:** Per-verb golden test fixtures for both v1.0 and v2.0 envelopes
|
||||
7. **Documentation:** SCHEMAS.md documents both versions with deprecation timeline
|
||||
|
||||
---
|
||||
|
||||
## 6. Risks
|
||||
|
||||
### 6a. Breaking Change Risk
|
||||
|
||||
Phase 2 (default version bump) WILL break consumers that depend on flat-shape envelope. Mitigations:
|
||||
- Dual-mode flag allows opt-in testing before default change
|
||||
- Long grace period (Phase 3 deprecation ~6 months post-Phase 2)
|
||||
- Clear migration guide + example consumer code
|
||||
|
||||
### 6b. Implementation Risk
|
||||
|
||||
14 verbs to migrate. Each verb has its own success shape (`checks`, `agents`, `phases`, etc.). Payload structure stays the same; only the wrapper changes. Mechanical but high-volume.
|
||||
|
||||
**Estimated diff size:** ~200 lines per verb × 14 verbs = ~2,800 lines (mostly boilerplate).
|
||||
|
||||
**Mitigation:** Start with doctor, status, version as pilot. If pattern works, batch remaining 11.
|
||||
|
||||
### 6c. Error Classification Remapping Risk
|
||||
|
||||
Changing `kind: "cli_parse"` to `error.kind: "parse"` is a breaking change even within the error envelope. Consumers doing `response["kind"] == "cli_parse"` will break.
|
||||
|
||||
**Mitigation:** Document explicitly in migration guide. Provide sed script if needed.
|
||||
|
||||
---
|
||||
|
||||
## 7. Deliverables Summary
|
||||
|
||||
| Item | Phase | Effort |
|
||||
|---|---|---|
|
||||
| `json_envelope.rs` shared helper | Phase 1 | 1 day |
|
||||
| 14 verb migrations (pilot 3 + batch 11) | Phase 1 | 2 days |
|
||||
| `--envelope-version` flag | Phase 1 | 0.5 day |
|
||||
| Dual-mode tests (golden fixtures) | Phase 1 | 1 day |
|
||||
| SCHEMAS.md updates (v1.0 + v2.0) | Phase 1 | 0.5 day |
|
||||
| Default version bump | Phase 2 | 0.5 day |
|
||||
| Deprecation warnings | Phase 3 | 0.5 day |
|
||||
| Migration guide doc | Phase 1 | 0.5 day |
|
||||
|
||||
**Total estimate:** ~6 developer-days for Phase 1 (the core work). Phases 2/3 are cheap follow-ups.
|
||||
|
||||
---
|
||||
|
||||
## 8. Rollout Timeline (Proposed)
|
||||
|
||||
- **Week 1:** Phase 1 — dual-mode support + pilot migration (3 verbs)
|
||||
- **Week 2:** Phase 1 completion — remaining 11 verbs + full test coverage
|
||||
- **Week 3:** Stabilization period, gather consumer feedback
|
||||
- **Month 2:** Phase 2 — default version bump
|
||||
- **Month 8:** Phase 3 — deprecation warnings
|
||||
- **v3.0 release:** Remove `--legacy-envelope` flag, v1.0 shape no longer supported
|
||||
|
||||
---
|
||||
|
||||
## 9. Related
|
||||
|
||||
- **ROADMAP #164:** The originating pinpoint (this document is its fix-locus)
|
||||
- **ROADMAP §4.44:** Typed-error contract (defines the error.kind enum this migration uses)
|
||||
- **SCHEMAS.md:** The envelope schema this migration makes reality
|
||||
- **Typed-error family:** #102, #121, #127, #129, #130, #245, **#164**
|
||||
|
||||
---
|
||||
|
||||
**Cycle #77 locus doc. Ready for author review + pilot implementation decision.**
|
||||
21
LICENSE
Normal file
21
LICENSE
Normal file
@@ -0,0 +1,21 @@
|
||||
MIT License
|
||||
|
||||
Copyright (c) 2026 ultraworkers
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
||||
@@ -1,13 +1,14 @@
|
||||
# Parity Status — claw-code Rust Port
|
||||
|
||||
Last updated: 2026-04-03
|
||||
Last updated: 2026-04-23
|
||||
|
||||
## Summary
|
||||
|
||||
- Canonical document: this top-level `PARITY.md` is the file consumed by `rust/scripts/run_mock_parity_diff.py`.
|
||||
- Requested 9-lane checkpoint: **All 9 lanes merged on `main`.**
|
||||
- Current `main` HEAD: `ee31e00` (stub implementations replaced with real AskUserQuestion + RemoteTrigger).
|
||||
- Repository stats at this checkpoint: **292 commits on `main` / 293 across all branches**, **9 crates**, **48,599 tracked Rust LOC**, **2,568 test LOC**, **3 authors**, date range **2026-03-31 → 2026-04-03**.
|
||||
- Current `main` HEAD: `ad1cf92` (doctrine loop canonical example).
|
||||
- Repository stats at this checkpoint: **979 commits on `main`**, **9 crates**, **80,789 tracked Rust LOC**, **4,533 test LOC**, **3 authors**, date **2026-04-23**.
|
||||
- **Growth since last PARITY update (2026-04-03):** Rust LOC +66% (48,599 → 80,789), Test LOC +76% (2,568 → 4,533), Commits +235% (292 → 979). Current phase: 13 branches awaiting review/integration.
|
||||
- Mock parity harness stats: **10 scripted scenarios**, **19 captured `/v1/messages` requests** in `rust/crates/rusty-claude-cli/tests/mock_parity_harness.rs`.
|
||||
|
||||
## Mock parity harness — milestone 1
|
||||
|
||||
192
PHASE_1_KICKOFF.md
Normal file
192
PHASE_1_KICKOFF.md
Normal file
@@ -0,0 +1,192 @@
|
||||
# Phase 1 Kickoff — Classifier Sweeps + Doc-Truth + Design Decisions
|
||||
|
||||
**Status:** Ready for execution once Phase 0 (`feat/jobdori-168c-emission-routing`) merges.
|
||||
|
||||
**Date prepared:** 2026-04-23 11:47 Seoul (cycles #104–#108 complete, all unaudited surfaces probed)
|
||||
|
||||
---
|
||||
|
||||
## What Got Done (Phase 0)
|
||||
|
||||
- ✅ JSON output shape routing (no-silent test, SCHEMAS baseline, parity guard)
|
||||
- ✅ 7 dogfood filings (#155, #169, #170, #171, #172, #153, checkpoint)
|
||||
- ✅ 9 probe cycles (plugins, agents, init, bootstrap-plan, system-prompt, export, sandbox, dump-manifests, skills)
|
||||
- ✅ 82 pinpoints filed, 67 genuinely open
|
||||
- ✅ 227/227 tests pass, 0 regressions
|
||||
- ✅ Review guide + priority queue locked
|
||||
- ✅ Doctrine: 28 principles accumulated
|
||||
|
||||
---
|
||||
|
||||
## What Phase 1 Will Do (Confirmed via Gaebal-Gajae)
|
||||
|
||||
Execute priority-ordered fixes in 6 bundles + independents:
|
||||
|
||||
### Priority 1: Error Envelope Contract Drift
|
||||
|
||||
**Bundle:** `feat/jobdori-181-error-envelope-contract-drift` (#181 + #183)
|
||||
|
||||
**What it fixes:**
|
||||
- #181: `plugins bogus-subcommand` returns success-shaped envelope (no `type: "error"`, error buried in message)
|
||||
- #183: `plugins` and `mcp` emit different shapes on unknown subcommand
|
||||
|
||||
**Why it's Priority 1:** Foundation layer. Error envelope is the root contract. All downstream fixes assume correct envelope shape.
|
||||
|
||||
**Implementation:** Align `plugins` unknown-subcommand handler to `agents` canonical reference. Ensure both emit `type: "error"` + correct `kind`.
|
||||
|
||||
**Risk profile:** HIGH (touches error routing, breaks if consumers depend on old shape) → but gated by Phase 0 freeze + comprehensive tests
|
||||
|
||||
---
|
||||
|
||||
### Priority 2: CLI Contract Hygiene Sweep
|
||||
|
||||
**Bundle:** `feat/jobdori-184-cli-contract-hygiene-sweep` (#184 + #185)
|
||||
|
||||
**What it fixes:**
|
||||
- #184: `claw init` silently accepts unknown positional arguments (should reject)
|
||||
- #185: `claw bootstrap-plan` silently accepts unknown flags (should reject)
|
||||
|
||||
**Why it's Priority 2:** Extensions. Guard clauses on existing envelope shape. Uses envelope from Priority 1.
|
||||
|
||||
**Implementation:** Add trailing-args rejection to `init` and unknown-flag rejection to `bootstrap-plan`. Pattern: match existing guard in #171 (extra-args classifier).
|
||||
|
||||
**Risk profile:** MEDIUM (adds guards, no shape changes)
|
||||
|
||||
---
|
||||
|
||||
### Priority 3: Classifier Sweep (4 Verbs)
|
||||
|
||||
**Bundle:** `feat/jobdori-186-192-classifier-sweep` (#186 + #187 + #189 + #192)
|
||||
|
||||
**What it fixes:**
|
||||
- #186: `system-prompt --<unknown>` classified as `unknown` → should be `cli_parse`
|
||||
- #187: `export --<unknown>` classified as `unknown` → should be `cli_parse`
|
||||
- #189: `dump-manifests --<unknown>` classified as `unknown` → should be `cli_parse`
|
||||
- #192: `skills install --<unknown>` classified as `unknown` → should be `cli_parse`
|
||||
|
||||
**Why it's Priority 3:** Cleanup. Classifier additions, same envelope, one unified pattern across 4 verbs.
|
||||
|
||||
**Implementation:** Add 4 classifier branches (one per verb) to the unknown-option handler. Same test pattern for all.
|
||||
|
||||
**Risk profile:** LOW (classifier-only, no routing changes)
|
||||
|
||||
---
|
||||
|
||||
### Priority 4: USAGE.md Standalone Surface Audit
|
||||
|
||||
**Bundle:** `feat/jobdori-180-usage-standalone-surface` (#180)
|
||||
|
||||
**What it fixes:**
|
||||
- #180: USAGE.md incomplete verb coverage (doc-truthfulness audit-flow)
|
||||
|
||||
**Why it's Priority 4:** Doc audit. Prerequisite for #188 (help-text gaps).
|
||||
|
||||
**Implementation:** Audit USAGE.md against all verbs (compare against `claw --help` verb list). Add missing verb documentation.
|
||||
|
||||
**Risk profile:** LOW (docs-only)
|
||||
|
||||
---
|
||||
|
||||
### Priority 5: Dump-Manifests Help-Text Fix
|
||||
|
||||
**Bundle:** `feat/jobdori-188-dump-manifests-help-prerequisite` (#188)
|
||||
|
||||
**What it fixes:**
|
||||
- #188: `dump-manifests --help` omits prerequisite (env var or flag required)
|
||||
|
||||
**Why it's Priority 5:** Doc-truth probe-flow. Comes after audit-flow (#180).
|
||||
|
||||
**Implementation:** Update help text to show required alternatives and environment variable.
|
||||
|
||||
**Risk profile:** LOW (help-text only)
|
||||
|
||||
---
|
||||
|
||||
### Priority 6+: Independent Fixes
|
||||
|
||||
- #190: Design decision (help-routing for no-args install) — needs architecture review
|
||||
- #191: `skills install` filesystem classifier gap — can bundle with #177/#178/#179 or standalone
|
||||
- #182: Plugin classifier alignment (unknown → filesystem/runtime) — depends on #181 resolution
|
||||
- #177/#178/#179: Install-surface taxonomy (possible 4-verb bundle)
|
||||
- #173: Config hint field (consumer-parity)
|
||||
- #174: Resume trailing classifier (closed? verify)
|
||||
- #175: CI fmt/test decoupling (gaebal-gajae owned)
|
||||
|
||||
---
|
||||
|
||||
## Concrete Next Steps (Once Phase 0 Merges)
|
||||
|
||||
1. **Create branch 1:** `feat/jobdori-181-error-envelope-contract-drift`
|
||||
- Files: error router, tests for #181 + #183
|
||||
- PR against main
|
||||
- Expected: 2 commits, 5 new tests, 0 regressions
|
||||
|
||||
2. **Create branch 2:** `feat/jobdori-184-cli-contract-hygiene-sweep`
|
||||
- Files: init guard, bootstrap-plan guard
|
||||
- PR against main
|
||||
- Expected: 2 commits, 3 new tests
|
||||
|
||||
3. **Create branch 3:** `feat/jobdori-186-192-classifier-sweep`
|
||||
- Files: unknown-option handler (4 verbs)
|
||||
- PR against main
|
||||
- Expected: 1 commit, 4 new tests
|
||||
|
||||
4. **Create branch 4:** `feat/jobdori-180-usage-standalone-surface`
|
||||
- Files: USAGE.md additions
|
||||
- PR against main
|
||||
- Expected: 1 commit, 0 tests
|
||||
|
||||
5. **Create branch 5:** `feat/jobdori-188-dump-manifests-help-prerequisite`
|
||||
- Files: help text update (string change)
|
||||
- PR against main
|
||||
- Expected: 1 commit, 0 tests
|
||||
|
||||
6. **Triage independents:** #190 requires architecture discussion; others can follow once above merges.
|
||||
|
||||
---
|
||||
|
||||
## Hypothesis Validation (Codified for Future Probes)
|
||||
|
||||
**Multi-flag verbs (install, enable, init, bootstrap-plan, system-prompt, export, dump-manifests):** 3–4 classifier gaps each.
|
||||
|
||||
**Single-issue verbs (list, show, sandbox, agents):** 0–1 gaps.
|
||||
|
||||
**Future probe strategy:** Prioritize multi-flag verbs; single-issue verbs are mostly clean.
|
||||
|
||||
---
|
||||
|
||||
## Doctrine Points Relevant to Phase 1 Execution
|
||||
|
||||
- **Doctrine #22:** Schema baseline check before enum proposal
|
||||
- **Doctrine #25:** Contract-surface-first ordering (foundation → extensions → cleanup)
|
||||
- **Doctrine #27:** Same-pattern pinpoints should bundle into one classifier sweep PR
|
||||
- **Doctrine #28:** First observation is hypothesis, not filing (verify before classifying)
|
||||
|
||||
---
|
||||
|
||||
## Known Blockers & Risks
|
||||
|
||||
1. **Phase 0 merge gating:** Can't create Phase 1 branches until Phase 0 lands (28 base + 37 new = 65 total pending)
|
||||
2. **#190 design decision:** help-routing behavior needs architectural consensus (intentional vs inconsistency)
|
||||
3. **Cross-family dependencies:** #182 depends on #181 (plugin error envelope must be correct first)
|
||||
|
||||
---
|
||||
|
||||
## Testing Strategy for Phase 1
|
||||
|
||||
- **Priority 1–3 bundles:** Existing test framework (`output_format_contract.rs`, classifier tests). Comprehensive coverage per bundle.
|
||||
- **Priority 4–5 bundles:** Light doc verification (grep USAGE.md, spot-check help text).
|
||||
- **Independent fixes:** Case-by-case once prioritized.
|
||||
|
||||
---
|
||||
|
||||
## Success Criteria
|
||||
|
||||
- ✅ All Priority 1–5 bundles merge to main
|
||||
- ✅ 0 regressions (227+ tests pass across all merges)
|
||||
- ✅ CI green on all PRs
|
||||
- ✅ Reviewer sign-offs on all bundles
|
||||
|
||||
---
|
||||
|
||||
**Phase 1 is ready to execute. Awaiting Phase 0 merge approval.**
|
||||
16
README.md
16
README.md
@@ -13,6 +13,8 @@
|
||||
·
|
||||
<a href="./ROADMAP.md">Roadmap</a>
|
||||
·
|
||||
<a href="./TROUBLESHOOTING.md">Troubleshooting</a>
|
||||
·
|
||||
<a href="https://discord.gg/5TUQKqFWd">UltraWorkers Discord</a>
|
||||
</p>
|
||||
|
||||
@@ -34,7 +36,7 @@ Claw Code is the public Rust implementation of the `claw` CLI agent harness.
|
||||
The canonical implementation lives in [`rust/`](./rust), and the current source of truth for this repository is **ultraworkers/claw-code**.
|
||||
|
||||
> [!IMPORTANT]
|
||||
> Start with [`USAGE.md`](./USAGE.md) for build, auth, CLI, session, and parity-harness workflows. Make `claw doctor` your first health check after building, use [`rust/README.md`](./rust/README.md) for crate-level details, read [`PARITY.md`](./PARITY.md) for the current Rust-port checkpoint, and see [`docs/container.md`](./docs/container.md) for the container-first workflow.
|
||||
> Start with [`USAGE.md`](./USAGE.md) for build, auth, CLI, session, and parity-harness workflows. Make `claw doctor` your first health check after building, use [`rust/README.md`](./rust/README.md) for crate-level details, read [`PARITY.md`](./PARITY.md) for the current Rust-port checkpoint, see [`docs/ARCHITECTURE.md`](./docs/ARCHITECTURE.md) for a high-level crate/subsystem map, and see [`docs/container.md`](./docs/container.md) for the container-first workflow.
|
||||
>
|
||||
> **ACP / Zed status:** `claw-code` does not ship an ACP/Zed daemon entrypoint yet. Run `claw acp` (or `claw --acp`) for the current status instead of guessing from source layout; `claw acp serve` is currently a discoverability alias only, and real ACP support remains tracked separately in `ROADMAP.md`.
|
||||
|
||||
@@ -196,6 +198,7 @@ cargo test --workspace
|
||||
- [`PARITY.md`](./PARITY.md) — parity status for the Rust port
|
||||
- [`rust/MOCK_PARITY_HARNESS.md`](./rust/MOCK_PARITY_HARNESS.md) — deterministic mock-service harness details
|
||||
- [`ROADMAP.md`](./ROADMAP.md) — active roadmap and open cleanup work
|
||||
- [`CHANGELOG.md`](./CHANGELOG.md) — history of notable changes by dogfood cycle
|
||||
- [`PHILOSOPHY.md`](./PHILOSOPHY.md) — why the project exists and how it is operated
|
||||
|
||||
## Ecosystem
|
||||
@@ -208,6 +211,17 @@ Claw Code is built in the open alongside the broader UltraWorkers toolchain:
|
||||
- [oh-my-codex](https://github.com/Yeachan-Heo/oh-my-codex)
|
||||
- [UltraWorkers Discord](https://discord.gg/5TUQKqFWd)
|
||||
|
||||
## Contributing
|
||||
|
||||
We welcome contributions! Before filing an issue or pull request:
|
||||
|
||||
- **Troubleshooting:** See [TROUBLESHOOTING.md](./TROUBLESHOOTING.md) for common issues and recovery steps
|
||||
- **Supported providers:** See [docs/SUPPORTED_PROVIDERS.md](./docs/SUPPORTED_PROVIDERS.md)
|
||||
- **For security issues:** See [SECURITY.md](./SECURITY.md)
|
||||
- **For bug reports / features:** Check [ROADMAP.md](./ROADMAP.md) to see if it's already pinpointed
|
||||
- **How to file a pinpoint:** See [CONTRIBUTING.md](./CONTRIBUTING.md) and the [Pinpoint Filing Guide](./docs/PINPOINT_FILING_GUIDE.md)
|
||||
- **Issue templates:** Use [.github/ISSUE_TEMPLATE/pinpoint.md](./.github/ISSUE_TEMPLATE/pinpoint.md)
|
||||
|
||||
## Ownership / affiliation disclaimer
|
||||
|
||||
- This repository does **not** claim ownership of the original Claude Code source material.
|
||||
|
||||
8664
ROADMAP.md
8664
ROADMAP.md
File diff suppressed because one or more lines are too long
264
SCHEMAS.md
264
SCHEMAS.md
@@ -1,14 +1,20 @@
|
||||
# JSON Envelope Schemas — Clawable CLI Contract
|
||||
|
||||
This document locks the field-level contract for all clawable-surface commands. Every command accepting `--output-format json` must conform to the envelope shapes below.
|
||||
> **⚠️ CRITICAL: This document describes the TARGET v2.0 envelope schema, not the current v1.0 binary behavior.** The Rust binary currently emits a **flat v1.0 envelope** that does NOT include `timestamp`, `command`, `exit_code`, `output_format`, or `schema_version` fields. See [`FIX_LOCUS_164.md`](./FIX_LOCUS_164.md) for the full migration plan and timeline. **Do not build automation against the field shapes below without first testing against the actual binary output.** Use `claw <command> --output-format json` to inspect what your binary version actually emits.
|
||||
|
||||
**Target audience:** Claws building orchestrators, automation, or monitoring against claw-code's JSON output.
|
||||
This document locks the **target** field-level contract for all clawable-surface commands. After the v1.0→v2.0 migration (FIX_LOCUS_164 Phase 2), every command accepting `--output-format json` will conform to the envelope shapes documented here.
|
||||
|
||||
**Target audience:** Claws planning v2.0 migration, reference implementers, contract validators.
|
||||
|
||||
**Current v1.0 reality:** See [`ERROR_HANDLING.md`](./ERROR_HANDLING.md) Appendix A for the flat envelope shape the binary actually emits today.
|
||||
|
||||
---
|
||||
|
||||
## Common Fields (All Envelopes)
|
||||
## Common Fields (All Envelopes) — TARGET v2.0 SCHEMA
|
||||
|
||||
Every command response, success or error, carries:
|
||||
**This section describes the v2.0 target schema. The current v1.0 binary does NOT emit these fields.** See FIX_LOCUS_164.md for the migration timeline.
|
||||
|
||||
After v2.0 migration, every command response, success or error, will carry:
|
||||
|
||||
```json
|
||||
{
|
||||
@@ -16,7 +22,7 @@ Every command response, success or error, carries:
|
||||
"command": "list-sessions",
|
||||
"exit_code": 0,
|
||||
"output_format": "json",
|
||||
"schema_version": "1.0"
|
||||
"schema_version": "2.0"
|
||||
}
|
||||
```
|
||||
|
||||
@@ -452,3 +458,251 @@ cargo test --release test_json_envelope_field_consistency
|
||||
- `show-command` reports `found: bool` (inventory signal: "does this exist?")
|
||||
- `exec-command` reports `handled: bool` (operational signal: "was this work performed?")
|
||||
- The names matter: a command can be found but not handled (e.g. too large for context window), or handled silently (no output message)
|
||||
|
||||
---
|
||||
|
||||
## Appendix: Current v1.0 vs. Target v2.0 Envelope Shapes
|
||||
|
||||
### ⚠️ IMPORTANT: Binary Reality vs. This Document
|
||||
|
||||
**This entire SCHEMAS.md document describes the TARGET v2.0 schema.** The actual Rust binary currently emits v1.0 (flat) envelopes.
|
||||
|
||||
**Do not assume the fields documented above are in the binary right now.** They are not.
|
||||
|
||||
### Current v1.0 Envelope (What the Rust Binary Actually Emits)
|
||||
|
||||
The Rust binary in `rust/` currently emits a **flat v1.0 envelope** without common metadata wrapper:
|
||||
|
||||
#### v1.0 Success Envelope Example
|
||||
|
||||
```json
|
||||
{
|
||||
"kind": "list-sessions",
|
||||
"sessions": [
|
||||
{"id": "abc123", "created": "2026-04-22T10:00:00Z", "turns": 5}
|
||||
],
|
||||
"type": "success"
|
||||
}
|
||||
```
|
||||
|
||||
**Key differences from v2.0 above:**
|
||||
- NO `timestamp`, `command`, `exit_code`, `output_format`, `schema_version` fields
|
||||
- `kind` field contains the verb name (or is entirely absent for success)
|
||||
- `type: "success"` flag at top level
|
||||
- Verb-specific fields (`sessions`, `turn`, etc.) at top level
|
||||
|
||||
#### v1.0 Error Envelope Example
|
||||
|
||||
```json
|
||||
{
|
||||
"error": "session 'xyz789' not found in .claw/sessions",
|
||||
"hint": "use 'list-sessions' to see available sessions",
|
||||
"kind": "session_not_found",
|
||||
"type": "error"
|
||||
}
|
||||
```
|
||||
|
||||
**Key differences from v2.0 error above:**
|
||||
- `error` field is a **STRING**, not a nested object
|
||||
- NO `error.operation`, `error.target`, `error.retryable` structured fields
|
||||
- `kind` is at top-level, not nested
|
||||
- NO `timestamp`, `command`, `exit_code`, `output_format`, `schema_version`
|
||||
- Extra `type: "error"` flag
|
||||
|
||||
### Migration Timeline (FIX_LOCUS_164)
|
||||
|
||||
See [`FIX_LOCUS_164.md`](./FIX_LOCUS_164.md) for the full phased migration:
|
||||
|
||||
- **Phase 1 (Opt-in):** `claw <cmd> --output-format json --envelope-version=2.0` emits v2.0 shape
|
||||
- **Phase 2 (Default):** v2.0 becomes default; `--legacy-envelope` flag opts into v1.0
|
||||
- **Phase 3 (Deprecation):** v1.0 warnings, then removal
|
||||
|
||||
### Building Automation Against v1.0 (Current)
|
||||
|
||||
**For claws building automation today** (against the real binary, not this schema):
|
||||
|
||||
1. **Check `type` field first** (string: "success" or "error")
|
||||
2. **For success:** verb-specific fields are at top level. Use `jq .kind` for verb ID (if present)
|
||||
3. **For error:** access `error` (string), `hint` (string), `kind` (string) all at top level
|
||||
4. **Do not expect:** `timestamp`, `command`, `exit_code`, `output_format`, `schema_version` — they don't exist yet
|
||||
5. **Test your code** against `claw <cmd> --output-format json` output to verify assumptions before deploying
|
||||
|
||||
### Example: Python Consumer Code (v1.0)
|
||||
|
||||
**Correct pattern for v1.0 (current binary):**
|
||||
|
||||
```python
|
||||
import json
|
||||
import subprocess
|
||||
|
||||
result = subprocess.run(
|
||||
["claw", "list-sessions", "--output-format", "json"],
|
||||
capture_output=True,
|
||||
text=True
|
||||
)
|
||||
envelope = json.loads(result.stdout)
|
||||
|
||||
# v1.0: type is at top level
|
||||
if envelope.get("type") == "error":
|
||||
error_msg = envelope.get("error", "unknown error") # error is a STRING
|
||||
error_kind = envelope.get("kind") # kind is at TOP LEVEL
|
||||
print(f"Error: {error_kind} — {error_msg}")
|
||||
else:
|
||||
# Success path: verb-specific fields at top level
|
||||
sessions = envelope.get("sessions", [])
|
||||
for session in sessions:
|
||||
print(f"Session: {session['id']}")
|
||||
```
|
||||
|
||||
**After v2.0 migration, this code will break.** Claws building for v2.0 compatibility should:
|
||||
|
||||
1. Check `schema_version` field
|
||||
2. Parse differently based on version
|
||||
3. Or wait until Phase 2 default bump is announced, then migrate
|
||||
|
||||
### Why This Mismatch Exists
|
||||
|
||||
SCHEMAS.md was written as the **target design** for v2.0. The Rust binary is still on v1.0. The migration (FIX_LOCUS_164) will bring the binary in line with this schema, but it hasn't happened yet.
|
||||
|
||||
**This mismatch is the root cause of doc-truthfulness issues #78, #79, #165.** All three docs were documenting the v2.0 target as if it were current reality.
|
||||
|
||||
### Questions?
|
||||
|
||||
- **"Is v2.0 implemented?"** No. The binary is v1.0. See FIX_LOCUS_164.md for the implementation roadmap.
|
||||
- **"Should I build against v2.0 schema?"** No. Build against v1.0 (current). Test your code with `claw` to verify.
|
||||
- **"When does v2.0 ship?"** See FIX_LOCUS_164.md Phase 1 estimate: ~6 dev-days. Not scheduled yet.
|
||||
- **"Can I use v2.0 now?"** Only if you explicitly pass `--envelope-version=2.0` (which doesn't exist yet in v1.0 binary).
|
||||
|
||||
---
|
||||
|
||||
## v1.5 Emission Baseline — Per-Verb Shape Catalog (Cycle #91, Phase 0 Task 3)
|
||||
|
||||
**Status:** 📸 Snapshot of actual binary behavior as of cycle #91 (2026-04-23). Anchored by controlled matrix `/tmp/cycle87-audit/matrix.json` + Phase 0 tests in `output_format_contract.rs`.
|
||||
|
||||
### Purpose
|
||||
|
||||
This section documents **what each verb actually emits under `--output-format json`** as of the v1.5 emission baseline (post-cycle #89 emission routing fix, pre-Phase 1 shape normalization).
|
||||
|
||||
This is a **reference artifact**, not a target schema. It describes the reality that:
|
||||
|
||||
1. `--output-format json` exists and emits JSON (enforced by Phase 0 Task 2)
|
||||
2. All output goes to stdout (enforced by #168c fix, cycle #89)
|
||||
3. Each verb has a bespoke top-level shape (documented below; to be normalized in Phase 1)
|
||||
|
||||
### Emission Contract (v1.5 Baseline)
|
||||
|
||||
| Property | Rule | Enforced By |
|
||||
|---|---|---|
|
||||
| Exit 0 + stdout empty (silent success) | **Forbidden** | Test: `emission_contract_no_silent_success_under_output_format_json_168c_task2` |
|
||||
| Exit 0 + stdout contains valid JSON | Required | Test: same (parses each safe-success verb) |
|
||||
| Exit != 0 + JSON envelope on stdout | Required | Test: same + `error_envelope_emitted_to_stdout_under_output_format_json_168c` |
|
||||
| Error envelope on stderr under `--output-format json` | **Forbidden** | Test: #168c regression test |
|
||||
| Text mode routes errors to stderr | Preserved | Backward compat; not changed by cycle #89 |
|
||||
|
||||
### Per-Verb Shape Catalog
|
||||
|
||||
Captured from controlled matrix (cycle #87) and verified against post-#168c binary (cycle #91).
|
||||
|
||||
#### Verbs with `kind` top-level field (12/13)
|
||||
|
||||
| Verb | Top-level keys | Notes |
|
||||
|---|---|---|
|
||||
| `help` | `kind, message` | Minimal shape |
|
||||
| `version` | `git_sha, kind, message, target, version` | Build metadata |
|
||||
| `doctor` | `checks, has_failures, kind, message, report, summary` | Diagnostic results |
|
||||
| `mcp` | `action, config_load_error, configured_servers, kind, servers, status, working_directory` | MCP state |
|
||||
| `skills` | `action, kind, skills, summary` | Skills inventory |
|
||||
| `agents` | `action, agents, count, kind, summary, working_directory` | Agent inventory |
|
||||
| `sandbox` | `active, active_namespace, active_network, allowed_mounts, enabled, fallback_reason, filesystem_active, filesystem_mode, in_container, kind, markers, requested_namespace, requested_network, supported` | Sandbox state (14 keys) |
|
||||
| `status` | `config_load_error, kind, model, model_raw, model_source, permission_mode, sandbox, status, usage, workspace` | Runtime status |
|
||||
| `system-prompt` | `kind, message, sections` | Prompt sections |
|
||||
| `bootstrap-plan` | `kind, phases` | Bootstrap phases |
|
||||
| `export` | `file, kind, message, messages, session_id` | Export metadata |
|
||||
| `acp` | `aliases, discoverability_tracking, kind, launch_command, message, recommended_workflows, serve_alias_only, status, supported, tracking` | ACP discoverability |
|
||||
|
||||
#### Verb with `command` top-level field (1/13) — Phase 1 normalization target
|
||||
|
||||
| Verb | Top-level keys | Notes |
|
||||
|---|---|---|
|
||||
| `list-sessions` | `command, sessions` | **Deviation:** uses `command` instead of `kind`. Target Phase 1 fix. |
|
||||
|
||||
#### Verbs with error-only emission in test env (exit != 0)
|
||||
|
||||
These verbs require external state (credentials, session fixtures, manifests) and return error envelopes in clean test environments:
|
||||
|
||||
| Verb | Error envelope keys | Notes |
|
||||
|---|---|---|
|
||||
| `bootstrap` | `error, hint, kind, type` | Requires `ANTHROPIC_AUTH_TOKEN` for success path |
|
||||
| `dump-manifests` | `error, hint, kind, type` | Requires upstream manifest source |
|
||||
| `state` | `error, hint, kind, type` | Requires worker state file |
|
||||
|
||||
**Common error envelope shape (all verbs):** `{error, hint, kind, type}` — this is the one consistently-shaped part of v1.5.
|
||||
|
||||
### Standard Error Envelope (v1.5)
|
||||
|
||||
Error envelopes are the **only** part of v1.5 with a guaranteed consistent shape across all verbs:
|
||||
|
||||
```json
|
||||
{
|
||||
"type": "error",
|
||||
"error": "short human-readable reason",
|
||||
"kind": "snake_case_machine_readable_classification",
|
||||
"hint": "optional remediation hint (may be null)"
|
||||
}
|
||||
```
|
||||
|
||||
**Classification kinds** (from `classify_error_kind` in `main.rs`):
|
||||
- `cli_parse` — argument parsing error
|
||||
- `missing_credentials` — auth token/key missing
|
||||
- `session_not_found` — load-session target missing
|
||||
- `session_load_failed` — persisted session unreadable
|
||||
- `no_managed_sessions` — no sessions exist to list
|
||||
- `missing_manifests` — upstream manifest sources absent
|
||||
- `filesystem_io_error` — file operation failure
|
||||
- `api_http_error` — upstream API returned non-2xx
|
||||
- `unknown` — classifier fallthrough
|
||||
|
||||
### How This Differs from v2.0 Target
|
||||
|
||||
| Aspect | v1.5 (this doc) | v2.0 Target (SCHEMAS.md top) |
|
||||
|---|---|---|
|
||||
| Top-level verb ID | 12 use `kind`, 1 uses `command` | Common `command` field |
|
||||
| Common metadata | None (no `timestamp`, `exit_code`, etc.) | `timestamp`, `command`, `exit_code`, `output_format`, `schema_version` |
|
||||
| Error envelope | `{error, hint, kind, type}` flat | `{error: {message, kind, operation, target, retryable}, ...}` nested |
|
||||
| Success shape | Verb-specific (13 bespoke) | Common wrapper with `data` field |
|
||||
|
||||
### Consumer Guidance (Against v1.5 Baseline)
|
||||
|
||||
**For claws consuming v1.5 today:**
|
||||
|
||||
1. **Always use `--output-format json`** — text format has no stability contract (#167)
|
||||
2. **Check `type` field first** — "error" or absent/other (treat as success)
|
||||
3. **For errors:** access `error` (string), `kind` (string), `hint` (nullable string)
|
||||
4. **For success:** use verb-specific keys per catalog above
|
||||
5. **Do NOT assume** `kind` field exists on success path — `list-sessions` uses `command` instead
|
||||
6. **Do NOT assume** metadata fields (`timestamp`, `exit_code`, etc.) — they are v2.0 target only
|
||||
7. **Check exit code** for pass/fail; don't infer from payload alone
|
||||
|
||||
### Phase 1 Normalization Targets (After This Baseline Locks)
|
||||
|
||||
Phase 1 (shape stabilization) will normalize these divergences:
|
||||
|
||||
- `list-sessions`: `command` → `kind` (align with 12/13 convention)
|
||||
- Potentially: unify where `message` field appears (9/13 have it, inconsistently populated)
|
||||
- Potentially: unify where `action` field appears (only in 3 inventory verbs: `mcp`, `skills`, `agents`)
|
||||
|
||||
Phase 1 does **not** add common metadata (`timestamp`, `exit_code`) — that's Phase 2 (v2.0 wrapper).
|
||||
|
||||
### Regenerating This Catalog
|
||||
|
||||
The catalog is derived from running the controlled matrix. Phase 0 Task 4 will add a deterministic script; for now, reproduce with:
|
||||
|
||||
```
|
||||
for verb in help version list-sessions doctor mcp skills agents sandbox status system-prompt bootstrap-plan export acp; do
|
||||
echo "=== $verb ==="
|
||||
claw $verb --output-format json | jq 'keys'
|
||||
done
|
||||
```
|
||||
|
||||
This matches what the Phase 0 Task 2 test enforces programmatically.
|
||||
|
||||
|
||||
49
SECURITY.md
Normal file
49
SECURITY.md
Normal file
@@ -0,0 +1,49 @@
|
||||
# Security Policy
|
||||
|
||||
## Supported Versions
|
||||
|
||||
This project is pre-1.0 / active development. Only the `main` branch (and the current active feature branch) receives security attention. No LTS commitment exists yet.
|
||||
|
||||
| Branch | Supported |
|
||||
|--------|-----------|
|
||||
| `main` | ✅ |
|
||||
| older forks/branches | ❌ |
|
||||
|
||||
## Reporting a Vulnerability
|
||||
|
||||
**Do not file a public GitHub issue for security vulnerabilities.**
|
||||
|
||||
Please use [GitHub Security Advisories](https://docs.github.com/en/code-security/security-advisories/guidance-on-reporting-and-writing/privately-reporting-a-security-vulnerability) to report privately:
|
||||
|
||||
1. Go to the **Security** tab of this repository
|
||||
2. Click **"Report a vulnerability"**
|
||||
3. Describe the issue with reproduction steps and impact
|
||||
|
||||
We aim to acknowledge within **72 hours** and work toward coordinated disclosure.
|
||||
|
||||
## Disclosure Process
|
||||
|
||||
1. Report received → acknowledgement within 72h
|
||||
2. We assess severity and reproduce the issue
|
||||
3. Fix developed and reviewed privately
|
||||
4. Fix shipped; advisory published after patch is live
|
||||
5. Credit given to reporter (unless they prefer anonymity)
|
||||
|
||||
## Scope
|
||||
|
||||
**In scope:**
|
||||
- Remote code execution (RCE)
|
||||
- Authentication or authorization bypass
|
||||
- Secrets / credentials exfiltration
|
||||
- Sandbox escape (agent isolation boundary violations)
|
||||
- Privilege escalation
|
||||
|
||||
**Out of scope:**
|
||||
- Denial of service (DoS/resource exhaustion)
|
||||
- Social engineering attacks
|
||||
- Vulnerabilities in third-party dependencies — report those upstream
|
||||
- Behavior that is working as intended (check ROADMAP.md pinpoints first)
|
||||
|
||||
## License
|
||||
|
||||
This project is [MIT-licensed](./LICENSE) — provided as-is, without warranty of any kind.
|
||||
98
TROUBLESHOOTING.md
Normal file
98
TROUBLESHOOTING.md
Normal file
@@ -0,0 +1,98 @@
|
||||
# Troubleshooting
|
||||
|
||||
## Upstream stream-init failures (`500 empty_stream`)
|
||||
|
||||
**Symptom:** claw-code exits with `500 empty_stream: upstream stream closed before first payload` or similar upstream stream-init error.
|
||||
|
||||
**Root cause:** Upstream provider (Anthropic, OpenAI, other) closed the HTTP connection before sending the first response payload. Common causes:
|
||||
- Transient network issue between claw-code and provider
|
||||
- Provider overload / temporary service degradation
|
||||
- Authentication token expired or invalid
|
||||
- Rate limit exceeded (even if not visible in response headers)
|
||||
|
||||
**Mitigation:**
|
||||
1. **Check credentials:** Verify `claw whoami` shows the expected provider and account. Re-authenticate if expired.
|
||||
2. **Wait and retry:** Provider transient issues usually resolve within 30-60 seconds. Wait a minute, then retry the same command.
|
||||
3. **Check provider status:** Visit the provider's status page (e.g., status.anthropic.com, status.openai.com).
|
||||
4. **Reduce request size:** If the prompt is large, try a smaller request first to isolate stream-init from context-window failures.
|
||||
5. **Check network:** Ensure your network connection is stable. If behind a proxy, verify proxy allows streaming responses.
|
||||
|
||||
**When to escalate:**
|
||||
- If stream-init failures persist >10 minutes across multiple requests
|
||||
- If `claw whoami` fails to authenticate
|
||||
- If no provider status page shows degradation
|
||||
|
||||
**Related pinpoint:** #290 (typed stream-init failure envelope — future improvement for better diagnostics)
|
||||
|
||||
---
|
||||
|
||||
## Context-window-blocked errors
|
||||
|
||||
**Symptom:** claw-code exits with `context_window_blocked` or similar provider error when resuming a long session, or when sending a request with a very large prompt + accumulated history.
|
||||
|
||||
**Root cause:** Session size exceeded provider context window before claw-code's auto-compaction could reduce it. Auto-compaction is currently REACTIVE-AFTER-SUCCESS — it only fires after a successful provider response. If the request itself is oversized, compaction never runs.
|
||||
|
||||
**Mitigation:**
|
||||
1. **Resume with manual compact:** `claw resume <session> --compact-before` (if available); else manually compact via `/compact` slash command before retrying
|
||||
2. **Start a fresh session:** Sometimes the cleanest path; existing session-state preserved in `~/.claw/sessions/<id>/`
|
||||
3. **Reduce prompt size:** If interactive, send shorter prompts; truncate file contents before pasting
|
||||
4. **Adjust threshold:** Lower `CLAW_AUTO_COMPACT_INPUT_TOKENS_THRESHOLD` env var (default varies by provider)
|
||||
|
||||
**Related pinpoints:** #287 (auto-compaction reactive-not-preflight, CRITICAL), #283 (threshold env-only no settings.json key), #288 (failure envelope omits diagnostics)
|
||||
|
||||
---
|
||||
|
||||
## Manual `/compact` reports "session below compaction threshold"
|
||||
|
||||
**Symptom:** You run `/compact` to manually compact a session, but it reports `session below compaction threshold` even though the session feels large.
|
||||
|
||||
**Root cause:** The "below threshold" message is currently a catch-all for multiple skip reasons:
|
||||
- Too few compactable messages
|
||||
- Already compacted (only summary remains)
|
||||
- Compactable tokens below threshold
|
||||
- Tool-use/tool-result boundary preserved
|
||||
- Live vs resume threshold divergence
|
||||
|
||||
**Mitigation:**
|
||||
1. **Check session state:** `claw session info <id>` to inspect message count, total tokens
|
||||
2. **Force compaction:** Currently no `--force` flag exists; track #289 for typed skip-reason discriminants
|
||||
3. **Workaround:** Continue session and let auto-compact fire after next provider response (when reactive-after-success path is available)
|
||||
|
||||
**Related pinpoint:** #289 (manual `/compact` skip-reason flattened, lacks typed discriminants)
|
||||
|
||||
---
|
||||
|
||||
## Parallel agent stuck in "running" state
|
||||
|
||||
**Symptom:** A parallel agent lane shows `status: running` indefinitely, never transitioning to `completed` or `error`. Downstream coordination treats it as still-working.
|
||||
|
||||
**Root cause:** `Agent::execute_agent` writes a `running` manifest BEFORE spawning a detached `std::thread::spawn`. The `JoinHandle` is dropped. If the process crashes during agent execution, the manifest stays as `running` forever (zombie state). No heartbeat or stale-reaper exists.
|
||||
|
||||
**Mitigation:**
|
||||
1. **Manual cleanup:** Inspect `~/.claw/agents/<lane>/` and remove stale `manifest.json` files where last-modified > N minutes ago
|
||||
2. **Restart agent lane:** `claw agent restart <lane>`
|
||||
3. **Kill orphaned processes:** `pgrep claw` to find lingering processes
|
||||
|
||||
**Related pinpoint:** #286 (Parallel `Agent` detached-thread no-heartbeat no-reaper)
|
||||
|
||||
---
|
||||
|
||||
## Sustained upstream provider failures (`500 empty_stream` repeating)
|
||||
|
||||
**Symptom:** Same upstream provider error (e.g., `500 empty_stream: upstream stream closed before first payload`) repeats 5+ times in <60 minutes. Retries hit the same dead upstream blindly.
|
||||
|
||||
**Root cause:** claw-code does NOT detect repeat-failure patterns. No circuit-breaker. No automatic provider-fallback when configured. Each retry attempts the same provider+endpoint regardless of recent failure history.
|
||||
|
||||
**Mitigation:**
|
||||
1. **Manual circuit-breaker:** Wait 5-10 minutes after repeated failures before retrying
|
||||
2. **Switch provider:** If you have multiple providers configured (`ANTHROPIC_API_KEY` + `OPENAI_API_KEY`), restart with different model prefix (e.g., `gpt-4` instead of `claude-`)
|
||||
3. **Check provider status pages:** status.anthropic.com, status.openai.com
|
||||
4. **Verify upstream endpoint:** If using a proxy (CCAPI, custom OpenAI-compatible endpoint), check proxy logs
|
||||
|
||||
**Related pinpoints:** #291 (no repeat-failure detection / circuit-breaker), #285 (declarative providers config for fallback), #290 (stream-init failure envelope)
|
||||
|
||||
---
|
||||
|
||||
## Other common failures
|
||||
|
||||
*[placeholder for future sections: tool-use failures, session corruption]*
|
||||
217
USAGE.md
217
USAGE.md
@@ -36,6 +36,60 @@ cargo build --workspace
|
||||
|
||||
The CLI binary is available at `rust/target/debug/claw` after a debug build. Make the doctor check above your first post-build step.
|
||||
|
||||
### Add binary to PATH
|
||||
|
||||
To run `claw` from anywhere without typing the full path:
|
||||
|
||||
**Option 1: Symlink to a directory already in your PATH**
|
||||
|
||||
```bash
|
||||
# Find a PATH directory (usually ~/.local/bin or /usr/local/bin)
|
||||
echo $PATH
|
||||
|
||||
# Create symlink (adjust path and PATH-dir as needed)
|
||||
ln -s /Users/yeongyu/clawd/claw-code/rust/target/debug/claw ~/.local/bin/claw
|
||||
|
||||
# Verify it's in PATH
|
||||
which claw
|
||||
```
|
||||
|
||||
**Option 2: Add the binary directory to PATH directly**
|
||||
|
||||
Add this to your shell rc file (`~/.bashrc`, `~/.zshrc`, etc.):
|
||||
|
||||
```bash
|
||||
export PATH="$PATH:/Users/yeongyu/clawd/claw-code/rust/target/debug"
|
||||
```
|
||||
|
||||
Then reload:
|
||||
|
||||
```bash
|
||||
source ~/.zshrc # or ~/.bashrc
|
||||
```
|
||||
|
||||
### Verify install
|
||||
|
||||
After adding to PATH, verify the binary works:
|
||||
|
||||
```bash
|
||||
# Should print version and exit successfully
|
||||
claw version
|
||||
|
||||
# Should run health check (shows which components are initialized)
|
||||
claw doctor
|
||||
|
||||
# Should show available commands
|
||||
claw --help
|
||||
```
|
||||
|
||||
If `claw: command not found`, the PATH addition didn't take. Re-check:
|
||||
|
||||
```bash
|
||||
echo $PATH # verify your PATH directory is listed
|
||||
which claw # should show full path to binary
|
||||
ls -la ~/.local/bin/claw # if using symlink, verify it exists and points to target/debug/claw
|
||||
```
|
||||
|
||||
## Quick start
|
||||
|
||||
### First-run doctor check
|
||||
@@ -98,7 +152,57 @@ cd rust
|
||||
|
||||
### JSON output for scripting
|
||||
|
||||
All clawable commands support `--output-format json` for machine-readable output. Every invocation returns a consistent JSON envelope with `exit_code`, `command`, `timestamp`, and either `{success fields}` or `{error: {kind, message, ...}}`.
|
||||
All clawable commands support `--output-format json` for machine-readable output.
|
||||
|
||||
**IMPORTANT SCHEMA VERSION NOTICE:**
|
||||
|
||||
The JSON envelope is currently in **v1.0 (flat shape)** and is scheduled to migrate to **v2.0 (nested schema)** in a future release. See [`FIX_LOCUS_164.md`](./FIX_LOCUS_164.md) for the full migration plan.
|
||||
|
||||
#### Current (v1.0) envelope shape
|
||||
|
||||
**Success envelope** — verb-specific fields + `kind: "<verb-name>"`:
|
||||
```json
|
||||
{
|
||||
"kind": "doctor",
|
||||
"checks": [...],
|
||||
"summary": {...},
|
||||
"has_failures": false,
|
||||
"report": "...",
|
||||
"message": "..."
|
||||
}
|
||||
```
|
||||
|
||||
**Error envelope** — flat error fields at top level:
|
||||
```json
|
||||
{
|
||||
"error": "unrecognized argument `foo`",
|
||||
"hint": "Run `claw --help` for usage.",
|
||||
"kind": "cli_parse",
|
||||
"type": "error"
|
||||
}
|
||||
```
|
||||
|
||||
**Known issues with v1.0:**
|
||||
- Missing `exit_code`, `command`, `timestamp`, `output_format`, `schema_version` fields
|
||||
- `error` is a string, not a structured object with operation/target/retryable/message/hint
|
||||
- `kind` field is semantically overloaded (verb identity in success, error classification in error)
|
||||
- See [`SCHEMAS.md`](./SCHEMAS.md) for documented (v2.0 target) schema and [`FIX_LOCUS_164.md`](./FIX_LOCUS_164.md) for migration details
|
||||
|
||||
#### Using v1.0 envelopes in your code
|
||||
|
||||
**Success path:** Check for absence of `type: "error"`, then access verb-specific fields:
|
||||
```bash
|
||||
cd rust
|
||||
./target/debug/claw doctor --output-format json | jq '.kind, .has_failures'
|
||||
```
|
||||
|
||||
**Error path:** Check for `type == "error"`, then access `error` (string) and `kind` (error classification):
|
||||
```bash
|
||||
cd rust
|
||||
./target/debug/claw doctor invalid-arg --output-format json | jq '.error, .kind'
|
||||
```
|
||||
|
||||
**Do NOT rely on `kind` alone for dispatching** — it has different meanings in success vs. error. Always check `type == "error"` first.
|
||||
|
||||
```bash
|
||||
cd rust
|
||||
@@ -109,6 +213,8 @@ cd rust
|
||||
|
||||
**Building a dispatcher or orchestration script?** See [`ERROR_HANDLING.md`](./ERROR_HANDLING.md) for the unified error-handling pattern. One code example works for all 14 clawable commands: parse the exit code, classify by `error.kind`, apply recovery strategies (retry, timeout recovery, validation, logging). Use that pattern instead of reimplementing error handling per command.
|
||||
|
||||
**Migrating to v2.0?** Check back after [`FIX_LOCUS_164`](./FIX_LOCUS_164.md) is implemented. Phase 1 will add a `--envelope-version=2.0` flag for opt-in access to the structured envelope schema. Phase 2 will make v2.0 the default. Phase 3 will deprecate v1.0.
|
||||
|
||||
### Inspect worker state
|
||||
|
||||
The `claw state` command reads `.claw/worker-state.json`, which is written by the interactive REPL or a one-shot prompt when a worker executes a task. This file contains the worker ID, session reference, model, and permission mode.
|
||||
@@ -422,6 +528,93 @@ cd rust
|
||||
./target/debug/claw system-prompt --cwd .. --date 2026-04-04
|
||||
```
|
||||
|
||||
### `dump-manifests` — Export upstream plugin/MCP manifests
|
||||
|
||||
**Purpose:** Dump built-in tool and plugin manifests to stdout as JSON, for parity comparison against the upstream Claude Code TypeScript implementation.
|
||||
|
||||
**Prerequisite:** This command requires access to upstream source files (`src/commands.ts`, `src/tools.ts`, `src/entrypoints/cli.tsx`). Set `CLAUDE_CODE_UPSTREAM` env var or pass `--manifests-dir`.
|
||||
|
||||
```bash
|
||||
# Via env var
|
||||
CLAUDE_CODE_UPSTREAM=/path/to/upstream claw dump-manifests
|
||||
|
||||
# Via flag
|
||||
claw dump-manifests --manifests-dir /path/to/upstream
|
||||
```
|
||||
|
||||
**When to use:** Parity work (comparing the Rust port's tool/plugin surface against the canonical TypeScript implementation). Not needed for normal operation.
|
||||
|
||||
**Error mode:** If upstream sources are missing, exits with `error-kind: missing_manifests` and a hint about how to provide them.
|
||||
|
||||
### `bootstrap-plan` — Show startup component graph
|
||||
|
||||
**Purpose:** Print the ordered list of startup components that are initialized when `claw` begins a session. Useful for debugging startup issues or verifying that fast-path optimizations are in place.
|
||||
|
||||
```bash
|
||||
claw bootstrap-plan
|
||||
```
|
||||
|
||||
**Sample output:**
|
||||
```
|
||||
- CliEntry
|
||||
- FastPathVersion
|
||||
- StartupProfiler
|
||||
- SystemPromptFastPath
|
||||
- ChromeMcpFastPath
|
||||
```
|
||||
|
||||
**When to use:**
|
||||
- Debugging why startup is slow (compare your plan to the expected one)
|
||||
- Verifying that fast-path components are registered
|
||||
- Understanding the load order before customizing hooks or plugins
|
||||
|
||||
**Related:** See `claw doctor` for health checks against these startup components.
|
||||
|
||||
### `acp` — Agent Context Protocol / Zed editor integration status
|
||||
|
||||
**Purpose:** Report the current state of the ACP (Agent Context Protocol) / Zed editor integration. Currently **discoverability only** — no editor daemon is available yet.
|
||||
|
||||
```bash
|
||||
claw acp
|
||||
claw acp serve # same output; `serve` is accepted but not yet launchable
|
||||
claw --acp # alias
|
||||
claw -acp # alias
|
||||
```
|
||||
|
||||
**Sample output:**
|
||||
```
|
||||
ACP / Zed
|
||||
Status discoverability only
|
||||
Launch `claw acp serve` / `claw --acp` / `claw -acp` report status only; no editor daemon is available yet
|
||||
Today use `claw prompt`, the REPL, or `claw doctor` for local verification
|
||||
Tracking ROADMAP #76
|
||||
```
|
||||
|
||||
**When to use:** Check whether ACP/Zed integration is ready in your current build. Plan around its availability (track ROADMAP #76 for status).
|
||||
|
||||
**Today's alternatives:** Use `claw prompt` for one-shot runs, the interactive REPL for iterative work, or `claw doctor` for local verification.
|
||||
|
||||
### `export` — Export session transcript
|
||||
|
||||
**Purpose:** Export a managed session's transcript to a file or stdout. Operates on the currently-resumed session (requires `--resume`).
|
||||
|
||||
```bash
|
||||
# Export latest session
|
||||
claw --resume latest export
|
||||
|
||||
# Export specific session
|
||||
claw --resume <session-id> export
|
||||
```
|
||||
|
||||
**Prerequisite:** A managed session must exist under `.claw/sessions/<workspace-fingerprint>/`. If no sessions exist, the command exits with `error-kind: no_managed_sessions` and a hint to start a session first.
|
||||
|
||||
**When to use:**
|
||||
- Archive session transcripts for review
|
||||
- Share session context with teammates
|
||||
- Feed session history into downstream tooling
|
||||
|
||||
**Related:** Inside the REPL, `/export` is also available as a slash command for the active session.
|
||||
|
||||
## Session management
|
||||
|
||||
REPL turns are persisted under `.claw/sessions/` in the current workspace.
|
||||
@@ -432,7 +625,27 @@ cd rust
|
||||
./target/debug/claw --resume latest /status /diff
|
||||
```
|
||||
|
||||
Useful interactive commands include `/help`, `/status`, `/cost`, `/config`, `/session`, `/model`, `/permissions`, and `/export`.
|
||||
### Interactive slash commands (inside the REPL)
|
||||
|
||||
Useful interactive commands include:
|
||||
|
||||
- `/help` — Show help for all available commands
|
||||
- `/status` — Display current session and workspace status
|
||||
- `/cost` — Show token usage and cost estimates for the session
|
||||
- `/config` — Display current configuration and environment state
|
||||
- `/session` — Show session ID, creation time, and persisted metadata
|
||||
- `/model` — Display or switch the active model
|
||||
- `/permissions` — Check sandbox permissions and capability grants
|
||||
- `/export [file]` — Export the current conversation to a file (or resume from backup)
|
||||
- `/ultraplan [task]` — Run a deep planning prompt with multi-step reasoning (good for complex refactoring tasks)
|
||||
- `/teleport <symbol-or-path>` — Jump to a file or symbol by searching the workspace (IDE-like navigation)
|
||||
- `/bughunter [scope]` — Inspect the codebase for likely bugs in an optional scope (e.g., `src/runtime`)
|
||||
- `/commit` — Generate a commit message and create a git commit from the conversation
|
||||
- `/pr [context]` — Draft or create a pull request from the conversation
|
||||
- `/issue [context]` — Draft or create a GitHub issue from the conversation
|
||||
- `/diff` — Show unified diff of changes made in the current session
|
||||
- `/plugin [list|install|enable|disable|uninstall|update]` — Manage Claw Code plugins
|
||||
- `/agents [list|help]` — List configured agents or get help on agent commands
|
||||
|
||||
## Config file resolution order
|
||||
|
||||
|
||||
110
docs/ARCHITECTURE.md
Normal file
110
docs/ARCHITECTURE.md
Normal file
@@ -0,0 +1,110 @@
|
||||
# claw-code Architecture
|
||||
|
||||
A high-level overview of how claw-code is structured. For implementation details, see source code in `rust/crates/`. For provider details, see [SUPPORTED_PROVIDERS.md](./SUPPORTED_PROVIDERS.md). For pinpoint navigation, see [ROADMAP.md](../ROADMAP.md#pinpoint-cluster-index).
|
||||
|
||||
## Overview
|
||||
|
||||
claw-code is a Rust-based CLI for interacting with LLM providers (Anthropic, OpenAI-compatible, xAI, DashScope, etc.). It provides:
|
||||
|
||||
- Streaming conversation with auto-compaction
|
||||
- Tool execution (file read/write, bash, MCP)
|
||||
- Multi-provider routing
|
||||
- Session persistence
|
||||
- Parallel agent execution
|
||||
|
||||
## Workspace Layout
|
||||
|
||||
The Rust workspace is organized in `rust/crates/`:
|
||||
|
||||
### Core crates
|
||||
|
||||
- **`rusty-claude-cli`** — CLI entry point. Parses args, routes commands, manages TUI/headless modes.
|
||||
- **`runtime`** — Conversation engine. Manages session state, message history, auto-compaction, tool dispatch, hooks, MCP, and branch/lane events.
|
||||
- **`api`** — Provider abstraction. Hosts `MODEL_REGISTRY` (provider/model routing), SSE streaming, request/response handling. Providers: `anthropic`, `openai_compat`.
|
||||
- **`tools`** — Tool definitions. File I/O, bash execution, MCP integration, PDF extraction.
|
||||
|
||||
### Support crates
|
||||
|
||||
- **`commands`** — Parsed command dispatch layer between CLI and runtime.
|
||||
- **`plugins`** — Plugin/hook lifecycle (`hooks.rs`).
|
||||
- **`telemetry`** — Metrics and tracing instrumentation.
|
||||
- **`compat-harness`** — Parity test harness for Rust-port validation.
|
||||
- **`mock-anthropic-service`** — Local mock server for offline/test use.
|
||||
|
||||
## Request Flow
|
||||
|
||||
1. **CLI parse** (`rusty-claude-cli/src/main.rs`) — interprets args, env vars, settings.json
|
||||
2. **Provider selection** (`api/src/providers/mod.rs`) — routes to provider via `MODEL_REGISTRY` based on model prefix
|
||||
3. **Conversation execution** (`runtime/src/conversation.rs`) — sends to provider via SSE, receives streamed response
|
||||
4. **Tool dispatch** (`tools/src/lib.rs`) — if response includes `tool_use`, execute and feed back `tool_result`
|
||||
5. **Auto-compaction check** (`runtime/src/compact.rs`) — REACTIVE-AFTER-SUCCESS only (see #287 for preflight gap)
|
||||
6. **Output** — JSON envelope (`--output-format json`) or text (default)
|
||||
|
||||
## Key Subsystems
|
||||
|
||||
### Auto-compaction
|
||||
|
||||
Triggered post-turn when `usage.input_tokens > threshold`. See:
|
||||
- Threshold via env-only (#283)
|
||||
- Reactive-not-preflight (#287, CRITICAL)
|
||||
- Manual `/compact` skip-reasons (#289)
|
||||
- Failure envelope coverage (#288)
|
||||
|
||||
### Provider routing
|
||||
|
||||
Hard-coded `MODEL_REGISTRY` + env-var-based auth + model-prefix heuristics. See:
|
||||
- [SUPPORTED_PROVIDERS.md](./SUPPORTED_PROVIDERS.md) for current providers
|
||||
- #285 for declarative providers/models/websearch source-of-truth
|
||||
- #245, #246 for declarative config & backend swap
|
||||
- #290, #291, #292 for transport resilience (stream-init, circuit-breaker, escalation)
|
||||
|
||||
### Parallel agents
|
||||
|
||||
Lane-based execution via `runtime/src/lane_events.rs`. Manifest-driven lifecycle. See:
|
||||
- #286 for detached-thread + no-heartbeat issue (CRITICAL)
|
||||
|
||||
### Tool lifecycle / hooks
|
||||
|
||||
Tools defined in `tools/src/`. Hook events emitted via `runtime/src/hooks.rs` and `plugins/src/hooks.rs`. See:
|
||||
- #254 (MCP refresh)
|
||||
- #268 (tool-rendering parity)
|
||||
- #274 (hook-execution-event envelope)
|
||||
- #280 (hook event tap)
|
||||
|
||||
### Session persistence
|
||||
|
||||
Sessions managed in `runtime/src/session.rs`. See:
|
||||
- #278 (version-comparison)
|
||||
- #279 (unknown-field policy)
|
||||
|
||||
### CLI dispatch
|
||||
|
||||
CLI parsing in `rusty-claude-cli/src/main.rs`. Issues:
|
||||
- #262 `--max-turns` spec
|
||||
- #267 `--cwd` runtime fix
|
||||
- #272 position-independent parsing
|
||||
- #282 env-vs-config consolidation
|
||||
|
||||
## Build & Test
|
||||
|
||||
See [CONTRIBUTING.md](../CONTRIBUTING.md) for build commands. Quick reference:
|
||||
|
||||
```
|
||||
cd rust && cargo build # Build all crates
|
||||
cd rust && cargo test # Run all Rust tests
|
||||
```
|
||||
|
||||
## Tracing & Debugging
|
||||
|
||||
- **Session state:** `runtime/src/session.rs` + `~/.claw/sessions/<id>/`
|
||||
- **Provider responses:** Set `RUST_LOG=trace` for verbose SSE logs
|
||||
- **Parity checks:** Use `compat-harness` crate for Rust-port validation
|
||||
|
||||
## Related Documents
|
||||
|
||||
- [ROADMAP.md](../ROADMAP.md) — Pinpoints by cluster
|
||||
- [TROUBLESHOOTING.md](../TROUBLESHOOTING.md) — User-facing failure mitigation
|
||||
- [SUPPORTED_PROVIDERS.md](./SUPPORTED_PROVIDERS.md) — Provider/model details
|
||||
- [CONTRIBUTING.md](../CONTRIBUTING.md) — Pinpoint filing format
|
||||
- [PINPOINT_FILING_GUIDE.md](./PINPOINT_FILING_GUIDE.md) — Filing workflow
|
||||
- [CHANGELOG.md](../CHANGELOG.md) — Recent changes
|
||||
101
docs/PINPOINT_FILING_GUIDE.md
Normal file
101
docs/PINPOINT_FILING_GUIDE.md
Normal file
@@ -0,0 +1,101 @@
|
||||
# Pinpoint Filing Guide
|
||||
|
||||
This guide walks through the workflow for filing a new claw-code pinpoint, from initial friction to merged ROADMAP entry. For format details, see [CONTRIBUTING.md](../CONTRIBUTING.md). For issue template, see [.github/ISSUE_TEMPLATE/pinpoint.md](../.github/ISSUE_TEMPLATE/pinpoint.md).
|
||||
|
||||
## What is a Pinpoint?
|
||||
|
||||
A pinpoint is a precise, distinct claw-code clawability gap captured in ROADMAP.md format. Pinpoints differ from generic issues by:
|
||||
- **Specificity:** Exact file paths, function names, line numbers when available
|
||||
- **Distinctness:** Verified not already covered by existing pinpoints
|
||||
- **Live evidence:** Real friction event, not hypothetical
|
||||
- **Fix shape:** Concrete delta proposal, not vague "should improve X"
|
||||
|
||||
## Workflow
|
||||
|
||||
### Step 1: Identify friction
|
||||
|
||||
Use claw-code in real work. When you hit friction (slow startup, broken behavior, opaque error, missing feature, test brittleness, etc.), STOP and capture:
|
||||
- What you were trying to do
|
||||
- What you expected to happen
|
||||
- What actually happened
|
||||
- Exact error message / log output (verbatim)
|
||||
|
||||
### Step 2: Identify distinct axis
|
||||
|
||||
Open ROADMAP.md and search for related existing pinpoints (use the [Cluster Index](../ROADMAP.md#pinpoint-cluster-index)).
|
||||
|
||||
For each candidate match:
|
||||
- Does the existing pinpoint cover this exact symptom?
|
||||
- Does it cover this exact axis (e.g., timing vs envelope vs config)?
|
||||
- Is your case a SUBSET, a SUPERSET, or an ORTHOGONAL axis?
|
||||
|
||||
If your case is orthogonal, file new. If subset, add live-evidence as additional context to existing pinpoint. If superset, file new + cross-reference existing.
|
||||
|
||||
### Step 3: Verify with code
|
||||
|
||||
Before filing, look at the relevant source code:
|
||||
- `rust/crates/api/src/sse.rs` — provider routing
|
||||
- `rust/crates/runtime/src/conversation.rs` — auto-compaction logic
|
||||
- `rust/crates/rusty-claude-cli/src/main.rs` — CLI entry
|
||||
- Search with grep / ripgrep to find the relevant module
|
||||
|
||||
If the code clearly does NOT have the feature you expected, file a pinpoint. If the code DOES have the feature but it's broken, file a bug.
|
||||
|
||||
### Step 4: Write the entry
|
||||
|
||||
Follow the canonical 5-section format (see [CONTRIBUTING.md](../CONTRIBUTING.md)):
|
||||
1. **Exact pinpoint** — One precise sentence
|
||||
2. **Live evidence** — Real friction event with timestamps
|
||||
3. **Why distinct** — Explicit comparison to nearest existing pinpoints
|
||||
4. **Concrete delta** — What you're filing (e.g., "ROADMAP.md appended")
|
||||
5. **Fix shape recorded** — Bullet list of suggested implementation steps
|
||||
|
||||
### Step 5: Submit
|
||||
|
||||
Append to ROADMAP.md and commit:
|
||||
|
||||
```
|
||||
git add ROADMAP.md
|
||||
git commit -m "roadmap: #<NNN> filed (<short title>)"
|
||||
git push origin <branch>
|
||||
git push fork <branch>
|
||||
```
|
||||
|
||||
Verify three-way parity (local == origin == fork) before posting any update.
|
||||
|
||||
## Worked Example: #290 (stream-init failure envelope)
|
||||
|
||||
This shows how #290 was filed in real-time on 2026-04-26.
|
||||
|
||||
### Step 1: Friction identified
|
||||
|
||||
gaebal-gajae's session hit `500 empty_stream: upstream stream closed before first payload` repeatedly (4x in 30 min). Bare-string error surfaced; no diagnostics, no retry guidance.
|
||||
|
||||
### Step 2: Distinct axis identified
|
||||
|
||||
- #266 (typed-error-kind taxonomy) covers single-failure categorization, NOT stream-init specifically
|
||||
- #287 (auto-compaction reactive) covers session-size failures, NOT transport
|
||||
- #288 (JSON envelope failure) covers context-window envelope, NOT stream-init
|
||||
|
||||
→ Orthogonal: filed new #290 covering typed-stream-init-failure-envelope
|
||||
|
||||
### Step 3: Code verified
|
||||
|
||||
Inspected `rust/crates/api/src/sse.rs` — confirmed no `failure_class=upstream_stream_init` discriminant, no retry recommendation in JSON envelope.
|
||||
|
||||
### Step 4: Entry written
|
||||
|
||||
Used canonical 5-section format. Listed 4 live evidence timestamps. Cross-referenced #266, #287, #288 in "Why distinct."
|
||||
|
||||
### Step 5: Submitted
|
||||
|
||||
Commit `0f38975`, pushed to both origin and fork, parity verified, Discord post under 1500 chars.
|
||||
|
||||
**Total time: ~2 minutes from friction identification to merged ROADMAP entry.**
|
||||
|
||||
## Tips
|
||||
|
||||
- **File while it's fresh.** Wait too long and you'll forget exact symptoms.
|
||||
- **Check Cluster Index FIRST** — saves time vs scanning full ROADMAP.
|
||||
- **Write Fix Shape even if you don't implement.** Helps future contributors.
|
||||
- **Live evidence with timestamps > theoretical examples.** Real-world friction always wins.
|
||||
81
docs/SUPPORTED_PROVIDERS.md
Normal file
81
docs/SUPPORTED_PROVIDERS.md
Normal file
@@ -0,0 +1,81 @@
|
||||
# Supported Providers
|
||||
|
||||
claw-code currently supports the following LLM providers. This is a snapshot of the current code state and may change. The canonical source of truth is `MODEL_REGISTRY` and provider routing logic in `rust/crates/api/src/providers/mod.rs`.
|
||||
|
||||
> **Note:** A declarative `providers` / `models` / `websearch` config in `settings.json` is tracked as pinpoint #285 and is not yet implemented. Until then, provider/model selection is determined by:
|
||||
> 1. The model name prefix (e.g., `claude-`, `grok-`, `openai/`, `qwen/`, `kimi-`)
|
||||
> 2. Environment variables (e.g., `ANTHROPIC_API_KEY`, `XAI_API_KEY`, `DASHSCOPE_API_KEY`, `OPENAI_API_KEY`)
|
||||
> 3. Hard-coded heuristics in `MODEL_REGISTRY` and `detect_provider_kind()`
|
||||
|
||||
## Anthropic
|
||||
|
||||
- **Status:** Primary supported provider
|
||||
- **Models:**
|
||||
- `claude-opus-4-6` (alias: `opus`) — 200K context, 32K max output
|
||||
- `claude-sonnet-4-6` (alias: `sonnet`) — 200K context, 64K max output
|
||||
- `claude-haiku-4-5-20251213` (alias: `haiku`) — 200K context, 64K max output
|
||||
- **Auth:** `ANTHROPIC_API_KEY` env var, or OAuth bearer via `claw login` (`ANTHROPIC_AUTH_TOKEN`)
|
||||
- **Base URL:** `https://api.anthropic.com` (override: `ANTHROPIC_BASE_URL`)
|
||||
- **Known issues:** Subject to upstream stream-init failures (see #290, #291)
|
||||
|
||||
## xAI (Grok)
|
||||
|
||||
- **Status:** Supported via OpenAI-compatible client
|
||||
- **Models:**
|
||||
- `grok-3` (aliases: `grok`, `grok-3`) — 131K context, 64K max output
|
||||
- `grok-3-mini` (aliases: `grok-mini`, `grok-3-mini`) — 131K context, 64K max output
|
||||
- `grok-2` — context/output limits not yet registered in token metadata
|
||||
- **Auth:** `XAI_API_KEY`
|
||||
- **Base URL:** `https://api.x.ai/v1` (override: `XAI_BASE_URL`)
|
||||
- **Known issues:** None currently tracked
|
||||
|
||||
## Alibaba DashScope (Qwen / Kimi)
|
||||
|
||||
- **Status:** Supported via OpenAI-compatible client pointed at DashScope compatible-mode endpoint
|
||||
- **Models:**
|
||||
- `qwen/*` and `qwen-*` prefix — routes to DashScope (e.g., `qwen-plus`, `qwen-max`, `qwen-turbo`, `qwen/qwen3-coder`)
|
||||
- `kimi-k2.5` (alias: `kimi`) — 256K context, 16K max output
|
||||
- `kimi-k1.5` — 256K context, 16K max output
|
||||
- `kimi/*` and `kimi-*` prefix — routes to DashScope
|
||||
- **Auth:** `DASHSCOPE_API_KEY`
|
||||
- **Base URL:** `https://dashscope.aliyuncs.com/compatible-mode/v1` (override: `DASHSCOPE_BASE_URL`)
|
||||
- **Known issues:** None currently tracked
|
||||
|
||||
## OpenAI / OpenAI-Compatible Endpoints
|
||||
|
||||
- **Status:** Supported via OpenAI-compatible client; also covers local providers (Ollama, LM Studio, vLLM, OpenRouter)
|
||||
- **Models:** `openai/` prefix (e.g., `openai/gpt-4.1-mini`) or bare `gpt-*` prefix
|
||||
- **Auth:** `OPENAI_API_KEY`
|
||||
- **Base URL:** `https://api.openai.com/v1` (override: `OPENAI_BASE_URL` — also used for local providers)
|
||||
- **Local provider routing:** When `OPENAI_BASE_URL` is set and `OPENAI_API_KEY` is present, unknown model names (e.g., `qwen2.5-coder:7b`) also route here
|
||||
- **Known issues:** Declarative per-model config tracked in #285
|
||||
|
||||
## Web Search
|
||||
|
||||
- **Status:** Hard-coded heuristics; declarative `websearch` config tracked in #285
|
||||
|
||||
## Provider Selection Order
|
||||
|
||||
When the model name has no recognized prefix, `detect_provider_kind()` falls through in this order:
|
||||
|
||||
1. Model prefix match (`claude-` → Anthropic, `grok-` → xAI, `openai/` or `gpt-` → OpenAI, `qwen/` or `qwen-` → DashScope, `kimi/` or `kimi-` → DashScope)
|
||||
2. `OPENAI_BASE_URL` + `OPENAI_API_KEY` set → OpenAI-compat
|
||||
3. Anthropic credentials found → Anthropic
|
||||
4. `OPENAI_API_KEY` found → OpenAI
|
||||
5. `XAI_API_KEY` found → xAI
|
||||
6. `OPENAI_BASE_URL` set (no key) → OpenAI-compat (for keyless local providers)
|
||||
7. Default fallback → Anthropic
|
||||
|
||||
## Reporting Provider Issues
|
||||
|
||||
For provider-specific bugs (e.g., `500 empty_stream` from upstream), see [TROUBLESHOOTING.md](TROUBLESHOOTING.md) for mitigation steps.
|
||||
|
||||
For pinpointing a missing provider feature, file via [ISSUE_TEMPLATE/pinpoint.md](../.github/ISSUE_TEMPLATE/pinpoint.md).
|
||||
|
||||
## Related Pinpoints
|
||||
|
||||
- #245 — Provider declarative config
|
||||
- #246 — Backend swap
|
||||
- #285 — Provider/model/websearch source of truth
|
||||
- #290 — Stream-init failure envelope
|
||||
- #291 — Repeat-failure circuit-breaker
|
||||
@@ -1,6 +1,24 @@
|
||||
use std::env;
|
||||
use std::path::Path;
|
||||
use std::process::Command;
|
||||
|
||||
fn resolve_git_head_path() -> Option<String> {
|
||||
let git_path = Path::new(".git");
|
||||
if git_path.is_file() {
|
||||
// Worktree: .git is a pointer file containing "gitdir: /path/to/real/.git/worktrees/<name>"
|
||||
if let Ok(content) = std::fs::read_to_string(git_path) {
|
||||
if let Some(gitdir) = content.strip_prefix("gitdir:") {
|
||||
let gitdir = gitdir.trim();
|
||||
return Some(format!("{}/HEAD", gitdir));
|
||||
}
|
||||
}
|
||||
} else if git_path.is_dir() {
|
||||
// Regular repo: .git is a directory
|
||||
return Some(".git/HEAD".to_string());
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
fn main() {
|
||||
// Get git SHA (short hash)
|
||||
let git_sha = Command::new("git")
|
||||
@@ -52,6 +70,12 @@ fn main() {
|
||||
println!("cargo:rustc-env=BUILD_DATE={build_date}");
|
||||
|
||||
// Rerun if git state changes
|
||||
println!("cargo:rerun-if-changed=.git/HEAD");
|
||||
// In worktrees, .git is a pointer file, so watch the actual HEAD location
|
||||
if let Some(head_path) = resolve_git_head_path() {
|
||||
println!("cargo:rerun-if-changed={}", head_path);
|
||||
} else {
|
||||
// Fallback to .git/HEAD for regular repos (won't trigger in worktrees, but prevents silent failure)
|
||||
println!("cargo:rerun-if-changed=.git/HEAD");
|
||||
}
|
||||
println!("cargo:rerun-if-changed=.git/refs");
|
||||
}
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -389,8 +389,13 @@ fn assert_json_command(current_dir: &Path, args: &[&str]) -> Value {
|
||||
}
|
||||
|
||||
/// #247 regression helper: run claw expecting a non-zero exit and return
|
||||
/// the JSON error envelope parsed from stderr. Asserts exit != 0 and that
|
||||
/// the JSON error envelope parsed from stdout. Asserts exit != 0 and that
|
||||
/// the envelope includes `type: "error"` at the very least.
|
||||
///
|
||||
/// #168c: Error envelopes under --output-format json are now emitted to
|
||||
/// STDOUT (not stderr). This matches the emission contract that stdout
|
||||
/// carries the contractual envelope (success OR error) while stderr is
|
||||
/// reserved for non-contractual diagnostics.
|
||||
fn assert_json_error_envelope(current_dir: &Path, args: &[&str]) -> Value {
|
||||
let output = run_claw(current_dir, args, &[]);
|
||||
assert!(
|
||||
@@ -399,10 +404,12 @@ fn assert_json_error_envelope(current_dir: &Path, args: &[&str]) -> Value {
|
||||
String::from_utf8_lossy(&output.stdout),
|
||||
String::from_utf8_lossy(&output.stderr)
|
||||
);
|
||||
// The JSON envelope is written to stderr for error cases (see main.rs).
|
||||
let envelope: Value = serde_json::from_slice(&output.stderr).unwrap_or_else(|err| {
|
||||
// #168c: The JSON envelope is written to STDOUT for error cases under
|
||||
// --output-format json (see main.rs). Previously was stderr.
|
||||
let envelope: Value = serde_json::from_slice(&output.stdout).unwrap_or_else(|err| {
|
||||
panic!(
|
||||
"stderr should be a JSON error envelope but failed to parse: {err}\nstderr bytes:\n{}",
|
||||
"stdout should be a JSON error envelope but failed to parse: {err}\nstdout bytes:\n{}\nstderr bytes:\n{}",
|
||||
String::from_utf8_lossy(&output.stdout),
|
||||
String::from_utf8_lossy(&output.stderr)
|
||||
)
|
||||
});
|
||||
@@ -413,6 +420,63 @@ fn assert_json_error_envelope(current_dir: &Path, args: &[&str]) -> Value {
|
||||
envelope
|
||||
}
|
||||
|
||||
/// #168c regression test: under `--output-format json`, error envelopes
|
||||
/// must be emitted to STDOUT (not stderr). This is the emission contract:
|
||||
/// stdout carries the JSON envelope regardless of success/error; stderr
|
||||
/// is reserved for non-contractual diagnostics.
|
||||
///
|
||||
/// Refutes cycle #84's "bootstrap silent failure" claim (cycle #87 controlled
|
||||
/// matrix showed errors were on stderr, not silent; cycle #88 locked the
|
||||
/// emission contract to require stdout).
|
||||
#[test]
|
||||
fn error_envelope_emitted_to_stdout_under_output_format_json_168c() {
|
||||
let root = unique_temp_dir("168c-emission-stdout");
|
||||
fs::create_dir_all(&root).expect("temp dir should exist");
|
||||
|
||||
// Trigger an error via `prompt` without arg (known cli_parse error).
|
||||
let output = run_claw(&root, &["--output-format", "json", "prompt"], &[]);
|
||||
|
||||
// Exit code must be non-zero (error).
|
||||
assert!(
|
||||
!output.status.success(),
|
||||
"prompt without arg must fail; stdout:\n{}\nstderr:\n{}",
|
||||
String::from_utf8_lossy(&output.stdout),
|
||||
String::from_utf8_lossy(&output.stderr)
|
||||
);
|
||||
|
||||
// #168c primary assertion: stdout carries the JSON envelope.
|
||||
let stdout_text = String::from_utf8_lossy(&output.stdout);
|
||||
assert!(
|
||||
!stdout_text.trim().is_empty(),
|
||||
"stdout must contain JSON envelope under --output-format json (#168c emission contract). stderr was:\n{}",
|
||||
String::from_utf8_lossy(&output.stderr)
|
||||
);
|
||||
let envelope: Value = serde_json::from_slice(&output.stdout).unwrap_or_else(|err| {
|
||||
panic!(
|
||||
"stdout should be valid JSON under --output-format json (#168c): {err}\nstdout bytes:\n{stdout_text}"
|
||||
)
|
||||
});
|
||||
assert_eq!(envelope["type"], "error", "envelope must be typed error");
|
||||
assert!(
|
||||
envelope["kind"].as_str().is_some(),
|
||||
"envelope must carry machine-readable kind"
|
||||
);
|
||||
|
||||
// #168c secondary assertion: stderr should NOT carry the JSON envelope
|
||||
// (it may be empty or contain non-JSON diagnostics, but the envelope
|
||||
// belongs on stdout under --output-format json).
|
||||
let stderr_text = String::from_utf8_lossy(&output.stderr);
|
||||
let stderr_trimmed = stderr_text.trim();
|
||||
if !stderr_trimmed.is_empty() {
|
||||
// If stderr has content, it must NOT be the JSON envelope.
|
||||
let stderr_is_json: Result<Value, _> = serde_json::from_slice(&output.stderr);
|
||||
assert!(
|
||||
stderr_is_json.is_err(),
|
||||
"stderr must not duplicate the JSON envelope (#168c); stderr was:\n{stderr_trimmed}"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn prompt_subcommand_without_arg_emits_cli_parse_envelope_with_hint_247() {
|
||||
// #247: `claw prompt` with no argument must classify as `cli_parse`
|
||||
@@ -474,6 +538,268 @@ fn whitespace_only_positional_arg_emits_cli_parse_envelope_247() {
|
||||
);
|
||||
}
|
||||
|
||||
/// #168c Phase 0 Task 2: No-silent guarantee.
|
||||
///
|
||||
/// Under `--output-format json`, every verb must satisfy the emission contract:
|
||||
/// either emit a valid JSON envelope to stdout (with exit 0 for success, or
|
||||
/// exit != 0 for error), OR exit with an error code. Silent success (exit 0
|
||||
/// with empty stdout) is forbidden under the JSON contract because consumers
|
||||
/// cannot distinguish success from broken emission.
|
||||
///
|
||||
/// This test iterates a catalog of clawable verbs and asserts:
|
||||
/// 1. Each verb produces stdout output when exit == 0 (no silent success)
|
||||
/// 2. The stdout output parses as JSON (emission contract integrity)
|
||||
/// 3. Error cases (exit != 0) produce JSON on stdout (#168c routing fix)
|
||||
///
|
||||
/// Phase 0 Task 2 deliverable: prevents regressions in the emission contract
|
||||
/// for the full set of discoverable verbs.
|
||||
#[test]
|
||||
fn emission_contract_no_silent_success_under_output_format_json_168c_task2() {
|
||||
let root = unique_temp_dir("168c-task2-no-silent");
|
||||
fs::create_dir_all(&root).expect("temp dir should exist");
|
||||
|
||||
// Verbs expected to succeed (exit 0) with non-empty JSON on stdout.
|
||||
// Covers the discovery-safe subset — verbs that don't require external
|
||||
// credentials or network and should be safely invokable in CI.
|
||||
let safe_success_verbs: &[(&str, &[&str])] = &[
|
||||
("help", &["help"]),
|
||||
("version", &["version"]),
|
||||
("list-sessions", &["list-sessions"]),
|
||||
("doctor", &["doctor"]),
|
||||
("mcp", &["mcp"]),
|
||||
("skills", &["skills"]),
|
||||
("agents", &["agents"]),
|
||||
("sandbox", &["sandbox"]),
|
||||
("status", &["status"]),
|
||||
("system-prompt", &["system-prompt"]),
|
||||
("bootstrap-plan", &["bootstrap-plan", "test"]),
|
||||
("acp", &["acp"]),
|
||||
];
|
||||
|
||||
for (verb, args) in safe_success_verbs {
|
||||
let mut full_args = vec!["--output-format", "json"];
|
||||
full_args.extend_from_slice(args);
|
||||
let output = run_claw(&root, &full_args, &[]);
|
||||
|
||||
// Emission contract clause 1: if exit == 0, stdout must be non-empty.
|
||||
if output.status.success() {
|
||||
let stdout_text = String::from_utf8_lossy(&output.stdout);
|
||||
assert!(
|
||||
!stdout_text.trim().is_empty(),
|
||||
"#168c Task 2 emission contract violation: `{verb}` exit 0 with empty stdout (silent success). stderr was:\n{}",
|
||||
String::from_utf8_lossy(&output.stderr)
|
||||
);
|
||||
|
||||
// Emission contract clause 2: stdout must be valid JSON.
|
||||
let envelope: Result<Value, _> = serde_json::from_slice(&output.stdout);
|
||||
assert!(
|
||||
envelope.is_ok(),
|
||||
"#168c Task 2 emission contract violation: `{verb}` stdout is not valid JSON:\n{stdout_text}"
|
||||
);
|
||||
}
|
||||
// If exit != 0, it's an error path; #168c primary test covers error routing.
|
||||
}
|
||||
|
||||
// Verbs expected to fail (exit != 0) in test env (require external state).
|
||||
// Emission contract clause 3: error paths must still emit JSON on stdout.
|
||||
let safe_error_verbs: &[(&str, &[&str])] = &[
|
||||
("prompt-no-arg", &["prompt"]),
|
||||
("doctor-bad-arg", &["doctor", "--foo"]),
|
||||
];
|
||||
|
||||
for (label, args) in safe_error_verbs {
|
||||
let mut full_args = vec!["--output-format", "json"];
|
||||
full_args.extend_from_slice(args);
|
||||
let output = run_claw(&root, &full_args, &[]);
|
||||
|
||||
assert!(
|
||||
!output.status.success(),
|
||||
"{label} was expected to fail but exited 0"
|
||||
);
|
||||
|
||||
// #168c: error envelopes must be on stdout.
|
||||
let stdout_text = String::from_utf8_lossy(&output.stdout);
|
||||
assert!(
|
||||
!stdout_text.trim().is_empty(),
|
||||
"#168c Task 2 emission contract violation: {label} failed with empty stdout. stderr was:\n{}",
|
||||
String::from_utf8_lossy(&output.stderr)
|
||||
);
|
||||
|
||||
let envelope: Result<Value, _> = serde_json::from_slice(&output.stdout);
|
||||
assert!(
|
||||
envelope.is_ok(),
|
||||
"#168c Task 2 emission contract violation: {label} stdout not valid JSON:\n{stdout_text}"
|
||||
);
|
||||
let envelope = envelope.unwrap();
|
||||
assert_eq!(
|
||||
envelope["type"], "error",
|
||||
"{label} error envelope must carry type=error, got: {envelope}"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
/// #168c Phase 0 Task 4: Shape parity / regression guard.
|
||||
///
|
||||
/// Locks the v1.5 emission baseline (documented in SCHEMAS.md § v1.5 Emission
|
||||
/// Baseline) so any future PR that introduces shape drift in a documented
|
||||
/// verb fails this test at PR time.
|
||||
///
|
||||
/// This complements Task 2 (no-silent guarantee) by asserting the SPECIFIC
|
||||
/// top-level key sets documented in the catalog. If a verb adds/removes a
|
||||
/// top-level field, this test fails — forcing the PR author to:
|
||||
/// (a) update SCHEMAS.md § v1.5 Emission Baseline with the new shape, and
|
||||
/// (b) acknowledge the v1.5 baseline is changing.
|
||||
///
|
||||
/// Phase 0 Task 4 deliverable: prevents undocumented shape drift in v1.5
|
||||
/// baseline before Phase 1 (shape normalization) begins.
|
||||
///
|
||||
/// Note: This test intentionally asserts the CURRENT (possibly imperfect)
|
||||
/// shape, NOT the target. Phase 1 will update these expectations as shapes
|
||||
/// normalize.
|
||||
#[test]
|
||||
fn v1_5_emission_baseline_shape_parity_168c_task4() {
|
||||
let root = unique_temp_dir("168c-task4-shape-parity");
|
||||
fs::create_dir_all(&root).expect("temp dir should exist");
|
||||
|
||||
// v1.5 baseline per-verb shape catalog (from SCHEMAS.md § v1.5 Emission Baseline).
|
||||
// Each entry: (verb, args, expected_top_level_keys_sorted).
|
||||
//
|
||||
// This catalog was captured by the cycle #87 controlled matrix and is
|
||||
// enforced by SCHEMAS.md § v1.5 Emission Baseline documentation.
|
||||
let baseline: &[(&str, &[&str], &[&str])] = &[
|
||||
// Verbs using `kind` field (12 of 13 success paths)
|
||||
("help", &["help"], &["kind", "message"]),
|
||||
(
|
||||
"version",
|
||||
&["version"],
|
||||
&["git_sha", "kind", "message", "target", "version"],
|
||||
),
|
||||
(
|
||||
"doctor",
|
||||
&["doctor"],
|
||||
&["checks", "has_failures", "kind", "message", "report", "summary"],
|
||||
),
|
||||
(
|
||||
"skills",
|
||||
&["skills"],
|
||||
&["action", "kind", "skills", "summary"],
|
||||
),
|
||||
(
|
||||
"agents",
|
||||
&["agents"],
|
||||
&["action", "agents", "count", "kind", "summary", "working_directory"],
|
||||
),
|
||||
(
|
||||
"system-prompt",
|
||||
&["system-prompt"],
|
||||
&["kind", "message", "sections"],
|
||||
),
|
||||
(
|
||||
"bootstrap-plan",
|
||||
&["bootstrap-plan", "test"],
|
||||
&["kind", "phases"],
|
||||
),
|
||||
// Verb using `command` field (the 1-of-13 deviation — Phase 1 target)
|
||||
(
|
||||
"list-sessions",
|
||||
&["list-sessions"],
|
||||
&["command", "sessions"],
|
||||
),
|
||||
];
|
||||
|
||||
for (verb, args, expected_keys) in baseline {
|
||||
let mut full_args = vec!["--output-format", "json"];
|
||||
full_args.extend_from_slice(args);
|
||||
let output = run_claw(&root, &full_args, &[]);
|
||||
|
||||
assert!(
|
||||
output.status.success(),
|
||||
"#168c Task 4: `{verb}` expected success path but exited with {:?}. stdout:\n{}\nstderr:\n{}",
|
||||
output.status.code(),
|
||||
String::from_utf8_lossy(&output.stdout),
|
||||
String::from_utf8_lossy(&output.stderr)
|
||||
);
|
||||
|
||||
let envelope: Value = serde_json::from_slice(&output.stdout).unwrap_or_else(|err| {
|
||||
panic!(
|
||||
"#168c Task 4: `{verb}` stdout not valid JSON: {err}\nstdout:\n{}",
|
||||
String::from_utf8_lossy(&output.stdout)
|
||||
)
|
||||
});
|
||||
|
||||
let actual_keys: Vec<String> = envelope
|
||||
.as_object()
|
||||
.unwrap_or_else(|| panic!("#168c Task 4: `{verb}` envelope not a JSON object"))
|
||||
.keys()
|
||||
.cloned()
|
||||
.collect();
|
||||
let mut actual_sorted = actual_keys.clone();
|
||||
actual_sorted.sort();
|
||||
|
||||
let mut expected_sorted: Vec<String> = expected_keys.iter().map(|s| s.to_string()).collect();
|
||||
expected_sorted.sort();
|
||||
|
||||
assert_eq!(
|
||||
actual_sorted, expected_sorted,
|
||||
"#168c Task 4: shape drift detected in `{verb}`!\n\
|
||||
Expected top-level keys (v1.5 baseline): {expected_sorted:?}\n\
|
||||
Actual top-level keys: {actual_sorted:?}\n\
|
||||
If this is intentional, update:\n\
|
||||
1. SCHEMAS.md § v1.5 Emission Baseline catalog\n\
|
||||
2. This test's `baseline` array\n\
|
||||
Envelope: {envelope}"
|
||||
);
|
||||
}
|
||||
|
||||
// Error envelope shape parity (all error paths).
|
||||
// Standard v1.5 error envelope: {error, hint, kind, type} (always 4 keys).
|
||||
let error_cases: &[(&str, &[&str])] = &[
|
||||
("prompt-no-arg", &["prompt"]),
|
||||
("doctor-bad-arg", &["doctor", "--foo"]),
|
||||
];
|
||||
|
||||
let expected_error_keys = ["error", "hint", "kind", "type"];
|
||||
let mut expected_error_sorted: Vec<String> =
|
||||
expected_error_keys.iter().map(|s| s.to_string()).collect();
|
||||
expected_error_sorted.sort();
|
||||
|
||||
for (label, args) in error_cases {
|
||||
let mut full_args = vec!["--output-format", "json"];
|
||||
full_args.extend_from_slice(args);
|
||||
let output = run_claw(&root, &full_args, &[]);
|
||||
|
||||
assert!(
|
||||
!output.status.success(),
|
||||
"{label}: expected error exit, got success"
|
||||
);
|
||||
|
||||
let envelope: Value = serde_json::from_slice(&output.stdout).unwrap_or_else(|err| {
|
||||
panic!(
|
||||
"#168c Task 4: {label} stdout not valid JSON: {err}\nstdout:\n{}",
|
||||
String::from_utf8_lossy(&output.stdout)
|
||||
)
|
||||
});
|
||||
|
||||
let actual_keys: Vec<String> = envelope
|
||||
.as_object()
|
||||
.unwrap_or_else(|| panic!("#168c Task 4: {label} envelope not a JSON object"))
|
||||
.keys()
|
||||
.cloned()
|
||||
.collect();
|
||||
let mut actual_sorted = actual_keys.clone();
|
||||
actual_sorted.sort();
|
||||
|
||||
assert_eq!(
|
||||
actual_sorted, expected_error_sorted,
|
||||
"#168c Task 4: error envelope shape drift detected in {label}!\n\
|
||||
Expected v1.5 error envelope keys: {expected_error_sorted:?}\n\
|
||||
Actual keys: {actual_sorted:?}\n\
|
||||
If this is intentional, update SCHEMAS.md § Standard Error Envelope (v1.5).\n\
|
||||
Envelope: {envelope}"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn unrecognized_argument_still_classifies_as_cli_parse_247_regression_guard() {
|
||||
// #247 regression guard: the new empty-prompt / prompt-subcommand
|
||||
@@ -496,6 +822,50 @@ fn unrecognized_argument_still_classifies_as_cli_parse_247_regression_guard() {
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn v1_5_action_field_appears_only_in_3_inventory_verbs_172() {
|
||||
// #172: SCHEMAS.md v1.5 Emission Baseline claims `action` field appears
|
||||
// only in 3 inventory verbs: mcp, skills, agents. This test is a
|
||||
// regression guard for that truthfulness claim. If a new verb adds
|
||||
// `action`, or one of the 3 removes it, this test fails and forces
|
||||
// the SCHEMAS.md documentation to stay in sync with reality.
|
||||
//
|
||||
// Discovered during cycle #98 probe: earlier SCHEMAS.md draft said
|
||||
// "only in 4 inventory verbs" but reality was only 3 (list-sessions
|
||||
// uses `command` instead of `action`). Doc was corrected; this test
|
||||
// locks the 3-verb invariant.
|
||||
let root = unique_temp_dir("172-action-inventory");
|
||||
fs::create_dir_all(&root).expect("temp dir should exist");
|
||||
|
||||
let verbs_with_action: &[&str] = &["mcp", "skills", "agents"];
|
||||
let verbs_without_action: &[&str] = &[
|
||||
"help",
|
||||
"version",
|
||||
"doctor",
|
||||
"status",
|
||||
"sandbox",
|
||||
"system-prompt",
|
||||
"bootstrap-plan",
|
||||
"list-sessions",
|
||||
];
|
||||
|
||||
for verb in verbs_with_action {
|
||||
let envelope = assert_json_command(&root, &["--output-format", "json", verb]);
|
||||
assert!(
|
||||
envelope.get("action").is_some(),
|
||||
"#172: `{verb}` should have `action` field per v1.5 baseline, but envelope: {envelope}"
|
||||
);
|
||||
}
|
||||
|
||||
for verb in verbs_without_action {
|
||||
let envelope = assert_json_command(&root, &["--output-format", "json", verb]);
|
||||
assert!(
|
||||
envelope.get("action").is_none(),
|
||||
"#172: `{verb}` should NOT have `action` field per v1.5 baseline (only 3 inventory verbs: mcp/skills/agents should have it), but envelope: {envelope}"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
fn assert_json_command_with_env(current_dir: &Path, args: &[&str], envs: &[(&str, &str)]) -> Value {
|
||||
let output = run_claw(current_dir, args, envs);
|
||||
assert!(
|
||||
|
||||
Reference in New Issue
Block a user