mirror of
https://github.com/instructkr/claude-code.git
synced 2026-05-25 06:56:45 +00:00
Compare commits
68 Commits
docs/roadm
...
fix/roadma
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
e5d904edaf | ||
|
|
f2a90228fb | ||
|
|
0581894b7e | ||
|
|
5b79413e87 | ||
|
|
85e736c73f | ||
|
|
b64df99134 | ||
|
|
c345ce6d02 | ||
|
|
91a0681ae9 | ||
|
|
c613e8e676 | ||
|
|
1003510a75 | ||
|
|
63a5a87471 | ||
|
|
da7924d079 | ||
|
|
bb2a9238d9 | ||
|
|
8806e62a9f | ||
|
|
78a0ff615a | ||
|
|
706ac0f8e1 | ||
|
|
bd8a27b100 | ||
|
|
60108dfbf6 | ||
|
|
bd9102f851 | ||
|
|
e7d5d08892 | ||
|
|
f003a108e3 | ||
|
|
a76dda2b19 | ||
|
|
013694476e | ||
|
|
3d02baf567 | ||
|
|
6f5465aeaf | ||
|
|
fdbc789694 | ||
|
|
779cf1c234 | ||
|
|
1f330c6737 | ||
|
|
3489ec51d5 | ||
|
|
0423321cb1 | ||
|
|
06c126ab6b | ||
|
|
1f572ff8de | ||
|
|
03bd461984 | ||
|
|
ba941f7f69 | ||
|
|
bf7bae82ae | ||
|
|
495e7a015c | ||
|
|
3364dc4bee | ||
|
|
499125c9a3 | ||
|
|
c32288bd6b | ||
|
|
c8b44878c5 | ||
|
|
ae30bf4f04 | ||
|
|
a4efdc43d7 | ||
|
|
52572d5883 | ||
|
|
b43a6f2d29 | ||
|
|
f1a55a211e | ||
|
|
0975252976 | ||
|
|
cef45efc16 | ||
|
|
bc1b3c837a | ||
|
|
88f79bb2a5 | ||
|
|
7149bbc3d9 | ||
|
|
aefa5b0f19 | ||
|
|
96ddecab81 | ||
|
|
271283cd03 | ||
|
|
5fb2ed9464 | ||
|
|
f967df7f01 | ||
|
|
5a9550d388 | ||
|
|
b071fac2cf | ||
|
|
fdcb05b2c4 | ||
|
|
fc26e16ce2 | ||
|
|
1c62116e25 | ||
|
|
739488f613 | ||
|
|
f72681f998 | ||
|
|
a61d023583 | ||
|
|
c881069ff8 | ||
|
|
5200d1a476 | ||
|
|
fa8eecaf8f | ||
|
|
2033c90921 | ||
|
|
8cada12c48 |
17
.dockerignore
Normal file
17
.dockerignore
Normal file
@@ -0,0 +1,17 @@
|
||||
# Keep docker build context small (Windows-friendly).
|
||||
.git
|
||||
.github
|
||||
**/target
|
||||
**/.claw-rag
|
||||
**/.claw
|
||||
**/.claude
|
||||
**/.cursor
|
||||
**/node_modules
|
||||
**/dist
|
||||
**/build
|
||||
**/*.log
|
||||
**/*.tmp
|
||||
**/*.sqlite
|
||||
**/*.sqlite-wal
|
||||
**/*.sqlite-shm
|
||||
**/.DS_Store
|
||||
20
.github/hooks/pre-push
vendored
Executable file
20
.github/hooks/pre-push
vendored
Executable file
@@ -0,0 +1,20 @@
|
||||
#!/usr/bin/env bash
|
||||
# Claw Code local pre-push safety gate.
|
||||
#
|
||||
# Install with:
|
||||
# git config core.hooksPath .github/hooks
|
||||
#
|
||||
# This intentionally mirrors the CI build gate so stale field/enum references are
|
||||
# caught before pushing to main or PR branches.
|
||||
set -euo pipefail
|
||||
|
||||
repo_root="$(git rev-parse --show-toplevel 2>/dev/null)"
|
||||
cd "$repo_root"
|
||||
|
||||
if [[ ! -f rust/Cargo.toml ]]; then
|
||||
echo "pre-push: rust/Cargo.toml not found; skipping cargo workspace build" >&2
|
||||
exit 0
|
||||
fi
|
||||
|
||||
echo "pre-push: cargo build --manifest-path rust/Cargo.toml --workspace" >&2
|
||||
cargo build --manifest-path rust/Cargo.toml --workspace
|
||||
25
.github/workflows/rust.yml
vendored
Normal file
25
.github/workflows/rust.yml
vendored
Normal file
@@ -0,0 +1,25 @@
|
||||
name: Rust
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [ "main" ]
|
||||
pull_request:
|
||||
branches: [ "main" ]
|
||||
|
||||
env:
|
||||
CARGO_TERM_COLOR: always
|
||||
|
||||
defaults:
|
||||
run:
|
||||
working-directory: rust
|
||||
|
||||
jobs:
|
||||
build:
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- name: Build
|
||||
run: cargo build --verbose
|
||||
- name: Run tests
|
||||
run: cargo test --verbose
|
||||
@@ -3,11 +3,11 @@
|
||||
"duplicate_roadmap_heading_lines": [],
|
||||
"roadmap_actions_mapped": 542,
|
||||
"roadmap_actions_total": 542,
|
||||
"roadmap_headings_mapped": 124,
|
||||
"roadmap_headings_total": 124,
|
||||
"roadmap_headings_mapped": 127,
|
||||
"roadmap_headings_total": 127,
|
||||
"unmapped_roadmap_heading_lines": []
|
||||
},
|
||||
"generated_at": "2026-05-14T08:13:45+00:00",
|
||||
"generated_at": "2026-05-25T04:30:33+00:00",
|
||||
"generation_policy": {
|
||||
"release_buckets": [
|
||||
"2.x_intake",
|
||||
@@ -14823,6 +14823,69 @@
|
||||
"status": "context",
|
||||
"title": "Parity source metadata: openai/codex",
|
||||
"verification_required": "none_context_only"
|
||||
},
|
||||
{
|
||||
"category": "boot",
|
||||
"deferral_rationale": "",
|
||||
"dependencies": [
|
||||
"stream_0_governance"
|
||||
],
|
||||
"id": "CC2-RM-H0125-pinpoint-693-claw-analog-bootstrap-plan",
|
||||
"lifecycle_status": "done_verify",
|
||||
"owner_lane": "stream_1_worker_boot_session_control",
|
||||
"release_bucket": "alpha_blocker",
|
||||
"source_anchor": "ROADMAP.md:L7528",
|
||||
"source_context": "Clawable Coding Harness Roadmap > Pinpoint follow-up intake",
|
||||
"source_level": 2,
|
||||
"source_line": 7528,
|
||||
"source_ordinal": null,
|
||||
"source_path": "ROADMAP.md",
|
||||
"source_type": "roadmap_heading",
|
||||
"status": "done_verify",
|
||||
"title": "Pinpoint #693. `claw-analog` bootstrap-plan phase parser silently falls back to `\"unknown\"` \u2014 `lib.rs:1114` uses `.unwrap_or(\"unknown\")` for phase field; unrecognized phases emit opaque kind instead of typed error",
|
||||
"verification_required": "targeted_regression_or_acceptance_test_required"
|
||||
},
|
||||
{
|
||||
"category": "branch_recovery",
|
||||
"deferral_rationale": "",
|
||||
"dependencies": [
|
||||
"stream_0_governance"
|
||||
],
|
||||
"id": "CC2-RM-H0126-pinpoint-694-no-pre-push-cargo-build-gat",
|
||||
"lifecycle_status": "done_verify",
|
||||
"owner_lane": "stream_3_branch_test_recovery",
|
||||
"release_bucket": "alpha_blocker",
|
||||
"source_anchor": "ROADMAP.md:L7538",
|
||||
"source_context": "Clawable Coding Harness Roadmap > Pinpoint follow-up intake",
|
||||
"source_level": 2,
|
||||
"source_line": 7538,
|
||||
"source_ordinal": null,
|
||||
"source_path": "ROADMAP.md",
|
||||
"source_type": "roadmap_heading",
|
||||
"status": "done_verify",
|
||||
"title": "Pinpoint #694. No pre-push `cargo build` gate \u2014 stale field refs (`retry_after`, `Team` variant, `config_load_error_kind`) broke main build undetected until CI",
|
||||
"verification_required": "git_fixture_or_recovery_recipe_test"
|
||||
},
|
||||
{
|
||||
"category": "boot",
|
||||
"deferral_rationale": "",
|
||||
"dependencies": [
|
||||
"stream_0_governance"
|
||||
],
|
||||
"id": "CC2-RM-H0127-pinpoint-695-agent-starts-in-stale-wrong",
|
||||
"lifecycle_status": "done_verify",
|
||||
"owner_lane": "stream_1_worker_boot_session_control",
|
||||
"release_bucket": "alpha_blocker",
|
||||
"source_anchor": "ROADMAP.md:L7548",
|
||||
"source_context": "Clawable Coding Harness Roadmap > Pinpoint follow-up intake",
|
||||
"source_level": 2,
|
||||
"source_line": 7548,
|
||||
"source_ordinal": null,
|
||||
"source_path": "ROADMAP.md",
|
||||
"source_type": "roadmap_heading",
|
||||
"status": "done_verify",
|
||||
"title": "Pinpoint #695. Agent starts in stale/wrong worktree and burns a full turn before noticing \u2014 no pre-flight check for \"file exists on current branch\" or \"this .git is writable from sandbox\"",
|
||||
"verification_required": "worker_boot_state_machine_or_cli_json_contract_test"
|
||||
}
|
||||
],
|
||||
"schema_version": "cc2.board.v1",
|
||||
@@ -14839,7 +14902,7 @@
|
||||
"root": "/Users/bellman/Documents/Workspace/claw-code/.omx/research"
|
||||
},
|
||||
"roadmap": {
|
||||
"heading_count": 124,
|
||||
"heading_count": 127,
|
||||
"ordered_action_count": 542,
|
||||
"path": "ROADMAP.md",
|
||||
"sha256_prefix": "2aba3315e52f3079"
|
||||
@@ -14850,15 +14913,15 @@
|
||||
"adoption_overlay": 357,
|
||||
"parity_overlay": 20,
|
||||
"stream_0_governance": 221,
|
||||
"stream_1_worker_boot_session_control": 15,
|
||||
"stream_1_worker_boot_session_control": 17,
|
||||
"stream_2_event_reporting_contracts": 73,
|
||||
"stream_3_branch_test_recovery": 16,
|
||||
"stream_3_branch_test_recovery": 17,
|
||||
"stream_4_claws_first_execution": 5,
|
||||
"stream_5_plugin_mcp_lifecycle": 22
|
||||
},
|
||||
"by_release_bucket": {
|
||||
"2.x_intake": 30,
|
||||
"alpha_blocker": 240,
|
||||
"alpha_blocker": 243,
|
||||
"beta_adoption": 417,
|
||||
"context": 15,
|
||||
"ga_ecosystem": 22,
|
||||
@@ -14870,13 +14933,13 @@
|
||||
"latest_open_issue": 30,
|
||||
"parity_repo_context": 2,
|
||||
"roadmap_action": 542,
|
||||
"roadmap_heading": 124
|
||||
"roadmap_heading": 127
|
||||
},
|
||||
"by_status": {
|
||||
"active": 73,
|
||||
"context": 15,
|
||||
"deferred_with_rationale": 9,
|
||||
"done_verify": 313,
|
||||
"done_verify": 316,
|
||||
"open": 285,
|
||||
"rejected_not_claw": 2,
|
||||
"stale_done": 31,
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
# Claw Code 2.0 Canonical Board
|
||||
|
||||
Generated from board schema: `2026-05-14T08:13:45+00:00`
|
||||
Generated from board schema: `2026-05-25T04:30:33+00:00`
|
||||
Schema version: `cc2.board.v1`
|
||||
Ultragoal mutation policy: `.omx/ultragoal` is leader-owned and was not modified by this rendering task.
|
||||
|
||||
@@ -8,7 +8,7 @@ Ultragoal mutation policy: `.omx/ultragoal` is leader-owned and was not modified
|
||||
|
||||
| Source | Frozen evidence |
|
||||
| --- | --- |
|
||||
| Roadmap | `ROADMAP.md` sha256 prefix `2aba3315e52f3079`; 124 headings; 542 ordered actions |
|
||||
| Roadmap | `ROADMAP.md` sha256 prefix `2aba3315e52f3079`; 127 headings; 542 ordered actions |
|
||||
| Approved plan | `.omx/plans/claw-code-2-0-adaptive-plan.md` sha256 prefix `e7ef6faf23bfc16b` |
|
||||
| Research bundle | root `/Users/bellman/Documents/Workspace/claw-code/.omx/research`; latest open issues 30; issue corpus 1000; codex/opencode clone metadata included |
|
||||
|
||||
@@ -16,11 +16,11 @@ Ultragoal mutation policy: `.omx/ultragoal` is leader-owned and was not modified
|
||||
|
||||
| Coverage gate | Mapped | Total | Status |
|
||||
| --- | --- | --- | --- |
|
||||
| ROADMAP headings | 124 | 124 | PASS |
|
||||
| ROADMAP headings | 127 | 127 | PASS |
|
||||
| ROADMAP ordered actions | 542 | 542 | PASS |
|
||||
| Duplicate heading lines | 0 | 0 | PASS |
|
||||
|
||||
Total canonical board items: **729**
|
||||
Total canonical board items: **732**
|
||||
|
||||
## Lifecycle Enum Reference
|
||||
|
||||
@@ -29,7 +29,7 @@ Total canonical board items: **729**
|
||||
| `active` | 73 | Current Claw Code 2.0 implementation surface that should remain visible on the board. |
|
||||
| `context` | 15 | Context-only heading or evidence anchor; not an implementation work item. |
|
||||
| `deferred_with_rationale` | 9 | Intentionally deferred; rationale must be present in the board item. |
|
||||
| `done_verify` | 313 | Marked as done upstream but retained for verification against current CC2 behavior. |
|
||||
| `done_verify` | 316 | Marked as done upstream but retained for verification against current CC2 behavior. |
|
||||
| `open` | 285 | Actionable unresolved work that needs implementation or acceptance evidence. |
|
||||
| `rejected_not_claw` | 2 | Excluded because it is not Claw Code product work. |
|
||||
| `stale_done` | 31 | Historically completed or merged work that may be stale and needs freshness checks before relying on it. |
|
||||
@@ -40,7 +40,7 @@ Total canonical board items: **729**
|
||||
| Bucket | Count | Meaning |
|
||||
| --- | --- | --- |
|
||||
| `2.x_intake` | 30 | Post-2.0 intake or follow-up candidate retained for sequencing. |
|
||||
| `alpha_blocker` | 240 | Must be resolved before alpha-quality autonomous coding lanes are dependable. |
|
||||
| `alpha_blocker` | 243 | Must be resolved before alpha-quality autonomous coding lanes are dependable. |
|
||||
| `beta_adoption` | 417 | Important for broader dogfood/adoption once alpha blockers are controlled. |
|
||||
| `context` | 15 | Non-actionable roadmap context. |
|
||||
| `ga_ecosystem` | 22 | Required for mature plugin/MCP/provider ecosystem behavior. |
|
||||
@@ -54,9 +54,9 @@ Total canonical board items: **729**
|
||||
| Adoption overlay — user-visible parity and release polish | 357 | 329 | `deferred_with_rationale` 3, `done_verify` 237, `open` 92, `rejected_not_claw` 2, `stale_done` 23 |
|
||||
| Parity overlay — opencode/codex comparison context | 20 | 16 | `context` 2, `deferred_with_rationale` 1, `done_verify` 5, `open` 11, `stale_done` 1 |
|
||||
| Stream 0 — Governance, intake, and cross-cutting roadmap triage | 221 | 198 | `active` 6, `context` 13, `deferred_with_rationale` 4, `done_verify` 45, `open` 147, `stale_done` 5, `superseded` 1 |
|
||||
| Stream 1 — Worker boot and session control | 15 | 14 | `active` 8, `deferred_with_rationale` 1, `open` 6 |
|
||||
| Stream 1 — Worker boot and session control | 17 | 16 | `active` 8, `deferred_with_rationale` 1, `done_verify` 2, `open` 6 |
|
||||
| Stream 2 — Event/reporting contracts | 73 | 73 | `active` 45, `done_verify` 20, `open` 8 |
|
||||
| Stream 3 — Branch/test recovery | 16 | 14 | `active` 6, `done_verify` 1, `open` 7, `stale_done` 2 |
|
||||
| Stream 3 — Branch/test recovery | 17 | 15 | `active` 6, `done_verify` 2, `open` 7, `stale_done` 2 |
|
||||
| Stream 4 — Claws-first task execution | 5 | 5 | `active` 4, `done_verify` 1 |
|
||||
| Stream 5 — Plugin/MCP lifecycle | 22 | 22 | `active` 4, `done_verify` 4, `open` 14 |
|
||||
|
||||
@@ -68,7 +68,7 @@ Total canonical board items: **729**
|
||||
| `latest_open_issue` | 30 |
|
||||
| `parity_repo_context` | 2 |
|
||||
| `roadmap_action` | 542 |
|
||||
| `roadmap_heading` | 124 |
|
||||
| `roadmap_heading` | 127 |
|
||||
|
||||
## Board Items by Stream
|
||||
|
||||
@@ -704,6 +704,8 @@ Total canonical board items: **729**
|
||||
| `CC2-RM-A0363-surface-inconsistency-cluster-of-3-after` | **Surface inconsistency (cluster of 3)**: after #143 Phase 1, the behavior matrix is: | `ROADMAP.md:L5515` / `roadmap_action` | `alpha_blocker` | `open` | `plugin_mcp_lifecycle_contract_test` | `stream_1_worker_boot_session_control` | — |
|
||||
| `CC2-RM-A0391-remove-the-error-prefix-from-format-unkn` | Remove the "error:" prefix from format_unknown_verb_option (already added by top-level handler) | `ROADMAP.md:L5916` / `roadmap_action` | `alpha_blocker` | `open` | `worker_boot_state_machine_or_cli_json_contract_test` | none | — |
|
||||
| `CC2-RM-A0512-system-prompt-output-format-json-exposes` | **`system-prompt --output-format json` exposes `"__SYSTEM_PROMPT_DYNAMIC_BOUNDARY__"` as a literal element in the `sections` array — an internal split delimiter leaked into the public structured output** — dogfooded 2026-04-30 by Jobdori on `e939777f`. Running `claw system-prompt --output-format json` returns `{"kind":"system-prompt","message":"<full prose>","sections":["You are an interactive agent...", "# System\n...", "# Doing tasks\n...", "# Executing actions with care\n...", "__SYSTEM_PROMPT_DYNAMIC_BOUNDARY__", "# Environment context\n...", "# Project context\n...", "# Claude instructions\n...", "# Runtime config\n..."]}`. The `sections` array has 9 elements; element index 4 is the raw string `"__SYSTEM_PROMPT_DYNAMIC_BOUNDARY__"`. This internal sentinel marks the boundary between the static and dynamic sections of the compiled system prompt, used during assembly to split the prompt at injection time. It appears in the public JSON output verbatim as a first-class section, indistinguishable from real sections by type alone. Automation that iterates `sections[]` must special-case this sentinel or it will process an internal implementation string as if it were a real system prompt section. **Required fix shape:** (a) strip `"__SYSTEM_PROMPT_DYNAMIC_BOUNDARY__"` and any similar internal delimiters from the `sections` array before serializing to JSON; (b) if the static/dynamic boundary is semantically meaningful for callers, expose it as a structured metadata field such as `boundary_index:4` or as a `section_type:"static"\|"dynamic"` field on each section entry, not as a raw sentinel string in the array; (c) rename the `sections` type from `string[]` to `[{id, type, content}]` to enable this without breaking the boundary signal; (d) add regression coverage proving the `system-prompt --output-format json` output's `sections` array contains no elements whose value equals `"__SYSTEM_PROMPT_DYNAMIC_BOUNDARY__"` or matches `/__[A-Z_]+__/`. **Why this matters:** internal sentinel strings in public JSON are a contract liability — they couple the wire format to internal implementation details. Any refactor that renames or removes the sentinel breaks callers that don't special-case it, and automation that doesn't know to filter it will miscount, misparse, or misrender the system prompt. Source: Jobdori live dogfood, `e939777f`, 2026-04-30. | `ROADMAP.md:L6333` / `roadmap_action` | `beta_adoption` | `open` | `worker_boot_state_machine_or_cli_json_contract_test` | none | — |
|
||||
| `CC2-RM-H0125-pinpoint-693-claw-analog-bootstrap-plan` | Pinpoint #693. `claw-analog` bootstrap-plan phase parser silently falls back to `"unknown"` — `lib.rs:1114` uses `.unwrap_or("unknown")` for phase field; unrecognized phases emit opaque kind instead of typed error | `ROADMAP.md:L7528` / `roadmap_heading` | `alpha_blocker` | `done_verify` | `targeted_regression_or_acceptance_test_required` | `stream_0_governance` | — |
|
||||
| `CC2-RM-H0127-pinpoint-695-agent-starts-in-stale-wrong` | Pinpoint #695. Agent starts in stale/wrong worktree and burns a full turn before noticing — no pre-flight check for "file exists on current branch" or "this .git is writable from sandbox" | `ROADMAP.md:L7548` / `roadmap_heading` | `alpha_blocker` | `done_verify` | `worker_boot_state_machine_or_cli_json_contract_test` | `stream_0_governance` | — |
|
||||
|
||||
### Stream 2 — Event/reporting contracts
|
||||
|
||||
@@ -803,6 +805,7 @@ Total canonical board items: **729**
|
||||
| `CC2-RM-A0410-remediation-registry-a-function-remediat` | **Remediation registry:** A function `remediation_for(kind: &str, operation: &str) -> Remediation` that maps `(error_kind, operation_context)` pairs to stable remediation structs: | `ROADMAP.md:L6041` / `roadmap_action` | `alpha_blocker` | `open` | `targeted_regression_or_acceptance_test_required` | `stream_2_event_reporting_contracts` | — |
|
||||
| `CC2-RM-A0411-stable-hint-outputs-per-class-each-error` | **Stable hint outputs per class:** Each `error_kind` maps to exactly one remediation shape. No more prose splitting. | `ROADMAP.md:L6049` / `roadmap_action` | `alpha_blocker` | `open` | `targeted_regression_or_acceptance_test_required` | `stream_2_event_reporting_contracts` | — |
|
||||
| `CC2-RM-A0412-golden-fixture-tests-test-each-kind-oper` | **Golden fixture tests:** Test each `(kind, operation)` pair against expected remediation output as golden fixtures instead of the current `split_error_hint()` string hacks. | `ROADMAP.md:L6050` / `roadmap_action` | `alpha_blocker` | `open` | `targeted_regression_or_acceptance_test_required` | `stream_2_event_reporting_contracts` | — |
|
||||
| `CC2-RM-H0126-pinpoint-694-no-pre-push-cargo-build-gat` | Pinpoint #694. No pre-push `cargo build` gate — stale field refs (`retry_after`, `Team` variant, `config_load_error_kind`) broke main build undetected until CI | `ROADMAP.md:L7538` / `roadmap_heading` | `alpha_blocker` | `done_verify` | `git_fixture_or_recovery_recipe_test` | `stream_0_governance` | — |
|
||||
|
||||
### Stream 4 — Claws-first task execution
|
||||
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
{
|
||||
"version": 1,
|
||||
"createdAt": "2026-05-14T07:53:46.061Z",
|
||||
"updatedAt": "2026-05-15T04:38:54.887Z",
|
||||
"updatedAt": "2026-05-25T04:18:52.711Z",
|
||||
"briefPath": ".omx/ultragoal/brief.md",
|
||||
"goalsPath": ".omx/ultragoal/goals.json",
|
||||
"ledgerPath": ".omx/ultragoal/ledger.jsonl",
|
||||
@@ -148,7 +148,19 @@
|
||||
"updatedAt": "2026-05-15T04:38:54.887Z",
|
||||
"evidence": "G012-final-gate complete: team g012-final-gate-ultra-e61d2271 8/8 tasks complete; final gate log /tmp/g012-final-quality-gate-pass4.log; commit 04c2abb pushed; docs/pr-triage-g012-final-gate.json docs/pr-issue-resolution-gate.md docs/g012-final-release-readiness-report.md; .omx/ultragoal/goals.json and ledger.jsonl updated; aiSlopCleaner and codeReview evidence included in quality gate JSON.",
|
||||
"completedAt": "2026-05-15T04:38:54.887Z"
|
||||
},
|
||||
{
|
||||
"id": "G013-implement-roadmap-pinpoints-693-695",
|
||||
"title": "Implement ROADMAP pinpoints #693-#695",
|
||||
"objective": "Map and implement the newly appended ROADMAP.md pinpoints #693, #694, and #695 after reset to origin/main: typed claw-analog bootstrap phase errors, a local pre-push cargo build gate, and startup/worktree preflight diagnostics; update CC2 board/coverage and verify with targeted and workspace checks.",
|
||||
"status": "in_progress",
|
||||
"attempt": 1,
|
||||
"createdAt": "2026-05-25T04:18:43.420Z",
|
||||
"updatedAt": "2026-05-25T04:18:52.711Z",
|
||||
"evidence": "Current-head verification after reset: python3 scripts/validate_cc2_board.py --board .omx/cc2/board.json failed with unmapped ROADMAP headings [7528,7538,7548], corresponding to Pinpoints #693-#695.",
|
||||
"startedAt": "2026-05-25T04:18:52.711Z"
|
||||
}
|
||||
],
|
||||
"codexObjective": "Complete the approved Claw Code 2.0 ultragoal delivery: implement all classified ROADMAP.md backlog work through execution-sized stream goals G001-G012, using .omx/ultragoal/ledger.jsonl as the durable audit trail and .omx/plans/claw-code-2-0-adaptive-plan.md as the source plan."
|
||||
"codexObjective": "Complete the approved Claw Code 2.0 ultragoal delivery: implement all classified ROADMAP.md backlog work through execution-sized stream goals G001-G012, using .omx/ultragoal/ledger.jsonl as the durable audit trail and .omx/plans/claw-code-2-0-adaptive-plan.md as the source plan.",
|
||||
"activeGoalId": "G013-implement-roadmap-pinpoints-693-695"
|
||||
}
|
||||
|
||||
File diff suppressed because one or more lines are too long
1089
ROADMAP.md
1089
ROADMAP.md
File diff suppressed because one or more lines are too long
21
USAGE.md
21
USAGE.md
@@ -474,6 +474,27 @@ cd rust
|
||||
./target/debug/claw system-prompt --cwd .. --date 2026-04-04
|
||||
```
|
||||
|
||||
## Install an external skill
|
||||
|
||||
`claw skills install <path>` accepts a local skill directory that contains
|
||||
`SKILL.md` or a standalone markdown file. This is useful when a companion
|
||||
repository ships a skill prompt that should be available through `/skills`.
|
||||
|
||||
For example, install TweetClaw as an X/Twitter automation skill:
|
||||
|
||||
```bash
|
||||
# From a parent directory that contains claw-code
|
||||
git clone https://github.com/Xquik-dev/tweetclaw
|
||||
cd claw-code/rust
|
||||
./target/debug/claw skills install ../../tweetclaw/skills/tweetclaw
|
||||
./target/debug/claw skills show tweetclaw
|
||||
```
|
||||
|
||||
TweetClaw gives `claw` users a local skill guide for OpenClaw/Xquik workflows
|
||||
such as tweet search, reply search, follower export, monitors, webhooks, and
|
||||
approval-gated posting. Configure any Xquik credentials outside the prompt and
|
||||
avoid pasting API keys into chat.
|
||||
|
||||
## Session management
|
||||
|
||||
REPL turns are persisted under `.claw/sessions/` in the current workspace.
|
||||
|
||||
125
concept.md
Normal file
125
concept.md
Normal file
@@ -0,0 +1,125 @@
|
||||
# Концепция проекта Claw Code
|
||||
|
||||
Документ фиксирует **цели**, **архитектуру** и **принципы** репозитория **Claw Code** — публичной Rust-реализации CLI-агента **`claw`** и сопутствующих инструментов. Источник правды по кодовой базе: workspace в каталоге [`rust/`](rust/README.md); операционные сценарии — [`USAGE.md`](USAGE.md), [`how_to_run.md`](how_to_run.md) (claw-analog), бэклог идеи — [`futute.md`](futute.md).
|
||||
|
||||
Отдельная продуктовая линия «из CLI → в личного помощника» (каналы/память/инструменты/проактивность/сессии) описана в [`docs/personal-assistant-roadmap.md`](docs/personal-assistant-roadmap.md).
|
||||
|
||||
---
|
||||
|
||||
## 1. Назначение продукта
|
||||
|
||||
**Claw Code** — это:
|
||||
|
||||
1. **Основной CLI `claw`** (`rusty-claude-cli`): полнофункциональный агент с REPL, OAuth, расширенным набором инструментов (включая bash, MCP, плагины и др.), стримингом и интеграцией с провайдерами **Anthropic**, **OpenAI-совместимыми** API и **xAI**.
|
||||
2. **`claw-analog`** — облегчённая оболочка на **том же слое API** (`api` crate): узкий, предсказуемый набор инструментов только для работы с файловой системой воркспейса, явные режимы прав, пригодность для **CI**, **скриптов** и **внешних агентов** (NDJSON).
|
||||
3. **`claw-rag-service`** — отдельный процесс: **индексация** репозитория (чанки + эмбеддинги в SQLite), **HTTP API** для семантического поиска и минимальный **веб-UI** для ручной проверки индекса.
|
||||
|
||||
Общая идея: дать **безопасный**, **аудируемый** и **воспроизводимый** способ вызова LLM над кодом и документацией, с путём эволюции от минимального harness до полного `claw`.
|
||||
|
||||
---
|
||||
|
||||
## 2. Целевая аудитория и сценарии
|
||||
|
||||
| Сегмент | Задача |
|
||||
|---------|--------|
|
||||
| Разработчик | Ежедневная работа с кодовой базой через полный `claw`: REPL, инструменты, сессии. |
|
||||
| Автор автоматизации | Одноразовые промпты, пайплайны с `--output-format json`, встроенные агенты без bash. |
|
||||
| Сопровождение / аудит | `claw-analog` в **read-only** + пресет **audit**; явные лимиты и политика. |
|
||||
| Порт и parity | Сравнение поведения с эталоном (`PARITY.md`, mock-harness). |
|
||||
| RAG над монорепо | Отдельный `ingest` + `serve`; агент подключает контекст через **`retrieve_context`** при заданном `RAG_BASE_URL`. |
|
||||
|
||||
---
|
||||
|
||||
## 3. Архитектура (логическая)
|
||||
|
||||
```text
|
||||
┌─────────────────────────────────────┐
|
||||
│ Провайдеры (Anthropic / OpenAI / …) │
|
||||
└─────────────────┬───────────────────┘
|
||||
│
|
||||
┌──────────────────────────────┼──────────────────────────────┐
|
||||
│ │ │
|
||||
▼ ▼ ▼
|
||||
┌──────────────┐ ┌──────────────┐ ┌──────────────────┐
|
||||
│ rusty- │ │ claw-analog │ │ claw-rag-service │
|
||||
│ claude-cli │ │ (lean loop) │ │ HTTP + SQLite │
|
||||
│ («claw») │ │ │ │ ingest / query │
|
||||
└──────┬───────┘ └──────┬───────┘ └────────┬─────────┘
|
||||
│ │ │
|
||||
│ crates/api │ retrieve_context │
|
||||
│ runtime, tools, … │ (POST /v1/query) │
|
||||
└──────────────┬───────────────┴───────────────────────────────┘
|
||||
│
|
||||
▼
|
||||
Файловая система / workspace (-w)
|
||||
```
|
||||
|
||||
**Принцип разделения:** тяжёлая индексация и хранение эмбеддингов **не** зашиваются в `claw-analog`, а живут в **`claw-rag-service`**. Агент только вызывает retrieval по HTTP — проще масштабировать, менять векторное хранилище и секреты эмбеддингов.
|
||||
|
||||
---
|
||||
|
||||
## 4. Принципы проектирования
|
||||
|
||||
1. **Безопасность по умолчанию** — относительные пути, запрет `..`, проверка выхода за canonical workspace; режимы `PermissionMode` согласованы с полным CLI; в неинтерактивном режиме опасные режимы блокируются без явного флага.
|
||||
2. **Явные лимиты** — размер чтения, число ходов, glob/grep caps, таймауты RAG; сбои предсказуемы, а не «OOM или вечный цикл».
|
||||
3. **Наблюдаемость для агентов** — NDJSON с `schema` и `format_version` на `run_start`, структурированные `tool_result`.
|
||||
4. **Модульность** — общий `api` для провайдеров; `claw-analog` не дублирует стек ключей RAG, только HTTP-клиент к сервису.
|
||||
5. **Паритет и тесты** — mock Anthropic, сценарии harness, отдельные job’ы CI для критичных crate’ов.
|
||||
6. **Документация рядом с кодом** — `how_to_run.md`, `docs/rag-web-ui.md`, `docs/container.md` и т.д.
|
||||
|
||||
---
|
||||
|
||||
## 5. Компоненты workspace (кратко)
|
||||
|
||||
- **`rusty-claude-cli`** — основной бинарь **`claw`**: пользовательский продукт полной мощности.
|
||||
- **`api`** — клиенты провайдеров, стриминг, типы запросов/ответов.
|
||||
- **`runtime`** — сессии, конфиг, **PermissionPolicy** / **PermissionEnforcer**, промпты, MCP и др.
|
||||
- **`tools`** — встроенные инструменты полного CLI.
|
||||
- **`claw-analog`** — минимальный цикл: инструменты чтения/поиска/записи (по режиму), стриминг и JSON, TOML-конфиг, сессии, doctor, config validate, **retrieve_context** при наличии `RAG_BASE_URL` / `rag_base_url`.
|
||||
- **`claw-rag-service`** — `ingest`, `serve`, маршруты `/`, `/health`, `/v1/stats`, `/v1/query`; SQLite + OpenAI-совместимые эмбеддинги (или mock для тестов).
|
||||
- **`mock-anthropic-service`**, **`compat-harness`** и др. — воспроизводимость и миграция.
|
||||
|
||||
Подробная раскладка: [`rust/README.md`](rust/README.md).
|
||||
|
||||
---
|
||||
|
||||
## 6. Claw-analog: роль и границы
|
||||
|
||||
**Задача:** дать «агента с инструментами» без разрастания поверхности атаки (нет произвольного shell в базовом сценарии).
|
||||
|
||||
**Инструменты (концептуально):** чтение и обход дерева (`read_file`, `list_dir`, `glob_workspace`), литеральный поиск (`grep_workspace` / `grep_search`), опционально `write_file`, опционально **`retrieve_context`** к RAG-сервису.
|
||||
|
||||
**Не входит в минимальный дизайн:** MCP, плагины, bash — это зона **полного `claw`**.
|
||||
|
||||
---
|
||||
|
||||
## 7. RAG-сервис: роль и эволюция
|
||||
|
||||
**Сейчас (MVP):** полный переиндекс при `ingest`, векторы в SQLite, поиск — линейный косинус по всем чанкам; подходит для умеренных объёмов кода.
|
||||
|
||||
**Направления роста (концепция):** инкрементальная индексация, ANN (sqlite-vec, Qdrant/Chroma в Docker), rate limits на эмбеддинги. Веб-UI на `GET /` — вспомогательный; продвинутый UI и авторизация — по мере необходимости.
|
||||
|
||||
Детали: [`docs/rag-web-ui.md`](docs/rag-web-ui.md).
|
||||
|
||||
---
|
||||
|
||||
## 8. Репозиторий вне основного runtime
|
||||
|
||||
- **`src/`**, **`tests/`** (Python и прочее) — вспомогательные/экспериментальные артефакты; **канонический runtime** — **`rust/`**.
|
||||
- Документы **PHILOSOPHY.md**, **ROADMAP.md**, **PARITY.md** дополняют концепцию процессом и намерениями сообщества/мейнтейнеров.
|
||||
|
||||
---
|
||||
|
||||
## 9. Связанные концепции (не ядро Claw Code)
|
||||
|
||||
В **`docs/`** могут находиться переносимые заметки для **других** продуктов (например локальный vision для NestJS-приложений) — они **не** определяют обязательное поведение `claw`, но отражают смежный интерес contributors.
|
||||
|
||||
---
|
||||
|
||||
## 10. Итоговая формулировка
|
||||
|
||||
**Claw Code** — это экосистема **Rust** вокруг агента **`claw`**: полный CLI для разработчиков, **`claw-analog`** как управляемый минимальный агент для автоматизации и **отдельный RAG-сервис** для семантического поиска по коду. Проект опирается на **явные права**, **лимиты**, **тестируемость** и **чёткие HTTP-границы** между агентом и тяжёлой индексацией.
|
||||
|
||||
---
|
||||
|
||||
*Обновляйте этот файл при смене ключевых продуктовых решений; детальный чеклист фич и backlog — в [`futute.md`](futute.md).*
|
||||
50
docker-compose.yml
Normal file
50
docker-compose.yml
Normal file
@@ -0,0 +1,50 @@
|
||||
services:
|
||||
qdrant:
|
||||
image: qdrant/qdrant:latest
|
||||
ports:
|
||||
- "6333:6333"
|
||||
- "6334:6334"
|
||||
environment:
|
||||
QDRANT__SERVICE__GRPC_PORT: "6334"
|
||||
volumes:
|
||||
- qdrant-storage:/qdrant/storage
|
||||
|
||||
rag-serve:
|
||||
build:
|
||||
context: ./rust
|
||||
dockerfile: crates/claw-rag-service/Dockerfile
|
||||
command: ["serve", "--db", "/data/index.sqlite"]
|
||||
environment:
|
||||
# Use mock embeddings by default for local dev; override in your shell for real providers.
|
||||
CLAW_RAG_MOCK_PROVIDERS: "1"
|
||||
CLAW_RAG_DB: "/data/index.sqlite"
|
||||
CLAW_RAG_HOST: "0.0.0.0"
|
||||
CLAW_RAG_QDRANT_URL: "http://qdrant:6334"
|
||||
CLAW_RAG_QDRANT_COLLECTION: "claw_rag_chunks"
|
||||
ports:
|
||||
- "8787:8787"
|
||||
depends_on:
|
||||
- qdrant
|
||||
volumes:
|
||||
- rag-data:/data
|
||||
|
||||
rag-ingest:
|
||||
build:
|
||||
context: ./rust
|
||||
dockerfile: crates/claw-rag-service/Dockerfile
|
||||
command: ["ingest", "--db", "/data/index.sqlite"]
|
||||
environment:
|
||||
CLAW_RAG_MOCK_PROVIDERS: "1"
|
||||
CLAW_RAG_DB: "/data/index.sqlite"
|
||||
CLAW_RAG_QDRANT_URL: "http://qdrant:6334"
|
||||
CLAW_RAG_QDRANT_COLLECTION: "claw_rag_chunks"
|
||||
depends_on:
|
||||
- qdrant
|
||||
volumes:
|
||||
- rag-data:/data
|
||||
# Mount example workspace roots under /workspaces
|
||||
- ./:/workspaces/main:ro
|
||||
|
||||
volumes:
|
||||
qdrant-storage:
|
||||
rag-data:
|
||||
47
docs/g013-roadmap-pinpoints-693-695-verification-map.md
Normal file
47
docs/g013-roadmap-pinpoints-693-695-verification-map.md
Normal file
@@ -0,0 +1,47 @@
|
||||
# G013 ROADMAP pinpoints #693-#695 verification map
|
||||
|
||||
This map records the current-head follow-up that was discovered after resetting
|
||||
`main` to `origin/main`: ROADMAP.md contained three new Pinpoint headings not
|
||||
covered by the Claw Code 2.0 board.
|
||||
|
||||
## Pinpoint #693 — typed phase error instead of silent `unknown`
|
||||
|
||||
- Code: `rust/crates/claw-analog/src/lib.rs`
|
||||
- Behavior: `format_rag_query_json_for_model` now rejects missing, empty, or
|
||||
literal `"unknown"` phase values with a structured error envelope containing
|
||||
`kind:"unknown_bootstrap_phase"`, `field:"phase"`, and `received_value`.
|
||||
- Regression tests: `rag_response_missing_phase_returns_typed_error` and
|
||||
`rag_response_unknown_phase_returns_typed_error`.
|
||||
|
||||
## Pinpoint #694 — local pre-push build gate
|
||||
|
||||
- Hook: `.github/hooks/pre-push`
|
||||
- Install command: `git config core.hooksPath .github/hooks`
|
||||
- Gate: `cargo build --manifest-path rust/Cargo.toml --workspace`
|
||||
- Purpose: mirror the CI build job locally so stale field/variant references are
|
||||
caught before push.
|
||||
|
||||
## Pinpoint #695 — startup/worktree preflight diagnostics
|
||||
|
||||
- Code: `rust/crates/runtime/src/worker_boot.rs`
|
||||
- Behavior: `startup_preflight_warnings` and
|
||||
`WorkerRegistry::observe_startup_preflight` emit structured warnings before
|
||||
the first model turn when a task mentions a path not tracked on the current
|
||||
branch (`file_absent_on_branch`) or git metadata is not writable
|
||||
(`git_metadata_not_writable`).
|
||||
- Regression tests:
|
||||
- `startup_preflight_warns_when_task_file_is_absent_on_branch`
|
||||
- `startup_preflight_records_structured_warning_event`
|
||||
|
||||
## Verification commands
|
||||
|
||||
```bash
|
||||
python3 scripts/generate_cc2_board.py
|
||||
python3 scripts/validate_cc2_board.py --board .omx/cc2/board.json
|
||||
python3 .omx/cc2/validate_issue_parity_intake.py .omx/cc2/issue-parity-intake.json
|
||||
bash -n .github/hooks/pre-push
|
||||
cargo fmt --manifest-path rust/Cargo.toml --all -- --check
|
||||
cargo test --manifest-path rust/Cargo.toml -p claw-analog rag_response_ -- --nocapture
|
||||
cargo test --manifest-path rust/Cargo.toml -p runtime startup_preflight -- --nocapture
|
||||
cargo build --manifest-path rust/Cargo.toml --workspace
|
||||
```
|
||||
131
docs/personal-assistant-roadmap.md
Normal file
131
docs/personal-assistant-roadmap.md
Normal file
@@ -0,0 +1,131 @@
|
||||
# From Claw Code to a Personal AI Assistant (Life OS)
|
||||
|
||||
This document turns the current “developer CLI agent” direction into a concrete path toward a **personal AI assistant**: a multi-channel interface (chat/voice), personal memory (RAG for life), tool/action integrations (MCP + plugins), proactivity (OmX-style loops), and long-lived identity (sessions + profile).
|
||||
|
||||
It is intentionally pragmatic: each section has **MVP scope**, **next step**, and **evolution**.
|
||||
|
||||
---
|
||||
|
||||
## 1) Interface: out of the terminal
|
||||
|
||||
### Goal
|
||||
Make `claw` usable without opening an IDE or terminal — from a phone, from chat, and eventually by voice.
|
||||
|
||||
### MVP
|
||||
- **Chat bridge**: a small service that relays messages from **Discord** (primary) or Telegram to `claw` / `claw-analog`.
|
||||
- Treat the chat thread as the “front-end”, and `claw` as the execution runtime.
|
||||
- Map a channel/thread to a **session id** (resume/append).
|
||||
- **Basic UX**: slash-like commands in chat:
|
||||
- `/prompt …`, `/resume latest`, `/status`, `/cost`, `/help`
|
||||
- “safe mode” defaults (read-only) unless elevated explicitly.
|
||||
|
||||
### Next step
|
||||
- **Voice**:
|
||||
- Speech-to-text input (e.g. Whisper-class STT) into the same chat bridge.
|
||||
- Text-to-speech output for hands-free feedback.
|
||||
|
||||
### Evolution
|
||||
- Multi-modal: attachments (images/PDF) routed into ingest/personal memory.
|
||||
- Presence and notifications: summaries pushed back into chat.
|
||||
|
||||
---
|
||||
|
||||
## 2) Memory: from “RAG for code” to “RAG for life”
|
||||
|
||||
### Goal
|
||||
Let the assistant answer personal questions and make decisions using *your* long-term context, not only the current repo.
|
||||
|
||||
### MVP
|
||||
- Extend ingestion inputs beyond git workspaces:
|
||||
- Notes (Markdown), exported chats, simple text logs.
|
||||
- PDFs (initially text extraction outside Rust is OK; later: built-in pipeline).
|
||||
- Keep a clear separation:
|
||||
- **Work RAG** (code/workspaces)
|
||||
- **Personal RAG** (notes, plans, history)
|
||||
|
||||
### Next step
|
||||
- Evolve `retrieve_context` into a **multi-source retrieval tool**:
|
||||
- “where to search” selector (work/personal/both)
|
||||
- metadata filters (source, date ranges, tags)
|
||||
|
||||
### Evolution
|
||||
- Incremental ingestion + event-based updates (watch folders, chat events).
|
||||
- Better stores (ANN/Qdrant/etc) when scale demands it.
|
||||
|
||||
---
|
||||
|
||||
## 3) Hands: tools, MCP, plugins
|
||||
|
||||
### Goal
|
||||
The assistant is valuable because it can **do** things, not only talk.
|
||||
|
||||
### MVP
|
||||
- Wire in external systems via **MCP servers**:
|
||||
- Calendar, notes (Notion), email, task trackers, smart home (as available).
|
||||
- Establish a convention for “personal skills”:
|
||||
- a dedicated directory (e.g. `.claw/skills/`) for user-specific automations
|
||||
- small, composable tools (digest, budgeting, reminders) rather than monoliths
|
||||
|
||||
### Next step
|
||||
- “Tool discovery” UX: list available MCP/tools/skills directly from chat.
|
||||
- Permission boundaries per tool category (read vs write, destructive actions require explicit confirmation).
|
||||
|
||||
### Evolution
|
||||
- Plugin marketplace flows for reusing “skills”.
|
||||
- Audit logging and replay of actions.
|
||||
|
||||
---
|
||||
|
||||
## 4) Proactivity: OmX-style loops
|
||||
|
||||
### Goal
|
||||
Move from reactive “answer me” to proactive “notice + prepare + propose + execute”.
|
||||
|
||||
### MVP
|
||||
- A scheduled runner that periodically:
|
||||
- checks inbox/notifications
|
||||
- extracts actionable tasks
|
||||
- drafts responses
|
||||
- posts a short digest to chat
|
||||
|
||||
### Next step
|
||||
- Multi-agent patterns (Architect/Executor/Reviewer) for higher reliability:
|
||||
- executor proposes actions
|
||||
- reviewer validates safety and correctness
|
||||
- only then does the bridge run the write/action tool
|
||||
|
||||
### Evolution
|
||||
- Event-driven triggers (webhooks) instead of only cron.
|
||||
- “Autopilot” modes with bounded scopes (time, tools, spend limits).
|
||||
|
||||
---
|
||||
|
||||
## 5) Long-lived identity: sessions + profile
|
||||
|
||||
### Goal
|
||||
Make the assistant feel continuous and personalized across days/weeks.
|
||||
|
||||
### MVP
|
||||
- Default to resuming the latest session (`--resume latest`-style behavior).
|
||||
- Use a short, user-owned profile/system-prompt for tone and preferences.
|
||||
|
||||
### Next step
|
||||
- Separate:
|
||||
- “personality” (style, preferences)
|
||||
- “memory” (facts, history)
|
||||
- “policies” (permissions, safety rules)
|
||||
|
||||
### Evolution
|
||||
- Multiple personas (work/personal) with explicit switching.
|
||||
- Transparent memory controls (“forget this”, “store this”).
|
||||
|
||||
---
|
||||
|
||||
## Suggested milestone sequence
|
||||
|
||||
1. **Discord bridge + session mapping** (no new AI capabilities; just distribution).
|
||||
2. **Personal ingest source #1** (notes folder) + retrieval selector (personal/work).
|
||||
3. **One MCP integration** (calendar or notes) + a single “daily digest” skill.
|
||||
4. **Scheduled digest loop** (cron) with bounded permissions.
|
||||
5. **Voice input/output** on top of the same bridge.
|
||||
|
||||
78
docs/rag-web-ui.md
Normal file
78
docs/rag-web-ui.md
Normal file
@@ -0,0 +1,78 @@
|
||||
# RAG и веб‑UI: архитектура и фазы
|
||||
|
||||
Цель: **не** раздувать `claw-analog` и основной `claw` — вынести индексацию и (позже) UI в отдельные процессы с явными HTTP/MCP контрактами.
|
||||
|
||||
## Принципы
|
||||
|
||||
1. **RAG как сервис** — отдельный бинарь (сейчас `claw-rag-service`), свой жизненный цикл, свои секреты (embedding API), своё хранилище.
|
||||
2. **Агент только вызывает retrieval** — в **`claw-analog`** инструмент **`retrieve_context`** → HTTP `POST {RAG_BASE_URL}/v1/query` (база без суффикса `/v1`); лимиты **`rag_timeout_secs`**, **`rag_top_k_max`** в `.claw-analog.toml`; ответ для модели — фрагменты с `path` + `snippet` + `score`.
|
||||
3. **Веб‑UI** — минимальная страница **`GET /`** в `claw-rag-service` (stats + форма `POST /v1/query`); чат с моделью и «переиндексировать» из браузера — при необходимости позже.
|
||||
|
||||
## Компоненты (целевая картина)
|
||||
|
||||
```text
|
||||
┌─────────────────┐ POST /v1/query ┌──────────────────────┐
|
||||
│ claw-analog │ ──────────────────────►│ claw-rag-service │
|
||||
│ (+ tool) │◄──────────────────────│ (embed + vector DB) │
|
||||
└─────────────────┘ JSON hits └──────────┬───────────┘
|
||||
│
|
||||
ingest (watch / CLI)
|
||||
▼
|
||||
workspace files / git tree
|
||||
```
|
||||
|
||||
- **Индексация**: отдельная команда или воркер (chunking, хеш файла, инкремент). Хранилище: на старте SQLite + `sqlite-vec` / файловый эмбеддинг-кэш; при росте — Qdrant/Chroma в Docker.
|
||||
- **Эмбеддинги**: HTTP к OpenAI/Anthropic-совместимому embedding endpoint или локальная модель (отдельное решение по лицензии и размеру).
|
||||
- **Веб‑UI**: авторизация (минимум: токен + reverse proxy), SSE или WebSocket для стрима ответа модели; UI **не** владеет секретами провайдера, если продукт так решит — прокси через бэкенд.
|
||||
|
||||
## Текущая реализация
|
||||
|
||||
Крейт **`rust/crates/claw-rag-service`** (из каталога `rust/`):
|
||||
|
||||
### HTTP
|
||||
|
||||
- `GET /` — одностраничный UI (встроенный `static/index.html`): счётчики из `/v1/stats`, поиск через `/v1/query`.
|
||||
- `GET /health` — `ok`.
|
||||
- `GET /v1/stats` — `{ "chunks": N, "phase": "1-sqlite" }` (если БД ещё нет: `chunks: 0`, `phase`: `1-sqlite-no-db`).
|
||||
- `POST /v1/query` — тело `{"query":"...", "top_k":8}`; ответ `{"hits":[{"path","snippet","score"}], "phase":"1-sqlite"|"1-sqlite-empty"|"1-sqlite-no-db"}`.
|
||||
|
||||
Поиск: **линейный обход** всех векторов в SQLite (MVP; для больших репозиториев планировать Qdrant/sqlite-vec или батчевый ANN).
|
||||
|
||||
### Индексация (фаза 1)
|
||||
|
||||
```powershell
|
||||
cd D:\path\to\claw-code-main\rust
|
||||
$env:OPENAI_API_KEY = "sk-..."
|
||||
cargo run -p claw-rag-service -- ingest -w D:\path\to\repo --db D:\path\to\index.sqlite
|
||||
cargo run -p claw-analog -- ... # при RAG_BASE_URL или rag_base_url в TOML — инструмент retrieve_context
|
||||
```
|
||||
|
||||
Переменные окружения:
|
||||
|
||||
- **`OPENAI_API_KEY`** или **`CLAW_RAG_OPENAI_API_KEY`** — для вызова `POST …/embeddings`.
|
||||
- **`CLAW_RAG_EMBEDDING_BASE_URL`** — по умолчанию `https://api.openai.com/v1`.
|
||||
- **`CLAW_RAG_EMBEDDING_MODEL`** — по умолчанию `text-embedding-3-small`.
|
||||
- **`CLAW_RAG_DB`** — путь к SQLite (у ingest/`serve`; у `serve` есть default `.claw-rag/index.sqlite`).
|
||||
- **`CLAW_RAG_PORT`** — порт HTTP (по умолчанию `8787`).
|
||||
- **`CLAW_RAG_MOCK_PROVIDERS=1`** — детерминированные вектора без сети (для тестов CI).
|
||||
|
||||
Запуск сервера: `cargo run -p claw-rag-service` или `cargo run -p claw-rag-service -- serve --db path\to\index.sqlite`.
|
||||
|
||||
### Дальше по фазам
|
||||
|
||||
| Фаза | Содержание |
|
||||
|------|------------|
|
||||
| 1 | ~~Ingest + SQLite + embeddings~~ (базово сделано; улучшения: инкремент, ANN, Docker-векторка). |
|
||||
| 2 | ~~Инструмент `retrieve_context`~~: `RAG_BASE_URL` / `rag_base_url`, `rag_timeout_secs`, `rag_top_k_max` в `.claw-analog.toml`. |
|
||||
| 3 | ~~Минимальный UI~~: `GET /` + те же `/v1/*` (дальше: чат, кнопка re-index из UI). |
|
||||
|
||||
## Риски и ограничения
|
||||
|
||||
- Секреты и PII в индексе; размер индекса и стоимость эмбеддингов.
|
||||
- Согласованность с symlink/jail как в `claw-analog` — retrieval не должен «утекать» за пределы workspace.
|
||||
- Локаль на UI: i18n отдельно от `AnalogLanguage` в CLI.
|
||||
|
||||
## Связанные документы
|
||||
|
||||
- Локальный запуск контейнеров (если поднимете векторку): [`container.md`](container.md).
|
||||
- Обзор `claw-analog`: [`how_to_run.md`](../how_to_run.md).
|
||||
389
how_to_run.md
Normal file
389
how_to_run.md
Normal file
@@ -0,0 +1,389 @@
|
||||
# claw-analog — как запускать и как это устроено
|
||||
|
||||
Минимальный агент поверх того же стека API, что и основной CLI [`claw`](rust/README.md): провайдеры Anthropic / OpenAI‑совместимые / xAI выбираются по модели и переменным окружения (см. [USAGE.md](USAGE.md)).
|
||||
|
||||
Дальше в примерах **рабочий каталог** — папка **`claw-code-main\rust`** (внутри клона репозитория). Если приглашение PowerShell уже `…\claw-code-main\rust>`, **не** выполняйте второй раз `cd rust` (иначе будет `rust\rust` и ошибка пути).
|
||||
|
||||
## Требования
|
||||
|
||||
- Установленный **Rust** и **cargo** (в PATH: обычно `%USERPROFILE%\.cargo\bin` на Windows).
|
||||
- Ключ API для выбранного провайдера (например `ANTHROPIC_API_KEY`).
|
||||
|
||||
## Сборка и справка
|
||||
|
||||
```powershell
|
||||
cd D:\path\to\claw-code-main\rust
|
||||
cargo build -p claw-analog
|
||||
cargo run -p claw-analog -- --help
|
||||
```
|
||||
|
||||
### Диагностика (`doctor`)
|
||||
|
||||
Подкоманда **`claw-analog doctor`** (у неё свой `--help`, отдельно от основного режима):
|
||||
|
||||
- **превью конфигурации** — итог после слияния **`.claw-analog.toml`** (путь `<workspace>/.claw-analog.toml` или **`--config`**) и **тех же флагов**, что у основного run: **`--model`**, **`--permission`**, **`--preset`**, **`--output-format`**, **`--stream`**, **`--no-stream`**, **`--no-runtime-enforcer`**, **`--accept-danger-non-interactive`**, плюс **`--profile`** для отображения пути к профилю. Печатаются контракт NDJSON (`schema`, `format_version`), эффективные поля и строки **provenance** (что победило: CLI, TOML или default);
|
||||
- статус типовых переменных (**без** значений: только `set` / `unset` и длина строки);
|
||||
- поиск workspace вверх от cwd (или **`--manifest-dir`**) и по умолчанию **`cargo check -p claw-analog`** (только компиляция, **не** перезаписывает `target\debug\claw-analog.exe` — иначе на Windows при `cargo run … doctor` часто «Отказано в доступе» при вложенном `cargo build`);
|
||||
- **`--release-build`** — **`cargo build --release -p claw-analog`** (бинарь в `target\release\`, не конфликтует с запущенным debug‑exe);
|
||||
- **`--no-build`** — пропустить cargo;
|
||||
- **`--tcp-ping`** (алиас **`--mock`**) — TCP **`connect`** к хосту:порту из **`ANTHROPIC_BASE_URL`** (или к дефолтному `https://api.anthropic.com`); не проверяет HTTP/TLS и тело ответа.
|
||||
|
||||
Примеры (из каталога `…\claw-code-main\rust`):
|
||||
|
||||
```powershell
|
||||
cargo run -p claw-analog -- doctor
|
||||
cargo run -p claw-analog -- doctor --no-build
|
||||
cargo run -p claw-analog -- doctor --tcp-ping
|
||||
cargo run -p claw-analog -- doctor -w D:\path\to\repo --preset implement
|
||||
cargo run -p claw-analog -- doctor --release-build
|
||||
```
|
||||
|
||||
### Проверка конфигурации без API (`config validate`)
|
||||
|
||||
Подкоманда **`claw-analog config validate`**:
|
||||
|
||||
- парсит **`.claw-analog.toml`** (по умолчанию `<workspace>/.claw-analog.toml`, переопределение **`--config`**) и выводит краткий **merge preview** (как у `doctor`, но **только TOML + defaults**, без флагов основного run);
|
||||
- проверяет **`profile.toml`**: тот же порядок, что у run (`--profile`, поле `profile` в TOML, иначе дефолтный `~/.claw-analog/profile.toml` при наличии файла);
|
||||
- **никаких** запросов к LLM и сети API.
|
||||
|
||||
**`--strict`** — ошибка (код выхода 1), если файла конфигурации нет или профиль не читается.
|
||||
|
||||
```powershell
|
||||
cargo run -p claw-analog -- config validate -w D:\path\to\repo
|
||||
cargo run -p claw-analog -- config validate --strict -w .
|
||||
```
|
||||
|
||||
### Дополнение оболочки (`complete`)
|
||||
|
||||
Скрипт автодополнения в **stdout** (перенаправьте в файл из документации вашей оболочки):
|
||||
|
||||
```powershell
|
||||
cargo run -p claw-analog -- complete powershell >> $PROFILE
|
||||
# bash:zsh:fish — см. вывод `complete --help`
|
||||
```
|
||||
|
||||
Доступные значения: **`bash`**, **`zsh`**, **`fish`**, **`powershell`** (алиас **`pwsh`**).
|
||||
|
||||
## Основные команды
|
||||
|
||||
Одна задача в аргументе (или текст с **stdin**):
|
||||
|
||||
```powershell
|
||||
# из ...\claw-code-main\rust
|
||||
cargo run -p claw-analog -- -w D:\path\to\repo "Кратко опиши структуру rust/crates"
|
||||
```
|
||||
|
||||
С **живым выводом** (SSE через `stream_message`):
|
||||
|
||||
```powershell
|
||||
cargo run -p claw-analog -- --stream -w . "Объясни claw-analog в двух предложениях"
|
||||
```
|
||||
|
||||
Разрешить **запись файлов** в workspace:
|
||||
|
||||
```powershell
|
||||
cargo run -p claw-analog -- --permission workspace-write -w . "Добавь комментарий в начало crates/claw-analog/Cargo.toml"
|
||||
```
|
||||
|
||||
Отключить проверку через **`runtime::PermissionEnforcer`** (только своя тюрьма путей; не рекомендуется):
|
||||
|
||||
```powershell
|
||||
cargo run -p claw-analog -- --no-runtime-enforcer -w . "…"
|
||||
```
|
||||
|
||||
Полезные лимиты (CLI **перекрывает** значения из `.claw-analog.toml`, см. ниже):
|
||||
|
||||
| Флаг | Значение по умолчанию | Назначение |
|
||||
|------|------------------------|------------|
|
||||
| `--max-read-bytes` | 262144 | Максимум байт для `read_file` / `grep_workspace` / `git_diff` / `git_log` |
|
||||
| `--max-turns` | 24 | Максимум раундов «модель → инструменты → модель» |
|
||||
| `--max-list-entries` | 500 | Лимит строк `list_dir` |
|
||||
| `--grep-max-lines` | 200 | Верхняя граница **суммарных** строк совпадений в `grep_workspace` (в т.ч. по нескольким файлам; в одном файле можно задать меньше через `max_lines`) |
|
||||
| `--glob-max-paths` | 2000 | Максимум путей, возвращаемых `glob_workspace` и при расширении `glob` внутри `grep_workspace` |
|
||||
| `--glob-max-depth` | 32 | Глубина обхода каталогов для glob (через `walkdir`), без бесконечной рекурсии |
|
||||
| `--output-format` | `rich` | `json` — NDJSON на stdout для скриптов и агентов |
|
||||
| `--print-tools` | — | Список эффективных инструментов для итоговых `permission` / enforcer, затем выход (**без** промпта и API) |
|
||||
| `--lang` | `en` | Подсказка в system: `en` или `ru` (язык ответов; **не** меняет id модели в API) |
|
||||
| `--preset` | — | `none` \| `audit` \| `explain` \| `implement` — см. раздел ниже |
|
||||
| `--session` | — | Путь к JSON-сессии (относительно `-w`, если не абсолютный): сохранение истории и resume |
|
||||
| `--save-session` | — | Дополнительный путь: тот же снимок сессии пишется сюда при каждом сохранении (можно **без** `--session`, чтобы только экспортировать JSON после прогона) |
|
||||
| `--profile` | — | TOML с полем `line` (подмешивается в system). Без флага: пробуется `%USERPROFILE%\.claw-analog\profile.toml` (Windows) / `~/.claw-analog/profile.toml` |
|
||||
| `--permission` | `read-only` | см. ниже: `read-only`, `workspace-write`, `prompt`, `danger-full-access`, `allow` |
|
||||
| `--accept-danger-non-interactive` | — | Разрешить `danger-full-access` / `allow`, когда stdin **не** TTY (CI; осознанный риск). В TOML: `accept_danger_non_interactive = true` |
|
||||
|
||||
Конфиг по умолчанию читается из **`<workspace>/.claw-analog.toml`**, если файл существует. Другой путь: **`--config PATH`**. Неизвестные ключи в TOML — ошибка парсинга (строгая схема).
|
||||
|
||||
Пример `.claw-analog.toml`:
|
||||
|
||||
```toml
|
||||
model = "sonnet"
|
||||
stream = true
|
||||
output_format = "rich"
|
||||
permission = "read-only"
|
||||
language = "en"
|
||||
preset = "audit"
|
||||
session = ".claw-analog.session.json"
|
||||
profile = "~/.claw-analog/profile.toml"
|
||||
no_runtime_enforcer = false
|
||||
accept_danger_non_interactive = false
|
||||
max_read_bytes = 262144
|
||||
max_turns = 24
|
||||
max_list_entries = 500
|
||||
grep_max_lines = 200
|
||||
glob_max_paths = 2000
|
||||
glob_max_depth = 32
|
||||
# Опционально: RAG (`claw-rag-service`) — см. раздел про RAG ниже
|
||||
# rag_base_url = "http://127.0.0.1:8787"
|
||||
# rag_timeout_secs = 30
|
||||
# rag_top_k_max = 32
|
||||
```
|
||||
|
||||
**RAG (`retrieve_context`):** если заданы **`RAG_BASE_URL`** (per-env) или непустой **`rag_base_url`** в `.claw-analog.toml`, в набор инструментов добавляется **`retrieve_context`** (семантический поиск по уже проиндексированному воркспейсу). Значение — корень HTTP сервиса, без суффикса `/v1` (запрос идёт на `{base}/v1/query`). Таймаут и верхняя граница **`top_k`** задаются **`rag_timeout_secs`** и **`rag_top_k_max`** (по умолчанию 30 с и 32; «жёсткий» потолок 256). Индексация по-прежнему отдельной командой **`claw-rag-service`**, см. [`docs/rag-web-ui.md`](docs/rag-web-ui.md).
|
||||
|
||||
**`permission`** (как у полного `claw`, те же строки в TOML):
|
||||
|
||||
| Значение | Инструмент `write_file` | Неинтерактив (stdin не TTY) |
|
||||
|----------|-------------------------|------------------------------|
|
||||
| `read-only` | нет | OK |
|
||||
| `workspace-write` | да (в пределах `-w`) | OK |
|
||||
| `prompt` | нет (в этом harness Enforcer не даёт писать без подтверждений) | предупреждение в stderr; для автозаписи используйте `workspace-write` |
|
||||
| `danger-full-access`, `allow` | да | **запрещено**, пока не задан `--accept-danger-non-interactive` или `accept_danger_non_interactive = true` в TOML |
|
||||
|
||||
**`--stream`** в командной строке включает стриминг; **`--no-stream`** явно выключает (полезно поверх `stream = true` в файле).
|
||||
|
||||
**`language`** в TOML: `en` или `ru` (те же значения, что у **`--lang`**); CLI имеет приоритет.
|
||||
|
||||
### Сессия (`--session`)
|
||||
|
||||
Файл JSON (версия `1`): метаданные `workspace`, `model`, опционально `preset`, массив `messages` в формате API (`role` + `content`). При запуске с существующим файлом история **догружается**, текущий текст запроса (аргумент или stdin) добавляется как **новое** пользовательское сообщение. Состояние сохраняется после каждого полного раунда с инструментами и при завершении без `tool_use`.
|
||||
|
||||
**`--save-session`** — тот же формат файла, что и у `--session`: при каждом шаге, где обновлялся бы файл сессии, запись дублируется (если путь совпадает с `--session`, вторая запись не выполняется). Без **`--session`** можно собрать историю одного прогона в JSON для скриптов или последующего **`--session`** без ручной сборки `messages`.
|
||||
|
||||
**Риски:** в файле могут оказаться **секреты** (вывод `read_file`, ключи из логов), файл не шифруется; длинная история **дороже** по токенам API. В stderr печатается напоминание при **`--session`** или **`--save-session`**. Несовпадение `workspace` / `model` / `preset` с текущим запуском даёт **предупреждение**, но прогон продолжается.
|
||||
|
||||
### Пресеты (`--preset`)
|
||||
|
||||
Добавляют краткий абзац к system prompt (аудит / обучение / правки). Набор инструментов по-прежнему задаётся **permission**: для **`implement`**, если ни CLI, ни файл не задали `permission`, по умолчанию подставляется **workspace-write** (чтобы был `write_file`). Явный `permission = "read-only"` в файле или `--permission read-only` в CLI имеет приоритет.
|
||||
|
||||
### Профиль (`profile.toml`)
|
||||
|
||||
Мини-файл:
|
||||
|
||||
```toml
|
||||
line = "Короткая подсказка стиля (одна строка в system)."
|
||||
```
|
||||
|
||||
Ограничения: размер файла не больше **2048** байт; длина строки после trim — не больше **512** символов Unicode (иначе усечение с предупреждением). Содержимое добавляется в system одной строкой: `Learner hint: …`.
|
||||
|
||||
## Инструменты (без произвольного shell)
|
||||
|
||||
| Имя | Режим | Описание |
|
||||
|-----|--------|----------|
|
||||
| `read_file` | read-only+ | Чтение UTF‑8 файла под `-w` |
|
||||
| `list_dir` | read-only+ | Список каталога (не рекурсивно) |
|
||||
| `glob_workspace` | read-only+ | Список **путей файлов** под `-w`: аргументы `pattern` (glob относительно `root`, слэши `/`), опционально `root` (по умолчанию `.`), `max_paths` (урезается лимитом CLI). В шаблоне нельзя `..`. |
|
||||
| `grep_workspace` | read-only+ | Та же **литеральная** подстрока по строкам, что и раньше; ровно один из селекторов: `path`, массив `paths` или `glob` (+ опционально `glob_root`). Общий бюджет строк — `max_lines` и `--grep-max-lines`. В нескольких файлах формат строк: `относительный/путь:номер_строки:содержимое`. |
|
||||
| `grep_search` | read-only+ | Тот же обработчик, что у `grep_workspace` (совместимость промптов с полным `claw`). |
|
||||
| `git_diff` | read-only+ | `git diff` (без цвета) внутри репозитория в `-w`. Опционально `cached` (staged), `rev_range`, `context_lines`, `paths`. Вывод ограничен `--max-read-bytes`. |
|
||||
| `git_log` | read-only+ | `git log` (без цвета) внутри репозитория в `-w`. Опционально `max_count` (по умолчанию 20), `rev_range`, `paths`. Вывод ограничен `--max-read-bytes`. |
|
||||
| `retrieve_context` | read-only+ | Только если задан **`RAG_BASE_URL`** или **`rag_base_url`** в TOML: HTTP **`POST {base}/v1/query`** к `claw-rag-service`, ответ — пути и сниппеты чанков (лимиты см. выше). |
|
||||
| `write_file` | `workspace-write`, `danger-full-access` или `allow` | Запись файла; родительские каталоги создаются при необходимости (`prompt` не даёт записать через Enforcer) |
|
||||
|
||||
## Принципы работы
|
||||
|
||||
1. **Корень workspace** (`-w`) приводится к каноническому пути; все пути в инструментах **относительные**, без `..` и без абсолютных сегментов.
|
||||
2. Перед доступом к файлу проверяется, что реальный путь остаётся **внутри** корня (symlink/`canonicalize`).
|
||||
3. **Политика прав** (если не отключена `--no-runtime-enforcer`): те же сущности, что у основного CLI — `PermissionPolicy` + `PermissionEnforcer::check` для инструмента и `check_file_write` для записи.
|
||||
4. **Цикл агента**: запрос к провайдеру → если `stop_reason == tool_use`, выполняются вызовы, результаты уходят в историю как `tool_result` → следующий раунд.
|
||||
5. **Стриминг**: при `--stream` текст ассистента печатается по мере прихода дельт; история для следующего раунда собирается из SSE так же, как в полном пайплайне (индексы блоков + JSON tool input). Отключить стриминг при настройке из файла можно флагом **`--no-stream`**.
|
||||
|
||||
Логи вида `[claw-analog] ...` пишутся в **stderr**. В режиме **rich** ответ модели — обычный текст в **stdout**; в режиме **json** в **stdout** идёт только **NDJSON** (см. ниже).
|
||||
|
||||
## Вывод JSON (CI и внешние агенты)
|
||||
|
||||
Флаг **`--output-format json`** переключает stdout на **поток строк JSON** (один объект = одна строка). Поля стабильны по смыслу, но набор может расширяться.
|
||||
|
||||
Основные `type`:
|
||||
|
||||
| `type` | Когда |
|
||||
|--------|--------|
|
||||
| `run_start` | Старт прогона: **`schema`** (`claw-analog-ndjson`), **`format_version`**, далее `workspace`, `model`, `stream`, `permission`, опционально `preset`, `session`, опционально `session_save`, булево **`rag_enabled`** (есть ли база для `retrieve_context`) |
|
||||
| `turn_start` | Начало раунда с моделью (`turn`) |
|
||||
| `assistant_text_delta` | Только при `--stream`: фрагмент текста ассистента |
|
||||
| `assistant_turn` | Итог раунда: `stop_reason`, `usage`, полный `text`, массив `tool_calls` |
|
||||
| `tool_result` | После выполнения инструмента: `name`, `tool_use_id`, `is_error`, `output` (может быть усечён), `truncated`, `output_len_chars` |
|
||||
| `run_end` | Успешное завершение (`ok: true`) |
|
||||
| `error` | Ошибка (печатается отдельной строкой при падении или пустом промпте) |
|
||||
|
||||
Пример (PowerShell): разбор потока построчно удобен **`jq`** или любом JSON‑парсере.
|
||||
|
||||
```powershell
|
||||
# из ...\claw-code-main\rust
|
||||
$env:ANTHROPIC_API_KEY = "sk-ant-..."
|
||||
cargo run -p claw-analog -- --output-format json -w . "Summarize rust/README.md" 2>$null | ForEach-Object { $_ | ConvertFrom-Json | Select-Object -ExpandProperty type }
|
||||
```
|
||||
|
||||
С **`--stream`** в stdout сначала идут события `assistant_text_delta`, затем для того же раунда — одна строка `assistant_turn` с полным собранным `text` (удобно для воспроизводимых логов).
|
||||
|
||||
### Ограничения и риски для агентов
|
||||
|
||||
- В **`tool_result.output`** большие файлы обрезаются (~32 KiB UTF‑8), поле **`truncated`: true**.
|
||||
- **Секреты**: не перенаправляйте stderr сырьём в публичные логи без фильтра; в `output` теоретически может попасть содержимое прочитанных файлов.
|
||||
- Контракт для оркестраторов: NDJSON из stdout, диагностика из stderr; код возврата ≠ 0 при ошибке. На первой строке **`run_start`** имеет смысл сверять **`schema`** и **`format_version`**; **`run_start`** также раскрывает путь workspace и модель — учитывайте при шаринге логов.
|
||||
|
||||
## Автотесты без реальной сети
|
||||
|
||||
Юнит‑тесты и интеграция с локальным **mock-anthropic-service**:
|
||||
|
||||
```powershell
|
||||
# из ...\claw-code-main\rust
|
||||
cargo test -p claw-analog
|
||||
```
|
||||
|
||||
В **GitHub Actions** отдельный job **`claw-analog (test + clippy -p)`** гоняет `cargo test -p claw-analog` и `cargo clippy -p claw-analog --no-deps` (в дополнение к полному `cargo test` / `clippy` по workspace).
|
||||
|
||||
При параллельном запуске тестов переменные окружения Anthropic изолированы **mutex**‑ом только для mock‑сценария; при сбоях можно запустить `cargo test -p claw-analog -- --test-threads=1`.
|
||||
|
||||
## Отдельно: `claw-rag-service` (RAG)
|
||||
|
||||
Индексация воркспейса и HTTP API живут в **`cargo run -p claw-rag-service`** (`ingest` + `serve`). После `serve` откройте **`http://127.0.0.1:8787/`** — лёгкий UI (stats + поиск). К `claw-analog` подключается через **`RAG_BASE_URL`** / `retrieve_context`. Подробности и env: [`docs/rag-web-ui.md`](docs/rag-web-ui.md).
|
||||
|
||||
### Ingest (один или несколько репозиториев)
|
||||
|
||||
`ingest` принимает **повторяемый** `--workspace` — это позволяет сделать **cross-repo RAG** (несколько реп в одну БД/коллекцию).
|
||||
|
||||
```powershell
|
||||
# из ...\claw-code-main\rust
|
||||
|
||||
# один workspace
|
||||
cargo run -p claw-rag-service -- ingest --workspace "D:\v\kria\s6"
|
||||
|
||||
# несколько workspace (cross-repo)
|
||||
cargo run -p claw-rag-service -- ingest --workspace "D:\repo1" --workspace "D:\repo2"
|
||||
```
|
||||
|
||||
В ответах `path` будет вида `repoId:relative/path` (чтобы не было коллизий одинаковых путей между репозиториями).
|
||||
|
||||
### Mock embeddings (без ключей / без сети)
|
||||
|
||||
Для локальных прогонов/тестов можно включить mock-эмбеддинги:
|
||||
|
||||
```powershell
|
||||
$env:CLAW_RAG_MOCK_PROVIDERS = "1"
|
||||
cargo run -p claw-rag-service -- ingest --workspace "D:\v\kria\s6"
|
||||
```
|
||||
|
||||
### Qdrant (рекомендуемый локальный вариант) через Docker
|
||||
|
||||
Для больших репозиториев лучше поднять локальный Qdrant: это снимает нагрузку с линейного сканирования `SQLite` и ускоряет запросы.
|
||||
|
||||
Запуск Qdrant (gRPC на 6334):
|
||||
|
||||
```powershell
|
||||
docker run --rm -p 6333:6333 -p 6334:6334 -e QDRANT__SERVICE__GRPC_PORT=6334 qdrant/qdrant
|
||||
```
|
||||
|
||||
#### Qdrant с persist volume (чтобы индекс сохранялся)
|
||||
|
||||
Вариант через именованный volume Docker:
|
||||
|
||||
```powershell
|
||||
docker volume create claw-qdrant-data
|
||||
docker run --rm -p 6333:6333 -p 6334:6334 `
|
||||
-e QDRANT__SERVICE__GRPC_PORT=6334 `
|
||||
-v claw-qdrant-data:/qdrant/storage `
|
||||
qdrant/qdrant
|
||||
```
|
||||
|
||||
Вариант через bind-mount (путь на хосте):
|
||||
|
||||
```powershell
|
||||
mkdir .claw-qdrant | Out-Null
|
||||
docker run --rm -p 6333:6333 -p 6334:6334 `
|
||||
-e QDRANT__SERVICE__GRPC_PORT=6334 `
|
||||
-v "${PWD}/.claw-qdrant:/qdrant/storage" `
|
||||
qdrant/qdrant
|
||||
```
|
||||
|
||||
Затем включите env и запускайте ingest с фичей `qdrant-index`:
|
||||
|
||||
```powershell
|
||||
$env:CLAW_RAG_QDRANT_URL = "http://127.0.0.1:6334"
|
||||
$env:CLAW_RAG_QDRANT_COLLECTION = "claw_rag_chunks"
|
||||
|
||||
# (опционально) без реального API для эмбеддингов
|
||||
$env:CLAW_RAG_MOCK_PROVIDERS = "1"
|
||||
|
||||
cargo run -p claw-rag-service --features qdrant-index -- ingest --workspace "D:\v\kria\s6"
|
||||
```
|
||||
|
||||
`ingest` сам создаст коллекцию, если её ещё нет (по размерности эмбеддингов).
|
||||
|
||||
### Запуск через Docker (Qdrant + claw-rag-service)
|
||||
|
||||
Если хочется поднимать всё одной командой, удобнее использовать `docker compose`.
|
||||
|
||||
1) Запуск сервисов:
|
||||
|
||||
```powershell
|
||||
cd D:\path\to\claw-code-main
|
||||
docker compose up --build
|
||||
```
|
||||
|
||||
Примечание: образ `rag-serve`/`rag-ingest` собирается на достаточно свежем Rust (см. `rust/crates/claw-rag-service/Dockerfile`), потому что `qdrant-client` может требовать более новую версию Rust, чем старые pinned-теги.
|
||||
|
||||
Если сборка Docker падает и вы видите строки вроде `transferring context: 21.02GB`, проверьте что:
|
||||
|
||||
- вы запускаете compose из корня репозитория (где лежит `docker-compose.yml`)
|
||||
- используется `.dockerignore` (уменьшает build-context, особенно если есть `target/` и локальные индексы)
|
||||
|
||||
Если сборка падает сразу с `EOF` на шаге `load local bake definitions`, попробуйте:
|
||||
|
||||
```powershell
|
||||
$env:COMPOSE_BAKE = "0"
|
||||
$env:DOCKER_BUILDKIT = "0"
|
||||
docker compose up --build
|
||||
```
|
||||
|
||||
2) Ingest (запускать отдельно, т.к. это batch job). Пример для одного workspace:
|
||||
|
||||
```powershell
|
||||
docker compose run --rm rag-ingest ingest --workspace "/workspaces/main"
|
||||
```
|
||||
|
||||
По умолчанию `rag-ingest` пишет индекс в общий volume, так что `rag-serve` сразу увидит чанки.
|
||||
|
||||
### Подключение к `claw-analog`
|
||||
|
||||
```powershell
|
||||
$env:RAG_BASE_URL = "http://127.0.0.1:8787"
|
||||
cargo run -p claw-analog -- -w "D:\v\kria\s6" "Найди где реализован ingest в RAG сервисе"
|
||||
```
|
||||
|
||||
## Auto‑TDD (автопроверки после `write_file`/`edit_file`)
|
||||
|
||||
В полном `claw` (и в других потребителях `runtime`) можно включить автозапуск линтера/тестов после успешных write-инструментов через `.claw/settings.json`:
|
||||
|
||||
```json
|
||||
{
|
||||
"autoTdd": {
|
||||
"enabled": true,
|
||||
"tools": ["write_file", "edit_file"],
|
||||
"commands": [
|
||||
"cd rust && cargo fmt",
|
||||
"cd rust && cargo clippy --workspace --all-targets -- -D warnings",
|
||||
"cd rust && cargo test --workspace"
|
||||
]
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## Отличия от полного `claw`
|
||||
|
||||
- Узкий набор инструментов (нет bash/MCP/плагинов).
|
||||
- Проще аудировать и ограничивать по `--permission` и лимитам.
|
||||
- Основной продукт по-прежнему `cargo run -p rusty-claude-cli` → бинарь `claw`.
|
||||
|
||||
## Дальнейшая разработка
|
||||
|
||||
План и чеклист идей (в т.ч. заимствованные из продуктового слоя вроде DeepTutor): [`futute.md`](futute.md) в корне репозитория.
|
||||
15
rust/.dockerignore
Normal file
15
rust/.dockerignore
Normal file
@@ -0,0 +1,15 @@
|
||||
# This .dockerignore applies to docker-compose build context: ./rust
|
||||
target
|
||||
**/target
|
||||
.claw
|
||||
.claw-rag
|
||||
.claude
|
||||
node_modules
|
||||
dist
|
||||
build
|
||||
*.log
|
||||
*.tmp
|
||||
*.sqlite
|
||||
*.sqlite-wal
|
||||
*.sqlite-shm
|
||||
.DS_Store
|
||||
1121
rust/Cargo.lock
generated
Normal file → Executable file
1121
rust/Cargo.lock
generated
Normal file → Executable file
File diff suppressed because it is too large
Load Diff
@@ -3,7 +3,7 @@ members = ["crates/*"]
|
||||
resolver = "2"
|
||||
|
||||
[workspace.package]
|
||||
version = "0.1.0"
|
||||
version = "0.1.3"
|
||||
edition = "2021"
|
||||
license = "MIT"
|
||||
publish = false
|
||||
|
||||
@@ -71,7 +71,12 @@ pub fn build_http_client() -> Result<reqwest::Client, ApiError> {
|
||||
/// first outbound request instead of at construction time.
|
||||
#[must_use]
|
||||
pub fn build_http_client_or_default() -> reqwest::Client {
|
||||
build_http_client().unwrap_or_else(|_| reqwest::Client::new())
|
||||
build_http_client().unwrap_or_else(|_| {
|
||||
reqwest::Client::builder()
|
||||
.user_agent("clawd-rust-tools/0.1")
|
||||
.build()
|
||||
.expect("default client with user_agent should always succeed")
|
||||
})
|
||||
}
|
||||
|
||||
/// Build a `reqwest::Client` from an explicit [`ProxyConfig`]. Used by tests
|
||||
@@ -81,7 +86,9 @@ pub fn build_http_client_or_default() -> reqwest::Client {
|
||||
/// and `https_proxy` fields and is registered as both an HTTP and HTTPS
|
||||
/// proxy so a single value can route every outbound request.
|
||||
pub fn build_http_client_with(config: &ProxyConfig) -> Result<reqwest::Client, ApiError> {
|
||||
let mut builder = reqwest::Client::builder().no_proxy();
|
||||
let mut builder = reqwest::Client::builder()
|
||||
.no_proxy()
|
||||
.user_agent("clawd-rust-tools/0.1");
|
||||
|
||||
let no_proxy = config
|
||||
.no_proxy
|
||||
|
||||
@@ -234,7 +234,7 @@ pub fn resolve_model_alias(model: &str) -> String {
|
||||
#[must_use]
|
||||
pub fn metadata_for_model(model: &str) -> Option<ProviderMetadata> {
|
||||
let canonical = resolve_model_alias(model);
|
||||
if canonical.starts_with("claude") {
|
||||
if canonical.starts_with("claude") || canonical.starts_with("anthropic/") {
|
||||
return Some(ProviderMetadata {
|
||||
provider: ProviderKind::Anthropic,
|
||||
auth_env: "ANTHROPIC_API_KEY",
|
||||
@@ -640,6 +640,14 @@ pub fn model_token_limit(model: &str) -> Option<ModelTokenLimit> {
|
||||
max_output_tokens: 16_384,
|
||||
context_window_tokens: 256_000,
|
||||
}),
|
||||
"qwen-max" => Some(ModelTokenLimit {
|
||||
max_output_tokens: 8_192,
|
||||
context_window_tokens: 131_072,
|
||||
}),
|
||||
"qwen-plus" => Some(ModelTokenLimit {
|
||||
max_output_tokens: 8_192,
|
||||
context_window_tokens: 131_072,
|
||||
}),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -505,10 +505,16 @@ impl StreamState {
|
||||
}
|
||||
|
||||
for choice in chunk.choices {
|
||||
// Handle reasoning/thinking from various provider fields
|
||||
if let Some(reasoning) = choice
|
||||
.delta
|
||||
.reasoning_content
|
||||
.filter(|value| !value.is_empty())
|
||||
.or(choice
|
||||
.delta
|
||||
.thinking
|
||||
.and_then(|t| t.content)
|
||||
.filter(|value| !value.is_empty()))
|
||||
{
|
||||
if !self.thinking_started {
|
||||
self.thinking_started = true;
|
||||
@@ -736,6 +742,7 @@ impl ToolCallState {
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
struct ChatCompletionResponse {
|
||||
#[serde(default)]
|
||||
id: String,
|
||||
model: String,
|
||||
choices: Vec<ChatChoice>,
|
||||
@@ -806,6 +813,7 @@ impl OpenAiUsage {
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
struct ChatCompletionChunk {
|
||||
#[serde(default)]
|
||||
id: String,
|
||||
#[serde(default)]
|
||||
model: Option<String>,
|
||||
@@ -817,6 +825,7 @@ struct ChatCompletionChunk {
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
struct ChunkChoice {
|
||||
#[serde(default)]
|
||||
delta: ChunkDelta,
|
||||
#[serde(default)]
|
||||
finish_reason: Option<String>,
|
||||
@@ -826,12 +835,21 @@ struct ChunkChoice {
|
||||
struct ChunkDelta {
|
||||
#[serde(default)]
|
||||
content: Option<String>,
|
||||
/// Some providers (GLM, DeepSeek) emit reasoning in `reasoning_content`
|
||||
#[serde(default)]
|
||||
reasoning_content: Option<String>,
|
||||
#[serde(default)]
|
||||
thinking: Option<ThinkingDelta>,
|
||||
#[serde(default, deserialize_with = "deserialize_null_as_empty_vec")]
|
||||
tool_calls: Vec<DeltaToolCall>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Default, Deserialize)]
|
||||
struct ThinkingDelta {
|
||||
#[serde(default)]
|
||||
content: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
struct DeltaToolCall {
|
||||
#[serde(default)]
|
||||
@@ -928,13 +946,17 @@ fn wire_model_for_base_url<'a>(
|
||||
if lowered_prefix == "openai" {
|
||||
let trimmed_base_url = base_url.trim_end_matches('/');
|
||||
let default_openai = DEFAULT_OPENAI_BASE_URL.trim_end_matches('/');
|
||||
if matches!(
|
||||
lowered_prefix.as_str(),
|
||||
"xai" | "grok" | "kimi" | "gemini" | "gemma"
|
||||
) {
|
||||
return Cow::Borrowed(&model[pos + 1..]);
|
||||
}
|
||||
if config.provider_name == "OpenAI" && trimmed_base_url != default_openai {
|
||||
// OpenAI-compatible gateways such as OpenRouter commonly use
|
||||
// slash-containing model slugs (for example `openai/gpt-4.1-mini`).
|
||||
// Preserve the slug when the user configured a non-default OpenAI
|
||||
// base URL; the prefix still routed to the OpenAI-compatible client,
|
||||
// but the gateway owns the final model namespace.
|
||||
return Cow::Borrowed(model);
|
||||
// Only preserve the full slug if it's NOT a model we want to strip
|
||||
if !model.contains("gemini") && !model.contains("gemma") {
|
||||
return Cow::Borrowed(model);
|
||||
}
|
||||
}
|
||||
return Cow::Borrowed(&model[pos + 1..]);
|
||||
}
|
||||
@@ -1454,7 +1476,50 @@ fn parse_sse_frame(
|
||||
data_lines.push(data.trim_start());
|
||||
}
|
||||
}
|
||||
// If no SSE data lines found, check if the entire frame is raw JSON (error or otherwise)
|
||||
if data_lines.is_empty() {
|
||||
// Detect raw JSON error response (not SSE-framed)
|
||||
if let Ok(raw) = serde_json::from_str::<serde_json::Value>(trimmed) {
|
||||
if let Some(err_obj) = raw.get("error") {
|
||||
let msg = err_obj
|
||||
.get("message")
|
||||
.and_then(|m| m.as_str())
|
||||
.unwrap_or("provider returned an error")
|
||||
.to_string();
|
||||
let code = err_obj
|
||||
.get("code")
|
||||
.and_then(serde_json::Value::as_u64)
|
||||
.map(|c| c as u16);
|
||||
let status = reqwest::StatusCode::from_u16(code.unwrap_or(500))
|
||||
.unwrap_or(reqwest::StatusCode::INTERNAL_SERVER_ERROR);
|
||||
return Err(ApiError::Api {
|
||||
status,
|
||||
error_type: err_obj
|
||||
.get("type")
|
||||
.and_then(|t| t.as_str())
|
||||
.map(str::to_owned),
|
||||
message: Some(msg),
|
||||
request_id: None,
|
||||
body: trimmed.chars().take(500).collect(),
|
||||
retryable: false,
|
||||
suggested_action: suggested_action_for_status(status),
|
||||
});
|
||||
}
|
||||
}
|
||||
// Detect HTML responses
|
||||
if trimmed.starts_with('<') || trimmed.starts_with("<!") {
|
||||
return Err(ApiError::Api {
|
||||
status: reqwest::StatusCode::BAD_REQUEST,
|
||||
error_type: Some("invalid_response".to_string()),
|
||||
message: Some(
|
||||
"provider returned HTML instead of JSON (check endpoint URL)".to_string(),
|
||||
),
|
||||
request_id: None,
|
||||
body: trimmed.chars().take(200).collect(),
|
||||
retryable: false,
|
||||
suggested_action: Some("verify the API endpoint URL is correct".to_string()),
|
||||
});
|
||||
}
|
||||
return Ok(None);
|
||||
}
|
||||
let payload = data_lines.join("\n");
|
||||
@@ -1491,6 +1556,21 @@ fn parse_sse_frame(
|
||||
});
|
||||
}
|
||||
}
|
||||
// Detect HTML or other non-JSON responses early for better error messages
|
||||
let trimmed_payload = payload.trim();
|
||||
if trimmed_payload.starts_with('<') || trimmed_payload.starts_with("<!") {
|
||||
return Err(ApiError::Api {
|
||||
status: reqwest::StatusCode::BAD_REQUEST,
|
||||
error_type: Some("invalid_response".to_string()),
|
||||
message: Some(
|
||||
"provider returned HTML instead of JSON (check endpoint URL)".to_string(),
|
||||
),
|
||||
request_id: None,
|
||||
body: payload.chars().take(200).collect(),
|
||||
retryable: false,
|
||||
suggested_action: Some("verify the API endpoint URL is correct".to_string()),
|
||||
});
|
||||
}
|
||||
serde_json::from_str::<ChatCompletionChunk>(&payload)
|
||||
.map(Some)
|
||||
.map_err(|error| ApiError::json_deserialize(provider, model, &payload, error))
|
||||
@@ -1777,6 +1857,7 @@ mod tests {
|
||||
delta: super::ChunkDelta {
|
||||
content: None,
|
||||
reasoning_content: Some("think".to_string()),
|
||||
thinking: None,
|
||||
tool_calls: Vec::new(),
|
||||
},
|
||||
finish_reason: None,
|
||||
@@ -1793,6 +1874,7 @@ mod tests {
|
||||
delta: super::ChunkDelta {
|
||||
content: Some(" answer".to_string()),
|
||||
reasoning_content: None,
|
||||
thinking: None,
|
||||
tool_calls: Vec::new(),
|
||||
},
|
||||
finish_reason: Some("stop".to_string()),
|
||||
|
||||
@@ -82,7 +82,7 @@ async fn send_message_posts_json_and_parses_response() {
|
||||
);
|
||||
assert_eq!(
|
||||
request.headers.get("user-agent").map(String::as_str),
|
||||
Some("claude-code/0.1.0")
|
||||
Some("claude-code/0.1.3")
|
||||
);
|
||||
assert_eq!(
|
||||
request.headers.get("anthropic-beta").map(String::as_str),
|
||||
|
||||
33
rust/crates/claw-analog/Cargo.toml
Normal file
33
rust/crates/claw-analog/Cargo.toml
Normal file
@@ -0,0 +1,33 @@
|
||||
[package]
|
||||
name = "claw-analog"
|
||||
version.workspace = true
|
||||
edition.workspace = true
|
||||
license.workspace = true
|
||||
publish.workspace = true
|
||||
description = "Minimal agent harness: tool loop with explicit permissions and workspace jail."
|
||||
|
||||
[lib]
|
||||
name = "claw_analog"
|
||||
path = "src/lib.rs"
|
||||
|
||||
[[bin]]
|
||||
name = "claw-analog"
|
||||
path = "src/main.rs"
|
||||
|
||||
[dependencies]
|
||||
api = { path = "../api" }
|
||||
clap = { version = "4", features = ["derive"] }
|
||||
clap_complete = "4"
|
||||
globset = "0.4"
|
||||
reqwest = { version = "0.12", default-features = false, features = ["json", "rustls-tls"] }
|
||||
runtime = { path = "../runtime" }
|
||||
serde = { version = "1", features = ["derive"] }
|
||||
serde_json.workspace = true
|
||||
tokio = { version = "1", features = ["macros", "rt-multi-thread"] }
|
||||
toml = "0.8"
|
||||
walkdir = "2"
|
||||
ignore = "0.4"
|
||||
|
||||
[dev-dependencies]
|
||||
mock-anthropic-service = { path = "../mock-anthropic-service" }
|
||||
tempfile = "3"
|
||||
489
rust/crates/claw-analog/src/agents.rs
Normal file
489
rust/crates/claw-analog/src/agents.rs
Normal file
@@ -0,0 +1,489 @@
|
||||
//! `claw-analog agents` — run multiple specialized sub-agents sequentially.
|
||||
|
||||
use std::path::{Path, PathBuf};
|
||||
|
||||
use api::InputMessage;
|
||||
use clap::{Parser, ValueEnum};
|
||||
use claw_analog::{
|
||||
enforce_non_interactive_permission_rules, load_analog_toml, resolve_analog_options,
|
||||
resolve_analog_profile_path, resolve_rag_base_url, AnalogConfig, AnalogDoctorOverrides,
|
||||
AnalogFileConfig, OutputFormat, PermissionMode, Preset, StreamOverride,
|
||||
};
|
||||
|
||||
const DEF_MAX_READ: u64 = 256 * 1024;
|
||||
const DEF_MAX_TURNS: u32 = 24;
|
||||
const DEF_MAX_LIST: usize = 500;
|
||||
const DEF_GREP_MAX: usize = 200;
|
||||
const DEF_GLOB_PATHS: usize = 2000;
|
||||
const DEF_GLOB_DEPTH: usize = 32;
|
||||
const DEF_RAG_TIMEOUT_SECS: u64 = 30;
|
||||
const DEF_RAG_TOP_K_MAX: u32 = 32;
|
||||
const RAG_TOP_K_ABS_CAP: u32 = 256;
|
||||
|
||||
#[derive(Copy, Clone, Debug, ValueEnum)]
|
||||
pub enum AgentsPresetArg {
|
||||
Audit,
|
||||
Explain,
|
||||
Implement,
|
||||
}
|
||||
|
||||
impl From<AgentsPresetArg> for Preset {
|
||||
fn from(p: AgentsPresetArg) -> Self {
|
||||
match p {
|
||||
AgentsPresetArg::Audit => Preset::Audit,
|
||||
AgentsPresetArg::Explain => Preset::Explain,
|
||||
AgentsPresetArg::Implement => Preset::Implement,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Copy, Clone, Debug, ValueEnum)]
|
||||
pub enum AgentsPermissionArg {
|
||||
ReadOnly,
|
||||
WorkspaceWrite,
|
||||
Prompt,
|
||||
#[value(name = "danger-full-access")]
|
||||
DangerFullAccess,
|
||||
Allow,
|
||||
}
|
||||
|
||||
impl From<AgentsPermissionArg> for PermissionMode {
|
||||
fn from(p: AgentsPermissionArg) -> Self {
|
||||
match p {
|
||||
AgentsPermissionArg::ReadOnly => PermissionMode::ReadOnly,
|
||||
AgentsPermissionArg::WorkspaceWrite => PermissionMode::WorkspaceWrite,
|
||||
AgentsPermissionArg::Prompt => PermissionMode::Prompt,
|
||||
AgentsPermissionArg::DangerFullAccess => PermissionMode::DangerFullAccess,
|
||||
AgentsPermissionArg::Allow => PermissionMode::Allow,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct AgentSpec {
|
||||
pub name: String,
|
||||
pub preset: Preset,
|
||||
pub permission: PermissionMode,
|
||||
pub model: Option<String>,
|
||||
pub prompt: Option<String>,
|
||||
}
|
||||
|
||||
fn default_permission_for_preset(p: Preset) -> PermissionMode {
|
||||
match p {
|
||||
Preset::Audit | Preset::Explain => PermissionMode::ReadOnly,
|
||||
Preset::Implement => PermissionMode::WorkspaceWrite,
|
||||
Preset::None => PermissionMode::ReadOnly,
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_agent_spec(s: &str) -> Result<AgentSpec, String> {
|
||||
// Allowed forms:
|
||||
// - "audit" | "explain" | "implement"
|
||||
// - "name=audit,preset=audit,permission=read-only,model=...,prompt=..."
|
||||
let raw = s.trim();
|
||||
if raw.is_empty() {
|
||||
return Err("empty --agent spec".to_string());
|
||||
}
|
||||
|
||||
if !raw.contains('=') {
|
||||
let preset = match raw.to_ascii_lowercase().as_str() {
|
||||
"audit" => Preset::Audit,
|
||||
"explain" => Preset::Explain,
|
||||
"implement" | "fix" => Preset::Implement,
|
||||
other => return Err(format!("unknown agent shorthand: {other}")),
|
||||
};
|
||||
return Ok(AgentSpec {
|
||||
name: raw.to_string(),
|
||||
preset,
|
||||
permission: default_permission_for_preset(preset),
|
||||
model: None,
|
||||
prompt: None,
|
||||
});
|
||||
}
|
||||
|
||||
let mut name: Option<String> = None;
|
||||
let mut preset: Option<Preset> = None;
|
||||
let mut permission: Option<PermissionMode> = None;
|
||||
let mut model: Option<String> = None;
|
||||
let mut prompt: Option<String> = None;
|
||||
|
||||
for part in raw.split(',') {
|
||||
let (k, v) = part
|
||||
.split_once('=')
|
||||
.ok_or_else(|| format!("invalid agent spec part {part:?} (expected k=v)"))?;
|
||||
let k = k.trim().to_ascii_lowercase();
|
||||
let v = v.trim();
|
||||
if v.is_empty() {
|
||||
continue;
|
||||
}
|
||||
match k.as_str() {
|
||||
"name" => name = Some(v.to_string()),
|
||||
"preset" => {
|
||||
let p = match v.to_ascii_lowercase().as_str() {
|
||||
"audit" => Preset::Audit,
|
||||
"explain" => Preset::Explain,
|
||||
"implement" | "fix" => Preset::Implement,
|
||||
"none" => Preset::None,
|
||||
other => return Err(format!("unknown preset {other:?}")),
|
||||
};
|
||||
preset = Some(p);
|
||||
}
|
||||
"permission" => {
|
||||
let pm = match v.to_ascii_lowercase().replace('_', "-").as_str() {
|
||||
"read-only" | "readonly" => PermissionMode::ReadOnly,
|
||||
"workspace-write" | "write" => PermissionMode::WorkspaceWrite,
|
||||
"prompt" => PermissionMode::Prompt,
|
||||
"danger-full-access" | "danger" => PermissionMode::DangerFullAccess,
|
||||
"allow" => PermissionMode::Allow,
|
||||
other => return Err(format!("unknown permission {other:?}")),
|
||||
};
|
||||
permission = Some(pm);
|
||||
}
|
||||
"model" => model = Some(v.to_string()),
|
||||
"prompt" => prompt = Some(v.to_string()),
|
||||
other => return Err(format!("unknown agent spec key {other:?}")),
|
||||
}
|
||||
}
|
||||
|
||||
let preset = preset.unwrap_or(Preset::Audit);
|
||||
let permission = permission.unwrap_or_else(|| default_permission_for_preset(preset));
|
||||
let name = name.unwrap_or_else(|| preset.label().unwrap_or("agent").to_string());
|
||||
|
||||
Ok(AgentSpec {
|
||||
name,
|
||||
preset,
|
||||
permission,
|
||||
model,
|
||||
prompt,
|
||||
})
|
||||
}
|
||||
|
||||
#[derive(Debug, Parser)]
|
||||
pub struct AgentsCli {
|
||||
/// Workspace root.
|
||||
#[arg(short = 'w', long, default_value = ".", value_name = "DIR")]
|
||||
pub workspace: PathBuf,
|
||||
|
||||
/// Config path (default: `<workspace>/.claw-analog.toml`).
|
||||
#[arg(long, value_name = "PATH")]
|
||||
pub config: Option<PathBuf>,
|
||||
|
||||
/// Base session path. If missing, it will be created from the base prompt.
|
||||
#[arg(long, value_name = "PATH")]
|
||||
pub base_session: PathBuf,
|
||||
|
||||
/// Base prompt. If omitted, reads from stdin.
|
||||
#[arg(long)]
|
||||
pub prompt: Option<String>,
|
||||
|
||||
/// Repeatable agent specs, e.g. `--agent audit` or `--agent name=fix,preset=implement,permission=workspace-write`.
|
||||
#[arg(long, required = true)]
|
||||
pub agent: Vec<String>,
|
||||
|
||||
/// If set, each agent writes its own session file next to base session.
|
||||
#[arg(long, default_value_t = true)]
|
||||
pub split_sessions: bool,
|
||||
}
|
||||
|
||||
fn load_file_config(path: &Path) -> AnalogFileConfig {
|
||||
if !path.is_file() {
|
||||
return AnalogFileConfig::default();
|
||||
}
|
||||
load_analog_toml(path).unwrap_or_default()
|
||||
}
|
||||
|
||||
fn config_path(args: &AgentsCli) -> PathBuf {
|
||||
args.config
|
||||
.clone()
|
||||
.unwrap_or_else(|| args.workspace.join(".claw-analog.toml"))
|
||||
}
|
||||
|
||||
fn derive_agent_session_path(base: &Path, agent_name: &str) -> PathBuf {
|
||||
let base_s = base.to_string_lossy();
|
||||
PathBuf::from(format!("{base_s}.agent-{agent_name}.json"))
|
||||
}
|
||||
|
||||
fn read_stdin_prompt() -> Result<String, String> {
|
||||
use std::io::Read;
|
||||
let mut buf = String::new();
|
||||
std::io::stdin()
|
||||
.read_to_string(&mut buf)
|
||||
.map_err(|e| e.to_string())?;
|
||||
let t = buf.trim();
|
||||
if t.is_empty() {
|
||||
return Err("empty prompt (pass --prompt or stdin)".to_string());
|
||||
}
|
||||
Ok(t.to_string())
|
||||
}
|
||||
|
||||
fn ensure_base_session(base_session: &Path, workspace: &Path, prompt: &str) -> Result<(), String> {
|
||||
if base_session.exists() {
|
||||
return Ok(());
|
||||
}
|
||||
let ws_s = workspace.display().to_string();
|
||||
let model = "base".to_string();
|
||||
let messages = if prompt.trim().is_empty() {
|
||||
Vec::new()
|
||||
} else {
|
||||
vec![InputMessage::user_text(prompt.to_string())]
|
||||
};
|
||||
claw_analog::session_save(base_session, &ws_s, &model, Preset::None, &messages)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn run_agents(args: AgentsCli) -> Result<(), String> {
|
||||
let rt = tokio::runtime::Builder::new_current_thread()
|
||||
.enable_all()
|
||||
.build()
|
||||
.map_err(|e| e.to_string())?;
|
||||
rt.block_on(async { run_agents_async(args).await })
|
||||
}
|
||||
|
||||
pub async fn run_agents_async(args: AgentsCli) -> Result<(), String> {
|
||||
run_agents_inner(args, |cfg, out| {
|
||||
Box::pin(async move {
|
||||
claw_analog::run(cfg, out)
|
||||
.await
|
||||
.map_err(|e| e.to_string())?;
|
||||
Ok(())
|
||||
})
|
||||
})
|
||||
.await
|
||||
}
|
||||
|
||||
type RunFuture<'a> = std::pin::Pin<Box<dyn std::future::Future<Output = Result<(), String>> + 'a>>;
|
||||
|
||||
async fn run_agents_inner<F>(args: AgentsCli, mut run_one: F) -> Result<(), String>
|
||||
where
|
||||
for<'a> F: FnMut(AnalogConfig, &'a mut Vec<u8>) -> RunFuture<'a>,
|
||||
{
|
||||
let workspace = if args.workspace.is_absolute() {
|
||||
args.workspace.clone()
|
||||
} else {
|
||||
std::env::current_dir()
|
||||
.map_err(|e| e.to_string())?
|
||||
.join(&args.workspace)
|
||||
};
|
||||
let cfg_path = config_path(&args);
|
||||
let file_cfg = load_file_config(&cfg_path);
|
||||
|
||||
let base_prompt = match args.prompt.clone() {
|
||||
Some(p) => p,
|
||||
None => read_stdin_prompt()?,
|
||||
};
|
||||
ensure_base_session(&args.base_session, &workspace, base_prompt.as_str())?;
|
||||
|
||||
let mut specs = Vec::new();
|
||||
for a in &args.agent {
|
||||
specs.push(parse_agent_spec(a)?);
|
||||
}
|
||||
|
||||
println!("claw-analog agents (sequential)\n");
|
||||
println!(" workspace: {}", workspace.display());
|
||||
println!(" base_session: {}", args.base_session.display());
|
||||
println!(" agents: {}", specs.len());
|
||||
println!();
|
||||
|
||||
for (i, spec) in specs.into_iter().enumerate() {
|
||||
println!(
|
||||
"== Agent {} / {}: {} ==",
|
||||
i + 1,
|
||||
args.agent.len(),
|
||||
spec.name
|
||||
);
|
||||
println!(" preset: {}", spec.preset.label().unwrap_or("none"));
|
||||
println!(" permission: {}", spec.permission.as_str());
|
||||
if let Some(m) = &spec.model {
|
||||
println!(" model: {m}");
|
||||
}
|
||||
|
||||
enforce_non_interactive_permission_rules(spec.permission, false)?;
|
||||
|
||||
let agent_session = if args.split_sessions {
|
||||
derive_agent_session_path(&args.base_session, spec.name.as_str())
|
||||
} else {
|
||||
args.base_session.clone()
|
||||
};
|
||||
if args.split_sessions {
|
||||
std::fs::copy(&args.base_session, &agent_session).map_err(|e| e.to_string())?;
|
||||
}
|
||||
|
||||
let overrides = AnalogDoctorOverrides {
|
||||
model: spec.model.clone(),
|
||||
permission: Some(spec.permission),
|
||||
preset: Some(spec.preset),
|
||||
output_format: Some(OutputFormat::Rich),
|
||||
stream: StreamOverride::ForceOff,
|
||||
..Default::default()
|
||||
};
|
||||
let resolved = resolve_analog_options(&file_cfg, &overrides);
|
||||
|
||||
let profile_path =
|
||||
resolve_analog_profile_path(&workspace, None, file_cfg.profile.as_deref());
|
||||
let profile_hint = if let Some(ref p) = profile_path {
|
||||
claw_analog::load_profile_hint(p).unwrap_or(None)
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
let rag_base_url = resolve_rag_base_url(&file_cfg);
|
||||
|
||||
let agent_prompt = spec.prompt.unwrap_or_else(|| {
|
||||
format!(
|
||||
"Agent {}: run preset {}",
|
||||
spec.name,
|
||||
resolved.preset.label().unwrap_or("none")
|
||||
)
|
||||
});
|
||||
|
||||
let cfg = AnalogConfig {
|
||||
model: resolved.model,
|
||||
workspace: workspace.clone(),
|
||||
permission_mode: resolved.permission_mode,
|
||||
accept_danger_non_interactive: false,
|
||||
use_stream: false,
|
||||
output_format: resolved.output_format,
|
||||
use_runtime_enforcer: resolved.use_runtime_enforcer,
|
||||
max_read_bytes: file_cfg.max_read_bytes.unwrap_or(DEF_MAX_READ),
|
||||
max_turns: file_cfg.max_turns.unwrap_or(DEF_MAX_TURNS),
|
||||
max_list_entries: file_cfg.max_list_entries.unwrap_or(DEF_MAX_LIST),
|
||||
grep_max_lines: file_cfg.grep_max_lines.unwrap_or(DEF_GREP_MAX),
|
||||
glob_max_paths: file_cfg.glob_max_paths.unwrap_or(DEF_GLOB_PATHS),
|
||||
glob_max_depth: file_cfg.glob_max_depth.unwrap_or(DEF_GLOB_DEPTH),
|
||||
preset: resolved.preset,
|
||||
language: file_cfg
|
||||
.language
|
||||
.as_deref()
|
||||
.and_then(claw_analog::AnalogLanguage::from_toml_str)
|
||||
.unwrap_or_default(),
|
||||
session_path: Some(agent_session.clone()),
|
||||
session_save_path: None,
|
||||
profile_hint,
|
||||
prompt: agent_prompt,
|
||||
rag_base_url,
|
||||
rag_http_timeout: std::time::Duration::from_secs(
|
||||
file_cfg.rag_timeout_secs.unwrap_or(DEF_RAG_TIMEOUT_SECS),
|
||||
),
|
||||
rag_top_k_max: file_cfg
|
||||
.rag_top_k_max
|
||||
.unwrap_or(DEF_RAG_TOP_K_MAX)
|
||||
.clamp(1, RAG_TOP_K_ABS_CAP),
|
||||
};
|
||||
|
||||
let mut buf: Vec<u8> = Vec::new();
|
||||
let run_res = run_one(cfg, &mut buf).await;
|
||||
match run_res {
|
||||
Ok(()) => {
|
||||
let text = String::from_utf8_lossy(&buf);
|
||||
let summary = tail_chars(text.as_ref(), 1600);
|
||||
println!(" result: OK");
|
||||
if args.split_sessions {
|
||||
println!(" session: {}", agent_session.display());
|
||||
}
|
||||
println!(" summary_tail:\n{}\n", indent_lines(&summary, 4));
|
||||
}
|
||||
Err(e) => {
|
||||
println!(" result: FAIL — {e}\n");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn tail_chars(s: &str, n: usize) -> String {
|
||||
let total = s.chars().count();
|
||||
if total <= n {
|
||||
return s.to_string();
|
||||
}
|
||||
s.chars().skip(total - n).collect()
|
||||
}
|
||||
|
||||
fn indent_lines(s: &str, spaces: usize) -> String {
|
||||
let pad = " ".repeat(spaces);
|
||||
s.lines()
|
||||
.map(|l| format!("{pad}{l}"))
|
||||
.collect::<Vec<_>>()
|
||||
.join("\n")
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use std::sync::{Mutex, OnceLock};
|
||||
|
||||
fn mock_env_lock() -> std::sync::MutexGuard<'static, ()> {
|
||||
static LOCK: OnceLock<Mutex<()>> = OnceLock::new();
|
||||
LOCK.get_or_init(|| Mutex::new(()))
|
||||
.lock()
|
||||
.unwrap_or_else(|e| e.into_inner())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parses_agent_shorthand() {
|
||||
let a = parse_agent_spec("audit").unwrap();
|
||||
assert_eq!(a.preset, Preset::Audit);
|
||||
assert_eq!(a.permission, PermissionMode::ReadOnly);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parses_agent_kv() {
|
||||
let a = parse_agent_spec("name=fix,preset=implement,permission=workspace-write").unwrap();
|
||||
assert_eq!(a.name, "fix");
|
||||
assert_eq!(a.preset, Preset::Implement);
|
||||
assert_eq!(a.permission, PermissionMode::WorkspaceWrite);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn runs_two_agents_sequentially_with_stub_runner() {
|
||||
let _g = mock_env_lock();
|
||||
let dir = tempfile::tempdir().unwrap();
|
||||
let workspace = dir.path().canonicalize().unwrap();
|
||||
std::fs::write(workspace.join("fixture.txt"), "hello parity fixture\n").unwrap();
|
||||
|
||||
let base_session = workspace.join(".claw").join("agents-base.json");
|
||||
std::fs::create_dir_all(base_session.parent().unwrap()).unwrap();
|
||||
std::fs::write(
|
||||
&base_session,
|
||||
format!(
|
||||
"{{\n \"version\": 1,\n \"workspace\": \"{}\",\n \"model\": \"base\",\n \"messages\": []\n}}\n",
|
||||
workspace.display()
|
||||
),
|
||||
)
|
||||
.unwrap();
|
||||
let args = AgentsCli {
|
||||
workspace: workspace.clone(),
|
||||
config: None,
|
||||
base_session: base_session.clone(),
|
||||
prompt: Some(String::new()),
|
||||
agent: vec![
|
||||
"name=audit,preset=audit,permission=read-only,prompt=check 1".to_string(),
|
||||
"name=explain,preset=explain,permission=read-only,prompt=check 2".to_string(),
|
||||
],
|
||||
split_sessions: true,
|
||||
};
|
||||
let called = std::sync::Arc::new(std::sync::atomic::AtomicUsize::new(0));
|
||||
let called2 = called.clone();
|
||||
let rt = tokio::runtime::Builder::new_multi_thread()
|
||||
.worker_threads(1)
|
||||
.enable_all()
|
||||
.build()
|
||||
.expect("runtime");
|
||||
rt.block_on(async {
|
||||
run_agents_inner(args, move |_cfg, out| {
|
||||
let called3 = called2.clone();
|
||||
Box::pin(async move {
|
||||
called3.fetch_add(1, std::sync::atomic::Ordering::Relaxed);
|
||||
out.extend_from_slice(b"stub ok");
|
||||
Ok(())
|
||||
})
|
||||
})
|
||||
.await
|
||||
.expect("agents should run");
|
||||
});
|
||||
assert_eq!(called.load(std::sync::atomic::Ordering::Relaxed), 2);
|
||||
|
||||
assert!(derive_agent_session_path(&base_session, "audit").is_file());
|
||||
assert!(derive_agent_session_path(&base_session, "explain").is_file());
|
||||
}
|
||||
}
|
||||
144
rust/crates/claw-analog/src/config_cmd.rs
Normal file
144
rust/crates/claw-analog/src/config_cmd.rs
Normal file
@@ -0,0 +1,144 @@
|
||||
//! `claw-analog config validate` — parse TOML and profile without calling the API.
|
||||
|
||||
use std::path::PathBuf;
|
||||
|
||||
use clap::Parser;
|
||||
use claw_analog::{
|
||||
load_analog_toml, load_profile_hint, resolve_analog_options, resolve_analog_profile_path,
|
||||
AnalogDoctorOverrides, AnalogFileConfig, AnalogLanguage, OutputFormat,
|
||||
};
|
||||
|
||||
#[derive(Parser, Debug)]
|
||||
pub struct ValidateCli {
|
||||
#[arg(short = 'w', long, default_value = ".", value_name = "DIR")]
|
||||
pub workspace: PathBuf,
|
||||
#[arg(long, value_name = "PATH")]
|
||||
pub config: Option<PathBuf>,
|
||||
/// Require `<workspace>/.claw-analog.toml` (or `--config`) to exist and parse.
|
||||
#[arg(long, default_value_t = false, action = clap::ArgAction::SetTrue)]
|
||||
pub strict: bool,
|
||||
#[arg(long, value_name = "PATH")]
|
||||
pub profile: Option<PathBuf>,
|
||||
}
|
||||
|
||||
pub fn run_validate(cli: ValidateCli) -> i32 {
|
||||
let cfg_path = cli
|
||||
.config
|
||||
.clone()
|
||||
.unwrap_or_else(|| cli.workspace.join(".claw-analog.toml"));
|
||||
|
||||
let file_cfg = if cfg_path.is_file() {
|
||||
match load_analog_toml(&cfg_path) {
|
||||
Ok(c) => {
|
||||
println!("OK: {} parses", cfg_path.display());
|
||||
c
|
||||
}
|
||||
Err(e) => {
|
||||
eprintln!("ERROR: {}: {e}", cfg_path.display());
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
} else if cli.strict {
|
||||
eprintln!(
|
||||
"ERROR: --strict: config file missing: {}",
|
||||
cfg_path.display()
|
||||
);
|
||||
return 1;
|
||||
} else {
|
||||
println!(
|
||||
"Note: {} absent — using empty TOML defaults for preview",
|
||||
cfg_path.display()
|
||||
);
|
||||
AnalogFileConfig::default()
|
||||
};
|
||||
|
||||
let prof_path = resolve_analog_profile_path(
|
||||
&cli.workspace,
|
||||
cli.profile.clone(),
|
||||
file_cfg.profile.as_deref(),
|
||||
);
|
||||
let mut ok = true;
|
||||
match &prof_path {
|
||||
None => println!(
|
||||
"Profile: (none — no CLI/TOML path and no default ~/.claw-analog/profile.toml)"
|
||||
),
|
||||
Some(p) => match load_profile_hint(p) {
|
||||
Ok(Some(line)) => println!(
|
||||
"OK: profile {} (line: {} chars)",
|
||||
p.display(),
|
||||
line.chars().count()
|
||||
),
|
||||
Ok(None) => println!("OK: profile {} (empty `line`)", p.display()),
|
||||
Err(e) => {
|
||||
eprintln!("ERROR: profile {}: {e}", p.display());
|
||||
ok = false;
|
||||
}
|
||||
},
|
||||
}
|
||||
|
||||
let lang = file_cfg
|
||||
.language
|
||||
.as_deref()
|
||||
.and_then(AnalogLanguage::from_toml_str)
|
||||
.unwrap_or_default();
|
||||
|
||||
let r = resolve_analog_options(&file_cfg, &AnalogDoctorOverrides::default());
|
||||
println!("\nMerge preview (TOML + defaults only; main-run CLI flags not applied):");
|
||||
println!(" language (TOML): {}", lang.as_str());
|
||||
println!(" model: {}", r.model);
|
||||
println!(" permission: {}", r.permission_mode.as_str());
|
||||
println!(" preset: {}", r.preset.label().unwrap_or("none"));
|
||||
println!(
|
||||
" output_format: {}",
|
||||
match r.output_format {
|
||||
OutputFormat::Rich => "rich",
|
||||
OutputFormat::Json => "json",
|
||||
}
|
||||
);
|
||||
println!(" stream: {}", r.use_stream);
|
||||
println!(
|
||||
" runtime_enforcer: {}",
|
||||
if r.use_runtime_enforcer { "on" } else { "off" }
|
||||
);
|
||||
println!(
|
||||
" accept_danger_non_interactive: {}",
|
||||
r.accept_danger_non_interactive
|
||||
);
|
||||
println!(" Provenance:");
|
||||
for line in &r.provenance {
|
||||
println!(" - {line}");
|
||||
}
|
||||
|
||||
i32::from(!ok)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn strict_fails_when_config_missing() {
|
||||
let dir = tempfile::tempdir().unwrap();
|
||||
let code = run_validate(ValidateCli {
|
||||
workspace: dir.path().to_path_buf(),
|
||||
config: None,
|
||||
strict: true,
|
||||
profile: None,
|
||||
});
|
||||
assert_eq!(code, 1);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parses_when_config_present() {
|
||||
let dir = tempfile::tempdir().unwrap();
|
||||
let p = dir.path().join(".claw-analog.toml");
|
||||
std::fs::write(&p, r#"model = "sonnet""#).unwrap();
|
||||
let code = run_validate(ValidateCli {
|
||||
workspace: dir.path().to_path_buf(),
|
||||
config: None,
|
||||
strict: true,
|
||||
profile: None,
|
||||
});
|
||||
assert_eq!(code, 0);
|
||||
}
|
||||
}
|
||||
733
rust/crates/claw-analog/src/doctor.rs
Normal file
733
rust/crates/claw-analog/src/doctor.rs
Normal file
@@ -0,0 +1,733 @@
|
||||
//! `claw-analog doctor` — environment and Cargo sanity checks.
|
||||
|
||||
use std::net::{TcpStream, ToSocketAddrs};
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::process::Command;
|
||||
use std::time::Duration;
|
||||
|
||||
use clap::ValueEnum;
|
||||
use claw_analog::{
|
||||
load_analog_toml, load_profile_hint, resolve_analog_options, AnalogDoctorOverrides,
|
||||
AnalogFileConfig, OutputFormat, PermissionMode, Preset, StreamOverride, NDJSON_FORMAT_VERSION,
|
||||
NDJSON_SCHEMA,
|
||||
};
|
||||
use reqwest::header::{HeaderMap, HeaderName, HeaderValue};
|
||||
|
||||
const ENV_CHECK: &[&str] = &[
|
||||
"ANTHROPIC_API_KEY",
|
||||
"ANTHROPIC_AUTH_TOKEN",
|
||||
"ANTHROPIC_BASE_URL",
|
||||
"OPENAI_API_KEY",
|
||||
"OPENAI_BASE_URL",
|
||||
"XAI_API_KEY",
|
||||
"RAG_BASE_URL",
|
||||
];
|
||||
|
||||
#[derive(Copy, Clone, Debug, ValueEnum)]
|
||||
pub enum DoctorPermissionArg {
|
||||
ReadOnly,
|
||||
WorkspaceWrite,
|
||||
Prompt,
|
||||
#[value(name = "danger-full-access")]
|
||||
DangerFullAccess,
|
||||
Allow,
|
||||
}
|
||||
|
||||
impl From<DoctorPermissionArg> for PermissionMode {
|
||||
fn from(p: DoctorPermissionArg) -> Self {
|
||||
match p {
|
||||
DoctorPermissionArg::ReadOnly => PermissionMode::ReadOnly,
|
||||
DoctorPermissionArg::WorkspaceWrite => PermissionMode::WorkspaceWrite,
|
||||
DoctorPermissionArg::Prompt => PermissionMode::Prompt,
|
||||
DoctorPermissionArg::DangerFullAccess => PermissionMode::DangerFullAccess,
|
||||
DoctorPermissionArg::Allow => PermissionMode::Allow,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Copy, Clone, Debug, ValueEnum)]
|
||||
pub enum DoctorOutputArg {
|
||||
Rich,
|
||||
Json,
|
||||
}
|
||||
|
||||
impl From<DoctorOutputArg> for OutputFormat {
|
||||
fn from(o: DoctorOutputArg) -> Self {
|
||||
match o {
|
||||
DoctorOutputArg::Rich => OutputFormat::Rich,
|
||||
DoctorOutputArg::Json => OutputFormat::Json,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Copy, Clone, Debug, ValueEnum)]
|
||||
pub enum DoctorPresetCli {
|
||||
None,
|
||||
Audit,
|
||||
Explain,
|
||||
Implement,
|
||||
}
|
||||
|
||||
impl From<DoctorPresetCli> for Preset {
|
||||
fn from(p: DoctorPresetCli) -> Self {
|
||||
match p {
|
||||
DoctorPresetCli::None => Preset::None,
|
||||
DoctorPresetCli::Audit => Preset::Audit,
|
||||
DoctorPresetCli::Explain => Preset::Explain,
|
||||
DoctorPresetCli::Implement => Preset::Implement,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, clap::Args)]
|
||||
pub struct DoctorCli {
|
||||
/// Workspace root (same as `claw-analog -w`; config defaults to `<workspace>/.claw-analog.toml`).
|
||||
#[arg(short = 'w', long, default_value = ".", value_name = "DIR")]
|
||||
pub workspace: PathBuf,
|
||||
/// Config path (default: `<workspace>/.claw-analog.toml`).
|
||||
#[arg(long, value_name = "PATH")]
|
||||
pub config: Option<PathBuf>,
|
||||
/// Override model (same precedence as main CLI).
|
||||
#[arg(long)]
|
||||
pub model: Option<String>,
|
||||
#[arg(long, value_enum)]
|
||||
pub permission: Option<DoctorPermissionArg>,
|
||||
#[arg(long, value_enum)]
|
||||
pub preset: Option<DoctorPresetCli>,
|
||||
#[arg(long, value_enum)]
|
||||
pub output_format: Option<DoctorOutputArg>,
|
||||
#[arg(long, default_value_t = false, conflicts_with = "no_stream")]
|
||||
pub stream: bool,
|
||||
#[arg(long, default_value_t = false, conflicts_with = "stream")]
|
||||
pub no_stream: bool,
|
||||
/// Disable `runtime::PermissionEnforcer` (same as main CLI).
|
||||
#[arg(
|
||||
long = "no-runtime-enforcer",
|
||||
default_value_t = false,
|
||||
action = clap::ArgAction::SetTrue
|
||||
)]
|
||||
pub no_runtime_enforcer: bool,
|
||||
#[arg(
|
||||
long = "accept-danger-non-interactive",
|
||||
default_value_t = false,
|
||||
action = clap::ArgAction::SetTrue
|
||||
)]
|
||||
pub accept_danger_non_interactive: bool,
|
||||
/// Profile TOML path (optional; if omitted, uses TOML `profile` or default `~/.claw-analog/profile.toml`).
|
||||
#[arg(long, value_name = "PATH")]
|
||||
pub profile: Option<PathBuf>,
|
||||
/// TCP connect to host:port from `ANTHROPIC_BASE_URL` (or default API URL); not a full HTTP check.
|
||||
#[arg(long, visible_alias = "mock")]
|
||||
pub tcp_ping: bool,
|
||||
/// Skip HTTPS/TLS + auth + quota header checks against configured providers.
|
||||
#[arg(long, default_value_t = false)]
|
||||
pub no_http_check: bool,
|
||||
/// Also probe the embeddings endpoint for OpenAI-compatible providers (may incur minimal cost).
|
||||
#[arg(long, default_value_t = false)]
|
||||
pub embeddings_check: bool,
|
||||
/// Skip compile check (`cargo check` / `build --release`).
|
||||
#[arg(long)]
|
||||
pub no_build: bool,
|
||||
/// Run `cargo build --release -p claw-analog` (writes `target/release/…`, safe while `cargo run` holds `target/debug/…` on Windows).
|
||||
#[arg(long, conflicts_with = "no_build")]
|
||||
pub release_build: bool,
|
||||
/// Directory containing the repo workspace `Cargo.toml` (default: search upward from cwd).
|
||||
#[arg(long, value_name = "DIR")]
|
||||
pub manifest_dir: Option<PathBuf>,
|
||||
}
|
||||
|
||||
pub fn run_doctor(args: DoctorCli) -> i32 {
|
||||
println!("claw-analog doctor — environment and build checks\n");
|
||||
|
||||
let workspace = args.workspace.clone();
|
||||
let canon_ws = std::fs::canonicalize(&workspace).unwrap_or_else(|_| workspace.clone());
|
||||
let cfg_path = args
|
||||
.config
|
||||
.clone()
|
||||
.unwrap_or_else(|| workspace.join(".claw-analog.toml"));
|
||||
let (file_cfg, cfg_note) = if cfg_path.is_file() {
|
||||
match load_analog_toml(&cfg_path) {
|
||||
Ok(c) => (c, "loaded"),
|
||||
Err(e) => {
|
||||
eprintln!(
|
||||
"[claw-analog] doctor: failed to parse {}: {e} (using empty TOML defaults)",
|
||||
cfg_path.display()
|
||||
);
|
||||
(AnalogFileConfig::default(), "parse error (defaults)")
|
||||
}
|
||||
}
|
||||
} else {
|
||||
(AnalogFileConfig::default(), "file missing (defaults only)")
|
||||
};
|
||||
|
||||
let stream_ov = if args.no_stream {
|
||||
StreamOverride::ForceOff
|
||||
} else if args.stream {
|
||||
StreamOverride::ForceOn
|
||||
} else {
|
||||
StreamOverride::FromFile
|
||||
};
|
||||
let overrides = AnalogDoctorOverrides {
|
||||
model: args.model.clone(),
|
||||
permission: args.permission.map(Into::into),
|
||||
preset: args.preset.map(Into::into),
|
||||
output_format: args.output_format.map(Into::into),
|
||||
stream: stream_ov,
|
||||
no_runtime_enforcer: args.no_runtime_enforcer,
|
||||
accept_danger_non_interactive: args.accept_danger_non_interactive,
|
||||
};
|
||||
let resolved = resolve_analog_options(&file_cfg, &overrides);
|
||||
|
||||
println!("NDJSON contract (for `--output-format json` runs):");
|
||||
println!(" schema: {NDJSON_SCHEMA}");
|
||||
println!(" format_version: {NDJSON_FORMAT_VERSION}\n");
|
||||
|
||||
println!("Effective config (merge of `.claw-analog.toml` + flags below):");
|
||||
println!(" workspace: {}", canon_ws.display());
|
||||
println!(" config: {} ({cfg_note})", cfg_path.display());
|
||||
println!(" model: {}", resolved.model);
|
||||
println!(" permission: {}", resolved.permission_mode.as_str());
|
||||
println!(" preset: {}", resolved.preset.label().unwrap_or("none"));
|
||||
println!(
|
||||
" output_format: {}",
|
||||
match resolved.output_format {
|
||||
OutputFormat::Rich => "rich",
|
||||
OutputFormat::Json => "json",
|
||||
}
|
||||
);
|
||||
println!(" stream: {}", resolved.use_stream);
|
||||
println!(
|
||||
" runtime_enforcer: {}",
|
||||
if resolved.use_runtime_enforcer {
|
||||
"on"
|
||||
} else {
|
||||
"off"
|
||||
}
|
||||
);
|
||||
println!(
|
||||
" accept_danger_non_interactive: {}",
|
||||
resolved.accept_danger_non_interactive
|
||||
);
|
||||
println!(" Provenance (which side won src ← …):");
|
||||
for line in &resolved.provenance {
|
||||
println!(" - {line}");
|
||||
}
|
||||
println!();
|
||||
|
||||
let prof = resolve_profile_path_doctor(
|
||||
args.profile.as_ref(),
|
||||
file_cfg.profile.as_deref(),
|
||||
&workspace,
|
||||
);
|
||||
print_profile_hint_section(&prof);
|
||||
println!();
|
||||
|
||||
check_env();
|
||||
println!();
|
||||
let build_ok = if args.no_build {
|
||||
println!("cargo: skipped (--no-build)");
|
||||
true
|
||||
} else if args.release_build {
|
||||
run_cargo_release_build(args.manifest_dir.as_deref())
|
||||
} else {
|
||||
run_cargo_check(args.manifest_dir.as_deref())
|
||||
};
|
||||
println!();
|
||||
if args.tcp_ping {
|
||||
ping_print();
|
||||
println!();
|
||||
}
|
||||
if !args.no_http_check {
|
||||
http_checks_print(args.embeddings_check);
|
||||
println!();
|
||||
}
|
||||
if build_ok {
|
||||
0
|
||||
} else {
|
||||
1
|
||||
}
|
||||
}
|
||||
|
||||
fn home_dir() -> Option<PathBuf> {
|
||||
#[cfg(windows)]
|
||||
{
|
||||
std::env::var_os("USERPROFILE").map(PathBuf::from)
|
||||
}
|
||||
#[cfg(not(windows))]
|
||||
{
|
||||
std::env::var_os("HOME").map(PathBuf::from)
|
||||
}
|
||||
}
|
||||
|
||||
fn expand_user_path(raw: &str) -> PathBuf {
|
||||
if let Some(rest) = raw.strip_prefix("~/") {
|
||||
home_dir()
|
||||
.map(|h| h.join(rest))
|
||||
.unwrap_or_else(|| PathBuf::from(raw))
|
||||
} else {
|
||||
PathBuf::from(raw)
|
||||
}
|
||||
}
|
||||
|
||||
fn resolve_profile_path_doctor(
|
||||
cli: Option<&PathBuf>,
|
||||
file: Option<&str>,
|
||||
workspace: &Path,
|
||||
) -> Option<PathBuf> {
|
||||
if let Some(p) = cli {
|
||||
return Some(if p.is_absolute() {
|
||||
p.clone()
|
||||
} else {
|
||||
workspace.join(p)
|
||||
});
|
||||
}
|
||||
if let Some(s) = file {
|
||||
let p = expand_user_path(s.trim());
|
||||
return Some(if p.is_absolute() {
|
||||
p
|
||||
} else {
|
||||
workspace.join(p)
|
||||
});
|
||||
}
|
||||
let def = home_dir()?.join(".claw-analog").join("profile.toml");
|
||||
if def.is_file() {
|
||||
Some(def)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
fn print_profile_hint_section(path: &Option<PathBuf>) {
|
||||
println!("Profile (system prompt snippet):");
|
||||
match path {
|
||||
None => println!(" (none — no --profile, no `profile` in TOML, default file absent)"),
|
||||
Some(p) => {
|
||||
print!(" path: {}", p.display());
|
||||
match load_profile_hint(p) {
|
||||
Ok(Some(h)) => println!(" — loaded, {} chars", h.chars().count()),
|
||||
Ok(None) => println!(" — file ok, empty `line`"),
|
||||
Err(e) => println!(" — error: {e}"),
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn mask_env_line(name: &str) {
|
||||
match std::env::var(name) {
|
||||
Ok(v) if !v.trim().is_empty() => {
|
||||
println!(" {name}: set ({} chars)", v.chars().count());
|
||||
}
|
||||
Ok(_) => println!(" {name}: set but empty"),
|
||||
Err(_) => println!(" {name}: unset"),
|
||||
}
|
||||
}
|
||||
|
||||
fn check_env() {
|
||||
println!("Environment (values are not printed):");
|
||||
for name in ENV_CHECK {
|
||||
mask_env_line(name);
|
||||
}
|
||||
let anthro_ok = std::env::var("ANTHROPIC_API_KEY")
|
||||
.map(|s| !s.trim().is_empty())
|
||||
.unwrap_or(false)
|
||||
|| std::env::var("ANTHROPIC_AUTH_TOKEN")
|
||||
.map(|s| !s.trim().is_empty())
|
||||
.unwrap_or(false);
|
||||
let openai_ok = std::env::var("OPENAI_API_KEY")
|
||||
.map(|s| !s.trim().is_empty())
|
||||
.unwrap_or(false);
|
||||
println!();
|
||||
if anthro_ok {
|
||||
println!("Anthropic credentials: OK (API key and/or auth token).");
|
||||
} else {
|
||||
println!("Anthropic credentials: not set — needed for default Claude/Anthropic models.");
|
||||
}
|
||||
if openai_ok {
|
||||
println!("OpenAI API key: set — use `openai/...` model prefix for that provider.");
|
||||
} else {
|
||||
println!("OpenAI API key: unset — only relevant for `openai/` models.");
|
||||
}
|
||||
if !anthro_ok && !openai_ok {
|
||||
println!("\nNote: neither Anthropic nor OpenAI keys are set; live runs will fail until you export credentials (see USAGE.md).");
|
||||
}
|
||||
}
|
||||
|
||||
/// Walk upward from `start` for a `Cargo.toml` that defines `[workspace]`.
|
||||
pub fn discover_cargo_workspace(start: &Path) -> Option<PathBuf> {
|
||||
let mut dir = start.to_path_buf();
|
||||
for _ in 0..32 {
|
||||
let manifest = dir.join("Cargo.toml");
|
||||
if manifest.is_file() {
|
||||
if let Ok(txt) = std::fs::read_to_string(&manifest) {
|
||||
if txt.contains("[workspace]") {
|
||||
return Some(dir);
|
||||
}
|
||||
}
|
||||
}
|
||||
dir = dir.parent()?.to_path_buf();
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
fn workspace_root_or_eprint(manifest_dir: Option<&Path>) -> Option<PathBuf> {
|
||||
let start = manifest_dir
|
||||
.map(Path::to_path_buf)
|
||||
.or_else(|| std::env::current_dir().ok())
|
||||
.unwrap_or_else(|| PathBuf::from("."));
|
||||
discover_cargo_workspace(&start).or_else(|| {
|
||||
eprintln!(
|
||||
"cargo: could not find a [workspace] Cargo.toml above {}.\n Pass --manifest-dir pointing at the `rust` folder of claw-code.",
|
||||
start.display()
|
||||
);
|
||||
None
|
||||
})
|
||||
}
|
||||
|
||||
/// `cargo check` does not replace `target/debug/claw-analog.exe`, so `cargo run … doctor` works on Windows.
|
||||
fn run_cargo_check(manifest_dir: Option<&Path>) -> bool {
|
||||
let Some(root) = workspace_root_or_eprint(manifest_dir) else {
|
||||
return false;
|
||||
};
|
||||
println!("cargo check -p claw-analog (workspace {})", root.display());
|
||||
println!(" (compile-only; avoids “access denied” replacing the running debug exe on Windows)");
|
||||
let status = Command::new("cargo")
|
||||
.args(["check", "-p", "claw-analog"])
|
||||
.current_dir(&root)
|
||||
.status();
|
||||
match status {
|
||||
Ok(s) if s.success() => {
|
||||
println!("cargo check: OK");
|
||||
true
|
||||
}
|
||||
Ok(s) => {
|
||||
eprintln!("cargo check: failed ({s})");
|
||||
false
|
||||
}
|
||||
Err(e) => {
|
||||
eprintln!("cargo check: could not run `cargo` ({e}). Is Rust/Cargo on PATH?");
|
||||
false
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn run_cargo_release_build(manifest_dir: Option<&Path>) -> bool {
|
||||
let Some(root) = workspace_root_or_eprint(manifest_dir) else {
|
||||
return false;
|
||||
};
|
||||
println!(
|
||||
"cargo build --release -p claw-analog (workspace {})",
|
||||
root.display()
|
||||
);
|
||||
println!(" (output in target/release/; does not overwrite a running target/debug/ binary)");
|
||||
let status = Command::new("cargo")
|
||||
.args(["build", "--release", "-p", "claw-analog"])
|
||||
.current_dir(&root)
|
||||
.status();
|
||||
match status {
|
||||
Ok(s) if s.success() => {
|
||||
println!("cargo build --release: OK");
|
||||
true
|
||||
}
|
||||
Ok(s) => {
|
||||
eprintln!("cargo build --release: failed ({s})");
|
||||
false
|
||||
}
|
||||
Err(e) => {
|
||||
eprintln!("cargo build --release: could not run `cargo` ({e}). Is Rust/Cargo on PATH?");
|
||||
false
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn default_anthropic_base() -> String {
|
||||
std::env::var("ANTHROPIC_BASE_URL").unwrap_or_else(|_| "https://api.anthropic.com".into())
|
||||
}
|
||||
|
||||
fn parse_host_port(url: &str) -> Result<(String, u16), String> {
|
||||
let url = url.trim().trim_end_matches('/');
|
||||
let (scheme, rest) = if let Some(r) = url.strip_prefix("https://") {
|
||||
("https", r)
|
||||
} else if let Some(r) = url.strip_prefix("http://") {
|
||||
("http", r)
|
||||
} else {
|
||||
return Err("URL must start with http:// or https://".into());
|
||||
};
|
||||
let host_part = rest
|
||||
.split('/')
|
||||
.next()
|
||||
.filter(|s| !s.is_empty())
|
||||
.ok_or_else(|| "missing host".to_string())?;
|
||||
if let Some((host, port_s)) = host_part.rsplit_once(':') {
|
||||
if let Ok(p) = port_s.parse::<u16>() {
|
||||
let host = host.trim_start_matches('[').trim_end_matches(']');
|
||||
return Ok((host.to_string(), p));
|
||||
}
|
||||
}
|
||||
let default_port = if scheme == "https" { 443 } else { 80 };
|
||||
Ok((host_part.to_string(), default_port))
|
||||
}
|
||||
|
||||
fn ping_print() {
|
||||
let url = default_anthropic_base();
|
||||
println!("TCP check for ANTHROPIC_BASE_URL (default if unset): {url}");
|
||||
match parse_host_port(&url) {
|
||||
Ok((host, port)) => match tcp_ping(&host, port) {
|
||||
Ok(()) => println!(" reachability: OK ({host}:{port})"),
|
||||
Err(e) => println!(" reachability: FAIL ({host}:{port}) — {e}"),
|
||||
},
|
||||
Err(e) => println!(" could not parse URL: {e}"),
|
||||
}
|
||||
println!(" (HTTP/TLS application data is not validated; this is connect() only.)");
|
||||
}
|
||||
|
||||
fn tcp_ping(host: &str, port: u16) -> Result<(), String> {
|
||||
let addr = (host, port)
|
||||
.to_socket_addrs()
|
||||
.map_err(|e| e.to_string())?
|
||||
.next()
|
||||
.ok_or_else(|| "no resolved addresses".to_string())?;
|
||||
TcpStream::connect_timeout(&addr, Duration::from_secs(3)).map_err(|e| e.to_string())?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn http_checks_print(embeddings_check: bool) {
|
||||
println!("HTTP/TLS checks (auth + TLS validation + quota headers when available):");
|
||||
|
||||
let rt = tokio::runtime::Builder::new_current_thread()
|
||||
.enable_all()
|
||||
.build();
|
||||
let Ok(rt) = rt else {
|
||||
println!(" runtime: FAIL (could not build tokio runtime)");
|
||||
return;
|
||||
};
|
||||
|
||||
rt.block_on(async {
|
||||
// OpenAI-compatible providers (OPENAI_BASE_URL, OPENAI_API_KEY)
|
||||
if let Ok(key) = std::env::var("OPENAI_API_KEY") {
|
||||
if !key.trim().is_empty() {
|
||||
let base = std::env::var("OPENAI_BASE_URL")
|
||||
.ok()
|
||||
.unwrap_or_else(|| "https://api.openai.com/v1".to_string());
|
||||
let url = openai_models_url(base.as_str());
|
||||
let mut headers = HeaderMap::new();
|
||||
if let Ok(v) = HeaderValue::from_str(format!("Bearer {}", key.trim()).as_str()) {
|
||||
headers.insert(reqwest::header::AUTHORIZATION, v);
|
||||
}
|
||||
let _ = http_check_and_print("openai", url.as_str(), headers).await;
|
||||
|
||||
if embeddings_check {
|
||||
let model = std::env::var("OPENAI_EMBEDDING_MODEL")
|
||||
.ok()
|
||||
.or_else(|| std::env::var("CLAW_RAG_EMBEDDING_MODEL").ok())
|
||||
.unwrap_or_else(|| "text-embedding-3-small".to_string());
|
||||
let eurl = openai_embeddings_url(base.as_str());
|
||||
let mut eheaders = HeaderMap::new();
|
||||
if let Ok(v) = HeaderValue::from_str(format!("Bearer {}", key.trim()).as_str())
|
||||
{
|
||||
eheaders.insert(reqwest::header::AUTHORIZATION, v);
|
||||
}
|
||||
let _ = openai_embeddings_probe(
|
||||
"openai embeddings",
|
||||
eurl.as_str(),
|
||||
&model,
|
||||
eheaders,
|
||||
)
|
||||
.await;
|
||||
} else {
|
||||
println!(" openai embeddings: skipped (pass --embeddings-check to enable)");
|
||||
}
|
||||
} else {
|
||||
println!(" openai: skipped (OPENAI_API_KEY empty)");
|
||||
}
|
||||
} else {
|
||||
println!(" openai: skipped (OPENAI_API_KEY unset)");
|
||||
}
|
||||
|
||||
// Anthropic (ANTHROPIC_BASE_URL, ANTHROPIC_API_KEY/AUTH_TOKEN)
|
||||
let a_key = std::env::var("ANTHROPIC_API_KEY").ok();
|
||||
let a_tok = std::env::var("ANTHROPIC_AUTH_TOKEN").ok();
|
||||
let a_base = std::env::var("ANTHROPIC_BASE_URL")
|
||||
.ok()
|
||||
.unwrap_or_else(|| "https://api.anthropic.com".to_string());
|
||||
if a_key.as_deref().is_some_and(|s| !s.trim().is_empty())
|
||||
|| a_tok.as_deref().is_some_and(|s| !s.trim().is_empty())
|
||||
{
|
||||
let url = anthropic_models_url(a_base.as_str());
|
||||
let mut headers = HeaderMap::new();
|
||||
headers.insert(
|
||||
HeaderName::from_static("anthropic-version"),
|
||||
HeaderValue::from_static("2023-06-01"),
|
||||
);
|
||||
if let Some(k) = a_key.as_deref().map(str::trim).filter(|s| !s.is_empty()) {
|
||||
if let Ok(v) = HeaderValue::from_str(k) {
|
||||
headers.insert(HeaderName::from_static("x-api-key"), v);
|
||||
}
|
||||
} else if let Some(t) = a_tok.as_deref().map(str::trim).filter(|s| !s.is_empty()) {
|
||||
if let Ok(v) = HeaderValue::from_str(format!("Bearer {t}").as_str()) {
|
||||
headers.insert(reqwest::header::AUTHORIZATION, v);
|
||||
}
|
||||
}
|
||||
let _ = http_check_and_print("anthropic", url.as_str(), headers).await;
|
||||
} else {
|
||||
println!(" anthropic: skipped (no API key/token)");
|
||||
}
|
||||
|
||||
// RAG service (RAG_BASE_URL) — just basic health + stats.
|
||||
if let Ok(base) = std::env::var("RAG_BASE_URL") {
|
||||
let base = base.trim().trim_end_matches('/');
|
||||
if !base.is_empty() {
|
||||
let headers = HeaderMap::new();
|
||||
let _ =
|
||||
http_check_and_print("rag health", &format!("{base}/health"), headers.clone())
|
||||
.await;
|
||||
let _ =
|
||||
http_check_and_print("rag stats", &format!("{base}/v1/stats"), headers).await;
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
println!(" (TLS validation is performed by the HTTP client; certificate errors surface as request failures.)");
|
||||
}
|
||||
|
||||
fn openai_models_url(base: &str) -> String {
|
||||
let b = base.trim().trim_end_matches('/');
|
||||
if b.ends_with("/v1") {
|
||||
format!("{b}/models")
|
||||
} else {
|
||||
format!("{b}/v1/models")
|
||||
}
|
||||
}
|
||||
|
||||
fn openai_embeddings_url(base: &str) -> String {
|
||||
let b = base.trim().trim_end_matches('/');
|
||||
if b.ends_with("/v1") {
|
||||
format!("{b}/embeddings")
|
||||
} else {
|
||||
format!("{b}/v1/embeddings")
|
||||
}
|
||||
}
|
||||
|
||||
fn anthropic_models_url(base: &str) -> String {
|
||||
let b = base.trim().trim_end_matches('/');
|
||||
format!("{b}/v1/models?limit=1")
|
||||
}
|
||||
|
||||
async fn http_check_and_print(label: &str, url: &str, headers: HeaderMap) -> Result<(), ()> {
|
||||
let client = reqwest::Client::builder()
|
||||
.timeout(Duration::from_secs(8))
|
||||
.build();
|
||||
let Ok(client) = client else {
|
||||
println!(" {label}: FAIL (client build)");
|
||||
return Err(());
|
||||
};
|
||||
|
||||
let resp = client.get(url).headers(headers).send().await;
|
||||
match resp {
|
||||
Ok(r) => {
|
||||
let status = r.status();
|
||||
println!(" {label}: {status} ({url})");
|
||||
print_quota_headers(r.headers());
|
||||
Ok(())
|
||||
}
|
||||
Err(e) => {
|
||||
let msg = e.to_string();
|
||||
if msg.to_ascii_lowercase().contains("certificate")
|
||||
|| msg.to_ascii_lowercase().contains("tls")
|
||||
{
|
||||
println!(" {label}: FAIL (TLS/cert) ({url}) — {msg}");
|
||||
} else {
|
||||
println!(" {label}: FAIL ({url}) — {msg}");
|
||||
}
|
||||
Err(())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn print_quota_headers(headers: &HeaderMap) {
|
||||
let mut out: Vec<(String, String)> = Vec::new();
|
||||
for (k, v) in headers.iter() {
|
||||
let name = k.as_str().to_ascii_lowercase();
|
||||
if name.contains("ratelimit") || name.contains("quota") {
|
||||
if let Ok(s) = v.to_str() {
|
||||
out.push((k.as_str().to_string(), s.to_string()));
|
||||
}
|
||||
}
|
||||
// OpenAI-compatible common headers:
|
||||
if name.starts_with("x-ratelimit-") {
|
||||
if let Ok(s) = v.to_str() {
|
||||
out.push((k.as_str().to_string(), s.to_string()));
|
||||
}
|
||||
}
|
||||
}
|
||||
out.sort();
|
||||
out.dedup();
|
||||
for (k, v) in out {
|
||||
println!(" {k}: {v}");
|
||||
}
|
||||
}
|
||||
|
||||
async fn openai_embeddings_probe(
|
||||
label: &str,
|
||||
url: &str,
|
||||
model: &str,
|
||||
headers: HeaderMap,
|
||||
) -> Result<(), ()> {
|
||||
let client = reqwest::Client::builder()
|
||||
.timeout(Duration::from_secs(12))
|
||||
.build();
|
||||
let Ok(client) = client else {
|
||||
println!(" {label}: FAIL (client build)");
|
||||
return Err(());
|
||||
};
|
||||
|
||||
// Minimal request: one short string. We don't parse the embedding content.
|
||||
let body = serde_json::json!({
|
||||
"model": model,
|
||||
"input": ["ping"]
|
||||
});
|
||||
|
||||
let resp = client.post(url).headers(headers).json(&body).send().await;
|
||||
match resp {
|
||||
Ok(r) => {
|
||||
let status = r.status();
|
||||
println!(" {label}: {status} ({url}) model={model}");
|
||||
print_quota_headers(r.headers());
|
||||
if !status.is_success() {
|
||||
let t = r.text().await.unwrap_or_default();
|
||||
if !t.trim().is_empty() {
|
||||
println!(" body: {}", t.chars().take(400).collect::<String>());
|
||||
}
|
||||
return Err(());
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
Err(e) => {
|
||||
let msg = e.to_string();
|
||||
if msg.to_ascii_lowercase().contains("certificate")
|
||||
|| msg.to_ascii_lowercase().contains("tls")
|
||||
{
|
||||
println!(" {label}: FAIL (TLS/cert) ({url}) — {msg}");
|
||||
} else {
|
||||
println!(" {label}: FAIL ({url}) — {msg}");
|
||||
}
|
||||
Err(())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn parses_base_url_host_port() {
|
||||
assert_eq!(
|
||||
parse_host_port("http://127.0.0.1:8080/v1").unwrap(),
|
||||
("127.0.0.1".into(), 8080)
|
||||
);
|
||||
assert_eq!(
|
||||
parse_host_port("https://api.anthropic.com").unwrap(),
|
||||
("api.anthropic.com".into(), 443)
|
||||
);
|
||||
}
|
||||
}
|
||||
2926
rust/crates/claw-analog/src/lib.rs
Normal file
2926
rust/crates/claw-analog/src/lib.rs
Normal file
File diff suppressed because it is too large
Load Diff
522
rust/crates/claw-analog/src/main.rs
Normal file
522
rust/crates/claw-analog/src/main.rs
Normal file
@@ -0,0 +1,522 @@
|
||||
//! Binary wrapper for `claw_analog::run` — see `how_to_run.md` in repo root.
|
||||
|
||||
mod agents;
|
||||
mod config_cmd;
|
||||
mod doctor;
|
||||
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::time::Duration;
|
||||
|
||||
use clap::{CommandFactory, Parser, Subcommand, ValueEnum};
|
||||
use clap_complete::{generate, Shell};
|
||||
use claw_analog::{
|
||||
load_analog_toml, load_profile_hint, permission_mode_from_toml_str, print_tools_dry_run,
|
||||
resolve_analog_profile_path, resolve_rag_base_url, AnalogConfig, AnalogFileConfig,
|
||||
AnalogLanguage, OutputFormat, PermissionMode, Preset, ANALOG_DEFAULT_MODEL,
|
||||
};
|
||||
|
||||
#[derive(Copy, Clone, Debug, ValueEnum)]
|
||||
enum PermissionArg {
|
||||
ReadOnly,
|
||||
WorkspaceWrite,
|
||||
Prompt,
|
||||
#[value(name = "danger-full-access")]
|
||||
DangerFullAccess,
|
||||
/// Same unrestricted posture as danger-full-access for this narrow tool set.
|
||||
Allow,
|
||||
}
|
||||
|
||||
#[derive(Copy, Clone, Debug, ValueEnum)]
|
||||
enum OutputFormatArg {
|
||||
Rich,
|
||||
Json,
|
||||
}
|
||||
|
||||
#[derive(Copy, Clone, Debug, ValueEnum)]
|
||||
enum LangArg {
|
||||
En,
|
||||
Ru,
|
||||
}
|
||||
|
||||
impl From<LangArg> for AnalogLanguage {
|
||||
fn from(a: LangArg) -> Self {
|
||||
match a {
|
||||
LangArg::En => AnalogLanguage::En,
|
||||
LangArg::Ru => AnalogLanguage::Ru,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Copy, Clone, Debug, ValueEnum)]
|
||||
enum PresetCli {
|
||||
None,
|
||||
/// Automatically infer a preset from the initial prompt.
|
||||
Auto,
|
||||
Audit,
|
||||
Explain,
|
||||
Implement,
|
||||
}
|
||||
|
||||
impl From<PresetCli> for Preset {
|
||||
fn from(p: PresetCli) -> Self {
|
||||
match p {
|
||||
PresetCli::None => Preset::None,
|
||||
PresetCli::Auto => Preset::None,
|
||||
PresetCli::Audit => Preset::Audit,
|
||||
PresetCli::Explain => Preset::Explain,
|
||||
PresetCli::Implement => Preset::Implement,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Parser, Debug)]
|
||||
#[command(
|
||||
name = "claw-analog",
|
||||
version,
|
||||
about = "Lean tool-agent loop (read/list/grep/write) on claw-code `api` providers"
|
||||
)]
|
||||
#[command(args_conflicts_with_subcommands = true)]
|
||||
struct RootCli {
|
||||
#[command(subcommand)]
|
||||
command: Option<Commands>,
|
||||
#[command(flatten)]
|
||||
run: RunCli,
|
||||
}
|
||||
|
||||
#[derive(Subcommand, Debug)]
|
||||
enum Commands {
|
||||
/// Verify credentials, `cargo check -p claw-analog` (or `--release-build`), config merge preview, optional `--tcp-ping`.
|
||||
Doctor(doctor::DoctorCli),
|
||||
Config {
|
||||
#[command(subcommand)]
|
||||
command: ConfigSub,
|
||||
},
|
||||
/// Print shell completion script for this binary (redirect to a file or `source` it).
|
||||
Complete(CompleteCli),
|
||||
/// Run multiple specialized sub-agents sequentially (shared base session).
|
||||
Agents(agents::AgentsCli),
|
||||
}
|
||||
|
||||
#[derive(Subcommand, Debug)]
|
||||
enum ConfigSub {
|
||||
/// Parse `.claw-analog.toml` and profile; print a merge preview (no API calls).
|
||||
Validate(config_cmd::ValidateCli),
|
||||
}
|
||||
|
||||
#[derive(Parser, Debug)]
|
||||
struct CompleteCli {
|
||||
#[arg(value_enum)]
|
||||
shell: ShellKind,
|
||||
}
|
||||
|
||||
#[derive(Copy, Clone, Debug, ValueEnum)]
|
||||
enum ShellKind {
|
||||
Bash,
|
||||
Zsh,
|
||||
Fish,
|
||||
#[value(name = "powershell", alias = "pwsh")]
|
||||
Powershell,
|
||||
}
|
||||
|
||||
#[derive(Parser, Debug)]
|
||||
struct RunCli {
|
||||
/// Config file (default: `<workspace>/.claw-analog.toml` if that path exists).
|
||||
#[arg(long, value_name = "PATH")]
|
||||
config: Option<PathBuf>,
|
||||
#[arg(short, long)]
|
||||
model: Option<String>,
|
||||
#[arg(short = 'w', long, default_value = ".")]
|
||||
workspace: PathBuf,
|
||||
#[arg(long, value_enum)]
|
||||
permission: Option<PermissionArg>,
|
||||
#[arg(long, value_enum)]
|
||||
preset: Option<PresetCli>,
|
||||
/// Reply language hint for the assistant (`en` or `ru` in system prompt; not the API model id).
|
||||
#[arg(long, value_enum)]
|
||||
lang: Option<LangArg>,
|
||||
/// Print effective tools for merged `permission` / enforcer, then exit (no prompt, no API).
|
||||
#[arg(long, default_value_t = false, action = clap::ArgAction::SetTrue)]
|
||||
print_tools: bool,
|
||||
/// Persist message history for resume (JSON). See `how_to_run.md` for risks.
|
||||
#[arg(long, value_name = "PATH")]
|
||||
session: Option<PathBuf>,
|
||||
/// Write session JSON to this path on each snapshot (export without `--session`, or an extra copy).
|
||||
#[arg(long, value_name = "PATH")]
|
||||
save_session: Option<PathBuf>,
|
||||
/// Profile snippet TOML (`line = "..."`). Default: `~/.claw-analog/profile.toml` if it exists.
|
||||
#[arg(long, value_name = "PATH")]
|
||||
profile: Option<PathBuf>,
|
||||
/// Stream assistant text to stdout as tokens arrive (uses `stream_message`).
|
||||
#[arg(long, default_value_t = false, conflicts_with = "no_stream")]
|
||||
stream: bool,
|
||||
/// Turn streaming off (overrides `stream` in config).
|
||||
#[arg(long, default_value_t = false, conflicts_with = "stream")]
|
||||
no_stream: bool,
|
||||
/// Newline-delimited JSON events on stdout (for agents / CI). Diagnostics stay on stderr.
|
||||
#[arg(long, value_enum)]
|
||||
output_format: Option<OutputFormatArg>,
|
||||
/// Disable `runtime::PermissionEnforcer` (paths are still jailed; policy checks are weakened).
|
||||
#[arg(long = "no-runtime-enforcer", default_value_t = false, action = clap::ArgAction::SetTrue)]
|
||||
no_runtime_enforcer: bool,
|
||||
/// Allow `danger-full-access` / `allow` when stdin is not a TTY (CI/automation; use with care).
|
||||
#[arg(long = "accept-danger-non-interactive", default_value_t = false, action = clap::ArgAction::SetTrue)]
|
||||
accept_danger_non_interactive: bool,
|
||||
#[arg(long)]
|
||||
max_read_bytes: Option<u64>,
|
||||
#[arg(long)]
|
||||
max_turns: Option<u32>,
|
||||
#[arg(long)]
|
||||
max_list_entries: Option<usize>,
|
||||
#[arg(long)]
|
||||
grep_max_lines: Option<usize>,
|
||||
#[arg(long)]
|
||||
glob_max_paths: Option<usize>,
|
||||
#[arg(long)]
|
||||
glob_max_depth: Option<usize>,
|
||||
prompt: Option<String>,
|
||||
}
|
||||
|
||||
const DEF_MAX_READ: u64 = 256 * 1024;
|
||||
const DEF_MAX_TURNS: u32 = 24;
|
||||
const DEF_MAX_LIST: usize = 500;
|
||||
const DEF_GREP_MAX: usize = 200;
|
||||
const DEF_GLOB_PATHS: usize = 2000;
|
||||
const DEF_GLOB_DEPTH: usize = 32;
|
||||
const DEF_RAG_TIMEOUT_SECS: u64 = 30;
|
||||
const DEF_RAG_TOP_K_MAX: u32 = 32;
|
||||
const RAG_TOP_K_ABS_CAP: u32 = 256;
|
||||
|
||||
fn config_file_path(cli: &RunCli) -> PathBuf {
|
||||
cli.config
|
||||
.clone()
|
||||
.unwrap_or_else(|| cli.workspace.join(".claw-analog.toml"))
|
||||
}
|
||||
|
||||
fn load_file_config(path: &Path) -> AnalogFileConfig {
|
||||
if !path.is_file() {
|
||||
return AnalogFileConfig::default();
|
||||
}
|
||||
match load_analog_toml(path) {
|
||||
Ok(c) => c,
|
||||
Err(e) => {
|
||||
eprintln!(
|
||||
"[claw-analog] warning: failed to read {}: {e}",
|
||||
path.display()
|
||||
);
|
||||
AnalogFileConfig::default()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn output_format_from_toml(s: &str) -> Option<OutputFormat> {
|
||||
match s.to_ascii_lowercase().as_str() {
|
||||
"json" => Some(OutputFormat::Json),
|
||||
"rich" => Some(OutputFormat::Rich),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
fn resolve_session_path(
|
||||
cli: Option<PathBuf>,
|
||||
file: Option<&str>,
|
||||
workspace: &Path,
|
||||
) -> Option<PathBuf> {
|
||||
let p = cli.or_else(|| file.map(PathBuf::from))?;
|
||||
Some(if p.is_absolute() {
|
||||
p
|
||||
} else {
|
||||
workspace.join(p)
|
||||
})
|
||||
}
|
||||
|
||||
fn merge_language(cli: Option<LangArg>, file: Option<&str>) -> AnalogLanguage {
|
||||
if let Some(l) = cli {
|
||||
return l.into();
|
||||
}
|
||||
file.and_then(AnalogLanguage::from_toml_str)
|
||||
.unwrap_or_default()
|
||||
}
|
||||
|
||||
fn merge_preset(cli: Option<PresetCli>, file: Option<&str>, prompt: &str) -> Preset {
|
||||
if let Some(p) = cli {
|
||||
return match p {
|
||||
PresetCli::Auto => claw_analog::infer_preset_from_prompt(prompt),
|
||||
other => Preset::from(other),
|
||||
};
|
||||
}
|
||||
if file.is_some_and(|s| s.trim().eq_ignore_ascii_case("auto")) {
|
||||
return claw_analog::infer_preset_from_prompt(prompt);
|
||||
}
|
||||
if let Some(s) = file.and_then(Preset::from_toml_str) {
|
||||
return s;
|
||||
}
|
||||
claw_analog::infer_preset_from_prompt(prompt)
|
||||
}
|
||||
|
||||
fn merge_permission(
|
||||
cli: Option<PermissionArg>,
|
||||
file_perm: Option<String>,
|
||||
preset: Preset,
|
||||
) -> PermissionMode {
|
||||
if let Some(p) = cli {
|
||||
return match p {
|
||||
PermissionArg::ReadOnly => PermissionMode::ReadOnly,
|
||||
PermissionArg::WorkspaceWrite => PermissionMode::WorkspaceWrite,
|
||||
PermissionArg::Prompt => PermissionMode::Prompt,
|
||||
PermissionArg::DangerFullAccess => PermissionMode::DangerFullAccess,
|
||||
PermissionArg::Allow => PermissionMode::Allow,
|
||||
};
|
||||
}
|
||||
if let Some(s) = file_perm.as_deref().and_then(permission_mode_from_toml_str) {
|
||||
return s;
|
||||
}
|
||||
match preset {
|
||||
Preset::Implement => PermissionMode::WorkspaceWrite,
|
||||
_ => PermissionMode::ReadOnly,
|
||||
}
|
||||
}
|
||||
|
||||
fn build_config(
|
||||
cli: &RunCli,
|
||||
file: &AnalogFileConfig,
|
||||
prompt: String,
|
||||
profile_hint: Option<String>,
|
||||
session_path: Option<PathBuf>,
|
||||
preset: Preset,
|
||||
permission_mode: PermissionMode,
|
||||
) -> AnalogConfig {
|
||||
let model = cli
|
||||
.model
|
||||
.clone()
|
||||
.or_else(|| file.model.clone())
|
||||
.unwrap_or_else(|| ANALOG_DEFAULT_MODEL.into());
|
||||
|
||||
let output_format = cli
|
||||
.output_format
|
||||
.map(|o| match o {
|
||||
OutputFormatArg::Rich => OutputFormat::Rich,
|
||||
OutputFormatArg::Json => OutputFormat::Json,
|
||||
})
|
||||
.or_else(|| {
|
||||
file.output_format
|
||||
.as_deref()
|
||||
.and_then(output_format_from_toml)
|
||||
})
|
||||
.unwrap_or(OutputFormat::Rich);
|
||||
|
||||
let use_stream = if cli.no_stream {
|
||||
false
|
||||
} else if cli.stream {
|
||||
true
|
||||
} else {
|
||||
file.stream.unwrap_or(false)
|
||||
};
|
||||
|
||||
let use_runtime_enforcer =
|
||||
!cli.no_runtime_enforcer && !file.no_runtime_enforcer.unwrap_or(false);
|
||||
|
||||
let accept_danger_non_interactive =
|
||||
cli.accept_danger_non_interactive || file.accept_danger_non_interactive.unwrap_or(false);
|
||||
|
||||
let max_read_bytes = cli
|
||||
.max_read_bytes
|
||||
.or(file.max_read_bytes)
|
||||
.unwrap_or(DEF_MAX_READ);
|
||||
let max_turns = cli.max_turns.or(file.max_turns).unwrap_or(DEF_MAX_TURNS);
|
||||
let max_list_entries = cli
|
||||
.max_list_entries
|
||||
.or(file.max_list_entries)
|
||||
.unwrap_or(DEF_MAX_LIST);
|
||||
let grep_max_lines = cli
|
||||
.grep_max_lines
|
||||
.or(file.grep_max_lines)
|
||||
.unwrap_or(DEF_GREP_MAX);
|
||||
let glob_max_paths = cli
|
||||
.glob_max_paths
|
||||
.or(file.glob_max_paths)
|
||||
.unwrap_or(DEF_GLOB_PATHS);
|
||||
let glob_max_depth = cli
|
||||
.glob_max_depth
|
||||
.or(file.glob_max_depth)
|
||||
.unwrap_or(DEF_GLOB_DEPTH);
|
||||
|
||||
let rag_base_url = resolve_rag_base_url(file);
|
||||
let rag_http_timeout =
|
||||
Duration::from_secs(file.rag_timeout_secs.unwrap_or(DEF_RAG_TIMEOUT_SECS).max(1));
|
||||
let rag_top_k_max = file
|
||||
.rag_top_k_max
|
||||
.unwrap_or(DEF_RAG_TOP_K_MAX)
|
||||
.clamp(1, RAG_TOP_K_ABS_CAP);
|
||||
|
||||
let session_save_path = cli.save_session.as_ref().map(|p| {
|
||||
if p.is_absolute() {
|
||||
p.clone()
|
||||
} else {
|
||||
cli.workspace.join(p)
|
||||
}
|
||||
});
|
||||
|
||||
let language = merge_language(cli.lang, file.language.as_deref());
|
||||
|
||||
AnalogConfig {
|
||||
model,
|
||||
workspace: cli.workspace.clone(),
|
||||
permission_mode,
|
||||
accept_danger_non_interactive,
|
||||
use_stream,
|
||||
output_format,
|
||||
use_runtime_enforcer,
|
||||
max_read_bytes,
|
||||
max_turns,
|
||||
max_list_entries,
|
||||
grep_max_lines,
|
||||
glob_max_paths,
|
||||
glob_max_depth,
|
||||
preset,
|
||||
language,
|
||||
session_path,
|
||||
session_save_path,
|
||||
profile_hint,
|
||||
prompt,
|
||||
rag_base_url,
|
||||
rag_http_timeout,
|
||||
rag_top_k_max,
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
|
||||
let root = RootCli::parse();
|
||||
match root.command {
|
||||
Some(Commands::Doctor(d)) => {
|
||||
let code = doctor::run_doctor(d);
|
||||
std::process::exit(code);
|
||||
}
|
||||
Some(Commands::Agents(a)) => {
|
||||
let code = match agents::run_agents(a) {
|
||||
Ok(()) => 0,
|
||||
Err(e) => {
|
||||
eprintln!("agents: {e}");
|
||||
1
|
||||
}
|
||||
};
|
||||
std::process::exit(code);
|
||||
}
|
||||
Some(Commands::Config { command }) => {
|
||||
let code = match command {
|
||||
ConfigSub::Validate(v) => config_cmd::run_validate(v),
|
||||
};
|
||||
std::process::exit(code);
|
||||
}
|
||||
Some(Commands::Complete(co)) => {
|
||||
let shell = match co.shell {
|
||||
ShellKind::Bash => Shell::Bash,
|
||||
ShellKind::Zsh => Shell::Zsh,
|
||||
ShellKind::Fish => Shell::Fish,
|
||||
ShellKind::Powershell => Shell::PowerShell,
|
||||
};
|
||||
let mut cmd = RootCli::command();
|
||||
generate(shell, &mut cmd, "claw-analog", &mut std::io::stdout());
|
||||
return Ok(());
|
||||
}
|
||||
None => {}
|
||||
}
|
||||
let cli = root.run;
|
||||
let cfg_path = config_file_path(&cli);
|
||||
let file_cfg = load_file_config(&cfg_path);
|
||||
|
||||
if cli.print_tools {
|
||||
let preset = merge_preset(
|
||||
cli.preset,
|
||||
file_cfg.preset.as_deref(),
|
||||
&cli.prompt.clone().unwrap_or_default(),
|
||||
);
|
||||
let permission_mode = merge_permission(cli.permission, file_cfg.permission.clone(), preset);
|
||||
let use_runtime_enforcer =
|
||||
!cli.no_runtime_enforcer && !file_cfg.no_runtime_enforcer.unwrap_or(false);
|
||||
let rag_url = resolve_rag_base_url(&file_cfg);
|
||||
print_tools_dry_run(
|
||||
permission_mode,
|
||||
use_runtime_enforcer,
|
||||
rag_url.as_deref(),
|
||||
&mut std::io::stdout(),
|
||||
)?;
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let pre_output_format = cli
|
||||
.output_format
|
||||
.map(|o| match o {
|
||||
OutputFormatArg::Rich => OutputFormat::Rich,
|
||||
OutputFormatArg::Json => OutputFormat::Json,
|
||||
})
|
||||
.or_else(|| {
|
||||
file_cfg
|
||||
.output_format
|
||||
.as_deref()
|
||||
.and_then(output_format_from_toml)
|
||||
})
|
||||
.unwrap_or(OutputFormat::Rich);
|
||||
|
||||
let prompt = if let Some(p) = cli.prompt.clone() {
|
||||
p
|
||||
} else {
|
||||
use std::io::Read;
|
||||
let mut buf = String::new();
|
||||
std::io::stdin().read_to_string(&mut buf)?;
|
||||
if buf.trim().is_empty() {
|
||||
if matches!(pre_output_format, OutputFormat::Json) {
|
||||
println!(
|
||||
"{}",
|
||||
serde_json::json!({"type": "error", "message": "empty prompt (pass as arg or stdin)"})
|
||||
);
|
||||
}
|
||||
return Err("empty prompt (pass as arg or stdin)".into());
|
||||
}
|
||||
buf
|
||||
};
|
||||
|
||||
let preset = merge_preset(cli.preset, file_cfg.preset.as_deref(), &prompt);
|
||||
let permission_mode = merge_permission(cli.permission, file_cfg.permission.clone(), preset);
|
||||
|
||||
let session_path = resolve_session_path(
|
||||
cli.session.clone(),
|
||||
file_cfg.session.as_deref(),
|
||||
&cli.workspace,
|
||||
);
|
||||
|
||||
let profile_path = resolve_analog_profile_path(
|
||||
&cli.workspace,
|
||||
cli.profile.clone(),
|
||||
file_cfg.profile.as_deref(),
|
||||
);
|
||||
|
||||
let profile_hint = if let Some(ref p) = profile_path {
|
||||
load_profile_hint(p)?
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
let config = build_config(
|
||||
&cli,
|
||||
&file_cfg,
|
||||
prompt,
|
||||
profile_hint,
|
||||
session_path,
|
||||
preset,
|
||||
permission_mode,
|
||||
);
|
||||
let output_format = config.output_format;
|
||||
|
||||
let mut out = std::io::stdout();
|
||||
if let Err(e) = claw_analog::run(config, &mut out).await {
|
||||
if matches!(output_format, OutputFormat::Json) {
|
||||
println!(
|
||||
"{}",
|
||||
serde_json::json!({"type": "error", "message": e.to_string()})
|
||||
);
|
||||
}
|
||||
return Err(e);
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
30
rust/crates/claw-rag-service/Cargo.toml
Normal file
30
rust/crates/claw-rag-service/Cargo.toml
Normal file
@@ -0,0 +1,30 @@
|
||||
[package]
|
||||
name = "claw-rag-service"
|
||||
version.workspace = true
|
||||
edition.workspace = true
|
||||
license.workspace = true
|
||||
publish.workspace = true
|
||||
description = "Workspace RAG service: SQLite index, OpenAI-compatible embeddings, query API."
|
||||
|
||||
[dependencies]
|
||||
axum = "0.8"
|
||||
clap = { version = "4", features = ["derive", "env"] }
|
||||
dotenvy = "0.15"
|
||||
reqwest = { version = "0.12", default-features = false, features = ["json", "rustls-tls"] }
|
||||
rusqlite = { version = "0.32", features = ["bundled"] }
|
||||
serde = { version = "1", features = ["derive"] }
|
||||
serde_json.workspace = true
|
||||
tokio = { version = "1", features = ["macros", "net", "rt-multi-thread", "signal"] }
|
||||
walkdir = "2"
|
||||
qdrant-client = { version = "1.17", optional = true }
|
||||
blake3 = "1"
|
||||
|
||||
[dev-dependencies]
|
||||
tempfile = "3"
|
||||
|
||||
[features]
|
||||
default = []
|
||||
qdrant-index = ["dep:qdrant-client"]
|
||||
|
||||
[lints]
|
||||
workspace = true
|
||||
20
rust/crates/claw-rag-service/Dockerfile
Normal file
20
rust/crates/claw-rag-service/Dockerfile
Normal file
@@ -0,0 +1,20 @@
|
||||
# qdrant-client currently requires a fairly recent stable Rust.
|
||||
# Keep this pinned to avoid surprise breaks from `rust:latest`.
|
||||
FROM rust:1.91-bookworm AS builder
|
||||
|
||||
WORKDIR /repo
|
||||
COPY . /repo/rust/
|
||||
|
||||
WORKDIR /repo/rust
|
||||
# Sanity check toolchain version (helps debug CI/Docker Desktop issues).
|
||||
RUN rustc --version && cargo --version
|
||||
# Build the service with qdrant support enabled (works even if you don't use qdrant).
|
||||
RUN cargo build -p claw-rag-service --release --features qdrant-index
|
||||
|
||||
FROM debian:bookworm-slim
|
||||
|
||||
WORKDIR /app
|
||||
COPY --from=builder /repo/rust/target/release/claw-rag-service /app/claw-rag-service
|
||||
|
||||
EXPOSE 8787
|
||||
ENTRYPOINT ["/app/claw-rag-service"]
|
||||
41
rust/crates/claw-rag-service/src/chunk.rs
Normal file
41
rust/crates/claw-rag-service/src/chunk.rs
Normal file
@@ -0,0 +1,41 @@
|
||||
//! Split file text into overlapping windows (character-based UTF-8).
|
||||
|
||||
#[must_use]
|
||||
pub fn chunk_text(text: &str, max_chars: usize, overlap: usize) -> Vec<String> {
|
||||
if max_chars == 0 {
|
||||
return Vec::new();
|
||||
}
|
||||
let overlap = overlap.min(max_chars.saturating_sub(1));
|
||||
let mut out = Vec::new();
|
||||
let chars: Vec<char> = text.chars().collect();
|
||||
if chars.is_empty() {
|
||||
return out;
|
||||
}
|
||||
let mut start = 0;
|
||||
loop {
|
||||
let end = (start + max_chars).min(chars.len());
|
||||
let piece: String = chars[start..end].iter().collect();
|
||||
if !piece.trim().is_empty() {
|
||||
out.push(piece);
|
||||
}
|
||||
if end >= chars.len() {
|
||||
break;
|
||||
}
|
||||
let step = max_chars.saturating_sub(overlap).max(1);
|
||||
start += step;
|
||||
}
|
||||
out
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn chunks_non_empty() {
|
||||
let c = chunk_text("hello world test", 5, 2);
|
||||
assert!(!c.is_empty());
|
||||
let joined: String = c.join("");
|
||||
assert!(joined.contains("hello"));
|
||||
}
|
||||
}
|
||||
210
rust/crates/claw-rag-service/src/db.rs
Normal file
210
rust/crates/claw-rag-service/src/db.rs
Normal file
@@ -0,0 +1,210 @@
|
||||
//! `SQLite` storage for chunks and embedding vectors.
|
||||
|
||||
use std::path::Path;
|
||||
|
||||
use rusqlite::{params, Connection};
|
||||
|
||||
const SCHEMA: &str = r"
|
||||
CREATE TABLE IF NOT EXISTS chunks (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
path TEXT NOT NULL,
|
||||
ordinal INTEGER NOT NULL,
|
||||
text TEXT NOT NULL,
|
||||
UNIQUE(path, ordinal)
|
||||
);
|
||||
CREATE TABLE IF NOT EXISTS embeddings (
|
||||
chunk_id INTEGER PRIMARY KEY,
|
||||
dim INTEGER NOT NULL,
|
||||
vec BLOB NOT NULL,
|
||||
FOREIGN KEY (chunk_id) REFERENCES chunks(id) ON DELETE CASCADE
|
||||
);
|
||||
CREATE TABLE IF NOT EXISTS files (
|
||||
path TEXT PRIMARY KEY,
|
||||
content_hash TEXT NOT NULL,
|
||||
size_bytes INTEGER NOT NULL,
|
||||
mtime_ms INTEGER NOT NULL,
|
||||
indexed_at_ms INTEGER NOT NULL
|
||||
);
|
||||
CREATE INDEX IF NOT EXISTS idx_chunks_path ON chunks(path);
|
||||
";
|
||||
|
||||
pub fn open_db(path: &Path) -> Result<Connection, String> {
|
||||
if let Some(parent) = path.parent() {
|
||||
if !parent.as_os_str().is_empty() {
|
||||
std::fs::create_dir_all(parent).map_err(|e| e.to_string())?;
|
||||
}
|
||||
}
|
||||
|
||||
let conn = Connection::open(path).map_err(|e| e.to_string())?;
|
||||
conn.execute_batch(
|
||||
r"
|
||||
PRAGMA foreign_keys = ON;
|
||||
PRAGMA journal_mode = WAL;
|
||||
",
|
||||
)
|
||||
.map_err(|e| e.to_string())?;
|
||||
conn.execute_batch(SCHEMA).map_err(|e| e.to_string())?;
|
||||
|
||||
Ok(conn)
|
||||
}
|
||||
|
||||
#[allow(dead_code)]
|
||||
pub fn truncate_index(conn: &Connection) -> Result<(), String> {
|
||||
conn.execute_batch("DELETE FROM embeddings; DELETE FROM chunks; DELETE FROM files;")
|
||||
.map_err(|e| e.to_string())?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn file_is_unchanged(
|
||||
conn: &Connection,
|
||||
path: &str,
|
||||
content_hash: &str,
|
||||
size_bytes: i64,
|
||||
mtime_ms: i64,
|
||||
) -> Result<bool, String> {
|
||||
let mut stmt = conn
|
||||
.prepare("SELECT content_hash, size_bytes, mtime_ms FROM files WHERE path=?1 LIMIT 1")
|
||||
.map_err(|e| e.to_string())?;
|
||||
let mut rows = stmt.query(params![path]).map_err(|e| e.to_string())?;
|
||||
if let Some(r) = rows.next().map_err(|e| e.to_string())? {
|
||||
let h: String = r.get(0).map_err(|e| e.to_string())?;
|
||||
let sz: i64 = r.get(1).map_err(|e| e.to_string())?;
|
||||
let mt: i64 = r.get(2).map_err(|e| e.to_string())?;
|
||||
return Ok(h == content_hash && sz == size_bytes && mt == mtime_ms);
|
||||
}
|
||||
Ok(false)
|
||||
}
|
||||
|
||||
pub fn upsert_file_meta(
|
||||
conn: &Connection,
|
||||
path: &str,
|
||||
content_hash: &str,
|
||||
size_bytes: i64,
|
||||
mtime_ms: i64,
|
||||
indexed_at_ms: i64,
|
||||
) -> Result<(), String> {
|
||||
conn.execute(
|
||||
r"
|
||||
INSERT INTO files(path, content_hash, size_bytes, mtime_ms, indexed_at_ms)
|
||||
VALUES (?1, ?2, ?3, ?4, ?5)
|
||||
ON CONFLICT(path) DO UPDATE SET
|
||||
content_hash=excluded.content_hash,
|
||||
size_bytes=excluded.size_bytes,
|
||||
mtime_ms=excluded.mtime_ms,
|
||||
indexed_at_ms=excluded.indexed_at_ms
|
||||
",
|
||||
params![path, content_hash, size_bytes, mtime_ms, indexed_at_ms],
|
||||
)
|
||||
.map_err(|e| e.to_string())?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn delete_file_and_chunks(conn: &Connection, path: &str) -> Result<(), String> {
|
||||
// Delete chunks first (embeddings cascade); then remove file meta.
|
||||
conn.execute("DELETE FROM chunks WHERE path=?1", params![path])
|
||||
.map_err(|e| e.to_string())?;
|
||||
conn.execute("DELETE FROM files WHERE path=?1", params![path])
|
||||
.map_err(|e| e.to_string())?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn list_all_files(conn: &Connection) -> Result<Vec<String>, String> {
|
||||
let mut stmt = conn
|
||||
.prepare("SELECT path FROM files")
|
||||
.map_err(|e| e.to_string())?;
|
||||
let rows = stmt
|
||||
.query_map([], |r| r.get::<_, String>(0))
|
||||
.map_err(|e| e.to_string())?;
|
||||
let mut out = Vec::new();
|
||||
for r in rows {
|
||||
out.push(r.map_err(|e| e.to_string())?);
|
||||
}
|
||||
Ok(out)
|
||||
}
|
||||
|
||||
pub fn insert_chunk(
|
||||
conn: &Connection,
|
||||
path: &str,
|
||||
ordinal: i32,
|
||||
text: &str,
|
||||
) -> Result<i64, String> {
|
||||
conn.execute(
|
||||
"INSERT INTO chunks (path, ordinal, text) VALUES (?1, ?2, ?3)",
|
||||
params![path, ordinal, text],
|
||||
)
|
||||
.map_err(|e| e.to_string())?;
|
||||
Ok(conn.last_insert_rowid())
|
||||
}
|
||||
|
||||
pub fn insert_embedding(
|
||||
conn: &Connection,
|
||||
chunk_id: i64,
|
||||
dim: usize,
|
||||
vec: &[f32],
|
||||
) -> Result<(), String> {
|
||||
let bytes = f32_slice_to_blob(vec);
|
||||
let dim_i64 = i64::try_from(dim).map_err(|_| "embedding dim too large".to_string())?;
|
||||
conn.execute(
|
||||
"INSERT INTO embeddings (chunk_id, dim, vec) VALUES (?1, ?2, ?3)",
|
||||
params![chunk_id, dim_i64, bytes],
|
||||
)
|
||||
.map_err(|e| e.to_string())?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub(crate) fn f32_slice_to_blob(v: &[f32]) -> Vec<u8> {
|
||||
let mut b = Vec::with_capacity(v.len() * 4);
|
||||
for x in v {
|
||||
b.extend_from_slice(&x.to_le_bytes());
|
||||
}
|
||||
b
|
||||
}
|
||||
|
||||
pub fn blob_to_f32_vec(blob: &[u8], dim: usize) -> Option<Vec<f32>> {
|
||||
if blob.len() != dim * 4 {
|
||||
return None;
|
||||
}
|
||||
let mut v = Vec::with_capacity(dim);
|
||||
for chunk in blob.chunks_exact(4) {
|
||||
v.push(f32::from_le_bytes([chunk[0], chunk[1], chunk[2], chunk[3]]));
|
||||
}
|
||||
Some(v)
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct ChunkRow {
|
||||
pub path: String,
|
||||
pub text: String,
|
||||
pub vec: Vec<f32>,
|
||||
}
|
||||
|
||||
pub fn load_all_indexed(conn: &Connection) -> Result<Vec<ChunkRow>, String> {
|
||||
let mut stmt = conn
|
||||
.prepare(
|
||||
"SELECT c.path, c.text, e.dim, e.vec FROM chunks c
|
||||
INNER JOIN embeddings e ON e.chunk_id = c.id",
|
||||
)
|
||||
.map_err(|e| e.to_string())?;
|
||||
let mut rows = stmt.query([]).map_err(|e| e.to_string())?;
|
||||
let mut out = Vec::new();
|
||||
while let Some(r) = rows.next().map_err(|e| e.to_string())? {
|
||||
let path: String = r.get(0).map_err(|e| e.to_string())?;
|
||||
let text: String = r.get(1).map_err(|e| e.to_string())?;
|
||||
let dim: i64 = r.get(2).map_err(|e| e.to_string())?;
|
||||
let blob: Vec<u8> = r.get(3).map_err(|e| e.to_string())?;
|
||||
let dim = usize::try_from(dim).map_err(|_| "invalid embedding dim in db".to_string())?;
|
||||
let Some(vec) = blob_to_f32_vec(&blob, dim) else {
|
||||
continue;
|
||||
};
|
||||
out.push(ChunkRow { path, text, vec });
|
||||
}
|
||||
Ok(out)
|
||||
}
|
||||
|
||||
pub fn chunk_count(conn: &Connection) -> Result<i64, String> {
|
||||
let n: i64 = conn
|
||||
.query_row("SELECT COUNT(*) FROM chunks", [], |r| r.get(0))
|
||||
.map_err(|e| e.to_string())?;
|
||||
Ok(n)
|
||||
}
|
||||
129
rust/crates/claw-rag-service/src/embed.rs
Normal file
129
rust/crates/claw-rag-service/src/embed.rs
Normal file
@@ -0,0 +1,129 @@
|
||||
//! OpenAI-compatible embeddings HTTP client.
|
||||
|
||||
use reqwest::Client;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct EmbedConfig {
|
||||
pub api_key: String,
|
||||
pub base_url: String,
|
||||
pub model: String,
|
||||
}
|
||||
|
||||
impl EmbedConfig {
|
||||
pub fn from_env() -> Result<Self, String> {
|
||||
let api_key = std::env::var("CLAW_RAG_OPENAI_API_KEY")
|
||||
.or_else(|_| std::env::var("OPENAI_API_KEY"))
|
||||
.map_err(|_| {
|
||||
"set CLAW_RAG_OPENAI_API_KEY or OPENAI_API_KEY for embeddings".to_string()
|
||||
})?;
|
||||
let base_url = std::env::var("CLAW_RAG_EMBEDDING_BASE_URL")
|
||||
.unwrap_or_else(|_| "https://api.openai.com/v1".into());
|
||||
let model = std::env::var("CLAW_RAG_EMBEDDING_MODEL")
|
||||
.unwrap_or_else(|_| "text-embedding-3-small".into());
|
||||
Ok(Self {
|
||||
api_key,
|
||||
base_url: base_url.trim_end_matches('/').to_string(),
|
||||
model,
|
||||
})
|
||||
}
|
||||
|
||||
/// Deterministic fake vectors for tests / dry-run (1536 dims match common `OpenAI` models;
|
||||
/// truncated scan still works if dim mismatches — ingest uses same mock for all).
|
||||
#[must_use]
|
||||
pub fn mock_from_env() -> Option<Self> {
|
||||
if std::env::var("CLAW_RAG_MOCK_PROVIDERS").ok().as_deref() != Some("1") {
|
||||
return None;
|
||||
}
|
||||
Some(Self {
|
||||
api_key: "mock".into(),
|
||||
base_url: "mock://".into(),
|
||||
model: "mock-embedding".into(),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
struct EmbeddingsRequest<'a> {
|
||||
model: &'a str,
|
||||
input: Vec<&'a str>,
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
struct EmbeddingsResponse {
|
||||
data: Vec<EmbeddingItem>,
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
struct EmbeddingItem {
|
||||
embedding: Vec<f32>,
|
||||
}
|
||||
|
||||
pub async fn embed_batch(
|
||||
client: &Client,
|
||||
cfg: &EmbedConfig,
|
||||
texts: &[String],
|
||||
) -> Result<Vec<Vec<f32>>, String> {
|
||||
if cfg.base_url.starts_with("mock://") {
|
||||
return Ok(texts
|
||||
.iter()
|
||||
.map(|s| mock_vector_for_text(s.as_str()))
|
||||
.collect());
|
||||
}
|
||||
|
||||
let url = format!("{}/embeddings", cfg.base_url);
|
||||
let inputs: Vec<&str> = texts.iter().map(String::as_str).collect();
|
||||
let body = EmbeddingsRequest {
|
||||
model: &cfg.model,
|
||||
input: inputs,
|
||||
};
|
||||
let res = client
|
||||
.post(&url)
|
||||
.header("Authorization", format!("Bearer {}", cfg.api_key))
|
||||
.header("Content-Type", "application/json")
|
||||
.json(&body)
|
||||
.send()
|
||||
.await
|
||||
.map_err(|e| e.to_string())?;
|
||||
if !res.status().is_success() {
|
||||
let t = res.text().await.unwrap_or_default();
|
||||
return Err(format!("embeddings HTTP error: {t}"));
|
||||
}
|
||||
let parsed: EmbeddingsResponse = res.json().await.map_err(|e| e.to_string())?;
|
||||
if parsed.data.len() != texts.len() {
|
||||
return Err(format!(
|
||||
"embeddings count mismatch: got {} for {} inputs",
|
||||
parsed.data.len(),
|
||||
texts.len()
|
||||
));
|
||||
}
|
||||
Ok(parsed.data.into_iter().map(|d| d.embedding).collect())
|
||||
}
|
||||
|
||||
fn mock_vector_for_text(s: &str) -> Vec<f32> {
|
||||
const DIM: usize = 16;
|
||||
let mut v = vec![0f32; DIM];
|
||||
for (i, b) in s.bytes().enumerate().take(DIM * 4) {
|
||||
v[i % DIM] += f32::from(b) / 255.0;
|
||||
}
|
||||
let norm: f32 = v.iter().map(|x| x * x).sum::<f32>().sqrt();
|
||||
if norm > 0.0 {
|
||||
for x in &mut v {
|
||||
*x /= norm;
|
||||
}
|
||||
}
|
||||
v
|
||||
}
|
||||
|
||||
pub fn cosine_similarity(a: &[f32], b: &[f32]) -> f32 {
|
||||
if a.len() != b.len() || a.is_empty() {
|
||||
return 0.0;
|
||||
}
|
||||
let dot: f32 = a.iter().zip(b.iter()).map(|(x, y)| x * y).sum();
|
||||
let na: f32 = a.iter().map(|x| x * x).sum::<f32>().sqrt();
|
||||
let nb: f32 = b.iter().map(|x| x * x).sum::<f32>().sqrt();
|
||||
if na == 0.0 || nb == 0.0 {
|
||||
return 0.0;
|
||||
}
|
||||
dot / (na * nb)
|
||||
}
|
||||
219
rust/crates/claw-rag-service/src/ingest.rs
Normal file
219
rust/crates/claw-rag-service/src/ingest.rs
Normal file
@@ -0,0 +1,219 @@
|
||||
//! Walk workspace and fill `SQLite` + embeddings.
|
||||
|
||||
use std::path::Path;
|
||||
use std::path::PathBuf;
|
||||
|
||||
use reqwest::Client;
|
||||
use walkdir::WalkDir;
|
||||
|
||||
use crate::chunk::chunk_text;
|
||||
use crate::db::{
|
||||
delete_file_and_chunks, file_is_unchanged, insert_chunk, insert_embedding, list_all_files,
|
||||
open_db, upsert_file_meta,
|
||||
};
|
||||
use crate::embed::{embed_batch, EmbedConfig};
|
||||
#[cfg(feature = "qdrant-index")]
|
||||
use crate::qdrant_index::{upsert_points, ChunkPoint};
|
||||
|
||||
const DEFAULT_MAX_FILE_BYTES: u64 = 2 * 1024 * 1024;
|
||||
const CHUNK_CHARS: usize = 900;
|
||||
const CHUNK_OVERLAP: usize = 120;
|
||||
const EMBED_BATCH: usize = 16;
|
||||
|
||||
static SKIP_DIR_NAMES: &[&str] = &[".git", "target", "node_modules", "__pycache__", ".claw-rag"];
|
||||
|
||||
static TEXT_EXTENSIONS: &[&str] = &[
|
||||
"rs", "md", "toml", "txt", "json", "yaml", "yml", "js", "ts", "tsx", "jsx", "py", "go", "c",
|
||||
"h", "cpp", "hpp", "cs", "java", "kt", "swift", "rb", "php", "sh", "ps1", "html", "css", "sql",
|
||||
];
|
||||
|
||||
#[derive(Debug, Default)]
|
||||
pub struct IngestStats {
|
||||
pub files_indexed: usize,
|
||||
pub chunks_total: usize,
|
||||
pub embeddings_written: usize,
|
||||
}
|
||||
|
||||
fn should_skip_dir(path: &Path) -> bool {
|
||||
path.file_name()
|
||||
.and_then(std::ffi::OsStr::to_str)
|
||||
.is_some_and(|n| SKIP_DIR_NAMES.contains(&n))
|
||||
}
|
||||
|
||||
fn is_text_extension(path: &Path) -> bool {
|
||||
path.extension()
|
||||
.and_then(std::ffi::OsStr::to_str)
|
||||
.is_some_and(|e| TEXT_EXTENSIONS.contains(&e.to_ascii_lowercase().as_str()))
|
||||
}
|
||||
|
||||
async fn flush_path_batch(
|
||||
conn: &rusqlite::Connection,
|
||||
path: &str,
|
||||
batch: &mut Vec<(i32, String)>,
|
||||
client: &Client,
|
||||
cfg: &EmbedConfig,
|
||||
stats: &mut IngestStats,
|
||||
) -> Result<(), String> {
|
||||
if batch.is_empty() {
|
||||
return Ok(());
|
||||
}
|
||||
let texts: Vec<String> = batch.iter().map(|(_, t)| t.clone()).collect();
|
||||
let vecs = embed_batch(client, cfg, &texts).await?;
|
||||
if vecs.len() != batch.len() {
|
||||
return Err("embed batch size mismatch".into());
|
||||
}
|
||||
|
||||
#[cfg(feature = "qdrant-index")]
|
||||
let mut qdrant_points: Vec<ChunkPoint> = Vec::with_capacity(batch.len());
|
||||
|
||||
for ((ord, t), vec) in batch.drain(..).zip(vecs.into_iter()) {
|
||||
let dim = vec.len();
|
||||
let cid = insert_chunk(conn, path, ord, &t)?;
|
||||
insert_embedding(conn, cid, dim, &vec)?;
|
||||
stats.embeddings_written += 1;
|
||||
|
||||
#[cfg(feature = "qdrant-index")]
|
||||
{
|
||||
qdrant_points.push(ChunkPoint {
|
||||
id: cid,
|
||||
vec,
|
||||
path: path.to_string(),
|
||||
text: t,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(feature = "qdrant-index")]
|
||||
upsert_points(qdrant_points).await?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub async fn run_ingest(
|
||||
workspaces: &[PathBuf],
|
||||
db_path: &Path,
|
||||
cfg: &EmbedConfig,
|
||||
client: &Client,
|
||||
) -> Result<IngestStats, String> {
|
||||
let conn = open_db(db_path)?;
|
||||
|
||||
let mut all_files: Vec<(String, PathBuf)> = Vec::new();
|
||||
let mut seen_paths: Vec<String> = Vec::new();
|
||||
|
||||
for ws in workspaces {
|
||||
let workspace = ws
|
||||
.canonicalize()
|
||||
.map_err(|e| format!("workspace: {}: {e}", ws.display()))?;
|
||||
let ws_prefix = workspace.clone();
|
||||
let repo_id = repo_id_for_workspace(&workspace);
|
||||
|
||||
for entry in WalkDir::new(&workspace)
|
||||
.into_iter()
|
||||
.filter_entry(|e| !should_skip_dir(e.path()))
|
||||
{
|
||||
let entry = entry.map_err(|e| e.to_string())?;
|
||||
if !entry.file_type().is_file() {
|
||||
continue;
|
||||
}
|
||||
let path = entry.path();
|
||||
if !is_text_extension(path) {
|
||||
continue;
|
||||
}
|
||||
let meta = entry.metadata().map_err(|e| e.to_string())?;
|
||||
if meta.len() > DEFAULT_MAX_FILE_BYTES {
|
||||
continue;
|
||||
}
|
||||
let rel = path
|
||||
.strip_prefix(&ws_prefix)
|
||||
.unwrap_or(path)
|
||||
.to_string_lossy()
|
||||
.replace('\\', "/");
|
||||
let key = format!("{repo_id}:{rel}");
|
||||
seen_paths.push(key.clone());
|
||||
all_files.push((key, path.to_path_buf()));
|
||||
}
|
||||
}
|
||||
|
||||
all_files.sort_by(|a, b| a.0.cmp(&b.0));
|
||||
seen_paths.sort();
|
||||
|
||||
let mut stats = IngestStats {
|
||||
files_indexed: all_files.len(),
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
for (rel, file) in all_files {
|
||||
let Ok(meta) = std::fs::metadata(&file) else {
|
||||
continue;
|
||||
};
|
||||
let size_bytes =
|
||||
i64::try_from(meta.len()).map_err(|_| "file size too large".to_string())?;
|
||||
let mtime_ms = meta
|
||||
.modified()
|
||||
.ok()
|
||||
.and_then(|t| t.duration_since(std::time::UNIX_EPOCH).ok())
|
||||
.and_then(|d| i64::try_from(d.as_millis()).ok())
|
||||
.unwrap_or(0);
|
||||
|
||||
let Ok(raw) = std::fs::read_to_string(&file) else {
|
||||
continue;
|
||||
};
|
||||
|
||||
let content_hash = blake3::hash(raw.as_bytes()).to_hex().to_string();
|
||||
if file_is_unchanged(&conn, &rel, &content_hash, size_bytes, mtime_ms)? {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Re-index this file: delete previous chunks (and embeddings) for path.
|
||||
delete_file_and_chunks(&conn, &rel)?;
|
||||
|
||||
let pieces = chunk_text(&raw, CHUNK_CHARS, CHUNK_OVERLAP);
|
||||
if pieces.is_empty() {
|
||||
continue;
|
||||
}
|
||||
|
||||
let mut batch: Vec<(i32, String)> = Vec::new();
|
||||
for (ord, piece) in pieces.into_iter().enumerate() {
|
||||
stats.chunks_total += 1;
|
||||
let ord_i32 =
|
||||
i32::try_from(ord).map_err(|_| "file produced too many chunks".to_string())?;
|
||||
batch.push((ord_i32, piece));
|
||||
if batch.len() >= EMBED_BATCH {
|
||||
flush_path_batch(&conn, &rel, &mut batch, client, cfg, &mut stats).await?;
|
||||
}
|
||||
}
|
||||
flush_path_batch(&conn, &rel, &mut batch, client, cfg, &mut stats).await?;
|
||||
|
||||
let now_ms = std::time::SystemTime::now()
|
||||
.duration_since(std::time::UNIX_EPOCH)
|
||||
.map(|d| i64::try_from(d.as_millis()).unwrap_or(0))
|
||||
.unwrap_or(0);
|
||||
upsert_file_meta(&conn, &rel, &content_hash, size_bytes, mtime_ms, now_ms)?;
|
||||
}
|
||||
|
||||
// Delete entries for files that no longer exist.
|
||||
// (We compare against file list from DB to avoid needing a SQL "NOT IN" temp table.)
|
||||
let mut seen_set = std::collections::BTreeSet::new();
|
||||
for p in &seen_paths {
|
||||
seen_set.insert(p.as_str());
|
||||
}
|
||||
for p in list_all_files(&conn)? {
|
||||
if !seen_set.contains(p.as_str()) {
|
||||
delete_file_and_chunks(&conn, &p)?;
|
||||
}
|
||||
}
|
||||
|
||||
Ok(stats)
|
||||
}
|
||||
|
||||
fn repo_id_for_workspace(workspace: &Path) -> String {
|
||||
let name = workspace
|
||||
.file_name()
|
||||
.and_then(std::ffi::OsStr::to_str)
|
||||
.filter(|s| !s.is_empty())
|
||||
.unwrap_or("workspace");
|
||||
let hash = blake3::hash(workspace.to_string_lossy().as_bytes())
|
||||
.to_hex()
|
||||
.to_string();
|
||||
format!("{name}-{h}", name = name, h = &hash[..8])
|
||||
}
|
||||
111
rust/crates/claw-rag-service/src/lib.rs
Normal file
111
rust/crates/claw-rag-service/src/lib.rs
Normal file
@@ -0,0 +1,111 @@
|
||||
//! Workspace RAG: ingest files → `SQLite` + embeddings, query via cosine similarity (linear scan MVP).
|
||||
#![forbid(unsafe_code)]
|
||||
|
||||
mod chunk;
|
||||
mod db;
|
||||
mod embed;
|
||||
mod ingest;
|
||||
#[cfg(feature = "qdrant-index")]
|
||||
mod qdrant_index;
|
||||
mod search;
|
||||
|
||||
pub use db::{chunk_count, open_db};
|
||||
pub use embed::EmbedConfig;
|
||||
pub use ingest::{run_ingest, IngestStats};
|
||||
pub use search::query_index;
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
/// One retrieved chunk for the model or UI.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct RagHit {
|
||||
pub path: String,
|
||||
pub snippet: String,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub score: Option<f32>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Deserialize)]
|
||||
pub struct QueryRequest {
|
||||
pub query: String,
|
||||
#[serde(default = "default_top_k")]
|
||||
pub top_k: u32,
|
||||
}
|
||||
|
||||
fn default_top_k() -> u32 {
|
||||
8
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
pub struct QueryResponse {
|
||||
pub hits: Vec<RagHit>,
|
||||
/// `0-stub` (legacy), `1-sqlite`, `1-sqlite-empty`, `1-sqlite-no-db`
|
||||
pub phase: &'static str,
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::path::Path;
|
||||
|
||||
use reqwest::Client;
|
||||
use tempfile::tempdir;
|
||||
|
||||
use super::*;
|
||||
|
||||
#[tokio::test]
|
||||
async fn query_missing_db_reports_phase() {
|
||||
let client = Client::new();
|
||||
let cfg = EmbedConfig {
|
||||
api_key: "x".into(),
|
||||
base_url: "mock://".into(),
|
||||
model: "m".into(),
|
||||
};
|
||||
let r = query_index(
|
||||
Path::new("/no/such/claw_rag.sqlite"),
|
||||
&client,
|
||||
&cfg,
|
||||
&QueryRequest {
|
||||
query: "hello".into(),
|
||||
top_k: 3,
|
||||
},
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(r.phase, "1-sqlite-no-db");
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn ingest_and_query_roundtrip_mock() {
|
||||
std::env::set_var("CLAW_RAG_MOCK_PROVIDERS", "1");
|
||||
let dir = tempdir().unwrap();
|
||||
let ws1 = dir.path().join("ws1");
|
||||
let ws2 = dir.path().join("ws2");
|
||||
std::fs::create_dir_all(&ws1).unwrap();
|
||||
std::fs::create_dir_all(&ws2).unwrap();
|
||||
std::fs::write(ws1.join("note.md"), "hello RAG service test content").unwrap();
|
||||
std::fs::write(ws2.join("docs.md"), "secondary repo doc about embeddings").unwrap();
|
||||
let db = dir.path().join("idx.sqlite");
|
||||
let client = Client::new();
|
||||
let cfg = EmbedConfig::mock_from_env().expect("mock");
|
||||
let st = run_ingest(&[ws1.clone(), ws2.clone()], &db, &cfg, &client)
|
||||
.await
|
||||
.unwrap();
|
||||
assert!(st.embeddings_written >= 1);
|
||||
|
||||
let r = query_index(
|
||||
&db,
|
||||
&client,
|
||||
&cfg,
|
||||
&QueryRequest {
|
||||
query: "RAG service".into(),
|
||||
top_k: 4,
|
||||
},
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(r.phase, "1-sqlite");
|
||||
assert!(!r.hits.is_empty());
|
||||
assert!(r.hits.iter().all(|h| h.path.contains(':')));
|
||||
std::env::remove_var("CLAW_RAG_MOCK_PROVIDERS");
|
||||
}
|
||||
}
|
||||
175
rust/crates/claw-rag-service/src/main.rs
Normal file
175
rust/crates/claw-rag-service/src/main.rs
Normal file
@@ -0,0 +1,175 @@
|
||||
//! `claw-rag-service` — HTTP API + `ingest` subcommand.
|
||||
|
||||
use std::path::PathBuf;
|
||||
use std::sync::Arc;
|
||||
|
||||
use axum::{
|
||||
extract::State,
|
||||
http::StatusCode,
|
||||
response::Html,
|
||||
routing::{get, post},
|
||||
Json, Router,
|
||||
};
|
||||
use clap::{Parser, Subcommand};
|
||||
use claw_rag_service::{
|
||||
chunk_count, open_db, query_index, run_ingest, EmbedConfig, QueryRequest, QueryResponse,
|
||||
};
|
||||
|
||||
#[derive(Parser)]
|
||||
#[command(
|
||||
name = "claw-rag-service",
|
||||
about = "Workspace RAG index + HTTP query API"
|
||||
)]
|
||||
struct Cli {
|
||||
#[command(subcommand)]
|
||||
command: Option<Cmd>,
|
||||
}
|
||||
|
||||
#[derive(Subcommand)]
|
||||
enum Cmd {
|
||||
/// Run HTTP server (default when no subcommand).
|
||||
Serve(ServeArgs),
|
||||
/// Index a workspace into `SQLite` (calls embedding API).
|
||||
Ingest(IngestArgs),
|
||||
}
|
||||
|
||||
#[derive(Parser)]
|
||||
struct ServeArgs {
|
||||
#[arg(long, env = "CLAW_RAG_DB", default_value = ".claw-rag/index.sqlite")]
|
||||
db: PathBuf,
|
||||
}
|
||||
|
||||
#[derive(Parser)]
|
||||
struct IngestArgs {
|
||||
/// Workspace roots to ingest. Repeat `--workspace` to ingest multiple repos (cross-repo RAG).
|
||||
#[arg(short, long)]
|
||||
workspace: Vec<PathBuf>,
|
||||
#[arg(long, env = "CLAW_RAG_DB", default_value = ".claw-rag/index.sqlite")]
|
||||
db: PathBuf,
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
struct AppState {
|
||||
db_path: PathBuf,
|
||||
client: reqwest::Client,
|
||||
cfg: EmbedConfig,
|
||||
}
|
||||
|
||||
/// Single-page UI for phase 3 (served at `GET /`).
|
||||
static INDEX_HTML: &str = include_str!(concat!(env!("CARGO_MANIFEST_DIR"), "/static/index.html"));
|
||||
|
||||
async fn ui_index() -> Html<&'static str> {
|
||||
Html(INDEX_HTML)
|
||||
}
|
||||
|
||||
fn rag_router(state: Arc<AppState>) -> Router {
|
||||
Router::new()
|
||||
.route("/", get(ui_index))
|
||||
.route("/health", get(|| async { "ok" }))
|
||||
.route("/v1/stats", get(stats))
|
||||
.route("/v1/query", post(query))
|
||||
.with_state(state)
|
||||
}
|
||||
|
||||
fn resolve_embed_config() -> Result<EmbedConfig, String> {
|
||||
if let Some(c) = EmbedConfig::mock_from_env() {
|
||||
return Ok(c);
|
||||
}
|
||||
EmbedConfig::from_env()
|
||||
}
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
|
||||
// Load `.env` if present (walks up parent directories).
|
||||
// This is a convenience for local development; CI/production should set real env vars.
|
||||
let _ = dotenvy::dotenv();
|
||||
|
||||
let cli = Cli::parse();
|
||||
|
||||
if let Some(Cmd::Ingest(a)) = cli.command {
|
||||
let cfg = resolve_embed_config()?;
|
||||
let client = reqwest::Client::new();
|
||||
let st = run_ingest(&a.workspace, &a.db, &cfg, &client).await?;
|
||||
eprintln!(
|
||||
"ingest: files={} chunks={} embeddings={}",
|
||||
st.files_indexed, st.chunks_total, st.embeddings_written
|
||||
);
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let db = if let Some(Cmd::Serve(s)) = cli.command {
|
||||
s.db
|
||||
} else {
|
||||
PathBuf::from(
|
||||
std::env::var("CLAW_RAG_DB").unwrap_or_else(|_| ".claw-rag/index.sqlite".into()),
|
||||
)
|
||||
};
|
||||
|
||||
let cfg = resolve_embed_config()?;
|
||||
let state = Arc::new(AppState {
|
||||
db_path: db,
|
||||
client: reqwest::Client::new(),
|
||||
cfg,
|
||||
});
|
||||
|
||||
let app = rag_router(state.clone());
|
||||
|
||||
let port: u16 = std::env::var("CLAW_RAG_PORT")
|
||||
.ok()
|
||||
.and_then(|s| s.parse().ok())
|
||||
.unwrap_or(8787);
|
||||
let host: std::net::IpAddr = std::env::var("CLAW_RAG_HOST")
|
||||
.ok()
|
||||
.and_then(|s| s.parse().ok())
|
||||
.unwrap_or(std::net::IpAddr::V4(std::net::Ipv4Addr::LOCALHOST));
|
||||
let addr = std::net::SocketAddr::from((host, port));
|
||||
eprintln!(
|
||||
"claw-rag-service db={} listen=http://{addr}",
|
||||
state.db_path.display()
|
||||
);
|
||||
let listener = tokio::net::TcpListener::bind(addr).await?;
|
||||
axum::serve(listener, app).await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn stats(State(state): State<Arc<AppState>>) -> Result<Json<serde_json::Value>, StatusCode> {
|
||||
let path = state.db_path.clone();
|
||||
if !path.is_file() {
|
||||
return Ok(Json(serde_json::json!({
|
||||
"chunks": 0,
|
||||
"phase": "1-sqlite-no-db"
|
||||
})));
|
||||
}
|
||||
let res = tokio::task::spawn_blocking(move || {
|
||||
let conn = open_db(&path).map_err(|_| ())?;
|
||||
chunk_count(&conn).map_err(|_| ())
|
||||
})
|
||||
.await
|
||||
.map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?
|
||||
.map_err(|()| StatusCode::INTERNAL_SERVER_ERROR)?;
|
||||
Ok(Json(serde_json::json!({
|
||||
"chunks": res,
|
||||
"phase": "1-sqlite"
|
||||
})))
|
||||
}
|
||||
|
||||
async fn query(
|
||||
State(state): State<Arc<AppState>>,
|
||||
Json(req): Json<QueryRequest>,
|
||||
) -> Result<Json<QueryResponse>, (StatusCode, String)> {
|
||||
query_index(&state.db_path, &state.client, &state.cfg, &req)
|
||||
.await
|
||||
.map(Json)
|
||||
.map_err(|e| (StatusCode::BAD_REQUEST, e))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::INDEX_HTML;
|
||||
|
||||
#[test]
|
||||
fn index_html_wires_api_paths() {
|
||||
assert!(INDEX_HTML.contains("/v1/stats"));
|
||||
assert!(INDEX_HTML.contains("/v1/query"));
|
||||
}
|
||||
}
|
||||
177
rust/crates/claw-rag-service/src/qdrant_index.rs
Normal file
177
rust/crates/claw-rag-service/src/qdrant_index.rs
Normal file
@@ -0,0 +1,177 @@
|
||||
use crate::{QueryResponse, RagHit};
|
||||
use serde_json::json;
|
||||
|
||||
async fn ensure_collection(
|
||||
client: &qdrant_client::Qdrant,
|
||||
collection: &str,
|
||||
dim: usize,
|
||||
) -> Result<(), String> {
|
||||
let dim_u64 = u64::try_from(dim).map_err(|_| "embedding dim too large".to_string())?;
|
||||
|
||||
// Try to create the collection; if it already exists, Qdrant will error.
|
||||
// We treat "already exists" as success to keep ingest idempotent.
|
||||
let res = client
|
||||
.create_collection(
|
||||
qdrant_client::qdrant::CreateCollectionBuilder::new(collection).vectors_config(
|
||||
qdrant_client::qdrant::VectorParamsBuilder::new(
|
||||
dim_u64,
|
||||
qdrant_client::qdrant::Distance::Cosine,
|
||||
),
|
||||
),
|
||||
)
|
||||
.await;
|
||||
|
||||
match res {
|
||||
Ok(_) => Ok(()),
|
||||
Err(e) => {
|
||||
let msg = e.to_string();
|
||||
if msg.contains("already exists") || msg.contains("Already exists") {
|
||||
Ok(())
|
||||
} else {
|
||||
Err(format!("qdrant create_collection: {e}"))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct QdrantConfig {
|
||||
pub url: String,
|
||||
pub api_key: Option<String>,
|
||||
pub collection: String,
|
||||
}
|
||||
|
||||
impl QdrantConfig {
|
||||
pub fn from_env() -> Option<Self> {
|
||||
let url = std::env::var("CLAW_RAG_QDRANT_URL").ok()?;
|
||||
let collection = std::env::var("CLAW_RAG_QDRANT_COLLECTION")
|
||||
.ok()
|
||||
.unwrap_or_else(|| "claw_rag_chunks".to_string());
|
||||
let api_key = std::env::var("CLAW_RAG_QDRANT_API_KEY").ok();
|
||||
Some(Self {
|
||||
url,
|
||||
api_key,
|
||||
collection,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn query_qdrant(q: &[f32], top_k: u32) -> Result<Option<QueryResponse>, String> {
|
||||
let Some(cfg) = QdrantConfig::from_env() else {
|
||||
return Ok(None);
|
||||
};
|
||||
|
||||
let limit = top_k.min(64);
|
||||
let mut client = qdrant_client::Qdrant::from_url(&cfg.url);
|
||||
if let Some(key) = &cfg.api_key {
|
||||
client = client.api_key(key.clone());
|
||||
}
|
||||
let client = client.build().map_err(|e| format!("qdrant client: {e}"))?;
|
||||
|
||||
// If collection doesn't exist yet, treat it as "no results" and fall back.
|
||||
// (We avoid creating it on query because ingest controls dimension/model.)
|
||||
if let Err(e) = client.collection_info(&cfg.collection).await {
|
||||
let msg = e.to_string();
|
||||
if msg.contains("doesn't exist") || msg.contains("Not found") {
|
||||
return Ok(None);
|
||||
}
|
||||
return Err(format!("qdrant collection_info: {e}"));
|
||||
}
|
||||
|
||||
let res = client
|
||||
.query(
|
||||
qdrant_client::qdrant::QueryPointsBuilder::new(&cfg.collection)
|
||||
.query(q.to_vec())
|
||||
.limit(u64::from(limit))
|
||||
.with_payload(true),
|
||||
)
|
||||
.await
|
||||
.map_err(|e| format!("qdrant query: {e}"))?;
|
||||
|
||||
let mut hits = Vec::new();
|
||||
for p in res.result {
|
||||
let payload = p.payload;
|
||||
let path = payload
|
||||
.get("path")
|
||||
.and_then(|v| v.as_str())
|
||||
.map(ToString::to_string)
|
||||
.unwrap_or_default();
|
||||
let text = payload
|
||||
.get("text")
|
||||
.and_then(|v| v.as_str())
|
||||
.map(ToString::to_string)
|
||||
.unwrap_or_default();
|
||||
let score = p.score;
|
||||
if !path.is_empty() {
|
||||
hits.push(RagHit {
|
||||
path,
|
||||
snippet: truncate_snippet(&text, 480),
|
||||
score: Some(score),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
Ok(Some(QueryResponse {
|
||||
hits,
|
||||
phase: "2-qdrant",
|
||||
}))
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct ChunkPoint {
|
||||
pub id: i64,
|
||||
pub vec: Vec<f32>,
|
||||
pub path: String,
|
||||
pub text: String,
|
||||
}
|
||||
|
||||
pub async fn upsert_points(points: Vec<ChunkPoint>) -> Result<(), String> {
|
||||
let Some(cfg) = QdrantConfig::from_env() else {
|
||||
return Ok(());
|
||||
};
|
||||
if points.is_empty() {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let mut client = qdrant_client::Qdrant::from_url(&cfg.url);
|
||||
if let Some(key) = &cfg.api_key {
|
||||
client = client.api_key(key.clone());
|
||||
}
|
||||
let client = client.build().map_err(|e| format!("qdrant client: {e}"))?;
|
||||
|
||||
let dim = points[0].vec.len();
|
||||
ensure_collection(&client, &cfg.collection, dim).await?;
|
||||
|
||||
let mut qpoints = Vec::with_capacity(points.len());
|
||||
for p in points {
|
||||
if p.vec.len() != dim {
|
||||
return Err("qdrant upsert: embedding dimension mismatch within batch".to_string());
|
||||
}
|
||||
let id = u64::try_from(p.id).map_err(|_| "chunk id must be non-negative".to_string())?;
|
||||
let payload_map = serde_json::Map::from_iter([
|
||||
("path".to_string(), json!(p.path)),
|
||||
("text".to_string(), json!(p.text)),
|
||||
]);
|
||||
let payload: qdrant_client::Payload = payload_map.into();
|
||||
|
||||
qpoints.push(qdrant_client::qdrant::PointStruct::new(id, p.vec, payload));
|
||||
}
|
||||
|
||||
client
|
||||
.upsert_points(qdrant_client::qdrant::UpsertPointsBuilder::new(
|
||||
&cfg.collection,
|
||||
qpoints,
|
||||
))
|
||||
.await
|
||||
.map_err(|e| format!("qdrant upsert: {e}"))?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn truncate_snippet(s: &str, max_chars: usize) -> String {
|
||||
let n = s.chars().count();
|
||||
if n <= max_chars {
|
||||
return s.to_string();
|
||||
}
|
||||
s.chars().take(max_chars).collect::<String>() + "…"
|
||||
}
|
||||
87
rust/crates/claw-rag-service/src/search.rs
Normal file
87
rust/crates/claw-rag-service/src/search.rs
Normal file
@@ -0,0 +1,87 @@
|
||||
//! Vector search over indexed chunks (linear scan MVP).
|
||||
|
||||
use std::path::Path;
|
||||
|
||||
use reqwest::Client;
|
||||
|
||||
use crate::db::{load_all_indexed, open_db};
|
||||
use crate::embed::{cosine_similarity, embed_batch, EmbedConfig};
|
||||
use crate::{QueryRequest, QueryResponse, RagHit};
|
||||
|
||||
pub async fn query_index(
|
||||
db_path: &Path,
|
||||
client: &Client,
|
||||
cfg: &EmbedConfig,
|
||||
req: &QueryRequest,
|
||||
) -> Result<QueryResponse, String> {
|
||||
if !db_path.is_file() {
|
||||
return Ok(QueryResponse {
|
||||
hits: Vec::new(),
|
||||
phase: "1-sqlite-no-db",
|
||||
});
|
||||
}
|
||||
|
||||
let conn = open_db(db_path)?;
|
||||
let qvecs = embed_batch(client, cfg, std::slice::from_ref(&req.query)).await?;
|
||||
let q = qvecs
|
||||
.into_iter()
|
||||
.next()
|
||||
.ok_or_else(|| "no query embedding".to_string())?;
|
||||
|
||||
#[cfg(feature = "qdrant-index")]
|
||||
if let Ok(Some(r)) = crate::qdrant_index::query_qdrant(&q, req.top_k).await {
|
||||
return Ok(r);
|
||||
}
|
||||
|
||||
let rows = load_all_indexed(&conn)?;
|
||||
drop(conn);
|
||||
|
||||
if rows.is_empty() {
|
||||
return Ok(QueryResponse {
|
||||
hits: Vec::new(),
|
||||
phase: "1-sqlite-empty",
|
||||
});
|
||||
}
|
||||
|
||||
let expected = rows[0].vec.len();
|
||||
if q.len() != expected {
|
||||
return Err(format!(
|
||||
"embedding dimension mismatch: index uses dim {} but query embedding has {} (same model/env as ingest required)",
|
||||
expected, q.len()
|
||||
));
|
||||
}
|
||||
|
||||
let mut scored: Vec<(f32, usize)> = rows
|
||||
.iter()
|
||||
.enumerate()
|
||||
.map(|(i, r)| (cosine_similarity(&q, &r.vec), i))
|
||||
.collect();
|
||||
scored.sort_by(|a, b| b.0.partial_cmp(&a.0).unwrap_or(std::cmp::Ordering::Equal));
|
||||
|
||||
let top = req.top_k.min(64) as usize;
|
||||
let hits: Vec<RagHit> = scored
|
||||
.into_iter()
|
||||
.take(top)
|
||||
.map(|(score, i)| {
|
||||
let r = &rows[i];
|
||||
RagHit {
|
||||
path: r.path.clone(),
|
||||
snippet: truncate_snippet(&r.text, 480),
|
||||
score: Some(score),
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
|
||||
Ok(QueryResponse {
|
||||
hits,
|
||||
phase: "1-sqlite",
|
||||
})
|
||||
}
|
||||
|
||||
fn truncate_snippet(s: &str, max_chars: usize) -> String {
|
||||
let n = s.chars().count();
|
||||
if n <= max_chars {
|
||||
return s.to_string();
|
||||
}
|
||||
s.chars().take(max_chars).collect::<String>() + "…"
|
||||
}
|
||||
233
rust/crates/claw-rag-service/static/index.html
Normal file
233
rust/crates/claw-rag-service/static/index.html
Normal file
@@ -0,0 +1,233 @@
|
||||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="utf-8" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1" />
|
||||
<title>claw-rag</title>
|
||||
<style>
|
||||
:root {
|
||||
--bg: #12141a;
|
||||
--surface: #1a1d26;
|
||||
--border: #2a3140;
|
||||
--text: #e8eaef;
|
||||
--muted: #8b93a8;
|
||||
--accent: #e8a035;
|
||||
--ok: #6daf8a;
|
||||
--err: #d97b7b;
|
||||
}
|
||||
* { box-sizing: border-box; }
|
||||
body {
|
||||
font-family: ui-sans-serif, system-ui, "Segoe UI", Roboto, sans-serif;
|
||||
margin: 0;
|
||||
min-height: 100vh;
|
||||
background: var(--bg);
|
||||
color: var(--text);
|
||||
line-height: 1.5;
|
||||
}
|
||||
header {
|
||||
padding: 1rem 1.25rem;
|
||||
border-bottom: 1px solid var(--border);
|
||||
background: var(--surface);
|
||||
}
|
||||
header h1 {
|
||||
margin: 0;
|
||||
font-size: 1.1rem;
|
||||
font-weight: 600;
|
||||
letter-spacing: 0.02em;
|
||||
}
|
||||
header p { margin: 0.35rem 0 0; font-size: 0.85rem; color: var(--muted); }
|
||||
main { max-width: 52rem; margin: 0 auto; padding: 1.25rem; }
|
||||
.stats {
|
||||
display: flex;
|
||||
gap: 1rem;
|
||||
flex-wrap: wrap;
|
||||
margin-bottom: 1.25rem;
|
||||
font-size: 0.9rem;
|
||||
}
|
||||
.stats span { color: var(--muted); }
|
||||
.stats strong { color: var(--accent); }
|
||||
form {
|
||||
display: grid;
|
||||
gap: 0.75rem;
|
||||
margin-bottom: 1.5rem;
|
||||
padding: 1rem;
|
||||
background: var(--surface);
|
||||
border: 1px solid var(--border);
|
||||
border-radius: 6px;
|
||||
}
|
||||
label { font-size: 0.8rem; color: var(--muted); }
|
||||
textarea, input[type="number"] {
|
||||
width: 100%;
|
||||
padding: 0.5rem 0.65rem;
|
||||
border: 1px solid var(--border);
|
||||
border-radius: 4px;
|
||||
background: var(--bg);
|
||||
color: var(--text);
|
||||
font: inherit;
|
||||
}
|
||||
textarea { min-height: 5rem; resize: vertical; }
|
||||
.row { display: flex; gap: 1rem; align-items: end; flex-wrap: wrap; }
|
||||
.row > div:first-child { flex: 1; min-width: 12rem; }
|
||||
button {
|
||||
padding: 0.55rem 1.1rem;
|
||||
background: var(--accent);
|
||||
color: #1a1206;
|
||||
border: none;
|
||||
border-radius: 4px;
|
||||
font-weight: 600;
|
||||
cursor: pointer;
|
||||
}
|
||||
button:disabled { opacity: 0.5; cursor: not-allowed; }
|
||||
button:not(:disabled):hover { filter: brightness(1.05); }
|
||||
.status { font-size: 0.85rem; min-height: 1.25rem; }
|
||||
.status.err { color: var(--err); }
|
||||
.status.ok { color: var(--ok); }
|
||||
.hits { display: flex; flex-direction: column; gap: 1rem; }
|
||||
.hit {
|
||||
padding: 0.85rem 1rem;
|
||||
background: var(--surface);
|
||||
border: 1px solid var(--border);
|
||||
border-radius: 6px;
|
||||
border-left: 3px solid var(--accent);
|
||||
}
|
||||
.hit header {
|
||||
padding: 0;
|
||||
border: none;
|
||||
background: transparent;
|
||||
margin-bottom: 0.5rem;
|
||||
}
|
||||
.hit .path { font-family: ui-monospace, monospace; font-size: 0.85rem; color: var(--accent); }
|
||||
.hit .score { font-size: 0.75rem; color: var(--muted); }
|
||||
pre {
|
||||
margin: 0;
|
||||
white-space: pre-wrap;
|
||||
word-break: break-word;
|
||||
font-size: 0.82rem;
|
||||
color: var(--muted);
|
||||
}
|
||||
footer {
|
||||
margin-top: 2rem;
|
||||
padding-top: 1rem;
|
||||
border-top: 1px solid var(--border);
|
||||
font-size: 0.75rem;
|
||||
color: var(--muted);
|
||||
}
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<header>
|
||||
<h1>claw-rag-service</h1>
|
||||
<p>Local index · same-origin <code>/v1/*</code> API</p>
|
||||
</header>
|
||||
<main>
|
||||
<div class="stats" id="stats">
|
||||
<span>chunks: <strong id="chunks">—</strong></span>
|
||||
<span>phase: <strong id="phase">—</strong></span>
|
||||
<button type="button" id="refresh" style="margin-left:auto">Refresh stats</button>
|
||||
</div>
|
||||
|
||||
<form id="qform">
|
||||
<div>
|
||||
<label for="query">Query</label>
|
||||
<textarea id="query" name="query" placeholder="Natural language search…" required></textarea>
|
||||
</div>
|
||||
<div class="row">
|
||||
<div>
|
||||
<label for="top_k">top_k</label>
|
||||
<input type="number" id="top_k" name="top_k" value="8" min="1" max="64" />
|
||||
</div>
|
||||
<button type="submit" id="submit">Search</button>
|
||||
</div>
|
||||
</form>
|
||||
|
||||
<div class="status" id="status"></div>
|
||||
<div class="hits" id="hits"></div>
|
||||
|
||||
<footer>
|
||||
Index is read-only here; run <code>claw-rag-service ingest</code> to (re)build. Phase 3 UI — no auth; bind to loopback only in production.
|
||||
</footer>
|
||||
</main>
|
||||
<script>
|
||||
async function loadStats() {
|
||||
const elC = document.getElementById('chunks');
|
||||
const elP = document.getElementById('phase');
|
||||
try {
|
||||
const r = await fetch('/v1/stats');
|
||||
const j = await r.json();
|
||||
elC.textContent = j.chunks ?? '?';
|
||||
elP.textContent = j.phase ?? '?';
|
||||
} catch (e) {
|
||||
elC.textContent = '?';
|
||||
elP.textContent = 'error';
|
||||
}
|
||||
}
|
||||
|
||||
function setStatus(msg, cls) {
|
||||
const s = document.getElementById('status');
|
||||
s.textContent = msg || '';
|
||||
s.className = 'status' + (cls ? ' ' + cls : '');
|
||||
}
|
||||
|
||||
function renderHits(data) {
|
||||
const root = document.getElementById('hits');
|
||||
root.innerHTML = '';
|
||||
const hits = data.hits || [];
|
||||
if (hits.length === 0) {
|
||||
setStatus('No hits (phase: ' + (data.phase || '?') + ')', 'ok');
|
||||
return;
|
||||
}
|
||||
setStatus(hits.length + ' hit(s) · phase: ' + (data.phase || '?'), 'ok');
|
||||
for (const h of hits) {
|
||||
const card = document.createElement('article');
|
||||
card.className = 'hit';
|
||||
const hdr = document.createElement('header');
|
||||
const path = document.createElement('div');
|
||||
path.className = 'path';
|
||||
path.textContent = h.path || '';
|
||||
hdr.appendChild(path);
|
||||
if (h.score != null) {
|
||||
const sc = document.createElement('div');
|
||||
sc.className = 'score';
|
||||
sc.textContent = 'score: ' + h.score;
|
||||
hdr.appendChild(sc);
|
||||
}
|
||||
card.appendChild(hdr);
|
||||
const pre = document.createElement('pre');
|
||||
pre.textContent = h.snippet || '';
|
||||
card.appendChild(pre);
|
||||
root.appendChild(card);
|
||||
}
|
||||
}
|
||||
|
||||
document.getElementById('refresh').addEventListener('click', loadStats);
|
||||
document.getElementById('qform').addEventListener('submit', async (ev) => {
|
||||
ev.preventDefault();
|
||||
const query = document.getElementById('query').value.trim();
|
||||
const top_k = Math.min(64, Math.max(1, parseInt(document.getElementById('top_k').value, 10) || 8));
|
||||
const btn = document.getElementById('submit');
|
||||
btn.disabled = true;
|
||||
setStatus('Searching…', '');
|
||||
document.getElementById('hits').innerHTML = '';
|
||||
try {
|
||||
const r = await fetch('/v1/query', {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({ query, top_k }),
|
||||
});
|
||||
const text = await r.text();
|
||||
if (!r.ok) {
|
||||
setStatus('HTTP ' + r.status + ': ' + text, 'err');
|
||||
return;
|
||||
}
|
||||
renderHits(JSON.parse(text));
|
||||
} catch (e) {
|
||||
setStatus(String(e), 'err');
|
||||
} finally {
|
||||
btn.disabled = false;
|
||||
}
|
||||
});
|
||||
|
||||
loadStats();
|
||||
</script>
|
||||
</body>
|
||||
</html>
|
||||
@@ -1180,6 +1180,9 @@ pub enum SlashCommand {
|
||||
count: Option<String>,
|
||||
},
|
||||
Unknown(String),
|
||||
Team {
|
||||
action: Option<String>,
|
||||
},
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
@@ -1277,6 +1280,7 @@ impl SlashCommand {
|
||||
Self::Tag { .. } => "/tag",
|
||||
Self::OutputStyle { .. } => "/output-style",
|
||||
Self::AddDir { .. } => "/add-dir",
|
||||
Self::Team { .. } => "/team",
|
||||
Self::Sandbox => "/sandbox",
|
||||
Self::Mcp { .. } => "/mcp",
|
||||
Self::Export { .. } => "/export",
|
||||
@@ -2256,7 +2260,7 @@ pub fn handle_plugins_slash_command(
|
||||
reload_runtime: true,
|
||||
})
|
||||
}
|
||||
Some("uninstall") => {
|
||||
Some("remove") | Some("uninstall") => {
|
||||
let Some(target) = target else {
|
||||
return Ok(PluginsCommandResult {
|
||||
message: "Usage: /plugins uninstall <plugin-id>".to_string(),
|
||||
@@ -2323,7 +2327,10 @@ pub fn handle_agents_slash_command(args: Option<&str>, cwd: &Path) -> std::io::R
|
||||
Ok(render_agents_report(&agents))
|
||||
}
|
||||
Some(args) if is_help_arg(args) => Ok(render_agents_usage(None)),
|
||||
Some(args) => Ok(render_agents_usage(Some(args))),
|
||||
Some(args) => Err(std::io::Error::new(
|
||||
std::io::ErrorKind::InvalidInput,
|
||||
format!("unknown agents subcommand: {args}. Supported: list, help"),
|
||||
)),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2344,7 +2351,10 @@ pub fn handle_agents_slash_command_json(args: Option<&str>, cwd: &Path) -> std::
|
||||
Ok(render_agents_report_json(cwd, &agents))
|
||||
}
|
||||
Some(args) if is_help_arg(args) => Ok(render_agents_usage_json(None)),
|
||||
Some(args) => Ok(render_agents_usage_json(Some(args))),
|
||||
Some(args) => Err(std::io::Error::new(
|
||||
std::io::ErrorKind::InvalidInput,
|
||||
format!("unknown agents subcommand: {args}. Supported: list, help"),
|
||||
)),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -3615,7 +3625,9 @@ fn render_agents_report_json(cwd: &Path, agents: &[AgentSummary]) -> Value {
|
||||
.count();
|
||||
json!({
|
||||
"kind": "agents",
|
||||
"status": "ok",
|
||||
"action": "list",
|
||||
"status": "ok",
|
||||
"working_directory": cwd.display().to_string(),
|
||||
"count": agents.len(),
|
||||
"summary": {
|
||||
@@ -3697,7 +3709,9 @@ fn render_skills_report_json(skills: &[SkillSummary]) -> Value {
|
||||
.count();
|
||||
json!({
|
||||
"kind": "skills",
|
||||
"status": "ok",
|
||||
"action": "list",
|
||||
"status": "ok",
|
||||
"summary": {
|
||||
"total": skills.len(),
|
||||
"active": active,
|
||||
@@ -3732,6 +3746,7 @@ fn render_skill_install_report_json(skill: &InstalledSkill) -> Value {
|
||||
json!({
|
||||
"kind": "skills",
|
||||
"action": "install",
|
||||
"status": "ok",
|
||||
"result": "installed",
|
||||
"invocation_name": &skill.invocation_name,
|
||||
"invoke_as": format!("${}", skill.invocation_name),
|
||||
@@ -3920,6 +3935,8 @@ fn render_agents_usage_json(unexpected: Option<&str>) -> Value {
|
||||
json!({
|
||||
"kind": "agents",
|
||||
"action": "help",
|
||||
"ok": unexpected.is_none(),
|
||||
"status": if unexpected.is_some() { "error" } else { "ok" },
|
||||
"usage": {
|
||||
"slash_command": "/agents [list|help]",
|
||||
"direct_cli": "claw agents [list|help]",
|
||||
@@ -3949,6 +3966,8 @@ fn render_skills_usage_json(unexpected: Option<&str>) -> Value {
|
||||
json!({
|
||||
"kind": "skills",
|
||||
"action": "help",
|
||||
"ok": unexpected.is_none(),
|
||||
"status": if unexpected.is_some() { "error" } else { "ok" },
|
||||
"usage": {
|
||||
"slash_command": "/skills [list|install <path>|help|<skill> [args]]",
|
||||
"aliases": ["/skill"],
|
||||
@@ -3991,6 +4010,8 @@ fn render_mcp_usage_json(unexpected: Option<&str>) -> Value {
|
||||
json!({
|
||||
"kind": "mcp",
|
||||
"action": "help",
|
||||
"ok": unexpected.is_none(),
|
||||
"status": if unexpected.is_some() { "error" } else { "ok" },
|
||||
"usage": {
|
||||
"slash_command": "/mcp [list|show <server>|help]",
|
||||
"direct_cli": "claw mcp [list|show <server>|help]",
|
||||
@@ -4312,6 +4333,7 @@ pub fn handle_slash_command(
|
||||
| SlashCommand::OutputStyle { .. }
|
||||
| SlashCommand::AddDir { .. }
|
||||
| SlashCommand::History { .. }
|
||||
| SlashCommand::Team { .. }
|
||||
| SlashCommand::Unknown(_) => None,
|
||||
}
|
||||
}
|
||||
@@ -5293,6 +5315,7 @@ mod tests {
|
||||
|
||||
assert_eq!(report["kind"], "agents");
|
||||
assert_eq!(report["action"], "list");
|
||||
assert_eq!(report["status"], "ok");
|
||||
assert_eq!(report["working_directory"], workspace.display().to_string());
|
||||
assert_eq!(report["count"], 3);
|
||||
assert_eq!(report["summary"]["active"], 2);
|
||||
@@ -5308,12 +5331,16 @@ mod tests {
|
||||
let help = handle_agents_slash_command_json(Some("help"), &workspace).expect("agents help");
|
||||
assert_eq!(help["kind"], "agents");
|
||||
assert_eq!(help["action"], "help");
|
||||
assert_eq!(help["status"], "ok");
|
||||
assert_eq!(help["usage"]["direct_cli"], "claw agents [list|help]");
|
||||
|
||||
let unexpected = handle_agents_slash_command_json(Some("show planner"), &workspace)
|
||||
.expect("agents usage");
|
||||
assert_eq!(unexpected["action"], "help");
|
||||
assert_eq!(unexpected["unexpected"], "show planner");
|
||||
// Unknown agents subcommands now return Err so CLI layer can exit 1.
|
||||
let unexpected_err = handle_agents_slash_command_json(Some("show planner"), &workspace);
|
||||
assert!(unexpected_err.is_err());
|
||||
assert!(unexpected_err
|
||||
.unwrap_err()
|
||||
.to_string()
|
||||
.contains("show planner"));
|
||||
|
||||
let _ = fs::remove_dir_all(workspace);
|
||||
let _ = fs::remove_dir_all(user_home);
|
||||
@@ -5419,6 +5446,7 @@ mod tests {
|
||||
);
|
||||
assert_eq!(report["kind"], "skills");
|
||||
assert_eq!(report["action"], "list");
|
||||
assert_eq!(report["status"], "ok");
|
||||
assert_eq!(report["summary"]["active"], 3);
|
||||
assert_eq!(report["summary"]["shadowed"], 1);
|
||||
assert_eq!(report["skills"][0]["name"], "plan");
|
||||
@@ -5430,6 +5458,7 @@ mod tests {
|
||||
let help = handle_skills_slash_command_json(Some("help"), &workspace).expect("skills help");
|
||||
assert_eq!(help["kind"], "skills");
|
||||
assert_eq!(help["action"], "help");
|
||||
assert_eq!(help["status"], "ok");
|
||||
assert_eq!(help["usage"]["aliases"][0], "/skill");
|
||||
assert_eq!(
|
||||
help["usage"]["direct_cli"],
|
||||
@@ -5451,9 +5480,14 @@ mod tests {
|
||||
assert!(agents_help
|
||||
.contains("Sources .claw/agents, ~/.claw/agents, $CLAW_CONFIG_HOME/agents"));
|
||||
|
||||
let agents_unexpected =
|
||||
super::handle_agents_slash_command(Some("show planner"), &cwd).expect("agents usage");
|
||||
assert!(agents_unexpected.contains("Unexpected show planner"));
|
||||
// Unknown agents subcommands now return Err (typed error) instead of Ok+help text
|
||||
// so that the CLI layer can exit 1. The error message names the unexpected input.
|
||||
let agents_unexpected_err = super::handle_agents_slash_command(Some("show planner"), &cwd);
|
||||
assert!(agents_unexpected_err.is_err());
|
||||
assert!(agents_unexpected_err
|
||||
.unwrap_err()
|
||||
.to_string()
|
||||
.contains("show planner"));
|
||||
|
||||
let skills_help =
|
||||
super::handle_skills_slash_command(Some("--help"), &cwd).expect("skills help");
|
||||
@@ -5489,6 +5523,7 @@ mod tests {
|
||||
let sources = skills_help_json["usage"]["sources"]
|
||||
.as_array()
|
||||
.expect("skills help sources");
|
||||
assert_eq!(skills_help_json["status"], "ok");
|
||||
assert_eq!(skills_help_json["usage"]["aliases"][0], "/skill");
|
||||
assert!(sources.iter().any(|value| value == ".omc/skills"));
|
||||
assert!(sources.iter().any(|value| value == ".agents/skills"));
|
||||
@@ -5874,6 +5909,13 @@ mod tests {
|
||||
assert!(report.contains("Invoke as $help"));
|
||||
assert!(report.contains(&install_root.display().to_string()));
|
||||
|
||||
let json_report = super::render_skill_install_report_json(&installed);
|
||||
assert_eq!(json_report["kind"], "skills");
|
||||
assert_eq!(json_report["action"], "install");
|
||||
assert_eq!(json_report["status"], "ok");
|
||||
assert_eq!(json_report["invocation_name"], "help");
|
||||
assert_eq!(json_report["invoke_as"], "$help");
|
||||
|
||||
let roots = vec![SkillRoot {
|
||||
source: DefinitionSource::UserCodexHome,
|
||||
path: install_root.clone(),
|
||||
|
||||
@@ -1050,8 +1050,59 @@ impl PluginManager {
|
||||
Self { config }
|
||||
}
|
||||
|
||||
/// Returns the default bundled plugins root directory.
|
||||
///
|
||||
/// Resolution order (first existing path wins):
|
||||
/// 1. `<exe_dir>/../share/claw/plugins/bundled` — standard install layout
|
||||
/// 2. `<exe_dir>/bundled` — simple relocated layout
|
||||
/// 3. `CARGO_MANIFEST_DIR/bundled` — dev/source-tree fallback (only if it exists)
|
||||
/// 4. `<exe_dir>/../share/claw/plugins/bundled` — canonical default even if missing
|
||||
///
|
||||
/// This avoids baking in a compile-time source-tree path that may be
|
||||
/// inaccessible at runtime (e.g. a root-owned repo directory).
|
||||
#[must_use]
|
||||
pub fn bundled_root() -> PathBuf {
|
||||
// Candidate 1: standard FHS install layout — <prefix>/bin/claw -> <prefix>/share/claw/plugins/bundled
|
||||
if let Ok(exe_path) = std::env::current_exe() {
|
||||
if let Some(exe_dir) = exe_path.parent() {
|
||||
let share_path = exe_dir
|
||||
.join("..")
|
||||
.join("share")
|
||||
.join("claw")
|
||||
.join("plugins")
|
||||
.join("bundled");
|
||||
if share_path.exists() {
|
||||
return share_path;
|
||||
}
|
||||
|
||||
// Candidate 2: simple adjacent layout — <exe_dir>/bundled
|
||||
let adjacent = exe_dir.join("bundled");
|
||||
if adjacent.exists() {
|
||||
return adjacent;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Candidate 3: dev/source-tree fallback — only if the directory actually exists
|
||||
let dev_path = PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("bundled");
|
||||
if dev_path.exists() {
|
||||
return dev_path;
|
||||
}
|
||||
|
||||
// Default (nothing found): return the canonical install path even if missing,
|
||||
// so callers get an empty plugin list rather than a permission error.
|
||||
if let Ok(exe_path) = std::env::current_exe() {
|
||||
if let Some(exe_dir) = exe_path.parent() {
|
||||
return exe_dir
|
||||
.join("..")
|
||||
.join("share")
|
||||
.join("claw")
|
||||
.join("plugins")
|
||||
.join("bundled");
|
||||
}
|
||||
}
|
||||
|
||||
// Last resort fallback
|
||||
PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("bundled")
|
||||
}
|
||||
|
||||
@@ -1370,12 +1421,24 @@ impl PluginManager {
|
||||
}
|
||||
|
||||
fn sync_bundled_plugins(&self) -> Result<(), PluginError> {
|
||||
let explicit_root = self.config.bundled_root.is_some();
|
||||
let bundled_root = self
|
||||
.config
|
||||
.bundled_root
|
||||
.clone()
|
||||
.unwrap_or_else(Self::bundled_root);
|
||||
let bundled_plugins = discover_plugin_dirs(&bundled_root)?;
|
||||
let bundled_plugins = match discover_plugin_dirs(&bundled_root) {
|
||||
Ok(plugins) => plugins,
|
||||
// When the bundled root is the auto-detected default and the directory is
|
||||
// inaccessible (e.g. a root-owned source tree), treat it as empty rather
|
||||
// than fatally failing. An explicit config override still surfaces errors.
|
||||
Err(PluginError::Io(ref error))
|
||||
if !explicit_root && error.kind() == std::io::ErrorKind::PermissionDenied =>
|
||||
{
|
||||
Vec::new()
|
||||
}
|
||||
Err(error) => return Err(error),
|
||||
};
|
||||
let mut registry = self.load_registry()?;
|
||||
let mut changed = false;
|
||||
let install_root = self.install_root();
|
||||
@@ -2989,17 +3052,139 @@ mod tests {
|
||||
fn default_bundled_root_loads_repo_bundles_as_installed_plugins() {
|
||||
let _guard = env_guard();
|
||||
let config_home = temp_dir("default-bundled-home");
|
||||
let manager = PluginManager::new(PluginManagerConfig::new(&config_home));
|
||||
|
||||
// Use the repo bundled path explicitly so the test is reliable regardless
|
||||
// of where the binary runs from.
|
||||
let repo_bundled = PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("bundled");
|
||||
let mut config = PluginManagerConfig::new(&config_home);
|
||||
config.bundled_root = Some(repo_bundled.clone());
|
||||
let manager = PluginManager::new(config);
|
||||
|
||||
if repo_bundled.exists() {
|
||||
let installed = manager
|
||||
.list_installed_plugins()
|
||||
.expect("bundled plugins should auto-install from repo path");
|
||||
assert!(installed
|
||||
.iter()
|
||||
.any(|plugin| plugin.metadata.id == "example-bundled@bundled"));
|
||||
assert!(installed
|
||||
.iter()
|
||||
.any(|plugin| plugin.metadata.id == "sample-hooks@bundled"));
|
||||
}
|
||||
|
||||
let _ = fs::remove_dir_all(config_home);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn default_bundled_root_is_not_blindly_cargo_manifest_dir() {
|
||||
// Verify that bundled_root() no longer unconditionally returns
|
||||
// CARGO_MANIFEST_DIR/bundled. The returned path must either exist
|
||||
// (a valid runtime or dev location was found) OR differ from the
|
||||
// compile-time source path (a runtime-relative default was chosen).
|
||||
let resolved = PluginManager::bundled_root();
|
||||
let compile_time_path = PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("bundled");
|
||||
|
||||
// If the compile-time path does not exist (e.g. installed binary running
|
||||
// outside the source tree), the resolved path must NOT be the CARGO_MANIFEST_DIR
|
||||
// path, because that would re-introduce the original bug.
|
||||
if !compile_time_path.exists() {
|
||||
assert_ne!(
|
||||
resolved, compile_time_path,
|
||||
"bundled_root() must not fall back to CARGO_MANIFEST_DIR when that path \
|
||||
does not exist — this would regress the root-owned-dir permission bug"
|
||||
);
|
||||
}
|
||||
// Either the path exists (dev scenario) or we got a runtime-relative path.
|
||||
// Either way the function should not panic or return an obviously wrong value.
|
||||
assert!(
|
||||
!resolved.as_os_str().is_empty(),
|
||||
"bundled_root() should return a non-empty path"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn override_bundled_root_is_used_exactly() {
|
||||
let _guard = env_guard();
|
||||
let config_home = temp_dir("override-bundled-home");
|
||||
let bundled_root = temp_dir("override-bundled-root");
|
||||
write_bundled_plugin(
|
||||
&bundled_root.join("override-plugin"),
|
||||
"override-plugin",
|
||||
"1.0.0",
|
||||
false,
|
||||
);
|
||||
|
||||
let mut config = PluginManagerConfig::new(&config_home);
|
||||
config.bundled_root = Some(bundled_root.clone());
|
||||
let manager = PluginManager::new(config);
|
||||
|
||||
let installed = manager
|
||||
.list_installed_plugins()
|
||||
.expect("default bundled plugins should auto-install");
|
||||
assert!(installed
|
||||
.iter()
|
||||
.any(|plugin| plugin.metadata.id == "example-bundled@bundled"));
|
||||
assert!(installed
|
||||
.iter()
|
||||
.any(|plugin| plugin.metadata.id == "sample-hooks@bundled"));
|
||||
.expect("override bundled_root should be used");
|
||||
assert!(
|
||||
installed
|
||||
.iter()
|
||||
.any(|plugin| plugin.metadata.id == "override-plugin@bundled"),
|
||||
"only the override bundled root should be scanned, not CARGO_MANIFEST_DIR"
|
||||
);
|
||||
|
||||
let _ = fs::remove_dir_all(config_home);
|
||||
let _ = fs::remove_dir_all(bundled_root);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn explicit_nonexistent_bundled_root_does_not_fail() {
|
||||
// When bundled_root is explicitly configured to a path that does not exist,
|
||||
// plugin list should succeed with an empty bundled section rather than
|
||||
// returning an error (discover_plugin_dirs treats NotFound as empty).
|
||||
let _guard = env_guard();
|
||||
let config_home = temp_dir("missing-bundled-home");
|
||||
|
||||
let nonexistent = temp_dir("nonexistent-bundled-XXXXXXXX");
|
||||
assert!(
|
||||
!nonexistent.exists(),
|
||||
"test precondition: path must not exist"
|
||||
);
|
||||
|
||||
let mut config = PluginManagerConfig::new(&config_home);
|
||||
config.bundled_root = Some(nonexistent);
|
||||
let manager = PluginManager::new(config);
|
||||
|
||||
// Should succeed with zero bundled plugins, not crash with ENOENT.
|
||||
let result = manager.list_installed_plugins();
|
||||
assert!(
|
||||
result.is_ok(),
|
||||
"nonexistent explicit bundled root should not fail: {result:?}"
|
||||
);
|
||||
let installed = result.unwrap();
|
||||
assert!(
|
||||
installed
|
||||
.iter()
|
||||
.all(|p| p.metadata.kind != PluginKind::Bundled),
|
||||
"no bundled plugins should be installed when bundled root path does not exist"
|
||||
);
|
||||
|
||||
let _ = fs::remove_dir_all(config_home);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn no_bundled_root_config_uses_auto_detection_without_panic() {
|
||||
// When bundled_root is not set (None), auto-detection runs. The resolved
|
||||
// path should either exist (dev environment) or be a runtime-relative path
|
||||
// that doesn't cause a panic or EACCES crash.
|
||||
let _guard = env_guard();
|
||||
let config_home = temp_dir("auto-detect-bundled-home");
|
||||
|
||||
// No bundled_root set — forces auto-detection in bundled_root().
|
||||
let config = PluginManagerConfig::new(&config_home);
|
||||
let manager = PluginManager::new(config);
|
||||
|
||||
// Should not panic or return a hard IO error.
|
||||
let result = manager.list_installed_plugins();
|
||||
assert!(
|
||||
result.is_ok(),
|
||||
"auto-detected bundled root resolution must not fail: {result:?}"
|
||||
);
|
||||
|
||||
let _ = fs::remove_dir_all(config_home);
|
||||
}
|
||||
|
||||
@@ -16,5 +16,8 @@ telemetry = { path = "../telemetry" }
|
||||
tokio = { version = "1", features = ["io-std", "io-util", "macros", "process", "rt", "rt-multi-thread", "time"] }
|
||||
walkdir = "2"
|
||||
|
||||
[dev-dependencies]
|
||||
tempfile = "3"
|
||||
|
||||
[lints]
|
||||
workspace = true
|
||||
|
||||
@@ -108,10 +108,18 @@ pub fn compact_session(session: &Session, config: CompactionConfig) -> Compactio
|
||||
.first()
|
||||
.and_then(extract_existing_compacted_summary);
|
||||
let compacted_prefix_len = usize::from(existing_summary.is_some());
|
||||
let raw_keep_from = session
|
||||
.messages
|
||||
.len()
|
||||
.saturating_sub(config.preserve_recent_messages);
|
||||
// When preserve_recent_messages is 0, the caller wants maximum compaction
|
||||
// (no recent messages preserved). Without this guard, saturating_sub(0)
|
||||
// returns messages.len(), which later indexes past the end of the array
|
||||
// at session.messages[k] because keep_from == messages.len() is out of bounds.
|
||||
let raw_keep_from = if config.preserve_recent_messages == 0 {
|
||||
session.messages.len()
|
||||
} else {
|
||||
session
|
||||
.messages
|
||||
.len()
|
||||
.saturating_sub(config.preserve_recent_messages)
|
||||
};
|
||||
// Ensure we do not split a tool-use / tool-result pair at the compaction
|
||||
// boundary. If the first preserved message is a user message whose first
|
||||
// block is a ToolResult, the assistant message with the matching ToolUse
|
||||
@@ -128,7 +136,7 @@ pub fn compact_session(session: &Session, config: CompactionConfig) -> Compactio
|
||||
// is NOT an assistant message that contains a ToolUse block (i.e. the
|
||||
// pair is actually broken at the boundary).
|
||||
loop {
|
||||
if k == 0 || k <= compacted_prefix_len {
|
||||
if k == 0 || k <= compacted_prefix_len || k >= session.messages.len() {
|
||||
break;
|
||||
}
|
||||
let first_preserved = &session.messages[k];
|
||||
@@ -291,12 +299,14 @@ fn merge_compact_summaries(existing_summary: Option<&str>, new_summary: &str) ->
|
||||
|
||||
let mut lines = vec!["<summary>".to_string(), "Conversation summary:".to_string()];
|
||||
|
||||
// Flatten prior highlights directly — do NOT re-nest them under
|
||||
// "- Previously compacted context:" or the nesting compounds with each
|
||||
// compaction cycle, inflating the summary by ~depth * overhead per turn.
|
||||
if !previous_highlights.is_empty() {
|
||||
lines.push("- Previously compacted context:".to_string());
|
||||
lines.extend(
|
||||
previous_highlights
|
||||
.into_iter()
|
||||
.map(|line| format!(" {line}")),
|
||||
.map(|line| format!("- {line}")),
|
||||
);
|
||||
}
|
||||
|
||||
@@ -678,7 +688,9 @@ mod tests {
|
||||
second_session.messages = follow_up_messages;
|
||||
let second = compact_session(&second_session, config);
|
||||
|
||||
assert!(second
|
||||
// "Previously compacted context:" header is intentionally flattened
|
||||
// (no re-nesting) to avoid summary inflation on repeated compaction.
|
||||
assert!(!second
|
||||
.formatted_summary
|
||||
.contains("Previously compacted context:"));
|
||||
assert!(second
|
||||
@@ -693,7 +705,7 @@ mod tests {
|
||||
assert!(matches!(
|
||||
&second.compacted_session.messages[0].blocks[0],
|
||||
ContentBlock::Text { text }
|
||||
if text.contains("Previously compacted context:")
|
||||
if !text.contains("Previously compacted context:")
|
||||
&& text.contains("Newly compacted context:")
|
||||
));
|
||||
assert!(matches!(
|
||||
|
||||
@@ -1,7 +1,22 @@
|
||||
use std::collections::BTreeMap;
|
||||
use std::collections::{BTreeMap, HashSet};
|
||||
use std::fmt::{Display, Formatter};
|
||||
use std::fs;
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::sync::Mutex;
|
||||
|
||||
/// Process-lifetime set of already-emitted config deprecation warning strings.
|
||||
/// Prevents duplicate warnings when `ConfigLoader::load()` is called multiple
|
||||
/// times within a single CLI invocation. (ROADMAP #698)
|
||||
static EMITTED_CONFIG_WARNINGS: std::sync::OnceLock<Mutex<HashSet<String>>> =
|
||||
std::sync::OnceLock::new();
|
||||
|
||||
fn emit_config_warning_once(warning: &str) {
|
||||
let set = EMITTED_CONFIG_WARNINGS.get_or_init(|| Mutex::new(HashSet::new()));
|
||||
let mut guard = set.lock().unwrap_or_else(|e| e.into_inner());
|
||||
if guard.insert(warning.to_string()) {
|
||||
eprintln!("warning: {warning}");
|
||||
}
|
||||
}
|
||||
|
||||
use crate::json::JsonValue;
|
||||
use crate::sandbox::{FilesystemIsolationMode, SandboxConfig};
|
||||
@@ -90,6 +105,10 @@ pub struct RuntimePermissionRuleConfig {
|
||||
allow: Vec<String>,
|
||||
deny: Vec<String>,
|
||||
ask: Vec<String>,
|
||||
/// #159: simple tool-name denials parsed from the `deniedTools` config field.
|
||||
/// Unlike the `deny` rules (pattern-based), `denied_tools` is a flat list of
|
||||
/// tool names that are unconditionally denied regardless of permission mode.
|
||||
denied_tools: Vec<String>,
|
||||
}
|
||||
|
||||
/// Collection of configured MCP servers after scope-aware merging.
|
||||
@@ -297,7 +316,7 @@ impl ConfigLoader {
|
||||
}
|
||||
|
||||
for warning in &all_warnings {
|
||||
eprintln!("warning: {warning}");
|
||||
emit_config_warning_once(&warning.to_string());
|
||||
}
|
||||
|
||||
let merged_value = JsonValue::Object(merged.clone());
|
||||
@@ -592,6 +611,104 @@ pub fn default_config_home() -> PathBuf {
|
||||
.unwrap_or_else(|| PathBuf::from(".claw"))
|
||||
}
|
||||
|
||||
/// Save provider settings to the user-level `~/.claw/settings.json`.
|
||||
/// Creates the file and directory if they don't exist. Sets file permissions
|
||||
/// to `0o600` (owner read/write only) to protect stored API keys.
|
||||
pub fn save_user_provider_settings(
|
||||
kind: &str,
|
||||
api_key: &str,
|
||||
base_url: Option<&str>,
|
||||
model: Option<&str>,
|
||||
) -> Result<(), ConfigError> {
|
||||
let config_home = default_config_home();
|
||||
fs::create_dir_all(&config_home).map_err(ConfigError::Io)?;
|
||||
let settings_path = config_home.join("settings.json");
|
||||
|
||||
let mut root = read_settings_root(&settings_path);
|
||||
|
||||
let mut provider = serde_json::Map::new();
|
||||
provider.insert(
|
||||
"kind".to_string(),
|
||||
serde_json::Value::String(kind.to_string()),
|
||||
);
|
||||
provider.insert(
|
||||
"apiKey".to_string(),
|
||||
serde_json::Value::String(api_key.to_string()),
|
||||
);
|
||||
if let Some(base_url) = base_url {
|
||||
provider.insert(
|
||||
"baseUrl".to_string(),
|
||||
serde_json::Value::String(base_url.to_string()),
|
||||
);
|
||||
} else {
|
||||
provider.remove("baseUrl");
|
||||
}
|
||||
root.insert("provider".to_string(), serde_json::Value::Object(provider));
|
||||
if let Some(model) = model {
|
||||
root.insert(
|
||||
"model".to_string(),
|
||||
serde_json::Value::String(model.to_string()),
|
||||
);
|
||||
} else {
|
||||
root.remove("model");
|
||||
}
|
||||
|
||||
write_settings_root(&settings_path, &root)?;
|
||||
|
||||
#[cfg(unix)]
|
||||
{
|
||||
use std::os::unix::fs::PermissionsExt;
|
||||
let perms = std::fs::Permissions::from_mode(0o600);
|
||||
fs::set_permissions(&settings_path, perms).map_err(ConfigError::Io)?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Remove the `provider` section from the user-level `~/.claw/settings.json`.
|
||||
pub fn clear_user_provider_settings() -> Result<(), ConfigError> {
|
||||
let config_home = default_config_home();
|
||||
let settings_path = config_home.join("settings.json");
|
||||
|
||||
if !settings_path.exists() {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let mut root = read_settings_root(&settings_path);
|
||||
if root.remove("provider").is_none() {
|
||||
return Ok(());
|
||||
}
|
||||
root.remove("model");
|
||||
|
||||
write_settings_root(&settings_path, &root)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn read_settings_root(path: &Path) -> serde_json::Map<String, serde_json::Value> {
|
||||
match fs::read_to_string(path) {
|
||||
Ok(contents) if !contents.trim().is_empty() => {
|
||||
serde_json::from_str::<serde_json::Value>(&contents)
|
||||
.ok()
|
||||
.and_then(|v| v.as_object().cloned())
|
||||
.unwrap_or_default()
|
||||
}
|
||||
_ => serde_json::Map::new(),
|
||||
}
|
||||
}
|
||||
|
||||
fn write_settings_root(
|
||||
path: &Path,
|
||||
root: &serde_json::Map<String, serde_json::Value>,
|
||||
) -> Result<(), ConfigError> {
|
||||
if let Some(parent) = path.parent() {
|
||||
fs::create_dir_all(parent).map_err(ConfigError::Io)?;
|
||||
}
|
||||
let rendered = serde_json::to_string_pretty(&serde_json::Value::Object(root.clone()))
|
||||
.map_err(|e| ConfigError::Parse(e.to_string()))?;
|
||||
fs::write(path, format!("{rendered}\n")).map_err(ConfigError::Io)
|
||||
}
|
||||
|
||||
impl RuntimeHookConfig {
|
||||
#[must_use]
|
||||
pub fn new(
|
||||
@@ -640,8 +757,18 @@ impl RuntimeHookConfig {
|
||||
|
||||
impl RuntimePermissionRuleConfig {
|
||||
#[must_use]
|
||||
pub fn new(allow: Vec<String>, deny: Vec<String>, ask: Vec<String>) -> Self {
|
||||
Self { allow, deny, ask }
|
||||
pub fn new(
|
||||
allow: Vec<String>,
|
||||
deny: Vec<String>,
|
||||
ask: Vec<String>,
|
||||
denied_tools: Vec<String>,
|
||||
) -> Self {
|
||||
Self {
|
||||
allow,
|
||||
deny,
|
||||
ask,
|
||||
denied_tools,
|
||||
}
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
@@ -658,6 +785,11 @@ impl RuntimePermissionRuleConfig {
|
||||
pub fn ask(&self) -> &[String] {
|
||||
&self.ask
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
pub fn denied_tools(&self) -> &[String] {
|
||||
&self.denied_tools
|
||||
}
|
||||
}
|
||||
|
||||
impl McpConfigCollection {
|
||||
@@ -828,6 +960,12 @@ fn parse_optional_permission_rules(
|
||||
.unwrap_or_default(),
|
||||
ask: optional_string_array(permissions, "ask", "merged settings.permissions")?
|
||||
.unwrap_or_default(),
|
||||
denied_tools: optional_string_array(
|
||||
permissions,
|
||||
"deniedTools",
|
||||
"merged settings.permissions",
|
||||
)?
|
||||
.unwrap_or_default(),
|
||||
})
|
||||
}
|
||||
|
||||
|
||||
@@ -197,6 +197,10 @@ const TOP_LEVEL_FIELDS: &[FieldSpec] = &[
|
||||
name: "trustedRoots",
|
||||
expected: FieldType::StringArray,
|
||||
},
|
||||
FieldSpec {
|
||||
name: "provider",
|
||||
expected: FieldType::Object,
|
||||
},
|
||||
];
|
||||
|
||||
const HOOKS_FIELDS: &[FieldSpec] = &[
|
||||
@@ -223,6 +227,10 @@ const PERMISSIONS_FIELDS: &[FieldSpec] = &[
|
||||
name: "allow",
|
||||
expected: FieldType::StringArray,
|
||||
},
|
||||
FieldSpec {
|
||||
name: "deniedTools",
|
||||
expected: FieldType::StringArray,
|
||||
},
|
||||
FieldSpec {
|
||||
name: "deny",
|
||||
expected: FieldType::StringArray,
|
||||
@@ -310,6 +318,25 @@ const OAUTH_FIELDS: &[FieldSpec] = &[
|
||||
},
|
||||
];
|
||||
|
||||
const PROVIDER_FIELDS: &[FieldSpec] = &[
|
||||
FieldSpec {
|
||||
name: "kind",
|
||||
expected: FieldType::String,
|
||||
},
|
||||
FieldSpec {
|
||||
name: "apiKey",
|
||||
expected: FieldType::String,
|
||||
},
|
||||
FieldSpec {
|
||||
name: "baseUrl",
|
||||
expected: FieldType::String,
|
||||
},
|
||||
FieldSpec {
|
||||
name: "model",
|
||||
expected: FieldType::String,
|
||||
},
|
||||
];
|
||||
|
||||
const DEPRECATED_FIELDS: &[DeprecatedField] = &[
|
||||
DeprecatedField {
|
||||
name: "permissionMode",
|
||||
@@ -501,6 +528,15 @@ pub fn validate_config_file(
|
||||
&path_display,
|
||||
));
|
||||
}
|
||||
if let Some(provider) = object.get("provider").and_then(JsonValue::as_object) {
|
||||
result.merge(validate_object_keys(
|
||||
provider,
|
||||
PROVIDER_FIELDS,
|
||||
"provider",
|
||||
source,
|
||||
&path_display,
|
||||
));
|
||||
}
|
||||
|
||||
result
|
||||
}
|
||||
|
||||
@@ -39,6 +39,7 @@ mod report_schema;
|
||||
pub mod sandbox;
|
||||
mod session;
|
||||
pub mod session_control;
|
||||
pub mod trident;
|
||||
pub use session_control::SessionStore;
|
||||
mod sse;
|
||||
pub mod stale_base;
|
||||
|
||||
@@ -102,6 +102,10 @@ pub struct PermissionPolicy {
|
||||
allow_rules: Vec<PermissionRule>,
|
||||
deny_rules: Vec<PermissionRule>,
|
||||
ask_rules: Vec<PermissionRule>,
|
||||
/// #159: simple tool-name denials. Tools in this list are unconditionally
|
||||
/// denied regardless of permission mode, checked before the rule-based
|
||||
/// deny/allow/ask evaluation.
|
||||
denied_tools: Vec<String>,
|
||||
}
|
||||
|
||||
impl PermissionPolicy {
|
||||
@@ -113,6 +117,7 @@ impl PermissionPolicy {
|
||||
allow_rules: Vec::new(),
|
||||
deny_rules: Vec::new(),
|
||||
ask_rules: Vec::new(),
|
||||
denied_tools: Vec::new(),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -144,6 +149,7 @@ impl PermissionPolicy {
|
||||
.iter()
|
||||
.map(|rule| PermissionRule::parse(rule))
|
||||
.collect();
|
||||
self.denied_tools = config.denied_tools().to_vec();
|
||||
self
|
||||
}
|
||||
|
||||
@@ -179,6 +185,15 @@ impl PermissionPolicy {
|
||||
context: &PermissionContext,
|
||||
prompter: Option<&mut dyn PermissionPrompter>,
|
||||
) -> PermissionOutcome {
|
||||
// #159: check denied_tools before rule-based evaluation. Tools listed
|
||||
// in the denied_tools config are unconditionally denied regardless of
|
||||
// permission mode.
|
||||
if self.denied_tools.iter().any(|t| t == tool_name) {
|
||||
return PermissionOutcome::Deny {
|
||||
reason: format!("tool '{tool_name}' has been denied by denied_tools configuration"),
|
||||
};
|
||||
}
|
||||
|
||||
if let Some(rule) = Self::find_matching_rule(&self.deny_rules, tool_name, input) {
|
||||
return PermissionOutcome::Deny {
|
||||
reason: format!(
|
||||
@@ -571,6 +586,7 @@ mod tests {
|
||||
vec!["bash(git:*)".to_string()],
|
||||
vec!["bash(rm -rf:*)".to_string()],
|
||||
Vec::new(),
|
||||
Vec::new(),
|
||||
);
|
||||
let policy = PermissionPolicy::new(PermissionMode::ReadOnly)
|
||||
.with_tool_requirement("bash", PermissionMode::DangerFullAccess)
|
||||
@@ -586,12 +602,39 @@ mod tests {
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn denied_tools_denies_listed_tools_unconditionally() {
|
||||
let rules = RuntimePermissionRuleConfig::new(
|
||||
Vec::new(),
|
||||
Vec::new(),
|
||||
Vec::new(),
|
||||
vec!["bash".to_string(), "write_file".to_string()],
|
||||
);
|
||||
let policy = PermissionPolicy::new(PermissionMode::Allow).with_permission_rules(&rules);
|
||||
|
||||
let result = policy.authorize("bash", "echo hello", None);
|
||||
assert!(matches!(
|
||||
result,
|
||||
PermissionOutcome::Deny { reason } if reason.contains("denied_tools")
|
||||
));
|
||||
|
||||
let result = policy.authorize("write_file", "{}", None);
|
||||
assert!(matches!(
|
||||
result,
|
||||
PermissionOutcome::Deny { reason } if reason.contains("denied_tools")
|
||||
));
|
||||
|
||||
let result = policy.authorize("read_file", "{}", None);
|
||||
assert_eq!(result, PermissionOutcome::Allow);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn ask_rules_force_prompt_even_when_mode_allows() {
|
||||
let rules = RuntimePermissionRuleConfig::new(
|
||||
Vec::new(),
|
||||
Vec::new(),
|
||||
vec!["bash(git:*)".to_string()],
|
||||
Vec::new(),
|
||||
);
|
||||
let policy = PermissionPolicy::new(PermissionMode::DangerFullAccess)
|
||||
.with_tool_requirement("bash", PermissionMode::DangerFullAccess)
|
||||
@@ -617,6 +660,7 @@ mod tests {
|
||||
Vec::new(),
|
||||
Vec::new(),
|
||||
vec!["bash(git:*)".to_string()],
|
||||
Vec::new(),
|
||||
);
|
||||
let policy = PermissionPolicy::new(PermissionMode::ReadOnly)
|
||||
.with_tool_requirement("bash", PermissionMode::DangerFullAccess)
|
||||
|
||||
@@ -42,6 +42,7 @@ pub const SYSTEM_PROMPT_DYNAMIC_BOUNDARY: &str = "__SYSTEM_PROMPT_DYNAMIC_BOUNDA
|
||||
pub const FRONTIER_MODEL_NAME: &str = "Claude Opus 4.6";
|
||||
const MAX_INSTRUCTION_FILE_CHARS: usize = 4_000;
|
||||
const MAX_TOTAL_INSTRUCTION_CHARS: usize = 12_000;
|
||||
const MAX_GIT_DIFF_CHARS: usize = 50_000;
|
||||
|
||||
/// Neutral identity for the model family line in generated prompts.
|
||||
#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
|
||||
@@ -295,10 +296,22 @@ fn read_git_diff(cwd: &Path) -> Option<String> {
|
||||
if sections.is_empty() {
|
||||
None
|
||||
} else {
|
||||
Some(sections.join("\n\n"))
|
||||
Some(truncate_diff(sections.join("\n\n")))
|
||||
}
|
||||
}
|
||||
|
||||
fn truncate_diff(mut diff: String) -> String {
|
||||
if diff.len() > MAX_GIT_DIFF_CHARS {
|
||||
let mut end = MAX_GIT_DIFF_CHARS;
|
||||
while !diff.is_char_boundary(end) {
|
||||
end -= 1;
|
||||
}
|
||||
diff.truncate(end);
|
||||
diff.push_str("\n\n... [diff truncated — too large for system prompt]");
|
||||
}
|
||||
diff
|
||||
}
|
||||
|
||||
fn read_git_output(cwd: &Path, args: &[&str]) -> Option<String> {
|
||||
let output = Command::new("git")
|
||||
.args(args)
|
||||
@@ -549,9 +562,9 @@ fn get_actions_section() -> String {
|
||||
mod tests {
|
||||
use super::{
|
||||
collapse_blank_lines, display_context_path, normalize_instruction_content,
|
||||
render_instruction_content, render_instruction_files, truncate_instruction_content,
|
||||
ContextFile, ModelFamilyIdentity, ProjectContext, SystemPromptBuilder,
|
||||
SYSTEM_PROMPT_DYNAMIC_BOUNDARY,
|
||||
render_instruction_content, render_instruction_files, truncate_diff,
|
||||
truncate_instruction_content, ContextFile, ModelFamilyIdentity, ProjectContext,
|
||||
SystemPromptBuilder, MAX_GIT_DIFF_CHARS, SYSTEM_PROMPT_DYNAMIC_BOUNDARY,
|
||||
};
|
||||
use crate::config::ConfigLoader;
|
||||
use std::fs;
|
||||
@@ -981,4 +994,46 @@ mod tests {
|
||||
assert!(rendered.contains("scope: /tmp/project"));
|
||||
assert!(rendered.contains("Project rules"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn truncate_diff_preserves_short_content() {
|
||||
let short = "a".repeat(1_000);
|
||||
let result = truncate_diff(short.clone());
|
||||
assert_eq!(result, short);
|
||||
assert!(!result.contains("[diff truncated"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn truncate_diff_caps_oversized_content() {
|
||||
let large = "x".repeat(MAX_GIT_DIFF_CHARS + 5_000);
|
||||
let result = truncate_diff(large);
|
||||
assert!(result.contains("... [diff truncated — too large for system prompt]"));
|
||||
// The body before the marker must be at most MAX_GIT_DIFF_CHARS bytes
|
||||
let marker = "\n\n... [diff truncated — too large for system prompt]";
|
||||
let body_len = result.len() - marker.len();
|
||||
assert!(body_len <= MAX_GIT_DIFF_CHARS);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn truncate_diff_respects_utf8_char_boundaries() {
|
||||
// Build a string where MAX_GIT_DIFF_CHARS falls in the middle of a
|
||||
// multi-byte character (U+1F600 = 4 bytes in UTF-8).
|
||||
let prefix_len = MAX_GIT_DIFF_CHARS - 2;
|
||||
let mut input = "a".repeat(prefix_len);
|
||||
// Append a 4-byte emoji so bytes [prefix_len..prefix_len+4] are the
|
||||
// emoji. MAX_GIT_DIFF_CHARS lands at prefix_len+2, inside the emoji.
|
||||
input.push('\u{1F600}');
|
||||
input.push_str(&"b".repeat(10_000));
|
||||
|
||||
let result = truncate_diff(input);
|
||||
// Must be valid UTF-8 (the fact that we have a String proves this, but
|
||||
// let's also verify the truncation marker is present).
|
||||
assert!(result.contains("[diff truncated"));
|
||||
// The body (before marker) should end before the emoji since cutting
|
||||
// inside it would be invalid UTF-8.
|
||||
let marker = "\n\n... [diff truncated — too large for system prompt]";
|
||||
let body = &result[..result.len() - marker.len()];
|
||||
assert!(body.len() <= MAX_GIT_DIFF_CHARS);
|
||||
assert!(body.is_char_boundary(body.len()));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -158,9 +158,15 @@ impl SessionStore {
|
||||
}
|
||||
|
||||
pub fn latest_session(&self) -> Result<ManagedSessionSummary, SessionControlError> {
|
||||
self.list_sessions()?.into_iter().next().ok_or_else(|| {
|
||||
SessionControlError::Format(format_no_managed_sessions(&self.sessions_root))
|
||||
})
|
||||
if let Some(latest) = self.list_sessions()?.into_iter().next() {
|
||||
return Ok(latest);
|
||||
}
|
||||
if let Some(latest) = self.scan_global_sessions()?.into_iter().next() {
|
||||
return Ok(latest);
|
||||
}
|
||||
Err(SessionControlError::Format(format_no_managed_sessions(
|
||||
&self.sessions_root,
|
||||
)))
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
@@ -190,6 +196,38 @@ impl SessionStore {
|
||||
})
|
||||
}
|
||||
|
||||
/// Load a session by reference, allowing cross-workspace resume for aliases.
|
||||
/// When the reference is an alias ("latest", "last", "recent"), workspace
|
||||
/// mismatch validation is skipped so `/resume latest` works across workspaces.
|
||||
/// For explicit session references, workspace validation is still enforced.
|
||||
pub fn load_session_loose(
|
||||
&self,
|
||||
reference: &str,
|
||||
) -> Result<LoadedManagedSession, SessionControlError> {
|
||||
match self.load_session(reference) {
|
||||
Ok(loaded) => Ok(loaded),
|
||||
Err(SessionControlError::WorkspaceMismatch { expected, actual })
|
||||
if is_session_reference_alias(reference) =>
|
||||
{
|
||||
let handle = self.resolve_reference(reference)?;
|
||||
let session = Session::load_from_path(&handle.path)?;
|
||||
eprintln!(
|
||||
" Note: resuming session from a different workspace (origin: {})",
|
||||
actual.display()
|
||||
);
|
||||
let _ = expected; // suppress unused warning
|
||||
Ok(LoadedManagedSession {
|
||||
handle: SessionHandle {
|
||||
id: session.session_id.clone(),
|
||||
path: handle.path,
|
||||
},
|
||||
session,
|
||||
})
|
||||
}
|
||||
Err(other) => Err(other),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn fork_session(
|
||||
&self,
|
||||
session: &Session,
|
||||
@@ -221,6 +259,47 @@ impl SessionStore {
|
||||
.map(Path::to_path_buf)
|
||||
}
|
||||
|
||||
/// Scan all known session storage locations for sessions from any workspace.
|
||||
/// Checks both the global root (~/.claw/sessions/) and the project-local
|
||||
/// .claw/sessions/ parent directory. Used as a fallback when the current
|
||||
/// workspace has no sessions.
|
||||
#[allow(clippy::unnecessary_wraps)]
|
||||
fn scan_global_sessions(&self) -> Result<Vec<ManagedSessionSummary>, SessionControlError> {
|
||||
let mut sessions = Vec::new();
|
||||
|
||||
// Scan global root: ~/.claw/sessions/<fingerprint>/
|
||||
let global_root = global_sessions_root();
|
||||
if let Ok(entries) = fs::read_dir(&global_root) {
|
||||
for entry in entries.flatten() {
|
||||
let path = entry.path();
|
||||
if path.is_dir() {
|
||||
let _ = Self::collect_sessions_from_dir_unvalidated(&path, &mut sessions);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Scan project-local parent: <cwd>/.claw/sessions/<fingerprint>/
|
||||
// Sessions are stored here by from_cwd(), so we must check all
|
||||
// fingerprint subdirs, not just the current workspace's.
|
||||
if let Some(local_parent) = self.legacy_sessions_root() {
|
||||
if let Ok(entries) = fs::read_dir(&local_parent) {
|
||||
for entry in entries.flatten() {
|
||||
let path = entry.path();
|
||||
if path.is_dir() && path != self.sessions_root {
|
||||
let _ = Self::collect_sessions_from_dir_unvalidated(&path, &mut sessions);
|
||||
} else if path == self.sessions_root {
|
||||
// Already searched in list_sessions(), but include here
|
||||
// in case this is called standalone
|
||||
let _ = Self::collect_sessions_from_dir_unvalidated(&path, &mut sessions);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
sort_managed_sessions(&mut sessions);
|
||||
Ok(sessions)
|
||||
}
|
||||
|
||||
fn validate_loaded_session(
|
||||
&self,
|
||||
session_path: &Path,
|
||||
@@ -305,6 +384,65 @@ impl SessionStore {
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Like `collect_sessions_from_dir` but skips workspace validation.
|
||||
/// Used by the global scan fallback to discover sessions from any workspace.
|
||||
fn collect_sessions_from_dir_unvalidated(
|
||||
directory: &Path,
|
||||
sessions: &mut Vec<ManagedSessionSummary>,
|
||||
) -> Result<(), SessionControlError> {
|
||||
let entries = match fs::read_dir(directory) {
|
||||
Ok(entries) => entries,
|
||||
Err(err) if err.kind() == std::io::ErrorKind::NotFound => return Ok(()),
|
||||
Err(err) => return Err(err.into()),
|
||||
};
|
||||
for entry in entries {
|
||||
let entry = entry?;
|
||||
let path = entry.path();
|
||||
if !is_managed_session_file(&path) {
|
||||
continue;
|
||||
}
|
||||
let metadata = entry.metadata()?;
|
||||
let modified_epoch_millis = metadata
|
||||
.modified()
|
||||
.ok()
|
||||
.and_then(|time| time.duration_since(UNIX_EPOCH).ok())
|
||||
.map(|duration| duration.as_millis())
|
||||
.unwrap_or_default();
|
||||
let summary = match Session::load_from_path(&path) {
|
||||
Ok(session) => ManagedSessionSummary {
|
||||
id: session.session_id,
|
||||
path,
|
||||
updated_at_ms: session.updated_at_ms,
|
||||
modified_epoch_millis,
|
||||
message_count: session.messages.len(),
|
||||
parent_session_id: session
|
||||
.fork
|
||||
.as_ref()
|
||||
.map(|fork| fork.parent_session_id.clone()),
|
||||
branch_name: session
|
||||
.fork
|
||||
.as_ref()
|
||||
.and_then(|fork| fork.branch_name.clone()),
|
||||
},
|
||||
Err(_) => ManagedSessionSummary {
|
||||
id: path
|
||||
.file_stem()
|
||||
.and_then(|value| value.to_str())
|
||||
.unwrap_or("unknown")
|
||||
.to_string(),
|
||||
path,
|
||||
updated_at_ms: 0,
|
||||
modified_epoch_millis,
|
||||
message_count: 0,
|
||||
parent_session_id: None,
|
||||
branch_name: None,
|
||||
},
|
||||
};
|
||||
sessions.push(summary);
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
/// Stable hex fingerprint of a workspace path.
|
||||
@@ -322,6 +460,13 @@ pub fn workspace_fingerprint(workspace_root: &Path) -> String {
|
||||
format!("{hash:016x}")
|
||||
}
|
||||
|
||||
/// The global sessions directory shared across all workspaces.
|
||||
/// Points to `~/.claw/sessions/` (or `$CLAW_CONFIG_HOME/sessions/`).
|
||||
#[must_use]
|
||||
pub fn global_sessions_root() -> PathBuf {
|
||||
crate::config::default_config_home().join("sessions")
|
||||
}
|
||||
|
||||
pub const PRIMARY_SESSION_EXTENSION: &str = "jsonl";
|
||||
pub const LEGACY_SESSION_EXTENSION: &str = "json";
|
||||
pub const LATEST_SESSION_REFERENCE: &str = "latest";
|
||||
@@ -574,7 +719,7 @@ fn format_no_managed_sessions(sessions_root: &Path) -> String {
|
||||
.and_then(|f| f.to_str())
|
||||
.unwrap_or("<unknown>");
|
||||
format!(
|
||||
"no managed sessions found in .claw/sessions/{fingerprint_dir}/\nStart `claw` to create a session, then rerun with `--resume {LATEST_SESSION_REFERENCE}`.\nNote: claw partitions sessions per workspace fingerprint; sessions from other CWDs are invisible."
|
||||
"no managed sessions found in .claw/sessions/{fingerprint_dir}/\nStart `claw` to create a session, then rerun with `--resume {LATEST_SESSION_REFERENCE}`.\nNote: /resume {LATEST_SESSION_REFERENCE} searches all workspaces."
|
||||
)
|
||||
}
|
||||
|
||||
@@ -1085,4 +1230,44 @@ mod tests {
|
||||
);
|
||||
fs::remove_dir_all(base).expect("temp dir should clean up");
|
||||
}
|
||||
|
||||
/// #160 regression: store-level list_sessions/session_exists/delete_session
|
||||
/// lifecycle works end-to-end.
|
||||
#[test]
|
||||
fn session_store_lifecycle_regression_160() {
|
||||
// given
|
||||
let base = temp_dir();
|
||||
fs::create_dir_all(&base).expect("base dir should exist");
|
||||
let store = SessionStore::from_cwd(&base).expect("store should build");
|
||||
let session = persist_session_via_store(&store, "160 regression test");
|
||||
|
||||
// when/then — session exists and is listed before deletion
|
||||
assert!(
|
||||
!store.list_sessions().expect("list").is_empty(),
|
||||
"store should have at least one session"
|
||||
);
|
||||
assert!(
|
||||
store.session_exists(&session.session_id),
|
||||
"session should exist before deletion"
|
||||
);
|
||||
|
||||
// when — delete the session
|
||||
let deleted = store
|
||||
.delete_session(&session.session_id)
|
||||
.expect("delete should succeed");
|
||||
|
||||
// then — session is gone
|
||||
assert_eq!(deleted.id, session.session_id);
|
||||
assert!(!deleted.path.exists(), "session file should be removed");
|
||||
assert!(
|
||||
!store.session_exists(&session.session_id),
|
||||
"session should not exist after deletion"
|
||||
);
|
||||
assert!(
|
||||
store.list_sessions().expect("list").is_empty(),
|
||||
"store should have no sessions after deletion"
|
||||
);
|
||||
|
||||
fs::remove_dir_all(base).expect("temp dir should clean up");
|
||||
}
|
||||
}
|
||||
|
||||
849
rust/crates/runtime/src/trident.rs
Normal file
849
rust/crates/runtime/src/trident.rs
Normal file
@@ -0,0 +1,849 @@
|
||||
use crate::compact::{compact_session, CompactionConfig, CompactionResult};
|
||||
use crate::session::{ContentBlock, ConversationMessage, MessageRole, Session};
|
||||
use std::collections::{BTreeMap, BTreeSet};
|
||||
|
||||
/// Configuration for the Trident compaction pipeline.
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
pub struct TridentConfig {
|
||||
pub supersede_enabled: bool,
|
||||
pub collapse_enabled: bool,
|
||||
pub cluster_enabled: bool,
|
||||
pub collapse_threshold: usize,
|
||||
pub cluster_min_size: usize,
|
||||
pub cluster_similarity_threshold: f64,
|
||||
pub max_file_operations: usize,
|
||||
}
|
||||
|
||||
impl Default for TridentConfig {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
supersede_enabled: true,
|
||||
collapse_enabled: true,
|
||||
cluster_enabled: true,
|
||||
collapse_threshold: 4,
|
||||
cluster_min_size: 3,
|
||||
cluster_similarity_threshold: 0.6,
|
||||
max_file_operations: 100,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Statistics from a Trident compaction run.
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub struct TridentStats {
|
||||
pub superseded_count: usize,
|
||||
pub collapsed_chains: usize,
|
||||
pub messages_collapsed: usize,
|
||||
pub clusters_found: usize,
|
||||
pub messages_clustered: usize,
|
||||
pub tokens_saved_estimate: usize,
|
||||
pub original_message_count: usize,
|
||||
pub final_message_count: usize,
|
||||
}
|
||||
|
||||
impl Default for TridentStats {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
superseded_count: 0,
|
||||
collapsed_chains: 0,
|
||||
messages_collapsed: 0,
|
||||
clusters_found: 0,
|
||||
messages_clustered: 0,
|
||||
tokens_saved_estimate: 0,
|
||||
original_message_count: 0,
|
||||
final_message_count: 0,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl TridentStats {
|
||||
pub fn format_report(&self) -> String {
|
||||
let compression = if self.final_message_count > 0 {
|
||||
self.original_message_count as f64 / self.final_message_count as f64
|
||||
} else {
|
||||
1.0
|
||||
};
|
||||
let mut lines = vec![
|
||||
"Trident Compaction Complete".to_string(),
|
||||
format!(
|
||||
" Stage 1 (Supersede): {} obsolete removed",
|
||||
self.superseded_count
|
||||
),
|
||||
format!(
|
||||
" Stage 2 (Collapse): {} -> {} summaries",
|
||||
self.messages_collapsed, self.collapsed_chains
|
||||
),
|
||||
format!(
|
||||
" Stage 3 (Cluster): {} -> {} clusters",
|
||||
self.messages_clustered, self.clusters_found
|
||||
),
|
||||
format!(" Original: {} messages", self.original_message_count),
|
||||
format!(
|
||||
" Final: {} messages ({:.1}x compression)",
|
||||
self.final_message_count, compression
|
||||
),
|
||||
];
|
||||
if self.tokens_saved_estimate > 0 {
|
||||
lines.push(format!(
|
||||
" Est. tokens saved: ~{}",
|
||||
self.tokens_saved_estimate
|
||||
));
|
||||
}
|
||||
lines.join("\n")
|
||||
}
|
||||
}
|
||||
|
||||
/// Result of the Trident compaction pipeline.
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct TridentResult {
|
||||
pub compacted_session: Session,
|
||||
pub stats: TridentStats,
|
||||
}
|
||||
|
||||
/// Run the full Trident compaction pipeline on a session, then apply
|
||||
/// the standard summary-based compaction.
|
||||
pub fn trident_compact_session(
|
||||
session: &Session,
|
||||
compaction_config: CompactionConfig,
|
||||
trident_config: &TridentConfig,
|
||||
) -> CompactionResult {
|
||||
let original_count = session.messages.len();
|
||||
let original_tokens: usize = session.messages.iter().map(estimate_message_tokens).sum();
|
||||
|
||||
let mut stats = TridentStats {
|
||||
original_message_count: original_count,
|
||||
..TridentStats::default()
|
||||
};
|
||||
|
||||
let mut messages = session.messages.clone();
|
||||
|
||||
if trident_config.supersede_enabled {
|
||||
let (kept, superseded_count) = stage1_supersede(&messages);
|
||||
stats.superseded_count = superseded_count;
|
||||
messages = kept;
|
||||
}
|
||||
|
||||
if trident_config.collapse_enabled {
|
||||
let (collapsed, chains, collapsed_count) =
|
||||
stage2_collapse(&messages, trident_config.collapse_threshold);
|
||||
stats.collapsed_chains = chains;
|
||||
stats.messages_collapsed = collapsed_count;
|
||||
messages = collapsed;
|
||||
}
|
||||
|
||||
if trident_config.cluster_enabled {
|
||||
let (clustered, clusters_found, messages_clustered) = stage3_cluster(
|
||||
&messages,
|
||||
trident_config.cluster_min_size,
|
||||
trident_config.cluster_similarity_threshold,
|
||||
);
|
||||
stats.clusters_found = clusters_found;
|
||||
stats.messages_clustered = messages_clustered;
|
||||
messages = clustered;
|
||||
}
|
||||
|
||||
stats.final_message_count = messages.len();
|
||||
|
||||
let final_tokens: usize = messages.iter().map(estimate_message_tokens).sum();
|
||||
stats.tokens_saved_estimate = original_tokens.saturating_sub(final_tokens);
|
||||
|
||||
let mut trident_session = session.clone();
|
||||
trident_session.messages = messages;
|
||||
|
||||
let result = compact_session(&trident_session, compaction_config);
|
||||
|
||||
if stats.superseded_count > 0 || stats.collapsed_chains > 0 || stats.clusters_found > 0 {
|
||||
eprintln!("{}", stats.format_report());
|
||||
}
|
||||
|
||||
result
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
// STAGE 1: SUPERSEDE — Zero-cost factual pruning
|
||||
// =============================================================================
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
enum FileOp {
|
||||
Read,
|
||||
Write,
|
||||
Edit,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
struct FileOperation {
|
||||
index: usize,
|
||||
op_type: FileOp,
|
||||
}
|
||||
|
||||
fn stage1_supersede(messages: &[ConversationMessage]) -> (Vec<ConversationMessage>, usize) {
|
||||
let mut file_ops: BTreeMap<String, Vec<FileOperation>> = BTreeMap::new();
|
||||
|
||||
for (i, msg) in messages.iter().enumerate() {
|
||||
for block in &msg.blocks {
|
||||
if let Some((path, op_type)) = extract_file_operation(block) {
|
||||
file_ops
|
||||
.entry(path)
|
||||
.or_default()
|
||||
.push(FileOperation { index: i, op_type });
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let mut obsolete_indices: BTreeSet<usize> = BTreeSet::new();
|
||||
|
||||
for (_path, ops) in &file_ops {
|
||||
if ops.len() < 2 {
|
||||
continue;
|
||||
}
|
||||
|
||||
let last_write_idx = ops
|
||||
.iter()
|
||||
.rev()
|
||||
.find(|op| op.op_type == FileOp::Write || op.op_type == FileOp::Edit)
|
||||
.map(|op| op.index);
|
||||
|
||||
if let Some(last_write) = last_write_idx {
|
||||
for op in ops {
|
||||
if op.op_type == FileOp::Read && op.index < last_write {
|
||||
obsolete_indices.insert(op.index);
|
||||
} else if (op.op_type == FileOp::Write || op.op_type == FileOp::Edit)
|
||||
&& op.index < last_write
|
||||
{
|
||||
obsolete_indices.insert(op.index);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let superseded_count = obsolete_indices.len();
|
||||
let kept: Vec<ConversationMessage> = messages
|
||||
.iter()
|
||||
.enumerate()
|
||||
.filter(|(i, _)| !obsolete_indices.contains(i))
|
||||
.map(|(_, msg)| msg.clone())
|
||||
.collect();
|
||||
|
||||
(kept, superseded_count)
|
||||
}
|
||||
|
||||
fn extract_file_operation(block: &ContentBlock) -> Option<(String, FileOp)> {
|
||||
match block {
|
||||
ContentBlock::ToolUse { name, input, .. } => {
|
||||
let path = extract_path_from_tool_input(name, input)?;
|
||||
let op_type = match name.as_str() {
|
||||
"read_file" | "Read" => FileOp::Read,
|
||||
"write_file" | "Write" => FileOp::Write,
|
||||
"edit_file" | "Edit" => FileOp::Edit,
|
||||
_ => return None,
|
||||
};
|
||||
Some((path, op_type))
|
||||
}
|
||||
ContentBlock::ToolResult {
|
||||
tool_name, output, ..
|
||||
} => {
|
||||
let path = extract_path_from_tool_output(tool_name, output)?;
|
||||
let op_type = match tool_name.as_str() {
|
||||
"read_file" | "Read" => FileOp::Read,
|
||||
"write_file" | "Write" => FileOp::Write,
|
||||
"edit_file" | "Edit" => FileOp::Edit,
|
||||
_ => return None,
|
||||
};
|
||||
Some((path, op_type))
|
||||
}
|
||||
ContentBlock::Text { .. } => None,
|
||||
ContentBlock::Thinking { .. } => None,
|
||||
}
|
||||
}
|
||||
|
||||
fn extract_path_from_tool_input(tool_name: &str, input: &str) -> Option<String> {
|
||||
if !matches!(
|
||||
tool_name,
|
||||
"read_file" | "write_file" | "edit_file" | "Read" | "Write" | "Edit"
|
||||
) {
|
||||
return None;
|
||||
}
|
||||
serde_json::from_str::<serde_json::Value>(input)
|
||||
.ok()
|
||||
.and_then(|v| v.get("path")?.as_str().map(String::from))
|
||||
.or_else(|| {
|
||||
serde_json::from_str::<serde_json::Value>(input)
|
||||
.ok()
|
||||
.and_then(|v| v.get("file_path")?.as_str().map(String::from))
|
||||
})
|
||||
}
|
||||
|
||||
fn extract_path_from_tool_output(tool_name: &str, output: &str) -> Option<String> {
|
||||
if !matches!(
|
||||
tool_name,
|
||||
"read_file" | "write_file" | "edit_file" | "Read" | "Write" | "Edit"
|
||||
) {
|
||||
return None;
|
||||
}
|
||||
serde_json::from_str::<serde_json::Value>(output)
|
||||
.ok()
|
||||
.and_then(|v| v.get("path")?.as_str().map(String::from))
|
||||
.or_else(|| {
|
||||
output
|
||||
.lines()
|
||||
.next()
|
||||
.and_then(|line| line.strip_prefix("path: "))
|
||||
.map(String::from)
|
||||
})
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
// STAGE 2: COLLAPSE — Summarize chatty exchanges
|
||||
// =============================================================================
|
||||
|
||||
fn stage2_collapse(
|
||||
messages: &[ConversationMessage],
|
||||
threshold: usize,
|
||||
) -> (Vec<ConversationMessage>, usize, usize) {
|
||||
if messages.len() < threshold {
|
||||
return (messages.to_vec(), 0, 0);
|
||||
}
|
||||
|
||||
let mut result: Vec<ConversationMessage> = Vec::new();
|
||||
let mut buffer: Vec<ConversationMessage> = Vec::new();
|
||||
let mut total_chains = 0;
|
||||
let mut total_collapsed = 0;
|
||||
|
||||
for msg in messages {
|
||||
if is_chatty_message(msg) {
|
||||
buffer.push(msg.clone());
|
||||
} else {
|
||||
if buffer.len() >= threshold {
|
||||
let summary = generate_collapse_summary(&buffer);
|
||||
total_chains += 1;
|
||||
total_collapsed += buffer.len();
|
||||
result.push(ConversationMessage {
|
||||
role: MessageRole::System,
|
||||
blocks: vec![ContentBlock::Text {
|
||||
text: format!("[Collapsed Conversation]\n{summary}"),
|
||||
}],
|
||||
usage: None,
|
||||
});
|
||||
} else {
|
||||
result.extend(buffer.drain(..));
|
||||
}
|
||||
buffer.clear();
|
||||
result.push(msg.clone());
|
||||
}
|
||||
}
|
||||
|
||||
if buffer.len() >= threshold {
|
||||
let summary = generate_collapse_summary(&buffer);
|
||||
total_chains += 1;
|
||||
total_collapsed += buffer.len();
|
||||
result.push(ConversationMessage {
|
||||
role: MessageRole::System,
|
||||
blocks: vec![ContentBlock::Text {
|
||||
text: format!("[Collapsed Conversation]\n{summary}"),
|
||||
}],
|
||||
usage: None,
|
||||
});
|
||||
} else {
|
||||
result.extend(buffer);
|
||||
}
|
||||
|
||||
(result, total_chains, total_collapsed)
|
||||
}
|
||||
|
||||
fn is_chatty_message(msg: &ConversationMessage) -> bool {
|
||||
let total_chars: usize = msg
|
||||
.blocks
|
||||
.iter()
|
||||
.map(|b| match b {
|
||||
ContentBlock::Text { text } => text.len(),
|
||||
ContentBlock::ToolUse { input, .. } => input.len(),
|
||||
ContentBlock::ToolResult { output, .. } => output.len(),
|
||||
ContentBlock::Thinking { thinking, .. } => thinking.len(),
|
||||
})
|
||||
.sum();
|
||||
|
||||
let has_tool_use = msg
|
||||
.blocks
|
||||
.iter()
|
||||
.any(|b| matches!(b, ContentBlock::ToolUse { .. }));
|
||||
let has_tool_result = msg
|
||||
.blocks
|
||||
.iter()
|
||||
.any(|b| matches!(b, ContentBlock::ToolResult { .. }));
|
||||
|
||||
if has_tool_use || has_tool_result {
|
||||
return false;
|
||||
}
|
||||
|
||||
total_chars < 200
|
||||
}
|
||||
|
||||
fn generate_collapse_summary(messages: &[ConversationMessage]) -> String {
|
||||
let user_count = messages
|
||||
.iter()
|
||||
.filter(|m| m.role == MessageRole::User)
|
||||
.count();
|
||||
let assistant_count = messages
|
||||
.iter()
|
||||
.filter(|m| m.role == MessageRole::Assistant)
|
||||
.count();
|
||||
|
||||
let mut topics: Vec<String> = messages
|
||||
.iter()
|
||||
.filter_map(|m| {
|
||||
m.blocks.iter().find_map(|b| match b {
|
||||
ContentBlock::Text { text } if !text.trim().is_empty() => {
|
||||
Some(truncate_text(text, 80))
|
||||
}
|
||||
_ => None,
|
||||
})
|
||||
})
|
||||
.take(5)
|
||||
.collect();
|
||||
topics.dedup();
|
||||
|
||||
let mut lines = vec![format!(
|
||||
"Collapsed {} messages ({} user, {} assistant).",
|
||||
messages.len(),
|
||||
user_count,
|
||||
assistant_count
|
||||
)];
|
||||
|
||||
if !topics.is_empty() {
|
||||
lines.push("Topics:".to_string());
|
||||
for topic in &topics {
|
||||
lines.push(format!(" - {topic}"));
|
||||
}
|
||||
}
|
||||
|
||||
lines.join("\n")
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
// STAGE 3: CLUSTER — Semantic grouping and deep storage
|
||||
// =============================================================================
|
||||
|
||||
fn stage3_cluster(
|
||||
messages: &[ConversationMessage],
|
||||
min_cluster_size: usize,
|
||||
similarity_threshold: f64,
|
||||
) -> (Vec<ConversationMessage>, usize, usize) {
|
||||
if messages.len() < min_cluster_size {
|
||||
return (messages.to_vec(), 0, 0);
|
||||
}
|
||||
|
||||
let fingerprints: Vec<MessageFingerprint> = messages
|
||||
.iter()
|
||||
.enumerate()
|
||||
.filter_map(|(i, msg)| fingerprint_message(i, msg))
|
||||
.collect();
|
||||
|
||||
if fingerprints.len() < min_cluster_size {
|
||||
return (messages.to_vec(), 0, 0);
|
||||
}
|
||||
|
||||
let mut cluster_assignments: BTreeMap<usize, usize> = BTreeMap::new();
|
||||
let mut cluster_id = 0;
|
||||
|
||||
for i in 0..fingerprints.len() {
|
||||
if cluster_assignments.contains_key(&fingerprints[i].index) {
|
||||
continue;
|
||||
}
|
||||
|
||||
let mut cluster_members: Vec<usize> = vec![fingerprints[i].index];
|
||||
|
||||
for j in (i + 1)..fingerprints.len() {
|
||||
if cluster_assignments.contains_key(&fingerprints[j].index) {
|
||||
continue;
|
||||
}
|
||||
|
||||
let similarity = compute_similarity(&fingerprints[i], &fingerprints[j]);
|
||||
if similarity >= similarity_threshold {
|
||||
cluster_members.push(fingerprints[j].index);
|
||||
}
|
||||
}
|
||||
|
||||
if cluster_members.len() >= min_cluster_size {
|
||||
for member_idx in &cluster_members {
|
||||
cluster_assignments.insert(*member_idx, cluster_id);
|
||||
}
|
||||
cluster_id += 1;
|
||||
}
|
||||
}
|
||||
|
||||
if cluster_assignments.is_empty() {
|
||||
return (messages.to_vec(), 0, 0);
|
||||
}
|
||||
|
||||
let total_clustered: usize = cluster_assignments.len();
|
||||
let clusters_found = cluster_id as usize;
|
||||
|
||||
let mut result: Vec<ConversationMessage> = Vec::new();
|
||||
let mut cluster_buffers: BTreeMap<usize, Vec<usize>> = BTreeMap::new();
|
||||
|
||||
for (msg_idx, &cid) in &cluster_assignments {
|
||||
cluster_buffers.entry(cid).or_default().push(*msg_idx);
|
||||
}
|
||||
|
||||
for (i, msg) in messages.iter().enumerate() {
|
||||
if let Some(&cid) = cluster_assignments.get(&i) {
|
||||
if let Some(buffer) = cluster_buffers.get_mut(&cid) {
|
||||
if buffer[0] == i {
|
||||
let cluster_messages: Vec<&ConversationMessage> =
|
||||
buffer.iter().filter_map(|&idx| messages.get(idx)).collect();
|
||||
let summary = generate_cluster_summary(&cluster_messages);
|
||||
result.push(ConversationMessage {
|
||||
role: MessageRole::System,
|
||||
blocks: vec![ContentBlock::Text {
|
||||
text: format!("[Clustered {} messages]\n{summary}", buffer.len()),
|
||||
}],
|
||||
usage: None,
|
||||
});
|
||||
}
|
||||
}
|
||||
} else {
|
||||
result.push(msg.clone());
|
||||
}
|
||||
}
|
||||
|
||||
(result, clusters_found, total_clustered)
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
struct MessageFingerprint {
|
||||
index: usize,
|
||||
tool_names: BTreeSet<String>,
|
||||
file_paths: BTreeSet<String>,
|
||||
role: MessageRole,
|
||||
text_length: usize,
|
||||
}
|
||||
|
||||
fn fingerprint_message(index: usize, msg: &ConversationMessage) -> Option<MessageFingerprint> {
|
||||
if msg.role == MessageRole::System {
|
||||
return None;
|
||||
}
|
||||
|
||||
let mut tool_names: BTreeSet<String> = BTreeSet::new();
|
||||
let mut file_paths: BTreeSet<String> = BTreeSet::new();
|
||||
let mut text_length = 0;
|
||||
|
||||
for block in &msg.blocks {
|
||||
match block {
|
||||
ContentBlock::ToolUse { name, input, .. } => {
|
||||
tool_names.insert(name.clone());
|
||||
if let Some(path) = extract_path_from_tool_input(name, input) {
|
||||
file_paths.insert(path);
|
||||
}
|
||||
text_length += input.len();
|
||||
}
|
||||
ContentBlock::ToolResult {
|
||||
tool_name, output, ..
|
||||
} => {
|
||||
tool_names.insert(tool_name.clone());
|
||||
if let Some(path) = extract_path_from_tool_output(tool_name, output) {
|
||||
file_paths.insert(path);
|
||||
}
|
||||
text_length += output.len();
|
||||
}
|
||||
ContentBlock::Text { text } => {
|
||||
text_length += text.len();
|
||||
}
|
||||
ContentBlock::Thinking { thinking, .. } => {
|
||||
text_length += thinking.len();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Some(MessageFingerprint {
|
||||
index,
|
||||
tool_names,
|
||||
file_paths,
|
||||
role: msg.role,
|
||||
text_length,
|
||||
})
|
||||
}
|
||||
|
||||
fn compute_similarity(a: &MessageFingerprint, b: &MessageFingerprint) -> f64 {
|
||||
if a.role != b.role {
|
||||
return 0.0;
|
||||
}
|
||||
|
||||
let tool_overlap = if a.tool_names.is_empty() && b.tool_names.is_empty() {
|
||||
1.0
|
||||
} else if a.tool_names.is_empty() || b.tool_names.is_empty() {
|
||||
0.0
|
||||
} else {
|
||||
let intersection: usize = a.tool_names.intersection(&b.tool_names).count();
|
||||
let union: usize = a.tool_names.union(&b.tool_names).count();
|
||||
intersection as f64 / union as f64
|
||||
};
|
||||
|
||||
let file_overlap = if a.file_paths.is_empty() && b.file_paths.is_empty() {
|
||||
1.0
|
||||
} else if a.file_paths.is_empty() || b.file_paths.is_empty() {
|
||||
0.0
|
||||
} else {
|
||||
let intersection: usize = a.file_paths.intersection(&b.file_paths).count();
|
||||
let union: usize = a.file_paths.union(&b.file_paths).count();
|
||||
intersection as f64 / union as f64
|
||||
};
|
||||
|
||||
let length_similarity = if a.text_length == 0 && b.text_length == 0 {
|
||||
1.0
|
||||
} else if a.text_length == 0 || b.text_length == 0 {
|
||||
0.0
|
||||
} else {
|
||||
let min_len = a.text_length.min(b.text_length) as f64;
|
||||
let max_len = a.text_length.max(b.text_length) as f64;
|
||||
min_len / max_len
|
||||
};
|
||||
|
||||
0.4 * tool_overlap + 0.4 * file_overlap + 0.2 * length_similarity
|
||||
}
|
||||
|
||||
fn generate_cluster_summary(messages: &[&ConversationMessage]) -> String {
|
||||
let mut tool_names: BTreeSet<String> = BTreeSet::new();
|
||||
let mut file_paths: BTreeSet<String> = BTreeSet::new();
|
||||
|
||||
for msg in messages {
|
||||
for block in &msg.blocks {
|
||||
match block {
|
||||
ContentBlock::ToolUse { name, input, .. } => {
|
||||
tool_names.insert(name.clone());
|
||||
if let Some(path) = extract_path_from_tool_input(name, input) {
|
||||
file_paths.insert(path);
|
||||
}
|
||||
}
|
||||
ContentBlock::ToolResult {
|
||||
tool_name, output, ..
|
||||
} => {
|
||||
tool_names.insert(tool_name.clone());
|
||||
if let Some(path) = extract_path_from_tool_output(tool_name, output) {
|
||||
file_paths.insert(path);
|
||||
}
|
||||
}
|
||||
ContentBlock::Text { .. } => {}
|
||||
ContentBlock::Thinking { .. } => {}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let mut lines = vec![format!("{} similar messages grouped.", messages.len())];
|
||||
|
||||
if !tool_names.is_empty() {
|
||||
lines.push(format!(
|
||||
"Tools: {}.",
|
||||
tool_names.iter().cloned().collect::<Vec<_>>().join(", ")
|
||||
));
|
||||
}
|
||||
|
||||
if !file_paths.is_empty() {
|
||||
let paths: Vec<String> = file_paths.iter().take(5).cloned().collect();
|
||||
lines.push(format!("Files: {}.", paths.join(", ")));
|
||||
}
|
||||
|
||||
lines.join("\n")
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
// Utilities
|
||||
// =============================================================================
|
||||
|
||||
fn estimate_message_tokens(message: &ConversationMessage) -> usize {
|
||||
message
|
||||
.blocks
|
||||
.iter()
|
||||
.map(|block| match block {
|
||||
ContentBlock::Text { text } => text.len() / 4 + 1,
|
||||
ContentBlock::ToolUse { name, input, .. } => (name.len() + input.len()) / 4 + 1,
|
||||
ContentBlock::ToolResult {
|
||||
tool_name, output, ..
|
||||
} => (tool_name.len() + output.len()) / 4 + 1,
|
||||
ContentBlock::Thinking { thinking, .. } => thinking.len() / 4 + 1,
|
||||
})
|
||||
.sum()
|
||||
}
|
||||
|
||||
fn truncate_text(text: &str, max_chars: usize) -> String {
|
||||
if text.chars().count() <= max_chars {
|
||||
return text.to_string();
|
||||
}
|
||||
let mut truncated: String = text.chars().take(max_chars).collect();
|
||||
truncated.push('…');
|
||||
truncated
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::compact::CompactionConfig;
|
||||
use crate::session::{ContentBlock, ConversationMessage, MessageRole, Session};
|
||||
|
||||
#[test]
|
||||
fn stage1_removes_obsolete_file_reads() {
|
||||
let messages = vec![
|
||||
ConversationMessage::assistant(vec![ContentBlock::ToolUse {
|
||||
id: "1".to_string(),
|
||||
name: "read_file".to_string(),
|
||||
input: r#"{"path":"src/main.rs"}"#.to_string(),
|
||||
}]),
|
||||
ConversationMessage::tool_result(
|
||||
"1",
|
||||
"read_file",
|
||||
r#"{"path":"src/main.rs","content":"old"}"#,
|
||||
false,
|
||||
),
|
||||
ConversationMessage::assistant(vec![ContentBlock::ToolUse {
|
||||
id: "2".to_string(),
|
||||
name: "edit_file".to_string(),
|
||||
input: r#"{"path":"src/main.rs","old":"old","new":"new"}"#.to_string(),
|
||||
}]),
|
||||
ConversationMessage::tool_result(
|
||||
"2",
|
||||
"edit_file",
|
||||
r#"{"path":"src/main.rs","ok":true}"#,
|
||||
false,
|
||||
),
|
||||
];
|
||||
|
||||
let (kept, superseded) = stage1_supersede(&messages);
|
||||
assert!(superseded > 0, "should supersede the earlier read");
|
||||
assert!(kept.len() < messages.len());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn stage1_keeps_standalone_reads() {
|
||||
let messages = vec![
|
||||
ConversationMessage::assistant(vec![ContentBlock::ToolUse {
|
||||
id: "1".to_string(),
|
||||
name: "read_file".to_string(),
|
||||
input: r#"{"path":"src/main.rs"}"#.to_string(),
|
||||
}]),
|
||||
ConversationMessage::tool_result(
|
||||
"1",
|
||||
"read_file",
|
||||
r#"{"path":"src/main.rs","content":"data"}"#,
|
||||
false,
|
||||
),
|
||||
];
|
||||
|
||||
let (kept, superseded) = stage1_supersede(&messages);
|
||||
assert_eq!(superseded, 0);
|
||||
assert_eq!(kept.len(), messages.len());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn stage2_collapses_chatty_messages() {
|
||||
let mut messages = vec![];
|
||||
for i in 0..6 {
|
||||
messages.push(ConversationMessage::user_text(&format!("ok {i}")));
|
||||
messages.push(ConversationMessage::assistant(vec![ContentBlock::Text {
|
||||
text: format!("got {i}"),
|
||||
}]));
|
||||
}
|
||||
messages.push(ConversationMessage::assistant(vec![
|
||||
ContentBlock::ToolUse {
|
||||
id: "t".to_string(),
|
||||
name: "bash".to_string(),
|
||||
input: r#"{"command":"ls"}"#.to_string(),
|
||||
},
|
||||
]));
|
||||
|
||||
let (result, chains, collapsed) = stage2_collapse(&messages, 4);
|
||||
assert!(chains > 0, "should collapse at least one chain");
|
||||
assert!(collapsed > 0);
|
||||
assert!(result.len() < messages.len());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn stage3_clusters_similar_messages() {
|
||||
let mut messages = vec![];
|
||||
for i in 0..5 {
|
||||
messages.push(ConversationMessage::assistant(vec![
|
||||
ContentBlock::ToolUse {
|
||||
id: format!("read_{i}"),
|
||||
name: "read_file".to_string(),
|
||||
input: format!(r#"{{"path":"src/{i}.rs"}}"#),
|
||||
},
|
||||
]));
|
||||
messages.push(ConversationMessage::tool_result(
|
||||
&format!("read_{i}"),
|
||||
"read_file",
|
||||
&format!(r#"{{"path":"src/{i}.rs","content":"data {i}"}}"#),
|
||||
false,
|
||||
));
|
||||
}
|
||||
|
||||
let (result, clusters, clustered) = stage3_cluster(&messages, 3, 0.4);
|
||||
assert!(clusters > 0, "should find at least one cluster");
|
||||
assert!(clustered > 0);
|
||||
assert!(result.len() < messages.len());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn trident_full_pipeline_preserves_important_content() {
|
||||
let mut session = Session::new();
|
||||
session.messages = vec![
|
||||
ConversationMessage::user_text("Read and fix main.rs"),
|
||||
ConversationMessage::assistant(vec![ContentBlock::ToolUse {
|
||||
id: "1".to_string(),
|
||||
name: "read_file".to_string(),
|
||||
input: r#"{"path":"src/main.rs"}"#.to_string(),
|
||||
}]),
|
||||
ConversationMessage::tool_result(
|
||||
"1",
|
||||
"read_file",
|
||||
r#"{"path":"src/main.rs","content":"fn main() { buggy }"}"#,
|
||||
false,
|
||||
),
|
||||
ConversationMessage::assistant(vec![ContentBlock::ToolUse {
|
||||
id: "2".to_string(),
|
||||
name: "edit_file".to_string(),
|
||||
input: r#"{"path":"src/main.rs","old":"buggy","new":"fixed"}"#.to_string(),
|
||||
}]),
|
||||
ConversationMessage::tool_result(
|
||||
"2",
|
||||
"edit_file",
|
||||
r#"{"path":"src/main.rs","ok":true}"#,
|
||||
false,
|
||||
),
|
||||
ConversationMessage::assistant(vec![ContentBlock::Text {
|
||||
text: "Fixed the bug in main.rs".to_string(),
|
||||
}]),
|
||||
];
|
||||
|
||||
let trident_config = TridentConfig::default();
|
||||
let result = trident_compact_session(
|
||||
&session,
|
||||
CompactionConfig {
|
||||
preserve_recent_messages: 4,
|
||||
max_estimated_tokens: 1,
|
||||
},
|
||||
&trident_config,
|
||||
);
|
||||
|
||||
assert!(
|
||||
result.removed_message_count > 0
|
||||
|| result.compacted_session.messages.len() < session.messages.len()
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn trident_stats_report() {
|
||||
let stats = TridentStats {
|
||||
superseded_count: 5,
|
||||
collapsed_chains: 2,
|
||||
messages_collapsed: 8,
|
||||
clusters_found: 1,
|
||||
messages_clustered: 3,
|
||||
tokens_saved_estimate: 1200,
|
||||
original_message_count: 20,
|
||||
final_message_count: 8,
|
||||
};
|
||||
let report = stats.format_report();
|
||||
assert!(report.contains("Stage 1 (Supersede): 5"));
|
||||
assert!(report.contains("Stage 2 (Collapse): 8 -> 2"));
|
||||
assert!(report.contains("Stage 3 (Cluster): 3 -> 1"));
|
||||
assert!(report.contains("1200") || report.contains("1,200"));
|
||||
}
|
||||
}
|
||||
@@ -438,13 +438,24 @@ fn normalize_path(path: &Path) -> PathBuf {
|
||||
/// Extract repository name from a path for event context.
|
||||
fn extract_repo_name(cwd: &str) -> Option<String> {
|
||||
let path = Path::new(cwd);
|
||||
// Try to find a .git directory to identify repo root
|
||||
let mut current = Some(path);
|
||||
while let Some(p) = current {
|
||||
if p.join(".git").is_dir() {
|
||||
return p.file_name().map(|n| n.to_string_lossy().to_string());
|
||||
// Ask git from the cwd itself. Walking ancestors manually can accidentally
|
||||
// classify synthetic/nonexistent paths as an unrelated parent repo (for
|
||||
// example `/tmp/.git`), which makes trust events point at the wrong repo.
|
||||
if path.is_dir() {
|
||||
if let Ok(output) = std::process::Command::new("git")
|
||||
.args(["rev-parse", "--show-toplevel"])
|
||||
.current_dir(path)
|
||||
.output()
|
||||
{
|
||||
if output.status.success() {
|
||||
let root = String::from_utf8_lossy(&output.stdout).trim().to_string();
|
||||
if !root.is_empty() {
|
||||
return Path::new(&root)
|
||||
.file_name()
|
||||
.map(|n| n.to_string_lossy().to_string());
|
||||
}
|
||||
}
|
||||
}
|
||||
current = p.parent();
|
||||
}
|
||||
// Fallback: use the last component of the path
|
||||
path.file_name().map(|n| n.to_string_lossy().to_string())
|
||||
|
||||
@@ -13,6 +13,7 @@
|
||||
|
||||
use std::collections::HashMap;
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::process::Command;
|
||||
use std::sync::{Arc, Mutex};
|
||||
use std::time::{SystemTime, UNIX_EPOCH};
|
||||
|
||||
@@ -73,6 +74,7 @@ pub struct WorkerFailure {
|
||||
#[serde(rename_all = "snake_case")]
|
||||
pub enum WorkerEventKind {
|
||||
Spawning,
|
||||
StartupPreflightWarning,
|
||||
TrustRequired,
|
||||
ToolPermissionRequired,
|
||||
TrustResolved,
|
||||
@@ -102,6 +104,21 @@ pub enum WorkerPromptTarget {
|
||||
Unknown,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
pub enum WorkerStartupPreflightWarningKind {
|
||||
FileAbsentOnBranch,
|
||||
GitMetadataNotWritable,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
|
||||
pub struct WorkerStartupPreflightWarning {
|
||||
pub kind: WorkerStartupPreflightWarningKind,
|
||||
pub message: String,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub path: Option<String>,
|
||||
}
|
||||
|
||||
/// Classification of startup failure when no evidence is available.
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
@@ -212,6 +229,12 @@ pub enum WorkerEventPayload {
|
||||
evidence: StartupEvidenceBundle,
|
||||
classification: StartupFailureClassification,
|
||||
},
|
||||
StartupPreflightWarning {
|
||||
kind: WorkerStartupPreflightWarningKind,
|
||||
message: String,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
path: Option<String>,
|
||||
},
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
|
||||
@@ -329,6 +352,34 @@ impl WorkerRegistry {
|
||||
inner.workers.get(worker_id).cloned()
|
||||
}
|
||||
|
||||
pub fn observe_startup_preflight(
|
||||
&self,
|
||||
worker_id: &str,
|
||||
task_prompt: &str,
|
||||
) -> Result<Worker, String> {
|
||||
let mut inner = self.inner.lock().expect("worker registry lock poisoned");
|
||||
let worker = inner
|
||||
.workers
|
||||
.get_mut(worker_id)
|
||||
.ok_or_else(|| format!("worker not found: {worker_id}"))?;
|
||||
|
||||
for warning in startup_preflight_warnings(Path::new(&worker.cwd), task_prompt) {
|
||||
push_event(
|
||||
worker,
|
||||
WorkerEventKind::StartupPreflightWarning,
|
||||
worker.status,
|
||||
Some(warning.message.clone()),
|
||||
Some(WorkerEventPayload::StartupPreflightWarning {
|
||||
kind: warning.kind,
|
||||
message: warning.message,
|
||||
path: warning.path,
|
||||
}),
|
||||
);
|
||||
}
|
||||
|
||||
Ok(worker.clone())
|
||||
}
|
||||
|
||||
pub fn observe(&self, worker_id: &str, screen_text: &str) -> Result<Worker, String> {
|
||||
let mut inner = self.inner.lock().expect("worker registry lock poisoned");
|
||||
let worker = inner
|
||||
@@ -1064,6 +1115,118 @@ fn extract_server_from_qualified_tool(tool: &str) -> Option<String> {
|
||||
(!server.is_empty()).then(|| server.to_string())
|
||||
}
|
||||
|
||||
pub fn startup_preflight_warnings(
|
||||
cwd: &Path,
|
||||
task_prompt: &str,
|
||||
) -> Vec<WorkerStartupPreflightWarning> {
|
||||
let mut warnings = Vec::new();
|
||||
|
||||
if let Some(git_path) = git_metadata_path(cwd) {
|
||||
if !path_is_writable(&git_path) {
|
||||
warnings.push(WorkerStartupPreflightWarning {
|
||||
kind: WorkerStartupPreflightWarningKind::GitMetadataNotWritable,
|
||||
message: format!(
|
||||
"git metadata is not writable; commits or pushes may fail: {}",
|
||||
git_path.display()
|
||||
),
|
||||
path: Some(git_path.display().to_string()),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
for path in mentioned_repo_paths(task_prompt) {
|
||||
if !git_tracks_path(cwd, &path) {
|
||||
warnings.push(WorkerStartupPreflightWarning {
|
||||
kind: WorkerStartupPreflightWarningKind::FileAbsentOnBranch,
|
||||
message: format!(
|
||||
"task mentions {path}, but git does not track it on the current branch"
|
||||
),
|
||||
path: Some(path),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
warnings
|
||||
}
|
||||
|
||||
fn mentioned_repo_paths(task_prompt: &str) -> Vec<String> {
|
||||
let mut out = Vec::new();
|
||||
for raw in task_prompt.split_whitespace() {
|
||||
let token = raw.trim_matches(|ch: char| {
|
||||
matches!(
|
||||
ch,
|
||||
'`' | '"' | '\'' | '(' | ')' | '[' | ']' | '{' | '}' | ',' | ';' | ':'
|
||||
)
|
||||
});
|
||||
if !token.contains('/') || token.contains("://") || token.starts_with('/') {
|
||||
continue;
|
||||
}
|
||||
let token = token.trim_start_matches("./");
|
||||
if token.contains("..") {
|
||||
continue;
|
||||
}
|
||||
if token
|
||||
.chars()
|
||||
.all(|ch| ch.is_ascii_alphanumeric() || matches!(ch, '/' | '_' | '-' | '.'))
|
||||
&& token
|
||||
.rsplit('/')
|
||||
.next()
|
||||
.is_some_and(|name| name.contains('.'))
|
||||
&& !out.iter().any(|seen| seen == token)
|
||||
{
|
||||
out.push(token.to_string());
|
||||
}
|
||||
}
|
||||
out
|
||||
}
|
||||
|
||||
fn git_tracks_path(cwd: &Path, path: &str) -> bool {
|
||||
Command::new("git")
|
||||
.arg("ls-files")
|
||||
.arg("--error-unmatch")
|
||||
.arg("--")
|
||||
.arg(path)
|
||||
.current_dir(cwd)
|
||||
.output()
|
||||
.is_ok_and(|output| output.status.success())
|
||||
}
|
||||
|
||||
fn git_metadata_path(cwd: &Path) -> Option<PathBuf> {
|
||||
let output = Command::new("git")
|
||||
.args(["rev-parse", "--git-path", "."])
|
||||
.current_dir(cwd)
|
||||
.output()
|
||||
.ok()?;
|
||||
if !output.status.success() {
|
||||
return None;
|
||||
}
|
||||
let text = String::from_utf8_lossy(&output.stdout).trim().to_string();
|
||||
if text.is_empty() {
|
||||
return None;
|
||||
}
|
||||
let path = PathBuf::from(text);
|
||||
Some(if path.is_absolute() {
|
||||
path
|
||||
} else {
|
||||
cwd.join(path)
|
||||
})
|
||||
}
|
||||
|
||||
fn path_is_writable(path: &Path) -> bool {
|
||||
let probe_dir = if path.is_dir() {
|
||||
path.to_path_buf()
|
||||
} else {
|
||||
path.parent().unwrap_or(path).to_path_buf()
|
||||
};
|
||||
let probe = probe_dir.join(format!(".claw-write-probe-{}", now_secs()));
|
||||
std::fs::OpenOptions::new()
|
||||
.write(true)
|
||||
.create_new(true)
|
||||
.open(&probe)
|
||||
.and_then(|_| std::fs::remove_file(&probe))
|
||||
.is_ok()
|
||||
}
|
||||
|
||||
fn detect_trust_prompt(lowered: &str) -> bool {
|
||||
[
|
||||
"do you trust the files in this folder",
|
||||
@@ -1285,6 +1448,8 @@ fn cwd_matches_observed_target(expected_cwd: &str, observed_cwd: &str) -> bool {
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use std::fs;
|
||||
use std::process::Command;
|
||||
|
||||
#[test]
|
||||
fn allowlisted_trust_prompt_auto_resolves_then_reaches_ready_state() {
|
||||
@@ -1431,6 +1596,66 @@ mod tests {
|
||||
assert!(!readiness.ready);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn startup_preflight_warns_when_task_file_is_absent_on_branch() {
|
||||
let tmp = tempfile::tempdir().expect("tempdir");
|
||||
Command::new("git")
|
||||
.arg("init")
|
||||
.current_dir(tmp.path())
|
||||
.output()
|
||||
.expect("git init should run");
|
||||
fs::create_dir_all(tmp.path().join("src")).expect("src dir");
|
||||
fs::write(tmp.path().join("src/lib.rs"), "pub fn present() {}\n").expect("write file");
|
||||
Command::new("git")
|
||||
.args(["add", "src/lib.rs"])
|
||||
.current_dir(tmp.path())
|
||||
.output()
|
||||
.expect("git add should run");
|
||||
|
||||
let warnings = startup_preflight_warnings(
|
||||
tmp.path(),
|
||||
"Fix src/lib.rs and rust/crates/runtime/src/trident.rs before testing.",
|
||||
);
|
||||
|
||||
assert!(warnings.iter().any(|warning| {
|
||||
warning.kind == WorkerStartupPreflightWarningKind::FileAbsentOnBranch
|
||||
&& warning.path.as_deref() == Some("rust/crates/runtime/src/trident.rs")
|
||||
}));
|
||||
assert!(!warnings.iter().any(|warning| {
|
||||
warning.kind == WorkerStartupPreflightWarningKind::FileAbsentOnBranch
|
||||
&& warning.path.as_deref() == Some("src/lib.rs")
|
||||
}));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn startup_preflight_records_structured_warning_event() {
|
||||
let tmp = tempfile::tempdir().expect("tempdir");
|
||||
Command::new("git")
|
||||
.arg("init")
|
||||
.current_dir(tmp.path())
|
||||
.output()
|
||||
.expect("git init should run");
|
||||
let registry = WorkerRegistry::new();
|
||||
let worker = registry.create(&tmp.path().display().to_string(), &[], true);
|
||||
|
||||
let observed = registry
|
||||
.observe_startup_preflight(&worker.worker_id, "Open missing/file.rs")
|
||||
.expect("preflight should run");
|
||||
|
||||
let event = observed
|
||||
.events
|
||||
.iter()
|
||||
.find(|event| event.kind == WorkerEventKind::StartupPreflightWarning)
|
||||
.expect("preflight warning event");
|
||||
assert!(matches!(
|
||||
event.payload,
|
||||
Some(WorkerEventPayload::StartupPreflightWarning {
|
||||
kind: WorkerStartupPreflightWarningKind::FileAbsentOnBranch,
|
||||
..
|
||||
})
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn startup_timeout_classifies_tool_permission_prompt() {
|
||||
let registry = WorkerRegistry::new();
|
||||
|
||||
@@ -23,6 +23,8 @@ serde_json.workspace = true
|
||||
syntect = "5"
|
||||
tokio = { version = "1", features = ["rt-multi-thread", "signal", "time"] }
|
||||
tools = { path = "../tools" }
|
||||
log = "0.4"
|
||||
|
||||
|
||||
[lints]
|
||||
workspace = true
|
||||
|
||||
@@ -2,6 +2,13 @@
|
||||
dead_code,
|
||||
unused_imports,
|
||||
unused_variables,
|
||||
clippy::doc_markdown,
|
||||
clippy::len_zero,
|
||||
clippy::manual_string_new,
|
||||
clippy::match_same_arms,
|
||||
clippy::result_large_err,
|
||||
clippy::too_many_lines,
|
||||
clippy::uninlined_format_args,
|
||||
clippy::unneeded_struct_pattern,
|
||||
clippy::unnecessary_wraps,
|
||||
clippy::unused_self
|
||||
@@ -23,6 +30,8 @@ use std::sync::{Arc, Mutex};
|
||||
use std::thread::{self, JoinHandle};
|
||||
use std::time::{Duration, Instant, UNIX_EPOCH};
|
||||
|
||||
use log::debug;
|
||||
|
||||
use api::{
|
||||
detect_provider_kind, model_family_identity_for, resolve_startup_auth_source, AnthropicClient,
|
||||
AuthSource, ContentBlockDelta, InputContentBlock, InputMessage, MessageRequest,
|
||||
@@ -58,7 +67,7 @@ use tools::{
|
||||
execute_tool, mvp_tool_specs, GlobalToolRegistry, RuntimeToolDefinition, ToolSearchOutput,
|
||||
};
|
||||
|
||||
const DEFAULT_MODEL: &str = "claude-opus-4-6";
|
||||
const DEFAULT_MODEL: &str = "anthropic/claude-opus-4-6";
|
||||
|
||||
/// #148: Model provenance for `claw status` JSON/text output. Records where
|
||||
/// the resolved model string came from so claws don't have to re-read argv
|
||||
@@ -265,6 +274,8 @@ fn classify_error_kind(message: &str) -> &'static str {
|
||||
"no_managed_sessions"
|
||||
} else if message.contains("unsupported ACP invocation") {
|
||||
"unsupported_acp_invocation"
|
||||
} else if message.contains("unsupported skills action") {
|
||||
"unsupported_skills_action"
|
||||
} else if message.contains("unrecognized argument") || message.contains("unknown option") {
|
||||
"cli_parse"
|
||||
} else if message.contains("invalid model syntax") {
|
||||
@@ -277,6 +288,16 @@ fn classify_error_kind(message: &str) -> &'static str {
|
||||
"confirmation_required"
|
||||
} else if message.contains("api failed") || message.contains("api returned") {
|
||||
"api_http_error"
|
||||
} else if message.contains("mcpServers") {
|
||||
"malformed_mcp_config"
|
||||
} else if message.starts_with("empty prompt") {
|
||||
"empty_prompt"
|
||||
} else if message.starts_with("interactive_only:") || message.contains("stdin is not a TTY") {
|
||||
"interactive_only"
|
||||
} else if message.starts_with("unknown agents subcommand:") {
|
||||
"unknown_agents_subcommand"
|
||||
} else if message.contains("is not installed") {
|
||||
"plugin_not_found"
|
||||
} else {
|
||||
"unknown"
|
||||
}
|
||||
@@ -718,15 +739,19 @@ fn parse_args(args: &[String]) -> Result<CliAction, String> {
|
||||
let value = args
|
||||
.get(index + 1)
|
||||
.ok_or_else(|| "missing value for --model".to_string())?;
|
||||
validate_model_syntax(value)?;
|
||||
model = resolve_model_alias_with_config(value);
|
||||
let resolved = resolve_model_alias_with_config(value);
|
||||
debug!("Resolved --model '{}' -> '{}'", value, resolved);
|
||||
validate_model_syntax(&resolved)?;
|
||||
model = resolved;
|
||||
model_flag_raw = Some(value.clone()); // #148
|
||||
index += 2;
|
||||
}
|
||||
flag if flag.starts_with("--model=") => {
|
||||
let value = &flag[8..];
|
||||
validate_model_syntax(value)?;
|
||||
model = resolve_model_alias_with_config(value);
|
||||
let resolved = resolve_model_alias_with_config(value);
|
||||
debug!("Resolved --model='{}' -> '{}'", value, resolved);
|
||||
validate_model_syntax(&resolved)?;
|
||||
model = resolved;
|
||||
model_flag_raw = Some(value.to_string()); // #148
|
||||
index += 1;
|
||||
}
|
||||
@@ -860,6 +885,35 @@ fn parse_args(args: &[String]) -> Result<CliAction, String> {
|
||||
}
|
||||
|
||||
if wants_help {
|
||||
// #684: --help before subcommand should still route to subcommand-specific
|
||||
// help when the subcommand is one of the local-help-topic commands.
|
||||
if let Some(action) = parse_local_help_action(&rest, output_format) {
|
||||
return action;
|
||||
}
|
||||
// When --help was consumed before the subcommand, rest has no help flag.
|
||||
// If rest is a simple local-help subcommand with no extra args, route there.
|
||||
if !rest.is_empty() && rest[1..].iter().all(|a| is_help_flag(a)) {
|
||||
let topic = match rest[0].as_str() {
|
||||
"status" => Some(LocalHelpTopic::Status),
|
||||
"sandbox" => Some(LocalHelpTopic::Sandbox),
|
||||
"doctor" => Some(LocalHelpTopic::Doctor),
|
||||
"acp" => Some(LocalHelpTopic::Acp),
|
||||
"init" => Some(LocalHelpTopic::Init),
|
||||
"state" => Some(LocalHelpTopic::State),
|
||||
"export" => Some(LocalHelpTopic::Export),
|
||||
"version" => Some(LocalHelpTopic::Version),
|
||||
"system-prompt" => Some(LocalHelpTopic::SystemPrompt),
|
||||
"dump-manifests" => Some(LocalHelpTopic::DumpManifests),
|
||||
"bootstrap-plan" => Some(LocalHelpTopic::BootstrapPlan),
|
||||
_ => None,
|
||||
};
|
||||
if let Some(topic) = topic {
|
||||
return Ok(CliAction::HelpTopic {
|
||||
topic,
|
||||
output_format,
|
||||
});
|
||||
}
|
||||
}
|
||||
return Ok(CliAction::Help { output_format });
|
||||
}
|
||||
|
||||
@@ -892,6 +946,12 @@ fn parse_args(args: &[String]) -> Result<CliAction, String> {
|
||||
allow_broad_cwd,
|
||||
});
|
||||
}
|
||||
// Non-TTY stdin with no piped content: refuse to start the interactive
|
||||
// REPL (it would block forever waiting for input that will never arrive).
|
||||
// (#696: emit a typed error instead of hanging indefinitely)
|
||||
// Skip this guard in test builds (parse_args tests run in non-TTY context).
|
||||
#[cfg(not(test))]
|
||||
return Err("interactive_only: claw requires an interactive terminal (stdin is not a TTY and no prompt was provided — pipe a prompt or run in a TTY)".into());
|
||||
}
|
||||
return Ok(CliAction::Repl {
|
||||
model,
|
||||
@@ -1003,6 +1063,14 @@ fn parse_args(args: &[String]) -> Result<CliAction, String> {
|
||||
),
|
||||
"skills" => {
|
||||
let args = join_optional_args(&rest[1..]);
|
||||
if let Some(action) = args.as_deref() {
|
||||
let first_word = action.split_whitespace().next().unwrap_or(action);
|
||||
if matches!(first_word, "remove" | "add" | "uninstall" | "delete") {
|
||||
return Err(format!(
|
||||
"unsupported skills action: {first_word}. Supported actions: list, install <path>, help, or <skill> [args]"
|
||||
));
|
||||
}
|
||||
}
|
||||
match classify_skills_slash_command(args.as_deref()) {
|
||||
SkillSlashDispatch::Invoke(prompt) => Ok(CliAction::Prompt {
|
||||
prompt,
|
||||
@@ -1100,7 +1168,10 @@ fn parse_local_help_action(
|
||||
rest: &[String],
|
||||
output_format: CliOutputFormat,
|
||||
) -> Option<Result<CliAction, String>> {
|
||||
if rest.len() != 2 || !is_help_flag(&rest[1]) {
|
||||
if rest.is_empty() {
|
||||
return None;
|
||||
}
|
||||
if !rest.iter().any(|a| is_help_flag(a)) {
|
||||
return None;
|
||||
}
|
||||
|
||||
@@ -1109,10 +1180,6 @@ fn parse_local_help_action(
|
||||
"sandbox" => LocalHelpTopic::Sandbox,
|
||||
"doctor" => LocalHelpTopic::Doctor,
|
||||
"acp" => LocalHelpTopic::Acp,
|
||||
// #141: add the subcommands that were previously falling back
|
||||
// to global help (init/state/export/version) or erroring out
|
||||
// (system-prompt/dump-manifests) or printing their primary
|
||||
// output instead of help text (bootstrap-plan).
|
||||
"init" => LocalHelpTopic::Init,
|
||||
"state" => LocalHelpTopic::State,
|
||||
"export" => LocalHelpTopic::Export,
|
||||
@@ -1122,6 +1189,10 @@ fn parse_local_help_action(
|
||||
"bootstrap-plan" => LocalHelpTopic::BootstrapPlan,
|
||||
_ => return None,
|
||||
};
|
||||
let has_non_help = rest[1..].iter().any(|a| !is_help_flag(a));
|
||||
if has_non_help {
|
||||
return None;
|
||||
}
|
||||
Some(Ok(CliAction::HelpTopic {
|
||||
topic,
|
||||
output_format,
|
||||
@@ -1155,8 +1226,9 @@ fn parse_single_word_command_alias(
|
||||
|
||||
if is_diagnostic && rest.len() > 1 {
|
||||
// Diagnostic verb with trailing args: reject unrecognized suffix
|
||||
if is_help_flag(&rest[1]) && rest.len() == 2 {
|
||||
// "doctor --help" is valid, routed to parse_local_help_action() instead
|
||||
let all_extra_are_help = rest[1..].iter().all(|a| is_help_flag(a));
|
||||
if all_extra_are_help {
|
||||
// "doctor --help -h" is valid, routed to parse_local_help_action() instead
|
||||
return None;
|
||||
}
|
||||
// Unrecognized suffix like "--json"
|
||||
@@ -1512,9 +1584,9 @@ fn levenshtein_distance(left: &str, right: &str) -> usize {
|
||||
|
||||
fn resolve_model_alias(model: &str) -> &str {
|
||||
match model {
|
||||
"opus" => "claude-opus-4-6",
|
||||
"sonnet" => "claude-sonnet-4-6",
|
||||
"haiku" => "claude-haiku-4-5-20251213",
|
||||
"opus" => "anthropic/claude-opus-4-6",
|
||||
"sonnet" => "anthropic/claude-sonnet-4-6",
|
||||
"haiku" => "anthropic/claude-haiku-4-5-20251213",
|
||||
_ => model,
|
||||
}
|
||||
}
|
||||
@@ -1538,11 +1610,6 @@ fn validate_model_syntax(model: &str) -> Result<(), String> {
|
||||
if trimmed.is_empty() {
|
||||
return Err("model string cannot be empty".to_string());
|
||||
}
|
||||
// Known aliases are always valid
|
||||
match trimmed {
|
||||
"opus" | "sonnet" | "haiku" => return Ok(()),
|
||||
_ => {}
|
||||
}
|
||||
// Check for spaces (malformed)
|
||||
if trimmed.contains(' ') {
|
||||
return Err(format!(
|
||||
@@ -1555,7 +1622,7 @@ fn validate_model_syntax(model: &str) -> Result<(), String> {
|
||||
if parts.len() != 2 || parts[0].is_empty() || parts[1].is_empty() {
|
||||
// #154: hint if the model looks like it belongs to a different provider
|
||||
let mut err_msg = format!(
|
||||
"invalid model syntax: '{}'. Expected provider/model (e.g., anthropic/claude-opus-4-6) or known alias (opus, sonnet, haiku)",
|
||||
"invalid model syntax: '{}'. Expected provider/model (e.g., anthropic/claude-opus-4-6)",
|
||||
trimmed
|
||||
);
|
||||
if trimmed.starts_with("gpt-") || trimmed.starts_with("gpt_") {
|
||||
@@ -2076,6 +2143,7 @@ fn render_doctor_report() -> Result<DoctorReport, Box<dyn std::error::Error>> {
|
||||
// Doctor path has its own config check; StatusContext here is only
|
||||
// fed into health renderers that don't read config_load_error.
|
||||
config_load_error: config.as_ref().err().map(ToString::to_string),
|
||||
config_load_error_kind: None,
|
||||
};
|
||||
Ok(DoctorReport {
|
||||
checks: vec![
|
||||
@@ -2186,25 +2254,34 @@ fn check_auth_health() -> DiagnosticCheck {
|
||||
let auth_token_present = env::var("ANTHROPIC_AUTH_TOKEN")
|
||||
.ok()
|
||||
.is_some_and(|value| !value.trim().is_empty());
|
||||
let openai_key_present = env::var("OPENAI_API_KEY")
|
||||
.ok()
|
||||
.is_some_and(|value| !value.trim().is_empty());
|
||||
let any_auth_present = api_key_present || auth_token_present || openai_key_present;
|
||||
let env_details = format!(
|
||||
"Environment api_key={} auth_token={}",
|
||||
"Environment api_key={} auth_token={} openai_key={}",
|
||||
if api_key_present { "present" } else { "absent" },
|
||||
if auth_token_present {
|
||||
"present"
|
||||
} else {
|
||||
"absent"
|
||||
},
|
||||
if openai_key_present {
|
||||
"present"
|
||||
} else {
|
||||
"absent"
|
||||
}
|
||||
);
|
||||
|
||||
match load_oauth_credentials() {
|
||||
Ok(Some(token_set)) => DiagnosticCheck::new(
|
||||
"Auth",
|
||||
if api_key_present || auth_token_present {
|
||||
if any_auth_present {
|
||||
DiagnosticLevel::Ok
|
||||
} else {
|
||||
DiagnosticLevel::Warn
|
||||
},
|
||||
if api_key_present || auth_token_present {
|
||||
if any_auth_present {
|
||||
"supported auth env vars are configured; legacy saved OAuth is ignored"
|
||||
} else {
|
||||
"legacy saved OAuth credentials are present but unsupported"
|
||||
@@ -2247,12 +2324,12 @@ fn check_auth_health() -> DiagnosticCheck {
|
||||
])),
|
||||
Ok(None) => DiagnosticCheck::new(
|
||||
"Auth",
|
||||
if api_key_present || auth_token_present {
|
||||
if any_auth_present {
|
||||
DiagnosticLevel::Ok
|
||||
} else {
|
||||
DiagnosticLevel::Warn
|
||||
},
|
||||
if api_key_present || auth_token_present {
|
||||
if any_auth_present {
|
||||
"supported auth env vars are configured"
|
||||
} else {
|
||||
"no supported auth env vars were found"
|
||||
@@ -2514,7 +2591,12 @@ fn check_boot_preflight_health(context: &StatusContext) -> DiagnosticCheck {
|
||||
format!("Worktree exists {}", preflight.worktree_exists),
|
||||
format!("Git dir exists {}", preflight.git_dir_exists),
|
||||
format!("Branch behind {}", preflight.branch_freshness.behind),
|
||||
format!("Trust allowlist {:?}", preflight.trust_gate_allowed),
|
||||
format!(
|
||||
"Trust allowlist {}",
|
||||
preflight
|
||||
.trust_gate_allowed
|
||||
.map_or("unknown".to_string(), |v| v.to_string())
|
||||
),
|
||||
format!("Trusted roots {}", preflight.trusted_roots_count),
|
||||
format!(
|
||||
"MCP eligible {} · servers {}",
|
||||
@@ -2802,6 +2884,7 @@ fn print_system_prompt(
|
||||
"{}",
|
||||
serde_json::to_string_pretty(&json!({
|
||||
"kind": "system-prompt",
|
||||
"status": "ok",
|
||||
"message": message,
|
||||
"sections": sections,
|
||||
}))?
|
||||
@@ -2824,6 +2907,7 @@ fn version_json_value() -> serde_json::Value {
|
||||
let executable_path = env::current_exe().ok().map(|p| p.display().to_string());
|
||||
json!({
|
||||
"kind": "version",
|
||||
"status": "ok",
|
||||
"message": render_version_report(),
|
||||
"version": VERSION,
|
||||
"git_sha": GIT_SHA,
|
||||
@@ -3015,6 +3099,11 @@ struct StatusContext {
|
||||
/// `status: "degraded"` so claws can distinguish "status ran but config
|
||||
/// is broken" from "status ran cleanly".
|
||||
config_load_error: Option<String>,
|
||||
/// #143: machine-readable kind for the config load error, derived from
|
||||
/// `classify_error_kind`. Included in JSON output alongside the human
|
||||
/// readable string so downstream claws can switch on the kind token
|
||||
/// instead of regex-scraping the prose.
|
||||
config_load_error_kind: Option<&'static str>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
@@ -3834,12 +3923,13 @@ fn run_resume_command(
|
||||
json: Some(serde_json::json!({ "kind": "help", "text": render_repl_help() })),
|
||||
}),
|
||||
SlashCommand::Compact => {
|
||||
let result = runtime::compact_session(
|
||||
let result = runtime::trident::trident_compact_session(
|
||||
session,
|
||||
CompactionConfig {
|
||||
max_estimated_tokens: 0,
|
||||
..CompactionConfig::default()
|
||||
},
|
||||
&runtime::trident::TridentConfig::default(),
|
||||
);
|
||||
let removed = result.removed_message_count;
|
||||
let kept = result.compacted_session.messages.len();
|
||||
@@ -4181,7 +4271,8 @@ fn run_resume_command(
|
||||
| SlashCommand::Ide { .. }
|
||||
| SlashCommand::Tag { .. }
|
||||
| SlashCommand::OutputStyle { .. }
|
||||
| SlashCommand::AddDir { .. } => Err("unsupported resumed slash command".into()),
|
||||
| SlashCommand::AddDir { .. }
|
||||
| SlashCommand::Team { .. } => Err("unsupported resumed slash command".into()),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -5037,6 +5128,135 @@ impl LiveCli {
|
||||
TerminalRenderer::new().color_theme(),
|
||||
&mut stdout,
|
||||
)?;
|
||||
|
||||
// ============================================================================
|
||||
// Auto-compact retry on context window errors
|
||||
// ============================================================================
|
||||
// When the model API returns a context_window_blocked error (because the request
|
||||
// exceeds the model's context window), we automatically:
|
||||
// 1. Compact the session (remove old messages to free up space)
|
||||
// 2. Retry the original request with the compacted session
|
||||
// 3. Report results to the user
|
||||
//
|
||||
// This eliminates the need for users to manually run /compact when they
|
||||
// hit context limits - the recovery happens automatically.
|
||||
//
|
||||
// Detection: We look for "context_window" or "Context window" in the error
|
||||
// message, which covers error types like:
|
||||
// - "context_window_blocked"
|
||||
// - "Context window blocked"
|
||||
// - "This model's maximum context length is X tokens..."
|
||||
// ============================================================================
|
||||
|
||||
let error_str = error.to_string();
|
||||
// Detect context window overflow. Some providers (e.g. OpenAI-compat backends)
|
||||
// return 400 with "no parseable body" instead of a proper context_length_exceeded
|
||||
// error when the request is too large to even parse — treat that as context overflow too.
|
||||
let is_context_window = error_str.contains("context_window")
|
||||
|| error_str.contains("Context window")
|
||||
|| error_str.contains("no parseable body");
|
||||
|
||||
if is_context_window {
|
||||
// A single compaction pass may not free enough context space.
|
||||
// Progressive retry: each round preserves fewer recent messages (4→2→1→0),
|
||||
// trading conversation continuity for a smaller payload until it fits.
|
||||
// Max 4 rounds before giving up and surfacing the error to the user.
|
||||
let max_compact_rounds = 4;
|
||||
let preserve_schedule = [4, 2, 1, 0];
|
||||
|
||||
for round in 0..max_compact_rounds {
|
||||
let preserve = preserve_schedule[round];
|
||||
println!(
|
||||
" Auto-compacting session (round {}/{}, preserving {} recent messages)...",
|
||||
round + 1,
|
||||
max_compact_rounds,
|
||||
preserve
|
||||
);
|
||||
|
||||
// Run Trident pipeline then summary-based compaction
|
||||
let result = runtime::trident::trident_compact_session(
|
||||
runtime.session(),
|
||||
CompactionConfig {
|
||||
preserve_recent_messages: preserve,
|
||||
max_estimated_tokens: 0,
|
||||
},
|
||||
&runtime::trident::TridentConfig::default(),
|
||||
);
|
||||
let removed = result.removed_message_count;
|
||||
|
||||
if removed == 0 && round > 0 {
|
||||
// No more messages to compact — further rounds won't help
|
||||
println!(" No further compaction possible.");
|
||||
break;
|
||||
}
|
||||
|
||||
if removed > 0 {
|
||||
println!(
|
||||
"{}",
|
||||
format_compact_report(
|
||||
removed,
|
||||
result.compacted_session.messages.len(),
|
||||
false
|
||||
)
|
||||
);
|
||||
}
|
||||
|
||||
// Without this, prepare_turn_runtime() reads from self.runtime.session()
|
||||
// which still holds the ORIGINAL un-compacted session, so every retry round
|
||||
// would send the same bloated request — compaction was wasted.
|
||||
*self.runtime.session_mut() = result.compacted_session.clone();
|
||||
|
||||
// Build a new runtime with the compacted session and retry
|
||||
let (mut new_runtime, hook_abort_monitor) =
|
||||
self.prepare_turn_runtime(true)?;
|
||||
drop(hook_abort_monitor);
|
||||
|
||||
let mut rp = CliPermissionPrompter::new(self.permission_mode);
|
||||
match new_runtime.run_turn(input, Some(&mut rp)) {
|
||||
Ok(summary) => {
|
||||
self.replace_runtime(new_runtime)?;
|
||||
spinner.finish(
|
||||
if round == 0 {
|
||||
"✨ Done (after auto-compact)"
|
||||
} else {
|
||||
"✨ Done (after aggressive auto-compact)"
|
||||
},
|
||||
TerminalRenderer::new().color_theme(),
|
||||
&mut stdout,
|
||||
)?;
|
||||
println!();
|
||||
if let Some(event) = summary.auto_compaction {
|
||||
println!(
|
||||
"{}",
|
||||
format_auto_compaction_notice(event.removed_message_count)
|
||||
);
|
||||
}
|
||||
self.persist_session()?;
|
||||
return Ok(());
|
||||
}
|
||||
Err(retry_error) => {
|
||||
let retry_str = retry_error.to_string();
|
||||
let still_context_window = retry_str.contains("context_window")
|
||||
|| retry_str.contains("Context window")
|
||||
|| retry_str.contains("no parseable body");
|
||||
|
||||
if still_context_window && round + 1 < max_compact_rounds {
|
||||
// The compacted session was still too large for the model's context.
|
||||
// Shut down the old runtime, adopt the partially-compacted one,
|
||||
// and loop — the next round will compact more aggressively.
|
||||
runtime.shutdown_plugins()?;
|
||||
runtime = new_runtime;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Not a context window error, or out of rounds
|
||||
return Err(Box::new(retry_error));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// If not a context window error, return original error
|
||||
Err(Box::new(error))
|
||||
}
|
||||
}
|
||||
@@ -5231,14 +5451,20 @@ impl LiveCli {
|
||||
self.handle_plugins_command(action.as_deref(), target.as_deref())?
|
||||
}
|
||||
SlashCommand::Agents { args } => {
|
||||
Self::print_agents(args.as_deref(), CliOutputFormat::Text)?;
|
||||
if let Err(error) = Self::print_agents(args.as_deref(), CliOutputFormat::Text) {
|
||||
eprintln!("{error}");
|
||||
}
|
||||
false
|
||||
}
|
||||
SlashCommand::Skills { args } => {
|
||||
match classify_skills_slash_command(args.as_deref()) {
|
||||
SkillSlashDispatch::Invoke(prompt) => self.run_turn(&prompt)?,
|
||||
SkillSlashDispatch::Local => {
|
||||
Self::print_skills(args.as_deref(), CliOutputFormat::Text)?;
|
||||
if let Err(error) =
|
||||
Self::print_skills(args.as_deref(), CliOutputFormat::Text)
|
||||
{
|
||||
eprintln!("{error}");
|
||||
}
|
||||
}
|
||||
}
|
||||
false
|
||||
@@ -5293,7 +5519,8 @@ impl LiveCli {
|
||||
| SlashCommand::Ide { .. }
|
||||
| SlashCommand::Tag { .. }
|
||||
| SlashCommand::OutputStyle { .. }
|
||||
| SlashCommand::AddDir { .. } => {
|
||||
| SlashCommand::AddDir { .. }
|
||||
| SlashCommand::Team { .. } => {
|
||||
let cmd_name = command.slash_name();
|
||||
eprintln!("{cmd_name} is not yet implemented in this build.");
|
||||
false
|
||||
@@ -6058,9 +6285,16 @@ fn latest_managed_session() -> Result<ManagedSessionSummary, Box<dyn std::error:
|
||||
fn load_session_reference(
|
||||
reference: &str,
|
||||
) -> Result<(SessionHandle, Session), Box<dyn std::error::Error>> {
|
||||
let loaded = current_session_store()?
|
||||
.load_session(reference)
|
||||
.map_err(|e| Box::new(e) as Box<dyn std::error::Error>)?;
|
||||
let store = current_session_store()?;
|
||||
// For alias references ("latest", "last", "recent"), allow cross-workspace
|
||||
// resume so /resume latest finds the most recent session globally.
|
||||
// For explicit references, workspace validation is enforced.
|
||||
let result = if runtime::session_control::is_session_reference_alias(reference) {
|
||||
store.load_session_loose(reference)
|
||||
} else {
|
||||
store.load_session(reference)
|
||||
};
|
||||
let loaded = result.map_err(|e| Box::new(e) as Box<dyn std::error::Error>)?;
|
||||
Ok((
|
||||
SessionHandle {
|
||||
id: loaded.handle.id,
|
||||
@@ -6395,6 +6629,8 @@ fn status_json_value(
|
||||
// are still populated). `config_load_error` carries the parse-error string
|
||||
// when present; it's a string rather than a typed object in Phase 1 and
|
||||
// will join the typed-error taxonomy in Phase 2 (ROADMAP §4.44).
|
||||
// `config_load_error_kind` is the machine-readable kind token derived from
|
||||
// `classify_error_kind` so downstream claws can switch on it directly.
|
||||
let degraded = context.config_load_error.is_some();
|
||||
let model_source = provenance.map(|p| p.source.as_str());
|
||||
let model_raw = provenance.and_then(|p| p.raw.clone());
|
||||
@@ -6403,6 +6639,7 @@ fn status_json_value(
|
||||
"kind": "status",
|
||||
"status": if degraded { "degraded" } else { "ok" },
|
||||
"config_load_error": context.config_load_error,
|
||||
"config_load_error_kind": context.config_load_error_kind,
|
||||
"model": model,
|
||||
"model_source": model_source,
|
||||
"model_raw": model_raw,
|
||||
@@ -6488,23 +6725,30 @@ fn status_context(
|
||||
// health surface (workspace, git, model, permission, sandbox can still be
|
||||
// reported independently).
|
||||
let runtime_config = loader.load();
|
||||
let (loaded_config_files, sandbox_status, config_load_error) = match runtime_config.as_ref() {
|
||||
Ok(runtime_config) => (
|
||||
runtime_config.loaded_entries().len(),
|
||||
resolve_sandbox_status(runtime_config.sandbox(), &cwd),
|
||||
None,
|
||||
),
|
||||
Err(err) => (
|
||||
0,
|
||||
// Fall back to defaults for sandbox resolution so claws still see
|
||||
// a populated sandbox section instead of a missing field. Defaults
|
||||
// produce the same output as a runtime config with no sandbox
|
||||
// overrides, which is the right degraded-mode shape: we cannot
|
||||
// report what the user *intended*, only what is actually in effect.
|
||||
resolve_sandbox_status(&runtime::SandboxConfig::default(), &cwd),
|
||||
Some(err.to_string()),
|
||||
),
|
||||
};
|
||||
let (loaded_config_files, sandbox_status, config_load_error, config_load_error_kind) =
|
||||
match runtime_config.as_ref() {
|
||||
Ok(cfg) => (
|
||||
cfg.loaded_entries().len(),
|
||||
resolve_sandbox_status(cfg.sandbox(), &cwd),
|
||||
None,
|
||||
None,
|
||||
),
|
||||
Err(err) => {
|
||||
let err_string = err.to_string();
|
||||
let err_kind = classify_error_kind(&err_string);
|
||||
(
|
||||
0,
|
||||
// Fall back to defaults for sandbox resolution so claws still see
|
||||
// a populated sandbox section instead of a missing field. Defaults
|
||||
// produce the same output as a runtime config with no sandbox
|
||||
// overrides, which is the right degraded-mode shape: we cannot
|
||||
// report what the user *intended*, only what is actually in effect.
|
||||
resolve_sandbox_status(&runtime::SandboxConfig::default(), &cwd),
|
||||
Some(err_string),
|
||||
Some(err_kind),
|
||||
)
|
||||
}
|
||||
};
|
||||
let project_context = ProjectContext::discover_with_git(&cwd, DEFAULT_DATE)?;
|
||||
let (project_root, git_branch) =
|
||||
parse_git_status_metadata(project_context.git_status.as_deref());
|
||||
@@ -6533,6 +6777,7 @@ fn status_context(
|
||||
boot_preflight,
|
||||
sandbox_status,
|
||||
config_load_error,
|
||||
config_load_error_kind,
|
||||
})
|
||||
}
|
||||
|
||||
@@ -6733,8 +6978,23 @@ fn print_sandbox_status_snapshot(
|
||||
}
|
||||
|
||||
fn sandbox_json_value(status: &runtime::SandboxStatus) -> serde_json::Value {
|
||||
// Derive top-level status so automation can do a single field check
|
||||
// instead of combining enabled/active/supported booleans.
|
||||
// ok = not enabled (not requested), OR enabled and active
|
||||
// warn = enabled and supported but not yet active (degraded)
|
||||
// error = enabled but unsupported on this platform
|
||||
let top_status = if !status.enabled {
|
||||
"ok"
|
||||
} else if status.active {
|
||||
"ok"
|
||||
} else if status.supported {
|
||||
"warn"
|
||||
} else {
|
||||
"error"
|
||||
};
|
||||
json!({
|
||||
"kind": "sandbox",
|
||||
"status": top_status,
|
||||
"enabled": status.enabled,
|
||||
"active": status.active,
|
||||
"supported": status.supported,
|
||||
@@ -7225,8 +7485,12 @@ fn run_init(output_format: CliOutputFormat) -> Result<(), Box<dyn std::error::Er
|
||||
/// string so claws can detect per-artifact state without substring matching.
|
||||
fn init_json_value(report: &crate::init::InitReport, message: &str) -> serde_json::Value {
|
||||
use crate::init::InitStatus;
|
||||
// Derive top-level status: "ok" when all artifacts succeeded (created or
|
||||
// skipped = idempotent); no failure path exists today so always "ok".
|
||||
let status = "ok";
|
||||
json!({
|
||||
"kind": "init",
|
||||
"status": status,
|
||||
"project_path": report.project_root.display().to_string(),
|
||||
"created": report.artifacts_with_status(InitStatus::Created),
|
||||
"updated": report.artifacts_with_status(InitStatus::Updated),
|
||||
@@ -8767,6 +9031,8 @@ impl AnthropicRuntimeClient {
|
||||
let mut markdown_stream = MarkdownStreamState::default();
|
||||
let mut events = Vec::new();
|
||||
let mut pending_tool: Option<(String, String, String)> = None;
|
||||
// 累积 reasoning_content 到 Thinking 块(修复 DeepSeek V4 reasoning_content 协议 bug)
|
||||
let mut pending_thinking: Option<(String, Option<String>)> = None;
|
||||
let mut block_has_thinking_summary = false;
|
||||
let mut saw_stop = false;
|
||||
let mut received_any_event = false;
|
||||
@@ -8808,6 +9074,14 @@ impl AnthropicRuntimeClient {
|
||||
}
|
||||
}
|
||||
ApiStreamEvent::ContentBlockStart(start) => {
|
||||
// 特判 Thinking 块:初始化 pending_thinking(用于累积后续 ThinkingDelta)
|
||||
if let OutputContentBlock::Thinking {
|
||||
thinking,
|
||||
signature,
|
||||
} = &start.content_block
|
||||
{
|
||||
pending_thinking = Some((thinking.clone(), signature.clone()));
|
||||
}
|
||||
push_output_block(
|
||||
start.content_block,
|
||||
out,
|
||||
@@ -8836,13 +9110,22 @@ impl AnthropicRuntimeClient {
|
||||
input.push_str(&partial_json);
|
||||
}
|
||||
}
|
||||
ContentBlockDelta::ThinkingDelta { .. } => {
|
||||
ContentBlockDelta::ThinkingDelta { thinking } => {
|
||||
if !block_has_thinking_summary {
|
||||
render_thinking_block_summary(out, None, false)?;
|
||||
block_has_thinking_summary = true;
|
||||
}
|
||||
// 累积 thinking 文本到 pending_thinking(让 session 持久化能拿到)
|
||||
if let Some((t, _)) = &mut pending_thinking {
|
||||
t.push_str(&thinking);
|
||||
}
|
||||
}
|
||||
ContentBlockDelta::SignatureDelta { signature } => {
|
||||
// 累积 signature 到 pending_thinking
|
||||
if let Some((_, sig)) = &mut pending_thinking {
|
||||
sig.get_or_insert_with(String::new).push_str(&signature);
|
||||
}
|
||||
}
|
||||
ContentBlockDelta::SignatureDelta { .. } => {}
|
||||
},
|
||||
ApiStreamEvent::ContentBlockStop(_) => {
|
||||
block_has_thinking_summary = false;
|
||||
@@ -8851,6 +9134,13 @@ impl AnthropicRuntimeClient {
|
||||
.and_then(|()| out.flush())
|
||||
.map_err(|error| RuntimeError::new(error.to_string()))?;
|
||||
}
|
||||
// 把累积的 thinking 转成 AssistantEvent::Thinking(让 build_assistant_message 写入 session)
|
||||
if let Some((thinking, signature)) = pending_thinking.take() {
|
||||
events.push(AssistantEvent::Thinking {
|
||||
thinking,
|
||||
signature,
|
||||
});
|
||||
}
|
||||
if let Some((id, name, input)) = pending_tool.take() {
|
||||
if let Some(progress_reporter) = &self.progress_reporter {
|
||||
progress_reporter.mark_tool_phase(&name, &input);
|
||||
@@ -9987,7 +10277,17 @@ fn convert_messages(messages: &[ConversationMessage]) -> Vec<InputMessage> {
|
||||
ContentBlock::Text { text } => {
|
||||
Some(InputContentBlock::Text { text: text.clone() })
|
||||
}
|
||||
ContentBlock::Thinking { .. } => None,
|
||||
ContentBlock::Thinking {
|
||||
thinking,
|
||||
signature,
|
||||
} => {
|
||||
// 保留 Thinking 块:OpenAI 兼容协议会把它转成 reasoning_content 字段
|
||||
// 回传给 DeepSeek V4(避免 400 "reasoning_content must be passed back" 错误)
|
||||
Some(InputContentBlock::Thinking {
|
||||
thinking: thinking.clone(),
|
||||
signature: signature.clone(),
|
||||
})
|
||||
}
|
||||
ContentBlock::ToolUse { id, name, input } => Some(InputContentBlock::ToolUse {
|
||||
id: id.clone(),
|
||||
name: name.clone(),
|
||||
@@ -10308,7 +10608,7 @@ mod tests {
|
||||
#[test]
|
||||
fn context_window_preflight_errors_render_recovery_steps() {
|
||||
let error = ApiError::ContextWindowExceeded {
|
||||
model: "claude-sonnet-4-6".to_string(),
|
||||
model: "anthropic/claude-sonnet-4-6".to_string(),
|
||||
estimated_input_tokens: 182_000,
|
||||
requested_output_tokens: 64_000,
|
||||
estimated_total_tokens: 246_000,
|
||||
@@ -10323,7 +10623,7 @@ mod tests {
|
||||
"{rendered}"
|
||||
);
|
||||
assert!(
|
||||
rendered.contains("Model claude-sonnet-4-6"),
|
||||
rendered.contains("Model anthropic/claude-sonnet-4-6"),
|
||||
"{rendered}"
|
||||
);
|
||||
assert!(
|
||||
@@ -10777,7 +11077,7 @@ mod tests {
|
||||
parse_args(&args).expect("args should parse"),
|
||||
CliAction::Prompt {
|
||||
prompt: "explain this".to_string(),
|
||||
model: "claude-opus-4-6".to_string(),
|
||||
model: "anthropic/claude-opus-4-6".to_string(),
|
||||
output_format: CliOutputFormat::Json,
|
||||
allowed_tools: None,
|
||||
permission_mode: PermissionMode::DangerFullAccess,
|
||||
@@ -10851,7 +11151,7 @@ mod tests {
|
||||
parse_args(&args).expect("args should parse"),
|
||||
CliAction::Prompt {
|
||||
prompt: "explain this".to_string(),
|
||||
model: "claude-opus-4-6".to_string(),
|
||||
model: "anthropic/claude-opus-4-6".to_string(),
|
||||
output_format: CliOutputFormat::Text,
|
||||
allowed_tools: None,
|
||||
permission_mode: PermissionMode::DangerFullAccess,
|
||||
@@ -10865,9 +11165,12 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn resolves_known_model_aliases() {
|
||||
assert_eq!(resolve_model_alias("opus"), "claude-opus-4-6");
|
||||
assert_eq!(resolve_model_alias("sonnet"), "claude-sonnet-4-6");
|
||||
assert_eq!(resolve_model_alias("haiku"), "claude-haiku-4-5-20251213");
|
||||
assert_eq!(resolve_model_alias("opus"), "anthropic/claude-opus-4-6");
|
||||
assert_eq!(resolve_model_alias("sonnet"), "anthropic/claude-sonnet-4-6");
|
||||
assert_eq!(
|
||||
resolve_model_alias("haiku"),
|
||||
"anthropic/claude-haiku-4-5-20251213"
|
||||
);
|
||||
assert_eq!(resolve_model_alias("claude-opus"), "claude-opus");
|
||||
}
|
||||
|
||||
@@ -10882,7 +11185,7 @@ mod tests {
|
||||
std::fs::create_dir_all(&config_home).expect("config home should exist");
|
||||
std::fs::write(
|
||||
cwd.join(".claw").join("settings.json"),
|
||||
r#"{"aliases":{"fast":"claude-haiku-4-5-20251213","smart":"opus","cheap":"grok-3-mini"}}"#,
|
||||
r#"{"aliases":{"fast":"anthropic/claude-haiku-4-5-20251213","smart":"opus","cheap":"grok-3-mini"}}"#,
|
||||
)
|
||||
.expect("project config should write");
|
||||
|
||||
@@ -10903,11 +11206,11 @@ mod tests {
|
||||
std::fs::remove_dir_all(root).expect("temp config root should clean up");
|
||||
|
||||
// then
|
||||
assert_eq!(direct, "claude-haiku-4-5-20251213");
|
||||
assert_eq!(chained, "claude-opus-4-6");
|
||||
assert_eq!(direct, "anthropic/claude-haiku-4-5-20251213");
|
||||
assert_eq!(chained, "anthropic/claude-opus-4-6");
|
||||
assert_eq!(cross_provider, "grok-3-mini");
|
||||
assert_eq!(unknown, "unknown-model");
|
||||
assert_eq!(builtin, "claude-haiku-4-5-20251213");
|
||||
assert_eq!(builtin, "anthropic/claude-haiku-4-5-20251213");
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -11341,7 +11644,10 @@ mod tests {
|
||||
model_flag_raw,
|
||||
..
|
||||
} => {
|
||||
assert_eq!(model, "claude-sonnet-4-6", "sonnet alias should resolve");
|
||||
assert_eq!(
|
||||
model, "anthropic/claude-sonnet-4-6",
|
||||
"sonnet alias should resolve"
|
||||
);
|
||||
assert_eq!(
|
||||
model_flag_raw.as_deref(),
|
||||
Some("sonnet"),
|
||||
@@ -11975,6 +12281,18 @@ mod tests {
|
||||
classify_error_kind("api failed after 3 attempts: ..."),
|
||||
"api_http_error"
|
||||
);
|
||||
assert_eq!(
|
||||
classify_error_kind("/tmp/settings.json: mcpServers.foo: expected JSON object"),
|
||||
"malformed_mcp_config"
|
||||
);
|
||||
assert_eq!(
|
||||
classify_error_kind("settings.json: mcpServers: field must be an object"),
|
||||
"malformed_mcp_config"
|
||||
);
|
||||
assert_eq!(
|
||||
classify_error_kind("empty prompt: provide a subcommand or a non-empty prompt string"),
|
||||
"empty_prompt"
|
||||
);
|
||||
assert_eq!(
|
||||
classify_error_kind("something completely unknown"),
|
||||
"unknown"
|
||||
@@ -12313,7 +12631,7 @@ mod tests {
|
||||
.expect("prompt shorthand should still work"),
|
||||
CliAction::Prompt {
|
||||
prompt: "please debug this".to_string(),
|
||||
model: "claude-opus-4-6".to_string(),
|
||||
model: "anthropic/claude-opus-4-6".to_string(),
|
||||
output_format: CliOutputFormat::Text,
|
||||
allowed_tools: None,
|
||||
permission_mode: crate::default_permission_mode(),
|
||||
@@ -12476,6 +12794,23 @@ mod tests {
|
||||
assert!(error.contains("skills"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn unsupported_skills_actions_return_typed_error_683() {
|
||||
for action in ["remove", "add", "uninstall", "delete"] {
|
||||
let error = parse_args(&["skills".to_string(), action.to_string()])
|
||||
.expect_err(&format!("skills {action} should error"));
|
||||
assert!(
|
||||
error.contains("unsupported skills action"),
|
||||
"skills {action} should contain 'unsupported skills action', got: {error}"
|
||||
);
|
||||
assert_eq!(
|
||||
classify_error_kind(&error),
|
||||
"unsupported_skills_action",
|
||||
"skills {action} should classify as unsupported_skills_action, got: {error}"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn typoed_status_subcommand_returns_did_you_mean_error() {
|
||||
let error = parse_args(&["statuss".to_string()]).expect_err("statuss should error");
|
||||
@@ -12815,7 +13150,7 @@ mod tests {
|
||||
vec!["session-old".to_string()],
|
||||
);
|
||||
|
||||
assert!(completions.contains(&"/model claude-sonnet-4-6".to_string()));
|
||||
assert!(completions.contains(&"/model anthropic/claude-sonnet-4-6".to_string()));
|
||||
assert!(completions.contains(&"/permissions workspace-write".to_string()));
|
||||
assert!(completions.contains(&"/session list".to_string()));
|
||||
assert!(completions.contains(&"/session switch session-current".to_string()));
|
||||
@@ -12834,7 +13169,7 @@ mod tests {
|
||||
|
||||
let banner = with_current_dir(&root, || {
|
||||
LiveCli::new(
|
||||
"claude-sonnet-4-6".to_string(),
|
||||
"anthropic/claude-sonnet-4-6".to_string(),
|
||||
true,
|
||||
None,
|
||||
PermissionMode::DangerFullAccess,
|
||||
@@ -12852,11 +13187,11 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn format_connected_line_renders_anthropic_provider_for_claude_model() {
|
||||
let model = "claude-sonnet-4-6";
|
||||
let model = "anthropic/claude-sonnet-4-6";
|
||||
|
||||
let line = format_connected_line(model);
|
||||
|
||||
assert_eq!(line, "Connected: claude-sonnet-4-6 via anthropic");
|
||||
assert_eq!(line, "Connected: anthropic/claude-sonnet-4-6 via anthropic");
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -12870,11 +13205,11 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn resolve_repl_model_returns_user_supplied_model_unchanged_when_explicit() {
|
||||
let user_model = "claude-sonnet-4-6".to_string();
|
||||
let user_model = "anthropic/claude-sonnet-4-6".to_string();
|
||||
|
||||
let resolved = resolve_repl_model(user_model);
|
||||
|
||||
assert_eq!(resolved, "claude-sonnet-4-6");
|
||||
assert_eq!(resolved, "anthropic/claude-sonnet-4-6");
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -12890,7 +13225,7 @@ mod tests {
|
||||
|
||||
let resolved = with_current_dir(&root, || resolve_repl_model(DEFAULT_MODEL.to_string()));
|
||||
|
||||
assert_eq!(resolved, "claude-sonnet-4-6");
|
||||
assert_eq!(resolved, "anthropic/claude-sonnet-4-6");
|
||||
|
||||
std::env::remove_var("ANTHROPIC_MODEL");
|
||||
std::env::remove_var("CLAW_CONFIG_HOME");
|
||||
@@ -13126,6 +13461,7 @@ mod tests {
|
||||
boot_preflight: test_boot_preflight(),
|
||||
sandbox_status: runtime::SandboxStatus::default(),
|
||||
config_load_error: None,
|
||||
config_load_error_kind: None,
|
||||
},
|
||||
None, // #148
|
||||
);
|
||||
@@ -13270,6 +13606,7 @@ mod tests {
|
||||
boot_preflight: test_boot_preflight(),
|
||||
sandbox_status: runtime::SandboxStatus::default(),
|
||||
config_load_error: None,
|
||||
config_load_error_kind: None,
|
||||
};
|
||||
|
||||
let check = super::check_workspace_health(&context);
|
||||
@@ -13307,6 +13644,7 @@ mod tests {
|
||||
boot_preflight: test_boot_preflight(),
|
||||
sandbox_status: runtime::SandboxStatus::default(),
|
||||
config_load_error: None,
|
||||
config_load_error_kind: None,
|
||||
};
|
||||
|
||||
let value = status_json_value(
|
||||
@@ -14380,7 +14718,7 @@ UU conflicted.rs",
|
||||
MessageResponse {
|
||||
id: "msg-1".to_string(),
|
||||
kind: "message".to_string(),
|
||||
model: "claude-opus-4-6".to_string(),
|
||||
model: "anthropic/claude-opus-4-6".to_string(),
|
||||
role: "assistant".to_string(),
|
||||
content: vec![OutputContentBlock::ToolUse {
|
||||
id: "tool-1".to_string(),
|
||||
@@ -14415,7 +14753,7 @@ UU conflicted.rs",
|
||||
MessageResponse {
|
||||
id: "msg-2".to_string(),
|
||||
kind: "message".to_string(),
|
||||
model: "claude-opus-4-6".to_string(),
|
||||
model: "anthropic/claude-opus-4-6".to_string(),
|
||||
role: "assistant".to_string(),
|
||||
content: vec![OutputContentBlock::ToolUse {
|
||||
id: "tool-2".to_string(),
|
||||
@@ -14450,7 +14788,7 @@ UU conflicted.rs",
|
||||
MessageResponse {
|
||||
id: "msg-3".to_string(),
|
||||
kind: "message".to_string(),
|
||||
model: "claude-opus-4-6".to_string(),
|
||||
model: "anthropic/claude-opus-4-6".to_string(),
|
||||
role: "assistant".to_string(),
|
||||
content: vec![
|
||||
OutputContentBlock::Thinking {
|
||||
@@ -15055,3 +15393,55 @@ mod dump_manifests_tests {
|
||||
let _ = fs::remove_dir_all(&root);
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod alias_resolution_tests {
|
||||
use super::{resolve_model_alias_with_config, validate_model_syntax};
|
||||
|
||||
#[test]
|
||||
fn test_alias_resolution_builtin() {
|
||||
// Built-in aliases should resolve to their full IDs
|
||||
assert_eq!(
|
||||
resolve_model_alias_with_config("opus"),
|
||||
"anthropic/claude-opus-4-6"
|
||||
);
|
||||
assert_eq!(
|
||||
resolve_model_alias_with_config("sonnet"),
|
||||
"anthropic/claude-sonnet-4-6"
|
||||
);
|
||||
assert_eq!(
|
||||
resolve_model_alias_with_config("haiku"),
|
||||
"anthropic/claude-haiku-4-5-20251213"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_alias_resolution_syntax_validation() {
|
||||
// Resolved aliases should pass syntax validation
|
||||
let resolved = resolve_model_alias_with_config("opus");
|
||||
assert!(validate_model_syntax(&resolved).is_ok());
|
||||
|
||||
// Raw aliases should FAIL syntax validation (this is why we resolve first!)
|
||||
assert!(validate_model_syntax("opus").is_err());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_unknown_alias_fails_validation() {
|
||||
// Unknown aliases resolve to themselves
|
||||
let resolved = resolve_model_alias_with_config("unknown-alias");
|
||||
assert_eq!(resolved, "unknown-alias");
|
||||
|
||||
// And then fail validation with a helpful error
|
||||
let result = validate_model_syntax(&resolved);
|
||||
assert!(result.is_err());
|
||||
assert!(result.unwrap_err().contains("invalid model syntax"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_direct_provider_model_passes() {
|
||||
// Direct provider/model strings should remain unchanged and pass
|
||||
let model = "openai/gpt-4o";
|
||||
assert_eq!(resolve_model_alias_with_config(model), model);
|
||||
assert!(validate_model_syntax(model).is_ok());
|
||||
}
|
||||
}
|
||||
|
||||
287
rust/crates/rusty-claude-cli/src/setup_wizard.rs
Normal file
287
rust/crates/rusty-claude-cli/src/setup_wizard.rs
Normal file
@@ -0,0 +1,287 @@
|
||||
use std::io::{self, IsTerminal, Write};
|
||||
|
||||
use runtime::{save_user_provider_settings, ConfigLoader, RuntimeProviderConfig};
|
||||
|
||||
use serde_json;
|
||||
|
||||
const PROVIDERS: &[(&str, &str, &str)] = &[
|
||||
("1", "Anthropic", "anthropic"),
|
||||
("2", "xAI / Grok", "xai"),
|
||||
("3", "OpenAI", "openai"),
|
||||
("4", "DashScope (Qwen/Kimi)", "dashscope"),
|
||||
("5", "Custom (OpenAI-compat)", "openai"),
|
||||
];
|
||||
|
||||
const PROVIDER_MODELS: &[(&str, &[&str])] = &[
|
||||
("anthropic", &["opus", "sonnet", "haiku"]),
|
||||
("xai", &["grok", "grok-mini", "grok-2"]),
|
||||
("openai", &["gpt-4.1", "gpt-4.1-mini", "gpt-4.1-nano"]),
|
||||
("dashscope", &["qwen-plus", "qwen-max", "kimi"]),
|
||||
];
|
||||
|
||||
const DEFAULT_BASE_URLS: &[(&str, &str)] = &[
|
||||
("anthropic", "https://api.anthropic.com"),
|
||||
("xai", "https://api.x.ai/v1"),
|
||||
("openai", "https://api.openai.com/v1"),
|
||||
("dashscope", "https://dashscope.aliyuncs.com/compatible-mode/v1"),
|
||||
];
|
||||
|
||||
const API_KEY_ENV_VARS: &[(&str, &str)] = &[
|
||||
("anthropic", "ANTHROPIC_API_KEY"),
|
||||
("xai", "XAI_API_KEY"),
|
||||
("openai", "OPENAI_API_KEY"),
|
||||
("dashscope", "DASHSCOPE_API_KEY"),
|
||||
];
|
||||
|
||||
pub fn run_setup_wizard() -> Result<(), Box<dyn std::error::Error>> {
|
||||
if !io::stdin().is_terminal() {
|
||||
return Err("setup wizard requires an interactive terminal".into());
|
||||
}
|
||||
|
||||
let current = load_current_provider_config();
|
||||
|
||||
println!();
|
||||
println!(" \x1b[1mClaw Code Setup Wizard\x1b[0m");
|
||||
println!(" Configure your provider, API key, and model.");
|
||||
println!(" Press Enter to keep current value.\n");
|
||||
|
||||
let kind = prompt_provider(¤t)?;
|
||||
let api_key = prompt_api_key(&kind, ¤t)?;
|
||||
let base_url = prompt_base_url(&kind, ¤t)?;
|
||||
let model = prompt_model(&kind, ¤t)?;
|
||||
let fast_model = prompt_fast_model(¤t, model.as_deref())?;
|
||||
|
||||
save_user_provider_settings(
|
||||
&kind,
|
||||
&api_key,
|
||||
base_url.as_deref(),
|
||||
model.as_deref(),
|
||||
)?;
|
||||
|
||||
if let Some(fast) = &fast_model {
|
||||
save_settings_field("subagentModel", fast)?;
|
||||
}
|
||||
|
||||
println!();
|
||||
println!(" \x1b[32mProvider saved to ~/.claw/settings.json\x1b[0m");
|
||||
println!(" Run \x1b[1m/model {}\x1b[0m or restart claw to activate.", model.as_deref().unwrap_or(&kind));
|
||||
println!();
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn load_current_provider_config() -> RuntimeProviderConfig {
|
||||
let cwd = std::env::current_dir().unwrap_or_default();
|
||||
ConfigLoader::default_for(&cwd)
|
||||
.load()
|
||||
.map(|c| c.provider().clone())
|
||||
.unwrap_or_default()
|
||||
}
|
||||
|
||||
fn prompt_provider(current: &RuntimeProviderConfig) -> Result<String, Box<dyn std::error::Error>> {
|
||||
let current_kind = current.kind().unwrap_or("anthropic");
|
||||
println!(" \x1b[1mProvider\x1b[0m");
|
||||
for (num, label, kind) in PROVIDERS {
|
||||
let marker = if *kind == current_kind { " (current)" } else { "" };
|
||||
println!(" [{num}] {label}{marker}");
|
||||
}
|
||||
let default = PROVIDERS
|
||||
.iter()
|
||||
.position(|(_, _, k)| *k == current_kind)
|
||||
.map_or_else(|| "1".to_string(), |i| (i + 1).to_string());
|
||||
|
||||
let input = read_line(&format!(" Select provider [{default}]: "))?;
|
||||
let choice = if input.trim().is_empty() {
|
||||
default
|
||||
} else {
|
||||
input.trim().to_string()
|
||||
};
|
||||
|
||||
let kind = PROVIDERS
|
||||
.iter()
|
||||
.find(|(num, _, _)| *num == choice)
|
||||
.map(|(_, _, kind)| *kind)
|
||||
.ok_or_else(|| format!("invalid provider choice: {choice}"))?;
|
||||
|
||||
Ok(kind.to_string())
|
||||
}
|
||||
|
||||
fn prompt_api_key(
|
||||
kind: &str,
|
||||
current: &RuntimeProviderConfig,
|
||||
) -> Result<String, Box<dyn std::error::Error>> {
|
||||
let env_var = API_KEY_ENV_VARS
|
||||
.iter()
|
||||
.find(|(k, _)| *k == kind)
|
||||
.map_or("API_KEY", |(_, v)| *v);
|
||||
|
||||
let current_key = current.api_key();
|
||||
let hint = match current_key {
|
||||
Some(key) if !key.is_empty() => {
|
||||
let masked = if key.len() > 4 {
|
||||
format!("****{}", &key[key.len() - 4..])
|
||||
} else {
|
||||
"****".to_string()
|
||||
};
|
||||
format!("[{masked}]")
|
||||
}
|
||||
_ => "(none)".to_string(),
|
||||
};
|
||||
|
||||
// Check if env var is already set
|
||||
let env_set = std::env::var(env_var)
|
||||
.ok()
|
||||
.is_some_and(|v| !v.is_empty());
|
||||
if env_set {
|
||||
println!(" {env_var} is set in environment (will take priority over stored key)");
|
||||
}
|
||||
|
||||
let input = read_line(&format!(" API key ({env_var}) {hint}: "))?;
|
||||
let key = if input.trim().is_empty() {
|
||||
current_key.unwrap_or("").to_string()
|
||||
} else {
|
||||
input.trim().to_string()
|
||||
};
|
||||
|
||||
if key.is_empty() && !env_set {
|
||||
eprintln!(" \x1b[33mWarning: no API key configured. Set {env_var} or re-run setup.\x1b[0m");
|
||||
}
|
||||
|
||||
Ok(key)
|
||||
}
|
||||
|
||||
fn prompt_base_url(
|
||||
kind: &str,
|
||||
current: &RuntimeProviderConfig,
|
||||
) -> Result<Option<String>, Box<dyn std::error::Error>> {
|
||||
let default_url = DEFAULT_BASE_URLS
|
||||
.iter()
|
||||
.find(|(k, _)| *k == kind)
|
||||
.map_or("", |(_, v)| *v);
|
||||
|
||||
let current_url = current.base_url().unwrap_or(default_url);
|
||||
let display = if current_url.is_empty() {
|
||||
default_url.to_string()
|
||||
} else {
|
||||
current_url.to_string()
|
||||
};
|
||||
|
||||
// Check if the relevant env var is already set
|
||||
let env_var = match kind {
|
||||
"anthropic" => "ANTHROPIC_BASE_URL",
|
||||
"xai" => "XAI_BASE_URL",
|
||||
"openai" => "OPENAI_BASE_URL",
|
||||
"dashscope" => "DASHSCOPE_BASE_URL",
|
||||
_ => "BASE_URL",
|
||||
};
|
||||
let env_set = std::env::var(env_var)
|
||||
.ok()
|
||||
.is_some_and(|v| !v.is_empty());
|
||||
if env_set {
|
||||
println!(" {env_var} is set in environment (will take priority over stored URL)");
|
||||
}
|
||||
|
||||
let input = read_line(&format!(" Base URL [{display}]: "))?;
|
||||
if input.trim().is_empty() {
|
||||
if current_url == default_url || current_url.is_empty() {
|
||||
Ok(None)
|
||||
} else {
|
||||
Ok(Some(current_url.to_string()))
|
||||
}
|
||||
} else {
|
||||
Ok(Some(input.trim().to_string()))
|
||||
}
|
||||
}
|
||||
|
||||
fn prompt_model(
|
||||
kind: &str,
|
||||
current: &RuntimeProviderConfig,
|
||||
) -> Result<Option<String>, Box<dyn std::error::Error>> {
|
||||
let empty: &[&str] = &[];
|
||||
let aliases = PROVIDER_MODELS
|
||||
.iter()
|
||||
.find(|(k, _)| *k == kind)
|
||||
.map_or(empty, |(_, models)| *models);
|
||||
|
||||
let current_model = current.model().unwrap_or(aliases.first().copied().unwrap_or(""));
|
||||
|
||||
println!(" \x1b[1mModel\x1b[0m");
|
||||
if !aliases.is_empty() {
|
||||
println!(" Common: {}", aliases.join(", "));
|
||||
}
|
||||
println!(" Or enter any model name (e.g. openai/gpt-4.1-mini for custom routing)");
|
||||
|
||||
let input = read_line(&format!(" Model [{current_model}]: "))?;
|
||||
if input.trim().is_empty() {
|
||||
if current_model.is_empty() {
|
||||
Ok(None)
|
||||
} else {
|
||||
Ok(Some(current_model.to_string()))
|
||||
}
|
||||
} else {
|
||||
Ok(Some(input.trim().to_string()))
|
||||
}
|
||||
}
|
||||
|
||||
fn prompt_fast_model(
|
||||
current: &RuntimeProviderConfig,
|
||||
main_model: Option<&str>,
|
||||
) -> Result<Option<String>, Box<dyn std::error::Error>> {
|
||||
println!();
|
||||
println!(" \x1b[1mFast Model (for Agent subtasks)\x1b[0m");
|
||||
println!(" A smaller/cheaper model used by the Agent tool when spawning");
|
||||
println!(" Explore, Plan, or Verification sub-agents. This saves tokens");
|
||||
println!(" by using a fast model for information-gathering tasks.");
|
||||
println!(" Press Enter to skip (agents will use your main model).");
|
||||
|
||||
let current_fast = load_current_settings_field("subagentModel");
|
||||
let default_hint = current_fast
|
||||
.as_deref()
|
||||
.or(main_model)
|
||||
.unwrap_or("");
|
||||
|
||||
let input = read_line(&format!(" Fast model [{}]: ", if default_hint.is_empty() { "same as main" } else { default_hint }))?;
|
||||
if input.trim().is_empty() {
|
||||
Ok(current_fast)
|
||||
} else {
|
||||
Ok(Some(input.trim().to_string()))
|
||||
}
|
||||
}
|
||||
|
||||
fn load_current_settings_field(field: &str) -> Option<String> {
|
||||
let home = std::env::var("HOME").ok()?;
|
||||
let settings_path = std::path::Path::new(&home).join(".claw/settings.json");
|
||||
let content = std::fs::read_to_string(&settings_path).ok()?;
|
||||
let json: serde_json::Value = serde_json::from_str(&content).ok()?;
|
||||
json.get(field)?.as_str().map(|s| s.to_string())
|
||||
}
|
||||
|
||||
fn save_settings_field(field: &str, value: &str) -> Result<(), Box<dyn std::error::Error>> {
|
||||
let home = std::env::var("HOME")?;
|
||||
let settings_dir = std::path::Path::new(&home).join(".claw");
|
||||
let settings_path = settings_dir.join("settings.json");
|
||||
|
||||
let mut settings: serde_json::Value = if settings_path.exists() {
|
||||
let content = std::fs::read_to_string(&settings_path)?;
|
||||
serde_json::from_str(&content)?
|
||||
} else {
|
||||
serde_json::json!({})
|
||||
};
|
||||
|
||||
if let Some(obj) = settings.as_object_mut() {
|
||||
obj.insert(field.to_string(), serde_json::Value::String(value.to_string()));
|
||||
}
|
||||
|
||||
std::fs::create_dir_all(&settings_dir)?;
|
||||
std::fs::write(&settings_path, serde_json::to_string_pretty(&settings)?)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn read_line(prompt: &str) -> Result<String, Box<dyn std::error::Error>> {
|
||||
let mut stdout = io::stdout();
|
||||
write!(stdout, "{prompt}")?;
|
||||
stdout.flush()?;
|
||||
let mut buffer = String::new();
|
||||
io::stdin().read_line(&mut buffer)?;
|
||||
Ok(buffer)
|
||||
}
|
||||
@@ -31,7 +31,7 @@ fn status_command_applies_model_and_permission_mode_flags() {
|
||||
assert_success(&output);
|
||||
let stdout = String::from_utf8(output.stdout).expect("stdout should be utf8");
|
||||
assert!(stdout.contains("Status"));
|
||||
assert!(stdout.contains("Model claude-sonnet-4-6"));
|
||||
assert!(stdout.contains("Model anthropic/claude-sonnet-4-6"));
|
||||
assert!(stdout.contains("Permission mode read-only"));
|
||||
|
||||
fs::remove_dir_all(temp_dir).expect("cleanup temp dir");
|
||||
|
||||
@@ -239,7 +239,7 @@ stderr:
|
||||
"Mock streaming says hello from the parity harness."
|
||||
);
|
||||
assert_eq!(parsed["compact"], true);
|
||||
assert_eq!(parsed["model"], "claude-sonnet-4-6");
|
||||
assert_eq!(parsed["model"], "anthropic/claude-sonnet-4-6");
|
||||
assert!(parsed["usage"].is_object());
|
||||
|
||||
fs::remove_dir_all(&workspace).expect("workspace cleanup should succeed");
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
use std::collections::BTreeMap;
|
||||
use std::fs;
|
||||
use std::io::Write;
|
||||
use std::os::unix::fs::PermissionsExt;
|
||||
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::process::{Command, Output, Stdio};
|
||||
use std::sync::atomic::{AtomicU64, Ordering};
|
||||
@@ -426,11 +426,15 @@ fn prepare_plugin_fixture(workspace: &HarnessWorkspace) {
|
||||
"#!/bin/sh\nINPUT=$(cat)\nprintf '{\"plugin\":\"%s\",\"tool\":\"%s\",\"input\":%s}\\n' \"$CLAWD_PLUGIN_ID\" \"$CLAWD_TOOL_NAME\" \"$INPUT\"\n",
|
||||
)
|
||||
.expect("plugin script should write");
|
||||
let mut permissions = fs::metadata(&script_path)
|
||||
.expect("plugin script metadata")
|
||||
.permissions();
|
||||
permissions.set_mode(0o755);
|
||||
fs::set_permissions(&script_path, permissions).expect("plugin script should be executable");
|
||||
#[cfg(unix)]
|
||||
{
|
||||
use std::os::unix::fs::PermissionsExt;
|
||||
let mut permissions = fs::metadata(&script_path)
|
||||
.expect("plugin script metadata")
|
||||
.permissions();
|
||||
permissions.set_mode(0o755);
|
||||
fs::set_permissions(&script_path, permissions).expect("plugin script should be executable");
|
||||
}
|
||||
|
||||
fs::write(
|
||||
manifest_dir.join("plugin.json"),
|
||||
|
||||
@@ -108,7 +108,7 @@ fn status_command_applies_cli_flags_end_to_end() {
|
||||
|
||||
let stdout = String::from_utf8(output.stdout).expect("stdout should be utf8");
|
||||
assert!(stdout.contains("Status"));
|
||||
assert!(stdout.contains("Model claude-sonnet-4-6"));
|
||||
assert!(stdout.contains("Model anthropic/claude-sonnet-4-6"));
|
||||
assert!(stdout.contains("Permission mode read-only"));
|
||||
}
|
||||
|
||||
@@ -289,7 +289,7 @@ fn resumed_status_surfaces_persisted_model() {
|
||||
let session_path = temp_dir.join("session.jsonl");
|
||||
|
||||
let mut session = workspace_session(&temp_dir);
|
||||
session.model = Some("claude-sonnet-4-6".to_string());
|
||||
session.model = Some("anthropic/claude-sonnet-4-6".to_string());
|
||||
session
|
||||
.push_user_text("model persistence fixture")
|
||||
.expect("write ok");
|
||||
@@ -317,7 +317,7 @@ fn resumed_status_surfaces_persisted_model() {
|
||||
let parsed: Value = serde_json::from_str(stdout.trim()).expect("should be json");
|
||||
assert_eq!(parsed["kind"], "status");
|
||||
assert_eq!(
|
||||
parsed["model"], "claude-sonnet-4-6",
|
||||
parsed["model"], "anthropic/claude-sonnet-4-6",
|
||||
"model should round-trip through session metadata"
|
||||
);
|
||||
}
|
||||
|
||||
@@ -15,6 +15,10 @@ reqwest = { version = "0.12", default-features = false, features = ["blocking",
|
||||
serde = { version = "1", features = ["derive"] }
|
||||
serde_json.workspace = true
|
||||
tokio = { version = "1", features = ["rt-multi-thread"] }
|
||||
aspect-core = "0.1"
|
||||
aspect-macros = "0.1"
|
||||
aspect-std = "0.1"
|
||||
log = "0.4"
|
||||
|
||||
[lints]
|
||||
workspace = true
|
||||
|
||||
157
rust/crates/tools/GIT_TOOLS_README.md
Normal file
157
rust/crates/tools/GIT_TOOLS_README.md
Normal file
@@ -0,0 +1,157 @@
|
||||
# Git-Aware Context Tools
|
||||
|
||||
Adds five native git tools to claw-code that provide structured, read-only access to repository state. These replace ad-hoc `git` commands via bash with purpose-built tool definitions the model can discover and invoke directly.
|
||||
|
||||
## Tools
|
||||
|
||||
### GitStatus
|
||||
|
||||
Show the working tree status (branch, staged, unstaged, untracked). Equivalent to `git status --short --branch`.
|
||||
|
||||
| Parameter | Type | Required | Default | Description |
|
||||
|-----------|------|----------|---------|-------------|
|
||||
| `short` | boolean | no | `true` | Use `--short --branch` format for concise output |
|
||||
|
||||
**Example input:**
|
||||
```json
|
||||
{}
|
||||
```
|
||||
|
||||
**Example output:**
|
||||
```json
|
||||
{
|
||||
"output": "## feat/git-aware-tools...upstream/main [ahead 1]\nM rust/crates/tools/src/lib.rs"
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### GitDiff
|
||||
|
||||
Show changes between commits, the index, and the working tree. Supports staged changes, specific paths, commit ranges, and comparing two commits.
|
||||
|
||||
| Parameter | Type | Required | Default | Description |
|
||||
|-----------|------|----------|---------|-------------|
|
||||
| `staged` | boolean | no | `false` | Show staged changes (`git diff --cached`) |
|
||||
| `commit` | string | no | — | Commit hash, tag, or branch to diff against |
|
||||
| `commit2` | string | no | — | Second commit for range diff (`commit...commit2`) |
|
||||
| `path` | string | no | — | File path to restrict the diff to |
|
||||
|
||||
**Example inputs:**
|
||||
```json
|
||||
{}
|
||||
```
|
||||
```json
|
||||
{ "staged": true }
|
||||
```
|
||||
```json
|
||||
{ "commit": "HEAD~3", "path": "rust/crates/tools/src/lib.rs" }
|
||||
```
|
||||
```json
|
||||
{ "commit": "main", "commit2": "feat/git-aware-tools" }
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### GitLog
|
||||
|
||||
Show commit history. Supports limiting count, filtering by author/date/path, and oneline format.
|
||||
|
||||
| Parameter | Type | Required | Default | Description |
|
||||
|-----------|------|----------|---------|-------------|
|
||||
| `count` | integer | no | `20` | Maximum number of commits to return |
|
||||
| `oneline` | boolean | no | `false` | Use `--oneline` format (hash + subject only) |
|
||||
| `author` | string | no | — | Filter commits by author pattern |
|
||||
| `since` | string | no | — | Filter commits since date (e.g. `"2024-01-01"` or `"2.weeks"`) |
|
||||
| `until` | string | no | — | Filter commits until date |
|
||||
| `path` | string | no | — | File or directory path to filter commits by |
|
||||
|
||||
**Example inputs:**
|
||||
```json
|
||||
{ "count": 5, "oneline": true }
|
||||
```
|
||||
```json
|
||||
{ "author": "alice", "since": "1.week", "path": "src/main.rs" }
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### GitShow
|
||||
|
||||
Show a commit, tag, or tree object with its diff. Supports showing a specific file at a commit and stat-only mode.
|
||||
|
||||
| Parameter | Type | Required | Default | Description |
|
||||
|-----------|------|----------|---------|-------------|
|
||||
| `commit` | string | **yes** | — | Commit hash, tag, or branch ref to show |
|
||||
| `path` | string | no | — | Show only this file at the given commit (`commit:path` syntax) |
|
||||
| `stat` | boolean | no | `false` | Show diffstat summary instead of full diff |
|
||||
|
||||
**Example inputs:**
|
||||
```json
|
||||
{ "commit": "HEAD" }
|
||||
```
|
||||
```json
|
||||
{ "commit": "abc1234", "stat": true }
|
||||
```
|
||||
```json
|
||||
{ "commit": "main", "path": "src/lib.rs" }
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### GitBlame
|
||||
|
||||
Show what revision and author last modified each line of a file. Supports line range filtering.
|
||||
|
||||
| Parameter | Type | Required | Default | Description |
|
||||
|-----------|------|----------|---------|-------------|
|
||||
| `path` | string | **yes** | — | File path to blame |
|
||||
| `start_line` | integer | no | — | Start of line range (1-based) |
|
||||
| `end_line` | integer | no | — | End of line range (1-based) |
|
||||
|
||||
**Example inputs:**
|
||||
```json
|
||||
{ "path": "src/main.rs" }
|
||||
```
|
||||
```json
|
||||
{ "path": "src/main.rs", "start_line": 100, "end_line": 150 }
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Architecture
|
||||
|
||||
All five tools follow the same pattern:
|
||||
|
||||
1. **ToolSpec** — Defines the tool name, description, JSON input schema, and `PermissionMode::ReadOnly`
|
||||
2. **Input struct** — Derives `Deserialize` with `#[serde(default)]` on optional fields
|
||||
3. **Run function** — Builds git arguments, calls `git_stdout()`, wraps result in JSON via `to_pretty_json()`
|
||||
4. **Dispatch** — Matched in `execute_tool_with_enforcer()` like all other tools
|
||||
|
||||
The existing `git_stdout(args: &[&str]) -> Option<String>` helper (at `tools/src/lib.rs`) handles running the `git` subprocess and returning trimmed stdout. Git tools simply construct the right arguments and delegate to this helper.
|
||||
|
||||
## Why native git tools?
|
||||
|
||||
Before this PR, the model had to use the `bash` tool for git operations, which has several drawbacks:
|
||||
|
||||
- **No structured output** — Bash returns raw text that the model must parse
|
||||
- **Over-permissioned** — Bash requires `DangerFullAccess` even for read-only git commands
|
||||
- **No discoverability** — The model can't search for git-capable tools via `ToolSearch`
|
||||
- **Inconsistent** — Each invocation may use different flags or formatting
|
||||
|
||||
With native git tools:
|
||||
|
||||
- All five are `ReadOnly` — safe in restricted permission modes
|
||||
- Structured JSON output — consistent, parseable results
|
||||
- Discoverable via `ToolSearch` with keywords like "git", "diff", "blame"
|
||||
- Model-friendly descriptions explain when to use each tool vs bash
|
||||
|
||||
## Testing
|
||||
|
||||
```bash
|
||||
cd rust
|
||||
cargo build --release
|
||||
cargo test -p tools
|
||||
```
|
||||
|
||||
The 3 pre-existing test failures (agent_fake_runner, agent_persists_handoff, worker_create_merges_config) are unrelated to this change — they fail due to local settings.json incompatibilities.
|
||||
@@ -3,6 +3,9 @@ use std::path::{Path, PathBuf};
|
||||
use std::process::Command;
|
||||
use std::time::{Duration, Instant};
|
||||
|
||||
use aspect_macros::aspect;
|
||||
use aspect_std::LoggingAspect;
|
||||
|
||||
use api::{
|
||||
max_tokens_for_model, model_family_identity_for, resolve_model_alias, ApiError,
|
||||
ContentBlockDelta, InputContentBlock, InputMessage, MessageRequest, MessageResponse,
|
||||
@@ -1176,6 +1179,80 @@ pub fn mvp_tool_specs() -> Vec<ToolSpec> {
|
||||
}),
|
||||
required_permission: PermissionMode::DangerFullAccess,
|
||||
},
|
||||
ToolSpec {
|
||||
name: "GitStatus",
|
||||
description: "Show the working tree status (branch, staged, unstaged, untracked). Equivalent to 'git status --short --branch'. Use this instead of running git status via bash to get structured, parseable output.",
|
||||
input_schema: json!({
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"short": { "type": "boolean" }
|
||||
},
|
||||
"additionalProperties": false
|
||||
}),
|
||||
required_permission: PermissionMode::ReadOnly,
|
||||
},
|
||||
ToolSpec {
|
||||
name: "GitDiff",
|
||||
description: "Show changes between commits, the index, and the working tree. Supports staged changes ('git diff --cached'), specific paths, commit ranges, and comparing two commits. Use this instead of running git diff via bash to get structured output.",
|
||||
input_schema: json!({
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"path": { "type": "string" },
|
||||
"staged": { "type": "boolean" },
|
||||
"commit": { "type": "string" },
|
||||
"commit2": { "type": "string" }
|
||||
},
|
||||
"additionalProperties": false
|
||||
}),
|
||||
required_permission: PermissionMode::ReadOnly,
|
||||
},
|
||||
ToolSpec {
|
||||
name: "GitLog",
|
||||
description: "Show commit history. Supports limiting count, filtering by author/date/path, and oneline format. Defaults to the last 20 commits. Use this instead of running git log via bash to get structured output.",
|
||||
input_schema: json!({
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"path": { "type": "string" },
|
||||
"count": { "type": "integer", "minimum": 1 },
|
||||
"oneline": { "type": "boolean" },
|
||||
"author": { "type": "string" },
|
||||
"since": { "type": "string" },
|
||||
"until": { "type": "string" }
|
||||
},
|
||||
"additionalProperties": false
|
||||
}),
|
||||
required_permission: PermissionMode::ReadOnly,
|
||||
},
|
||||
ToolSpec {
|
||||
name: "GitShow",
|
||||
description: "Show a commit, tag, or tree object with its diff. Supports showing a specific file at a commit (commit:path) and stat-only mode. Use this instead of running git show via bash to get structured output.",
|
||||
input_schema: json!({
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"commit": { "type": "string" },
|
||||
"path": { "type": "string" },
|
||||
"stat": { "type": "boolean" }
|
||||
},
|
||||
"required": ["commit"],
|
||||
"additionalProperties": false
|
||||
}),
|
||||
required_permission: PermissionMode::ReadOnly,
|
||||
},
|
||||
ToolSpec {
|
||||
name: "GitBlame",
|
||||
description: "Show what revision and author last modified each line of a file. Supports line range filtering (start_line, end_line). Use this instead of running git blame via bash to get structured output.",
|
||||
input_schema: json!({
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"path": { "type": "string" },
|
||||
"start_line": { "type": "integer", "minimum": 1 },
|
||||
"end_line": { "type": "integer", "minimum": 1 }
|
||||
},
|
||||
"required": ["path"],
|
||||
"additionalProperties": false
|
||||
}),
|
||||
required_permission: PermissionMode::ReadOnly,
|
||||
},
|
||||
]
|
||||
}
|
||||
|
||||
@@ -1199,6 +1276,7 @@ pub fn execute_tool(name: &str, input: &Value) -> Result<String, String> {
|
||||
}
|
||||
|
||||
#[allow(clippy::too_many_lines)]
|
||||
#[aspect(LoggingAspect::new().log_args().log_result())]
|
||||
fn execute_tool_with_enforcer(
|
||||
enforcer: Option<&PermissionEnforcer>,
|
||||
name: &str,
|
||||
@@ -1305,6 +1383,11 @@ fn execute_tool_with_enforcer(
|
||||
"TestingPermission" => {
|
||||
from_value::<TestingPermissionInput>(input).and_then(run_testing_permission)
|
||||
}
|
||||
"GitStatus" => from_value::<GitStatusInput>(input).and_then(run_git_status),
|
||||
"GitDiff" => from_value::<GitDiffInput>(input).and_then(run_git_diff),
|
||||
"GitLog" => from_value::<GitLogInput>(input).and_then(run_git_log),
|
||||
"GitShow" => from_value::<GitShowInput>(input).and_then(run_git_show),
|
||||
"GitBlame" => from_value::<GitBlameInput>(input).and_then(run_git_blame),
|
||||
_ => Err(format!("unsupported tool: {name}")),
|
||||
}
|
||||
}
|
||||
@@ -1840,6 +1923,133 @@ fn run_testing_permission(input: TestingPermissionInput) -> Result<String, Strin
|
||||
"message": "Testing permission tool stub"
|
||||
}))
|
||||
}
|
||||
|
||||
#[allow(clippy::needless_pass_by_value)]
|
||||
/// Execute `git status --short --branch` and return structured JSON output.
|
||||
/// Falls back to full `git status` if `short` is explicitly set to false.
|
||||
fn run_git_status(input: GitStatusInput) -> Result<String, String> {
|
||||
let mut args: Vec<&str> = vec!["status"];
|
||||
if input.short.unwrap_or(true) {
|
||||
args.push("--short");
|
||||
args.push("--branch");
|
||||
}
|
||||
match git_stdout(&args) {
|
||||
Some(output) => to_pretty_json(json!({
|
||||
"output": output
|
||||
})),
|
||||
None => Err(
|
||||
"git status failed. Ensure the current directory is inside a git repository."
|
||||
.to_string(),
|
||||
),
|
||||
}
|
||||
}
|
||||
|
||||
#[allow(clippy::needless_pass_by_value)]
|
||||
/// Execute `git diff` with optional --cached, commit, and path filters.
|
||||
/// Returns the diff output wrapped in a JSON object.
|
||||
fn run_git_diff(input: GitDiffInput) -> Result<String, String> {
|
||||
let mut args: Vec<String> = vec!["diff".to_string()];
|
||||
if input.staged.unwrap_or(false) {
|
||||
args.push("--cached".to_string());
|
||||
}
|
||||
if let Some(ref commit) = input.commit {
|
||||
if let Some(ref commit2) = input.commit2 {
|
||||
args.push(format!("{commit}...{commit2}"));
|
||||
} else {
|
||||
args.push(commit.clone());
|
||||
}
|
||||
}
|
||||
if let Some(ref path) = input.path {
|
||||
args.push("--".to_string());
|
||||
args.push(path.clone());
|
||||
}
|
||||
let arg_refs: Vec<&str> = args.iter().map(|s| s.as_str()).collect();
|
||||
match git_stdout(&arg_refs) {
|
||||
Some(output) => to_pretty_json(json!({
|
||||
"output": output
|
||||
})),
|
||||
None => Err(
|
||||
"git diff failed. Ensure the current directory is inside a git repository.".to_string(),
|
||||
),
|
||||
}
|
||||
}
|
||||
|
||||
#[allow(clippy::needless_pass_by_value)]
|
||||
/// Execute `git log` with count, author, date, and path filters.
|
||||
/// Defaults to the last 20 commits.
|
||||
fn run_git_log(input: GitLogInput) -> Result<String, String> {
|
||||
let mut args: Vec<String> = vec!["log".to_string()];
|
||||
let count = input.count.unwrap_or(20);
|
||||
args.push(format!("-n{count}"));
|
||||
if input.oneline.unwrap_or(false) {
|
||||
args.push("--oneline".to_string());
|
||||
}
|
||||
if let Some(ref author) = input.author {
|
||||
args.push(format!("--author={author}"));
|
||||
}
|
||||
if let Some(ref since) = input.since {
|
||||
args.push(format!("--since={since}"));
|
||||
}
|
||||
if let Some(ref until) = input.until {
|
||||
args.push(format!("--until={until}"));
|
||||
}
|
||||
if let Some(ref path) = input.path {
|
||||
args.push("--".to_string());
|
||||
args.push(path.clone());
|
||||
}
|
||||
let arg_refs: Vec<&str> = args.iter().map(|s| s.as_str()).collect();
|
||||
match git_stdout(&arg_refs) {
|
||||
Some(output) => to_pretty_json(json!({
|
||||
"output": output
|
||||
})),
|
||||
None => Err(
|
||||
"git log failed. Ensure the current directory is inside a git repository.".to_string(),
|
||||
),
|
||||
}
|
||||
}
|
||||
|
||||
#[allow(clippy::needless_pass_by_value)]
|
||||
/// Execute `git show` for a given commit, optionally with --stat or a file path.
|
||||
/// Uses the `commit:path` syntax when a path is specified.
|
||||
fn run_git_show(input: GitShowInput) -> Result<String, String> {
|
||||
let mut args: Vec<String> = vec!["show".to_string()];
|
||||
if input.stat.unwrap_or(false) {
|
||||
args.push("--stat".to_string());
|
||||
}
|
||||
if let Some(ref path) = input.path {
|
||||
args.push(format!("{}:{}", input.commit, path));
|
||||
} else {
|
||||
args.push(input.commit.clone());
|
||||
}
|
||||
let arg_refs: Vec<&str> = args.iter().map(|s| s.as_str()).collect();
|
||||
match git_stdout(&arg_refs) {
|
||||
Some(output) => to_pretty_json(json!({
|
||||
"output": output
|
||||
})),
|
||||
None => Err(format!(
|
||||
"git show {} failed. Ensure the commit exists.",
|
||||
input.commit
|
||||
)),
|
||||
}
|
||||
}
|
||||
|
||||
#[allow(clippy::needless_pass_by_value)]
|
||||
/// Execute `git blame` on a file, optionally restricted to a line range.
|
||||
fn run_git_blame(input: GitBlameInput) -> Result<String, String> {
|
||||
let mut args: Vec<String> = vec!["blame".to_string()];
|
||||
if let (Some(start), Some(end)) = (input.start_line, input.end_line) {
|
||||
args.push(format!("-L{start},{end}"));
|
||||
}
|
||||
args.push(input.path.clone());
|
||||
let arg_refs: Vec<&str> = args.iter().map(|s| s.as_str()).collect();
|
||||
match git_stdout(&arg_refs) {
|
||||
Some(output) => to_pretty_json(json!({
|
||||
"output": output
|
||||
})),
|
||||
None => Err(format!("git blame {} failed. Ensure the file exists and the directory is inside a git repository.", input.path)),
|
||||
}
|
||||
}
|
||||
|
||||
fn from_value<T: for<'de> Deserialize<'de>>(input: &Value) -> Result<T, String> {
|
||||
serde_json::from_value(input.clone()).map_err(|error| error.to_string())
|
||||
}
|
||||
@@ -2692,6 +2902,85 @@ struct TestingPermissionInput {
|
||||
action: String,
|
||||
}
|
||||
|
||||
/// Input for the GitStatus tool: shows working tree status.
|
||||
/// Defaults to --short --branch mode for concise, parseable output.
|
||||
#[derive(Debug, Deserialize)]
|
||||
struct GitStatusInput {
|
||||
#[serde(default)]
|
||||
/// If true, use --short --branch format. Defaults to true.
|
||||
short: Option<bool>,
|
||||
}
|
||||
|
||||
/// Input for the GitDiff tool: shows changes between commits, index, and working tree.
|
||||
/// All fields are optional - calling with no options is equivalent to `git diff`.
|
||||
#[derive(Debug, Deserialize)]
|
||||
struct GitDiffInput {
|
||||
#[serde(default)]
|
||||
/// File path to diff. Prepends `--` before the path.
|
||||
path: Option<String>,
|
||||
#[serde(default)]
|
||||
/// If true, show staged changes (`git diff --cached`).
|
||||
staged: Option<bool>,
|
||||
#[serde(default)]
|
||||
/// A commit hash, tag, or branch to diff against.
|
||||
commit: Option<String>,
|
||||
#[serde(default)]
|
||||
/// A second commit for range diffs (commit...commit2).
|
||||
commit2: Option<String>,
|
||||
}
|
||||
|
||||
/// Input for the GitLog tool: shows commit history.
|
||||
/// Defaults to the last 20 commits in full format.
|
||||
#[derive(Debug, Deserialize)]
|
||||
struct GitLogInput {
|
||||
#[serde(default)]
|
||||
/// File or directory path to filter commits by.
|
||||
path: Option<String>,
|
||||
#[serde(default)]
|
||||
/// Maximum number of commits to return. Defaults to 20.
|
||||
count: Option<usize>,
|
||||
#[serde(default)]
|
||||
/// If true, use --oneline format (hash + subject only).
|
||||
oneline: Option<bool>,
|
||||
#[serde(default)]
|
||||
/// Filter commits by author pattern.
|
||||
author: Option<String>,
|
||||
#[serde(default)]
|
||||
/// Filter commits since date (e.g. "2024-01-01" or "2.weeks").
|
||||
since: Option<String>,
|
||||
#[serde(default)]
|
||||
/// Filter commits until date.
|
||||
until: Option<String>,
|
||||
}
|
||||
|
||||
/// Input for the GitShow tool: shows a commit, tag, or tree object.
|
||||
#[derive(Debug, Deserialize)]
|
||||
struct GitShowInput {
|
||||
/// Commit hash, tag, or branch ref to show. Required.
|
||||
commit: String,
|
||||
#[serde(default)]
|
||||
/// If set, show only this file at the given commit (commit:path syntax).
|
||||
path: Option<String>,
|
||||
#[serde(default)]
|
||||
/// If true, show diffstat summary instead of full diff.
|
||||
stat: Option<bool>,
|
||||
}
|
||||
|
||||
/// Input for the GitBlame tool: shows per-line author/revision info for a file.
|
||||
#[derive(Debug, Deserialize)]
|
||||
struct GitBlameInput {
|
||||
/// File path to blame. Required.
|
||||
path: String,
|
||||
#[serde(rename = "start_line")]
|
||||
#[serde(default)]
|
||||
/// Start of line range (1-based). Only used if end_line is also set.
|
||||
start_line: Option<usize>,
|
||||
#[serde(rename = "end_line")]
|
||||
#[serde(default)]
|
||||
/// End of line range (1-based). Only used if start_line is also set.
|
||||
end_line: Option<usize>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize)]
|
||||
struct WebFetchOutput {
|
||||
bytes: usize,
|
||||
|
||||
11
rust/scripts/install.sh
Executable file
11
rust/scripts/install.sh
Executable file
@@ -0,0 +1,11 @@
|
||||
#!/bin/bash
|
||||
set -e
|
||||
|
||||
# Build the release binary
|
||||
cargo build --release
|
||||
|
||||
# Link to ~/.local/bin
|
||||
mkdir -p "$HOME/.local/bin"
|
||||
ln -sf "$(pwd)/target/release/claw" "$HOME/.local/bin/claw"
|
||||
|
||||
echo "✓ Claw installed to ~/.local/bin/claw"
|
||||
Reference in New Issue
Block a user