From ae30bf4f04f92f3a9db1ddc1489bb3702b8d1cc7 Mon Sep 17 00:00:00 2001 From: gismo212 <125292781+gismo212@users.noreply.github.com> Date: Mon, 25 May 2026 05:25:28 +0300 Subject: [PATCH] feat(analog): add claw-analog minimal harness Adds claw-analog minimal harness for lean, predictable tool execution. --- how_to_run.md | 389 +++ rust/crates/claw-analog/Cargo.toml | 33 + rust/crates/claw-analog/src/agents.rs | 489 ++++ rust/crates/claw-analog/src/config_cmd.rs | 144 + rust/crates/claw-analog/src/doctor.rs | 733 ++++++ rust/crates/claw-analog/src/lib.rs | 2889 +++++++++++++++++++++ rust/crates/claw-analog/src/main.rs | 522 ++++ 7 files changed, 5199 insertions(+) create mode 100644 how_to_run.md create mode 100644 rust/crates/claw-analog/Cargo.toml create mode 100644 rust/crates/claw-analog/src/agents.rs create mode 100644 rust/crates/claw-analog/src/config_cmd.rs create mode 100644 rust/crates/claw-analog/src/doctor.rs create mode 100644 rust/crates/claw-analog/src/lib.rs create mode 100644 rust/crates/claw-analog/src/main.rs diff --git a/how_to_run.md b/how_to_run.md new file mode 100644 index 00000000..ac8cba72 --- /dev/null +++ b/how_to_run.md @@ -0,0 +1,389 @@ +# claw-analog — как запускать и как это устроено + +Минимальный агент поверх того же стека API, что и основной CLI [`claw`](rust/README.md): провайдеры Anthropic / OpenAI‑совместимые / xAI выбираются по модели и переменным окружения (см. [USAGE.md](USAGE.md)). + +Дальше в примерах **рабочий каталог** — папка **`claw-code-main\rust`** (внутри клона репозитория). Если приглашение PowerShell уже `…\claw-code-main\rust>`, **не** выполняйте второй раз `cd rust` (иначе будет `rust\rust` и ошибка пути). + +## Требования + +- Установленный **Rust** и **cargo** (в PATH: обычно `%USERPROFILE%\.cargo\bin` на Windows). +- Ключ API для выбранного провайдера (например `ANTHROPIC_API_KEY`). + +## Сборка и справка + +```powershell +cd D:\path\to\claw-code-main\rust +cargo build -p claw-analog +cargo run -p claw-analog -- --help +``` + +### Диагностика (`doctor`) + +Подкоманда **`claw-analog doctor`** (у неё свой `--help`, отдельно от основного режима): + +- **превью конфигурации** — итог после слияния **`.claw-analog.toml`** (путь `/.claw-analog.toml` или **`--config`**) и **тех же флагов**, что у основного run: **`--model`**, **`--permission`**, **`--preset`**, **`--output-format`**, **`--stream`**, **`--no-stream`**, **`--no-runtime-enforcer`**, **`--accept-danger-non-interactive`**, плюс **`--profile`** для отображения пути к профилю. Печатаются контракт NDJSON (`schema`, `format_version`), эффективные поля и строки **provenance** (что победило: CLI, TOML или default); +- статус типовых переменных (**без** значений: только `set` / `unset` и длина строки); +- поиск workspace вверх от cwd (или **`--manifest-dir`**) и по умолчанию **`cargo check -p claw-analog`** (только компиляция, **не** перезаписывает `target\debug\claw-analog.exe` — иначе на Windows при `cargo run … doctor` часто «Отказано в доступе» при вложенном `cargo build`); +- **`--release-build`** — **`cargo build --release -p claw-analog`** (бинарь в `target\release\`, не конфликтует с запущенным debug‑exe); +- **`--no-build`** — пропустить cargo; +- **`--tcp-ping`** (алиас **`--mock`**) — TCP **`connect`** к хосту:порту из **`ANTHROPIC_BASE_URL`** (или к дефолтному `https://api.anthropic.com`); не проверяет HTTP/TLS и тело ответа. + +Примеры (из каталога `…\claw-code-main\rust`): + +```powershell +cargo run -p claw-analog -- doctor +cargo run -p claw-analog -- doctor --no-build +cargo run -p claw-analog -- doctor --tcp-ping +cargo run -p claw-analog -- doctor -w D:\path\to\repo --preset implement +cargo run -p claw-analog -- doctor --release-build +``` + +### Проверка конфигурации без API (`config validate`) + +Подкоманда **`claw-analog config validate`**: + +- парсит **`.claw-analog.toml`** (по умолчанию `/.claw-analog.toml`, переопределение **`--config`**) и выводит краткий **merge preview** (как у `doctor`, но **только TOML + defaults**, без флагов основного run); +- проверяет **`profile.toml`**: тот же порядок, что у run (`--profile`, поле `profile` в TOML, иначе дефолтный `~/.claw-analog/profile.toml` при наличии файла); +- **никаких** запросов к LLM и сети API. + +**`--strict`** — ошибка (код выхода 1), если файла конфигурации нет или профиль не читается. + +```powershell +cargo run -p claw-analog -- config validate -w D:\path\to\repo +cargo run -p claw-analog -- config validate --strict -w . +``` + +### Дополнение оболочки (`complete`) + +Скрипт автодополнения в **stdout** (перенаправьте в файл из документации вашей оболочки): + +```powershell +cargo run -p claw-analog -- complete powershell >> $PROFILE +# bash:zsh:fish — см. вывод `complete --help` +``` + +Доступные значения: **`bash`**, **`zsh`**, **`fish`**, **`powershell`** (алиас **`pwsh`**). + +## Основные команды + +Одна задача в аргументе (или текст с **stdin**): + +```powershell +# из ...\claw-code-main\rust +cargo run -p claw-analog -- -w D:\path\to\repo "Кратко опиши структуру rust/crates" +``` + +С **живым выводом** (SSE через `stream_message`): + +```powershell +cargo run -p claw-analog -- --stream -w . "Объясни claw-analog в двух предложениях" +``` + +Разрешить **запись файлов** в workspace: + +```powershell +cargo run -p claw-analog -- --permission workspace-write -w . "Добавь комментарий в начало crates/claw-analog/Cargo.toml" +``` + +Отключить проверку через **`runtime::PermissionEnforcer`** (только своя тюрьма путей; не рекомендуется): + +```powershell +cargo run -p claw-analog -- --no-runtime-enforcer -w . "…" +``` + +Полезные лимиты (CLI **перекрывает** значения из `.claw-analog.toml`, см. ниже): + +| Флаг | Значение по умолчанию | Назначение | +|------|------------------------|------------| +| `--max-read-bytes` | 262144 | Максимум байт для `read_file` / `grep_workspace` / `git_diff` / `git_log` | +| `--max-turns` | 24 | Максимум раундов «модель → инструменты → модель» | +| `--max-list-entries` | 500 | Лимит строк `list_dir` | +| `--grep-max-lines` | 200 | Верхняя граница **суммарных** строк совпадений в `grep_workspace` (в т.ч. по нескольким файлам; в одном файле можно задать меньше через `max_lines`) | +| `--glob-max-paths` | 2000 | Максимум путей, возвращаемых `glob_workspace` и при расширении `glob` внутри `grep_workspace` | +| `--glob-max-depth` | 32 | Глубина обхода каталогов для glob (через `walkdir`), без бесконечной рекурсии | +| `--output-format` | `rich` | `json` — NDJSON на stdout для скриптов и агентов | +| `--print-tools` | — | Список эффективных инструментов для итоговых `permission` / enforcer, затем выход (**без** промпта и API) | +| `--lang` | `en` | Подсказка в system: `en` или `ru` (язык ответов; **не** меняет id модели в API) | +| `--preset` | — | `none` \| `audit` \| `explain` \| `implement` — см. раздел ниже | +| `--session` | — | Путь к JSON-сессии (относительно `-w`, если не абсолютный): сохранение истории и resume | +| `--save-session` | — | Дополнительный путь: тот же снимок сессии пишется сюда при каждом сохранении (можно **без** `--session`, чтобы только экспортировать JSON после прогона) | +| `--profile` | — | TOML с полем `line` (подмешивается в system). Без флага: пробуется `%USERPROFILE%\.claw-analog\profile.toml` (Windows) / `~/.claw-analog/profile.toml` | +| `--permission` | `read-only` | см. ниже: `read-only`, `workspace-write`, `prompt`, `danger-full-access`, `allow` | +| `--accept-danger-non-interactive` | — | Разрешить `danger-full-access` / `allow`, когда stdin **не** TTY (CI; осознанный риск). В TOML: `accept_danger_non_interactive = true` | + +Конфиг по умолчанию читается из **`/.claw-analog.toml`**, если файл существует. Другой путь: **`--config PATH`**. Неизвестные ключи в TOML — ошибка парсинга (строгая схема). + +Пример `.claw-analog.toml`: + +```toml +model = "sonnet" +stream = true +output_format = "rich" +permission = "read-only" +language = "en" +preset = "audit" +session = ".claw-analog.session.json" +profile = "~/.claw-analog/profile.toml" +no_runtime_enforcer = false +accept_danger_non_interactive = false +max_read_bytes = 262144 +max_turns = 24 +max_list_entries = 500 +grep_max_lines = 200 +glob_max_paths = 2000 +glob_max_depth = 32 +# Опционально: RAG (`claw-rag-service`) — см. раздел про RAG ниже +# rag_base_url = "http://127.0.0.1:8787" +# rag_timeout_secs = 30 +# rag_top_k_max = 32 +``` + +**RAG (`retrieve_context`):** если заданы **`RAG_BASE_URL`** (per-env) или непустой **`rag_base_url`** в `.claw-analog.toml`, в набор инструментов добавляется **`retrieve_context`** (семантический поиск по уже проиндексированному воркспейсу). Значение — корень HTTP сервиса, без суффикса `/v1` (запрос идёт на `{base}/v1/query`). Таймаут и верхняя граница **`top_k`** задаются **`rag_timeout_secs`** и **`rag_top_k_max`** (по умолчанию 30 с и 32; «жёсткий» потолок 256). Индексация по-прежнему отдельной командой **`claw-rag-service`**, см. [`docs/rag-web-ui.md`](docs/rag-web-ui.md). + +**`permission`** (как у полного `claw`, те же строки в TOML): + +| Значение | Инструмент `write_file` | Неинтерактив (stdin не TTY) | +|----------|-------------------------|------------------------------| +| `read-only` | нет | OK | +| `workspace-write` | да (в пределах `-w`) | OK | +| `prompt` | нет (в этом harness Enforcer не даёт писать без подтверждений) | предупреждение в stderr; для автозаписи используйте `workspace-write` | +| `danger-full-access`, `allow` | да | **запрещено**, пока не задан `--accept-danger-non-interactive` или `accept_danger_non_interactive = true` в TOML | + +**`--stream`** в командной строке включает стриминг; **`--no-stream`** явно выключает (полезно поверх `stream = true` в файле). + +**`language`** в TOML: `en` или `ru` (те же значения, что у **`--lang`**); CLI имеет приоритет. + +### Сессия (`--session`) + +Файл JSON (версия `1`): метаданные `workspace`, `model`, опционально `preset`, массив `messages` в формате API (`role` + `content`). При запуске с существующим файлом история **догружается**, текущий текст запроса (аргумент или stdin) добавляется как **новое** пользовательское сообщение. Состояние сохраняется после каждого полного раунда с инструментами и при завершении без `tool_use`. + +**`--save-session`** — тот же формат файла, что и у `--session`: при каждом шаге, где обновлялся бы файл сессии, запись дублируется (если путь совпадает с `--session`, вторая запись не выполняется). Без **`--session`** можно собрать историю одного прогона в JSON для скриптов или последующего **`--session`** без ручной сборки `messages`. + +**Риски:** в файле могут оказаться **секреты** (вывод `read_file`, ключи из логов), файл не шифруется; длинная история **дороже** по токенам API. В stderr печатается напоминание при **`--session`** или **`--save-session`**. Несовпадение `workspace` / `model` / `preset` с текущим запуском даёт **предупреждение**, но прогон продолжается. + +### Пресеты (`--preset`) + +Добавляют краткий абзац к system prompt (аудит / обучение / правки). Набор инструментов по-прежнему задаётся **permission**: для **`implement`**, если ни CLI, ни файл не задали `permission`, по умолчанию подставляется **workspace-write** (чтобы был `write_file`). Явный `permission = "read-only"` в файле или `--permission read-only` в CLI имеет приоритет. + +### Профиль (`profile.toml`) + +Мини-файл: + +```toml +line = "Короткая подсказка стиля (одна строка в system)." +``` + +Ограничения: размер файла не больше **2048** байт; длина строки после trim — не больше **512** символов Unicode (иначе усечение с предупреждением). Содержимое добавляется в system одной строкой: `Learner hint: …`. + +## Инструменты (без произвольного shell) + +| Имя | Режим | Описание | +|-----|--------|----------| +| `read_file` | read-only+ | Чтение UTF‑8 файла под `-w` | +| `list_dir` | read-only+ | Список каталога (не рекурсивно) | +| `glob_workspace` | read-only+ | Список **путей файлов** под `-w`: аргументы `pattern` (glob относительно `root`, слэши `/`), опционально `root` (по умолчанию `.`), `max_paths` (урезается лимитом CLI). В шаблоне нельзя `..`. | +| `grep_workspace` | read-only+ | Та же **литеральная** подстрока по строкам, что и раньше; ровно один из селекторов: `path`, массив `paths` или `glob` (+ опционально `glob_root`). Общий бюджет строк — `max_lines` и `--grep-max-lines`. В нескольких файлах формат строк: `относительный/путь:номер_строки:содержимое`. | +| `grep_search` | read-only+ | Тот же обработчик, что у `grep_workspace` (совместимость промптов с полным `claw`). | +| `git_diff` | read-only+ | `git diff` (без цвета) внутри репозитория в `-w`. Опционально `cached` (staged), `rev_range`, `context_lines`, `paths`. Вывод ограничен `--max-read-bytes`. | +| `git_log` | read-only+ | `git log` (без цвета) внутри репозитория в `-w`. Опционально `max_count` (по умолчанию 20), `rev_range`, `paths`. Вывод ограничен `--max-read-bytes`. | +| `retrieve_context` | read-only+ | Только если задан **`RAG_BASE_URL`** или **`rag_base_url`** в TOML: HTTP **`POST {base}/v1/query`** к `claw-rag-service`, ответ — пути и сниппеты чанков (лимиты см. выше). | +| `write_file` | `workspace-write`, `danger-full-access` или `allow` | Запись файла; родительские каталоги создаются при необходимости (`prompt` не даёт записать через Enforcer) | + +## Принципы работы + +1. **Корень workspace** (`-w`) приводится к каноническому пути; все пути в инструментах **относительные**, без `..` и без абсолютных сегментов. +2. Перед доступом к файлу проверяется, что реальный путь остаётся **внутри** корня (symlink/`canonicalize`). +3. **Политика прав** (если не отключена `--no-runtime-enforcer`): те же сущности, что у основного CLI — `PermissionPolicy` + `PermissionEnforcer::check` для инструмента и `check_file_write` для записи. +4. **Цикл агента**: запрос к провайдеру → если `stop_reason == tool_use`, выполняются вызовы, результаты уходят в историю как `tool_result` → следующий раунд. +5. **Стриминг**: при `--stream` текст ассистента печатается по мере прихода дельт; история для следующего раунда собирается из SSE так же, как в полном пайплайне (индексы блоков + JSON tool input). Отключить стриминг при настройке из файла можно флагом **`--no-stream`**. + +Логи вида `[claw-analog] ...` пишутся в **stderr**. В режиме **rich** ответ модели — обычный текст в **stdout**; в режиме **json** в **stdout** идёт только **NDJSON** (см. ниже). + +## Вывод JSON (CI и внешние агенты) + +Флаг **`--output-format json`** переключает stdout на **поток строк JSON** (один объект = одна строка). Поля стабильны по смыслу, но набор может расширяться. + +Основные `type`: + +| `type` | Когда | +|--------|--------| +| `run_start` | Старт прогона: **`schema`** (`claw-analog-ndjson`), **`format_version`**, далее `workspace`, `model`, `stream`, `permission`, опционально `preset`, `session`, опционально `session_save`, булево **`rag_enabled`** (есть ли база для `retrieve_context`) | +| `turn_start` | Начало раунда с моделью (`turn`) | +| `assistant_text_delta` | Только при `--stream`: фрагмент текста ассистента | +| `assistant_turn` | Итог раунда: `stop_reason`, `usage`, полный `text`, массив `tool_calls` | +| `tool_result` | После выполнения инструмента: `name`, `tool_use_id`, `is_error`, `output` (может быть усечён), `truncated`, `output_len_chars` | +| `run_end` | Успешное завершение (`ok: true`) | +| `error` | Ошибка (печатается отдельной строкой при падении или пустом промпте) | + +Пример (PowerShell): разбор потока построчно удобен **`jq`** или любом JSON‑парсере. + +```powershell +# из ...\claw-code-main\rust +$env:ANTHROPIC_API_KEY = "sk-ant-..." +cargo run -p claw-analog -- --output-format json -w . "Summarize rust/README.md" 2>$null | ForEach-Object { $_ | ConvertFrom-Json | Select-Object -ExpandProperty type } +``` + +С **`--stream`** в stdout сначала идут события `assistant_text_delta`, затем для того же раунда — одна строка `assistant_turn` с полным собранным `text` (удобно для воспроизводимых логов). + +### Ограничения и риски для агентов + +- В **`tool_result.output`** большие файлы обрезаются (~32 KiB UTF‑8), поле **`truncated`: true**. +- **Секреты**: не перенаправляйте stderr сырьём в публичные логи без фильтра; в `output` теоретически может попасть содержимое прочитанных файлов. +- Контракт для оркестраторов: NDJSON из stdout, диагностика из stderr; код возврата ≠ 0 при ошибке. На первой строке **`run_start`** имеет смысл сверять **`schema`** и **`format_version`**; **`run_start`** также раскрывает путь workspace и модель — учитывайте при шаринге логов. + +## Автотесты без реальной сети + +Юнит‑тесты и интеграция с локальным **mock-anthropic-service**: + +```powershell +# из ...\claw-code-main\rust +cargo test -p claw-analog +``` + +В **GitHub Actions** отдельный job **`claw-analog (test + clippy -p)`** гоняет `cargo test -p claw-analog` и `cargo clippy -p claw-analog --no-deps` (в дополнение к полному `cargo test` / `clippy` по workspace). + +При параллельном запуске тестов переменные окружения Anthropic изолированы **mutex**‑ом только для mock‑сценария; при сбоях можно запустить `cargo test -p claw-analog -- --test-threads=1`. + +## Отдельно: `claw-rag-service` (RAG) + +Индексация воркспейса и HTTP API живут в **`cargo run -p claw-rag-service`** (`ingest` + `serve`). После `serve` откройте **`http://127.0.0.1:8787/`** — лёгкий UI (stats + поиск). К `claw-analog` подключается через **`RAG_BASE_URL`** / `retrieve_context`. Подробности и env: [`docs/rag-web-ui.md`](docs/rag-web-ui.md). + +### Ingest (один или несколько репозиториев) + +`ingest` принимает **повторяемый** `--workspace` — это позволяет сделать **cross-repo RAG** (несколько реп в одну БД/коллекцию). + +```powershell +# из ...\claw-code-main\rust + +# один workspace +cargo run -p claw-rag-service -- ingest --workspace "D:\v\kria\s6" + +# несколько workspace (cross-repo) +cargo run -p claw-rag-service -- ingest --workspace "D:\repo1" --workspace "D:\repo2" +``` + +В ответах `path` будет вида `repoId:relative/path` (чтобы не было коллизий одинаковых путей между репозиториями). + +### Mock embeddings (без ключей / без сети) + +Для локальных прогонов/тестов можно включить mock-эмбеддинги: + +```powershell +$env:CLAW_RAG_MOCK_PROVIDERS = "1" +cargo run -p claw-rag-service -- ingest --workspace "D:\v\kria\s6" +``` + +### Qdrant (рекомендуемый локальный вариант) через Docker + +Для больших репозиториев лучше поднять локальный Qdrant: это снимает нагрузку с линейного сканирования `SQLite` и ускоряет запросы. + +Запуск Qdrant (gRPC на 6334): + +```powershell +docker run --rm -p 6333:6333 -p 6334:6334 -e QDRANT__SERVICE__GRPC_PORT=6334 qdrant/qdrant +``` + +#### Qdrant с persist volume (чтобы индекс сохранялся) + +Вариант через именованный volume Docker: + +```powershell +docker volume create claw-qdrant-data +docker run --rm -p 6333:6333 -p 6334:6334 ` + -e QDRANT__SERVICE__GRPC_PORT=6334 ` + -v claw-qdrant-data:/qdrant/storage ` + qdrant/qdrant +``` + +Вариант через bind-mount (путь на хосте): + +```powershell +mkdir .claw-qdrant | Out-Null +docker run --rm -p 6333:6333 -p 6334:6334 ` + -e QDRANT__SERVICE__GRPC_PORT=6334 ` + -v "${PWD}/.claw-qdrant:/qdrant/storage" ` + qdrant/qdrant +``` + +Затем включите env и запускайте ingest с фичей `qdrant-index`: + +```powershell +$env:CLAW_RAG_QDRANT_URL = "http://127.0.0.1:6334" +$env:CLAW_RAG_QDRANT_COLLECTION = "claw_rag_chunks" + +# (опционально) без реального API для эмбеддингов +$env:CLAW_RAG_MOCK_PROVIDERS = "1" + +cargo run -p claw-rag-service --features qdrant-index -- ingest --workspace "D:\v\kria\s6" +``` + +`ingest` сам создаст коллекцию, если её ещё нет (по размерности эмбеддингов). + +### Запуск через Docker (Qdrant + claw-rag-service) + +Если хочется поднимать всё одной командой, удобнее использовать `docker compose`. + +1) Запуск сервисов: + +```powershell +cd D:\path\to\claw-code-main +docker compose up --build +``` + +Примечание: образ `rag-serve`/`rag-ingest` собирается на достаточно свежем Rust (см. `rust/crates/claw-rag-service/Dockerfile`), потому что `qdrant-client` может требовать более новую версию Rust, чем старые pinned-теги. + +Если сборка Docker падает и вы видите строки вроде `transferring context: 21.02GB`, проверьте что: + +- вы запускаете compose из корня репозитория (где лежит `docker-compose.yml`) +- используется `.dockerignore` (уменьшает build-context, особенно если есть `target/` и локальные индексы) + +Если сборка падает сразу с `EOF` на шаге `load local bake definitions`, попробуйте: + +```powershell +$env:COMPOSE_BAKE = "0" +$env:DOCKER_BUILDKIT = "0" +docker compose up --build +``` + +2) Ingest (запускать отдельно, т.к. это batch job). Пример для одного workspace: + +```powershell +docker compose run --rm rag-ingest ingest --workspace "/workspaces/main" +``` + +По умолчанию `rag-ingest` пишет индекс в общий volume, так что `rag-serve` сразу увидит чанки. + +### Подключение к `claw-analog` + +```powershell +$env:RAG_BASE_URL = "http://127.0.0.1:8787" +cargo run -p claw-analog -- -w "D:\v\kria\s6" "Найди где реализован ingest в RAG сервисе" +``` + +## Auto‑TDD (автопроверки после `write_file`/`edit_file`) + +В полном `claw` (и в других потребителях `runtime`) можно включить автозапуск линтера/тестов после успешных write-инструментов через `.claw/settings.json`: + +```json +{ + "autoTdd": { + "enabled": true, + "tools": ["write_file", "edit_file"], + "commands": [ + "cd rust && cargo fmt", + "cd rust && cargo clippy --workspace --all-targets -- -D warnings", + "cd rust && cargo test --workspace" + ] + } +} +``` + +## Отличия от полного `claw` + +- Узкий набор инструментов (нет bash/MCP/плагинов). +- Проще аудировать и ограничивать по `--permission` и лимитам. +- Основной продукт по-прежнему `cargo run -p rusty-claude-cli` → бинарь `claw`. + +## Дальнейшая разработка + +План и чеклист идей (в т.ч. заимствованные из продуктового слоя вроде DeepTutor): [`futute.md`](futute.md) в корне репозитория. diff --git a/rust/crates/claw-analog/Cargo.toml b/rust/crates/claw-analog/Cargo.toml new file mode 100644 index 00000000..472b45e8 --- /dev/null +++ b/rust/crates/claw-analog/Cargo.toml @@ -0,0 +1,33 @@ +[package] +name = "claw-analog" +version.workspace = true +edition.workspace = true +license.workspace = true +publish.workspace = true +description = "Minimal agent harness: tool loop with explicit permissions and workspace jail." + +[lib] +name = "claw_analog" +path = "src/lib.rs" + +[[bin]] +name = "claw-analog" +path = "src/main.rs" + +[dependencies] +api = { path = "../api" } +clap = { version = "4", features = ["derive"] } +clap_complete = "4" +globset = "0.4" +reqwest = { version = "0.12", default-features = false, features = ["json", "rustls-tls"] } +runtime = { path = "../runtime" } +serde = { version = "1", features = ["derive"] } +serde_json.workspace = true +tokio = { version = "1", features = ["macros", "rt-multi-thread"] } +toml = "0.8" +walkdir = "2" +ignore = "0.4" + +[dev-dependencies] +mock-anthropic-service = { path = "../mock-anthropic-service" } +tempfile = "3" diff --git a/rust/crates/claw-analog/src/agents.rs b/rust/crates/claw-analog/src/agents.rs new file mode 100644 index 00000000..7f8111c7 --- /dev/null +++ b/rust/crates/claw-analog/src/agents.rs @@ -0,0 +1,489 @@ +//! `claw-analog agents` — run multiple specialized sub-agents sequentially. + +use std::path::{Path, PathBuf}; + +use api::InputMessage; +use clap::{Parser, ValueEnum}; +use claw_analog::{ + enforce_non_interactive_permission_rules, load_analog_toml, resolve_analog_options, + resolve_analog_profile_path, resolve_rag_base_url, AnalogConfig, AnalogDoctorOverrides, + AnalogFileConfig, OutputFormat, PermissionMode, Preset, StreamOverride, +}; + +const DEF_MAX_READ: u64 = 256 * 1024; +const DEF_MAX_TURNS: u32 = 24; +const DEF_MAX_LIST: usize = 500; +const DEF_GREP_MAX: usize = 200; +const DEF_GLOB_PATHS: usize = 2000; +const DEF_GLOB_DEPTH: usize = 32; +const DEF_RAG_TIMEOUT_SECS: u64 = 30; +const DEF_RAG_TOP_K_MAX: u32 = 32; +const RAG_TOP_K_ABS_CAP: u32 = 256; + +#[derive(Copy, Clone, Debug, ValueEnum)] +pub enum AgentsPresetArg { + Audit, + Explain, + Implement, +} + +impl From for Preset { + fn from(p: AgentsPresetArg) -> Self { + match p { + AgentsPresetArg::Audit => Preset::Audit, + AgentsPresetArg::Explain => Preset::Explain, + AgentsPresetArg::Implement => Preset::Implement, + } + } +} + +#[derive(Copy, Clone, Debug, ValueEnum)] +pub enum AgentsPermissionArg { + ReadOnly, + WorkspaceWrite, + Prompt, + #[value(name = "danger-full-access")] + DangerFullAccess, + Allow, +} + +impl From for PermissionMode { + fn from(p: AgentsPermissionArg) -> Self { + match p { + AgentsPermissionArg::ReadOnly => PermissionMode::ReadOnly, + AgentsPermissionArg::WorkspaceWrite => PermissionMode::WorkspaceWrite, + AgentsPermissionArg::Prompt => PermissionMode::Prompt, + AgentsPermissionArg::DangerFullAccess => PermissionMode::DangerFullAccess, + AgentsPermissionArg::Allow => PermissionMode::Allow, + } + } +} + +#[derive(Debug, Clone)] +pub struct AgentSpec { + pub name: String, + pub preset: Preset, + pub permission: PermissionMode, + pub model: Option, + pub prompt: Option, +} + +fn default_permission_for_preset(p: Preset) -> PermissionMode { + match p { + Preset::Audit | Preset::Explain => PermissionMode::ReadOnly, + Preset::Implement => PermissionMode::WorkspaceWrite, + Preset::None => PermissionMode::ReadOnly, + } +} + +fn parse_agent_spec(s: &str) -> Result { + // Allowed forms: + // - "audit" | "explain" | "implement" + // - "name=audit,preset=audit,permission=read-only,model=...,prompt=..." + let raw = s.trim(); + if raw.is_empty() { + return Err("empty --agent spec".to_string()); + } + + if !raw.contains('=') { + let preset = match raw.to_ascii_lowercase().as_str() { + "audit" => Preset::Audit, + "explain" => Preset::Explain, + "implement" | "fix" => Preset::Implement, + other => return Err(format!("unknown agent shorthand: {other}")), + }; + return Ok(AgentSpec { + name: raw.to_string(), + preset, + permission: default_permission_for_preset(preset), + model: None, + prompt: None, + }); + } + + let mut name: Option = None; + let mut preset: Option = None; + let mut permission: Option = None; + let mut model: Option = None; + let mut prompt: Option = None; + + for part in raw.split(',') { + let (k, v) = part + .split_once('=') + .ok_or_else(|| format!("invalid agent spec part {part:?} (expected k=v)"))?; + let k = k.trim().to_ascii_lowercase(); + let v = v.trim(); + if v.is_empty() { + continue; + } + match k.as_str() { + "name" => name = Some(v.to_string()), + "preset" => { + let p = match v.to_ascii_lowercase().as_str() { + "audit" => Preset::Audit, + "explain" => Preset::Explain, + "implement" | "fix" => Preset::Implement, + "none" => Preset::None, + other => return Err(format!("unknown preset {other:?}")), + }; + preset = Some(p); + } + "permission" => { + let pm = match v.to_ascii_lowercase().replace('_', "-").as_str() { + "read-only" | "readonly" => PermissionMode::ReadOnly, + "workspace-write" | "write" => PermissionMode::WorkspaceWrite, + "prompt" => PermissionMode::Prompt, + "danger-full-access" | "danger" => PermissionMode::DangerFullAccess, + "allow" => PermissionMode::Allow, + other => return Err(format!("unknown permission {other:?}")), + }; + permission = Some(pm); + } + "model" => model = Some(v.to_string()), + "prompt" => prompt = Some(v.to_string()), + other => return Err(format!("unknown agent spec key {other:?}")), + } + } + + let preset = preset.unwrap_or(Preset::Audit); + let permission = permission.unwrap_or_else(|| default_permission_for_preset(preset)); + let name = name.unwrap_or_else(|| preset.label().unwrap_or("agent").to_string()); + + Ok(AgentSpec { + name, + preset, + permission, + model, + prompt, + }) +} + +#[derive(Debug, Parser)] +pub struct AgentsCli { + /// Workspace root. + #[arg(short = 'w', long, default_value = ".", value_name = "DIR")] + pub workspace: PathBuf, + + /// Config path (default: `/.claw-analog.toml`). + #[arg(long, value_name = "PATH")] + pub config: Option, + + /// Base session path. If missing, it will be created from the base prompt. + #[arg(long, value_name = "PATH")] + pub base_session: PathBuf, + + /// Base prompt. If omitted, reads from stdin. + #[arg(long)] + pub prompt: Option, + + /// Repeatable agent specs, e.g. `--agent audit` or `--agent name=fix,preset=implement,permission=workspace-write`. + #[arg(long, required = true)] + pub agent: Vec, + + /// If set, each agent writes its own session file next to base session. + #[arg(long, default_value_t = true)] + pub split_sessions: bool, +} + +fn load_file_config(path: &Path) -> AnalogFileConfig { + if !path.is_file() { + return AnalogFileConfig::default(); + } + load_analog_toml(path).unwrap_or_default() +} + +fn config_path(args: &AgentsCli) -> PathBuf { + args.config + .clone() + .unwrap_or_else(|| args.workspace.join(".claw-analog.toml")) +} + +fn derive_agent_session_path(base: &Path, agent_name: &str) -> PathBuf { + let base_s = base.to_string_lossy(); + PathBuf::from(format!("{base_s}.agent-{agent_name}.json")) +} + +fn read_stdin_prompt() -> Result { + use std::io::Read; + let mut buf = String::new(); + std::io::stdin() + .read_to_string(&mut buf) + .map_err(|e| e.to_string())?; + let t = buf.trim(); + if t.is_empty() { + return Err("empty prompt (pass --prompt or stdin)".to_string()); + } + Ok(t.to_string()) +} + +fn ensure_base_session(base_session: &Path, workspace: &Path, prompt: &str) -> Result<(), String> { + if base_session.exists() { + return Ok(()); + } + let ws_s = workspace.display().to_string(); + let model = "base".to_string(); + let messages = if prompt.trim().is_empty() { + Vec::new() + } else { + vec![InputMessage::user_text(prompt.to_string())] + }; + claw_analog::session_save(base_session, &ws_s, &model, Preset::None, &messages)?; + Ok(()) +} + +pub fn run_agents(args: AgentsCli) -> Result<(), String> { + let rt = tokio::runtime::Builder::new_current_thread() + .enable_all() + .build() + .map_err(|e| e.to_string())?; + rt.block_on(async { run_agents_async(args).await }) +} + +pub async fn run_agents_async(args: AgentsCli) -> Result<(), String> { + run_agents_inner(args, |cfg, out| { + Box::pin(async move { + claw_analog::run(cfg, out) + .await + .map_err(|e| e.to_string())?; + Ok(()) + }) + }) + .await +} + +type RunFuture<'a> = std::pin::Pin> + 'a>>; + +async fn run_agents_inner(args: AgentsCli, mut run_one: F) -> Result<(), String> +where + for<'a> F: FnMut(AnalogConfig, &'a mut Vec) -> RunFuture<'a>, +{ + let workspace = if args.workspace.is_absolute() { + args.workspace.clone() + } else { + std::env::current_dir() + .map_err(|e| e.to_string())? + .join(&args.workspace) + }; + let cfg_path = config_path(&args); + let file_cfg = load_file_config(&cfg_path); + + let base_prompt = match args.prompt.clone() { + Some(p) => p, + None => read_stdin_prompt()?, + }; + ensure_base_session(&args.base_session, &workspace, base_prompt.as_str())?; + + let mut specs = Vec::new(); + for a in &args.agent { + specs.push(parse_agent_spec(a)?); + } + + println!("claw-analog agents (sequential)\n"); + println!(" workspace: {}", workspace.display()); + println!(" base_session: {}", args.base_session.display()); + println!(" agents: {}", specs.len()); + println!(); + + for (i, spec) in specs.into_iter().enumerate() { + println!( + "== Agent {} / {}: {} ==", + i + 1, + args.agent.len(), + spec.name + ); + println!(" preset: {}", spec.preset.label().unwrap_or("none")); + println!(" permission: {}", spec.permission.as_str()); + if let Some(m) = &spec.model { + println!(" model: {m}"); + } + + enforce_non_interactive_permission_rules(spec.permission, false)?; + + let agent_session = if args.split_sessions { + derive_agent_session_path(&args.base_session, spec.name.as_str()) + } else { + args.base_session.clone() + }; + if args.split_sessions { + std::fs::copy(&args.base_session, &agent_session).map_err(|e| e.to_string())?; + } + + let overrides = AnalogDoctorOverrides { + model: spec.model.clone(), + permission: Some(spec.permission), + preset: Some(spec.preset), + output_format: Some(OutputFormat::Rich), + stream: StreamOverride::ForceOff, + ..Default::default() + }; + let resolved = resolve_analog_options(&file_cfg, &overrides); + + let profile_path = + resolve_analog_profile_path(&workspace, None, file_cfg.profile.as_deref()); + let profile_hint = if let Some(ref p) = profile_path { + claw_analog::load_profile_hint(p).unwrap_or(None) + } else { + None + }; + + let rag_base_url = resolve_rag_base_url(&file_cfg); + + let agent_prompt = spec.prompt.unwrap_or_else(|| { + format!( + "Agent {}: run preset {}", + spec.name, + resolved.preset.label().unwrap_or("none") + ) + }); + + let cfg = AnalogConfig { + model: resolved.model, + workspace: workspace.clone(), + permission_mode: resolved.permission_mode, + accept_danger_non_interactive: false, + use_stream: false, + output_format: resolved.output_format, + use_runtime_enforcer: resolved.use_runtime_enforcer, + max_read_bytes: file_cfg.max_read_bytes.unwrap_or(DEF_MAX_READ), + max_turns: file_cfg.max_turns.unwrap_or(DEF_MAX_TURNS), + max_list_entries: file_cfg.max_list_entries.unwrap_or(DEF_MAX_LIST), + grep_max_lines: file_cfg.grep_max_lines.unwrap_or(DEF_GREP_MAX), + glob_max_paths: file_cfg.glob_max_paths.unwrap_or(DEF_GLOB_PATHS), + glob_max_depth: file_cfg.glob_max_depth.unwrap_or(DEF_GLOB_DEPTH), + preset: resolved.preset, + language: file_cfg + .language + .as_deref() + .and_then(claw_analog::AnalogLanguage::from_toml_str) + .unwrap_or_default(), + session_path: Some(agent_session.clone()), + session_save_path: None, + profile_hint, + prompt: agent_prompt, + rag_base_url, + rag_http_timeout: std::time::Duration::from_secs( + file_cfg.rag_timeout_secs.unwrap_or(DEF_RAG_TIMEOUT_SECS), + ), + rag_top_k_max: file_cfg + .rag_top_k_max + .unwrap_or(DEF_RAG_TOP_K_MAX) + .clamp(1, RAG_TOP_K_ABS_CAP), + }; + + let mut buf: Vec = Vec::new(); + let run_res = run_one(cfg, &mut buf).await; + match run_res { + Ok(()) => { + let text = String::from_utf8_lossy(&buf); + let summary = tail_chars(text.as_ref(), 1600); + println!(" result: OK"); + if args.split_sessions { + println!(" session: {}", agent_session.display()); + } + println!(" summary_tail:\n{}\n", indent_lines(&summary, 4)); + } + Err(e) => { + println!(" result: FAIL — {e}\n"); + } + } + } + + Ok(()) +} + +fn tail_chars(s: &str, n: usize) -> String { + let total = s.chars().count(); + if total <= n { + return s.to_string(); + } + s.chars().skip(total - n).collect() +} + +fn indent_lines(s: &str, spaces: usize) -> String { + let pad = " ".repeat(spaces); + s.lines() + .map(|l| format!("{pad}{l}")) + .collect::>() + .join("\n") +} + +#[cfg(test)] +mod tests { + use super::*; + use std::sync::{Mutex, OnceLock}; + + fn mock_env_lock() -> std::sync::MutexGuard<'static, ()> { + static LOCK: OnceLock> = OnceLock::new(); + LOCK.get_or_init(|| Mutex::new(())) + .lock() + .unwrap_or_else(|e| e.into_inner()) + } + + #[test] + fn parses_agent_shorthand() { + let a = parse_agent_spec("audit").unwrap(); + assert_eq!(a.preset, Preset::Audit); + assert_eq!(a.permission, PermissionMode::ReadOnly); + } + + #[test] + fn parses_agent_kv() { + let a = parse_agent_spec("name=fix,preset=implement,permission=workspace-write").unwrap(); + assert_eq!(a.name, "fix"); + assert_eq!(a.preset, Preset::Implement); + assert_eq!(a.permission, PermissionMode::WorkspaceWrite); + } + + #[test] + fn runs_two_agents_sequentially_with_stub_runner() { + let _g = mock_env_lock(); + let dir = tempfile::tempdir().unwrap(); + let workspace = dir.path().canonicalize().unwrap(); + std::fs::write(workspace.join("fixture.txt"), "hello parity fixture\n").unwrap(); + + let base_session = workspace.join(".claw").join("agents-base.json"); + std::fs::create_dir_all(base_session.parent().unwrap()).unwrap(); + std::fs::write( + &base_session, + format!( + "{{\n \"version\": 1,\n \"workspace\": \"{}\",\n \"model\": \"base\",\n \"messages\": []\n}}\n", + workspace.display() + ), + ) + .unwrap(); + let args = AgentsCli { + workspace: workspace.clone(), + config: None, + base_session: base_session.clone(), + prompt: Some(String::new()), + agent: vec![ + "name=audit,preset=audit,permission=read-only,prompt=check 1".to_string(), + "name=explain,preset=explain,permission=read-only,prompt=check 2".to_string(), + ], + split_sessions: true, + }; + let called = std::sync::Arc::new(std::sync::atomic::AtomicUsize::new(0)); + let called2 = called.clone(); + let rt = tokio::runtime::Builder::new_multi_thread() + .worker_threads(1) + .enable_all() + .build() + .expect("runtime"); + rt.block_on(async { + run_agents_inner(args, move |_cfg, out| { + let called3 = called2.clone(); + Box::pin(async move { + called3.fetch_add(1, std::sync::atomic::Ordering::Relaxed); + out.extend_from_slice(b"stub ok"); + Ok(()) + }) + }) + .await + .expect("agents should run"); + }); + assert_eq!(called.load(std::sync::atomic::Ordering::Relaxed), 2); + + assert!(derive_agent_session_path(&base_session, "audit").is_file()); + assert!(derive_agent_session_path(&base_session, "explain").is_file()); + } +} diff --git a/rust/crates/claw-analog/src/config_cmd.rs b/rust/crates/claw-analog/src/config_cmd.rs new file mode 100644 index 00000000..38ea3ae8 --- /dev/null +++ b/rust/crates/claw-analog/src/config_cmd.rs @@ -0,0 +1,144 @@ +//! `claw-analog config validate` — parse TOML and profile without calling the API. + +use std::path::PathBuf; + +use clap::Parser; +use claw_analog::{ + load_analog_toml, load_profile_hint, resolve_analog_options, resolve_analog_profile_path, + AnalogDoctorOverrides, AnalogFileConfig, AnalogLanguage, OutputFormat, +}; + +#[derive(Parser, Debug)] +pub struct ValidateCli { + #[arg(short = 'w', long, default_value = ".", value_name = "DIR")] + pub workspace: PathBuf, + #[arg(long, value_name = "PATH")] + pub config: Option, + /// Require `/.claw-analog.toml` (or `--config`) to exist and parse. + #[arg(long, default_value_t = false, action = clap::ArgAction::SetTrue)] + pub strict: bool, + #[arg(long, value_name = "PATH")] + pub profile: Option, +} + +pub fn run_validate(cli: ValidateCli) -> i32 { + let cfg_path = cli + .config + .clone() + .unwrap_or_else(|| cli.workspace.join(".claw-analog.toml")); + + let file_cfg = if cfg_path.is_file() { + match load_analog_toml(&cfg_path) { + Ok(c) => { + println!("OK: {} parses", cfg_path.display()); + c + } + Err(e) => { + eprintln!("ERROR: {}: {e}", cfg_path.display()); + return 1; + } + } + } else if cli.strict { + eprintln!( + "ERROR: --strict: config file missing: {}", + cfg_path.display() + ); + return 1; + } else { + println!( + "Note: {} absent — using empty TOML defaults for preview", + cfg_path.display() + ); + AnalogFileConfig::default() + }; + + let prof_path = resolve_analog_profile_path( + &cli.workspace, + cli.profile.clone(), + file_cfg.profile.as_deref(), + ); + let mut ok = true; + match &prof_path { + None => println!( + "Profile: (none — no CLI/TOML path and no default ~/.claw-analog/profile.toml)" + ), + Some(p) => match load_profile_hint(p) { + Ok(Some(line)) => println!( + "OK: profile {} (line: {} chars)", + p.display(), + line.chars().count() + ), + Ok(None) => println!("OK: profile {} (empty `line`)", p.display()), + Err(e) => { + eprintln!("ERROR: profile {}: {e}", p.display()); + ok = false; + } + }, + } + + let lang = file_cfg + .language + .as_deref() + .and_then(AnalogLanguage::from_toml_str) + .unwrap_or_default(); + + let r = resolve_analog_options(&file_cfg, &AnalogDoctorOverrides::default()); + println!("\nMerge preview (TOML + defaults only; main-run CLI flags not applied):"); + println!(" language (TOML): {}", lang.as_str()); + println!(" model: {}", r.model); + println!(" permission: {}", r.permission_mode.as_str()); + println!(" preset: {}", r.preset.label().unwrap_or("none")); + println!( + " output_format: {}", + match r.output_format { + OutputFormat::Rich => "rich", + OutputFormat::Json => "json", + } + ); + println!(" stream: {}", r.use_stream); + println!( + " runtime_enforcer: {}", + if r.use_runtime_enforcer { "on" } else { "off" } + ); + println!( + " accept_danger_non_interactive: {}", + r.accept_danger_non_interactive + ); + println!(" Provenance:"); + for line in &r.provenance { + println!(" - {line}"); + } + + i32::from(!ok) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn strict_fails_when_config_missing() { + let dir = tempfile::tempdir().unwrap(); + let code = run_validate(ValidateCli { + workspace: dir.path().to_path_buf(), + config: None, + strict: true, + profile: None, + }); + assert_eq!(code, 1); + } + + #[test] + fn parses_when_config_present() { + let dir = tempfile::tempdir().unwrap(); + let p = dir.path().join(".claw-analog.toml"); + std::fs::write(&p, r#"model = "sonnet""#).unwrap(); + let code = run_validate(ValidateCli { + workspace: dir.path().to_path_buf(), + config: None, + strict: true, + profile: None, + }); + assert_eq!(code, 0); + } +} diff --git a/rust/crates/claw-analog/src/doctor.rs b/rust/crates/claw-analog/src/doctor.rs new file mode 100644 index 00000000..2d193236 --- /dev/null +++ b/rust/crates/claw-analog/src/doctor.rs @@ -0,0 +1,733 @@ +//! `claw-analog doctor` — environment and Cargo sanity checks. + +use std::net::{TcpStream, ToSocketAddrs}; +use std::path::{Path, PathBuf}; +use std::process::Command; +use std::time::Duration; + +use clap::ValueEnum; +use claw_analog::{ + load_analog_toml, load_profile_hint, resolve_analog_options, AnalogDoctorOverrides, + AnalogFileConfig, OutputFormat, PermissionMode, Preset, StreamOverride, NDJSON_FORMAT_VERSION, + NDJSON_SCHEMA, +}; +use reqwest::header::{HeaderMap, HeaderName, HeaderValue}; + +const ENV_CHECK: &[&str] = &[ + "ANTHROPIC_API_KEY", + "ANTHROPIC_AUTH_TOKEN", + "ANTHROPIC_BASE_URL", + "OPENAI_API_KEY", + "OPENAI_BASE_URL", + "XAI_API_KEY", + "RAG_BASE_URL", +]; + +#[derive(Copy, Clone, Debug, ValueEnum)] +pub enum DoctorPermissionArg { + ReadOnly, + WorkspaceWrite, + Prompt, + #[value(name = "danger-full-access")] + DangerFullAccess, + Allow, +} + +impl From for PermissionMode { + fn from(p: DoctorPermissionArg) -> Self { + match p { + DoctorPermissionArg::ReadOnly => PermissionMode::ReadOnly, + DoctorPermissionArg::WorkspaceWrite => PermissionMode::WorkspaceWrite, + DoctorPermissionArg::Prompt => PermissionMode::Prompt, + DoctorPermissionArg::DangerFullAccess => PermissionMode::DangerFullAccess, + DoctorPermissionArg::Allow => PermissionMode::Allow, + } + } +} + +#[derive(Copy, Clone, Debug, ValueEnum)] +pub enum DoctorOutputArg { + Rich, + Json, +} + +impl From for OutputFormat { + fn from(o: DoctorOutputArg) -> Self { + match o { + DoctorOutputArg::Rich => OutputFormat::Rich, + DoctorOutputArg::Json => OutputFormat::Json, + } + } +} + +#[derive(Copy, Clone, Debug, ValueEnum)] +pub enum DoctorPresetCli { + None, + Audit, + Explain, + Implement, +} + +impl From for Preset { + fn from(p: DoctorPresetCli) -> Self { + match p { + DoctorPresetCli::None => Preset::None, + DoctorPresetCli::Audit => Preset::Audit, + DoctorPresetCli::Explain => Preset::Explain, + DoctorPresetCli::Implement => Preset::Implement, + } + } +} + +#[derive(Debug, clap::Args)] +pub struct DoctorCli { + /// Workspace root (same as `claw-analog -w`; config defaults to `/.claw-analog.toml`). + #[arg(short = 'w', long, default_value = ".", value_name = "DIR")] + pub workspace: PathBuf, + /// Config path (default: `/.claw-analog.toml`). + #[arg(long, value_name = "PATH")] + pub config: Option, + /// Override model (same precedence as main CLI). + #[arg(long)] + pub model: Option, + #[arg(long, value_enum)] + pub permission: Option, + #[arg(long, value_enum)] + pub preset: Option, + #[arg(long, value_enum)] + pub output_format: Option, + #[arg(long, default_value_t = false, conflicts_with = "no_stream")] + pub stream: bool, + #[arg(long, default_value_t = false, conflicts_with = "stream")] + pub no_stream: bool, + /// Disable `runtime::PermissionEnforcer` (same as main CLI). + #[arg( + long = "no-runtime-enforcer", + default_value_t = false, + action = clap::ArgAction::SetTrue + )] + pub no_runtime_enforcer: bool, + #[arg( + long = "accept-danger-non-interactive", + default_value_t = false, + action = clap::ArgAction::SetTrue + )] + pub accept_danger_non_interactive: bool, + /// Profile TOML path (optional; if omitted, uses TOML `profile` or default `~/.claw-analog/profile.toml`). + #[arg(long, value_name = "PATH")] + pub profile: Option, + /// TCP connect to host:port from `ANTHROPIC_BASE_URL` (or default API URL); not a full HTTP check. + #[arg(long, visible_alias = "mock")] + pub tcp_ping: bool, + /// Skip HTTPS/TLS + auth + quota header checks against configured providers. + #[arg(long, default_value_t = false)] + pub no_http_check: bool, + /// Also probe the embeddings endpoint for OpenAI-compatible providers (may incur minimal cost). + #[arg(long, default_value_t = false)] + pub embeddings_check: bool, + /// Skip compile check (`cargo check` / `build --release`). + #[arg(long)] + pub no_build: bool, + /// Run `cargo build --release -p claw-analog` (writes `target/release/…`, safe while `cargo run` holds `target/debug/…` on Windows). + #[arg(long, conflicts_with = "no_build")] + pub release_build: bool, + /// Directory containing the repo workspace `Cargo.toml` (default: search upward from cwd). + #[arg(long, value_name = "DIR")] + pub manifest_dir: Option, +} + +pub fn run_doctor(args: DoctorCli) -> i32 { + println!("claw-analog doctor — environment and build checks\n"); + + let workspace = args.workspace.clone(); + let canon_ws = std::fs::canonicalize(&workspace).unwrap_or_else(|_| workspace.clone()); + let cfg_path = args + .config + .clone() + .unwrap_or_else(|| workspace.join(".claw-analog.toml")); + let (file_cfg, cfg_note) = if cfg_path.is_file() { + match load_analog_toml(&cfg_path) { + Ok(c) => (c, "loaded"), + Err(e) => { + eprintln!( + "[claw-analog] doctor: failed to parse {}: {e} (using empty TOML defaults)", + cfg_path.display() + ); + (AnalogFileConfig::default(), "parse error (defaults)") + } + } + } else { + (AnalogFileConfig::default(), "file missing (defaults only)") + }; + + let stream_ov = if args.no_stream { + StreamOverride::ForceOff + } else if args.stream { + StreamOverride::ForceOn + } else { + StreamOverride::FromFile + }; + let overrides = AnalogDoctorOverrides { + model: args.model.clone(), + permission: args.permission.map(Into::into), + preset: args.preset.map(Into::into), + output_format: args.output_format.map(Into::into), + stream: stream_ov, + no_runtime_enforcer: args.no_runtime_enforcer, + accept_danger_non_interactive: args.accept_danger_non_interactive, + }; + let resolved = resolve_analog_options(&file_cfg, &overrides); + + println!("NDJSON contract (for `--output-format json` runs):"); + println!(" schema: {NDJSON_SCHEMA}"); + println!(" format_version: {NDJSON_FORMAT_VERSION}\n"); + + println!("Effective config (merge of `.claw-analog.toml` + flags below):"); + println!(" workspace: {}", canon_ws.display()); + println!(" config: {} ({cfg_note})", cfg_path.display()); + println!(" model: {}", resolved.model); + println!(" permission: {}", resolved.permission_mode.as_str()); + println!(" preset: {}", resolved.preset.label().unwrap_or("none")); + println!( + " output_format: {}", + match resolved.output_format { + OutputFormat::Rich => "rich", + OutputFormat::Json => "json", + } + ); + println!(" stream: {}", resolved.use_stream); + println!( + " runtime_enforcer: {}", + if resolved.use_runtime_enforcer { + "on" + } else { + "off" + } + ); + println!( + " accept_danger_non_interactive: {}", + resolved.accept_danger_non_interactive + ); + println!(" Provenance (which side won src ← …):"); + for line in &resolved.provenance { + println!(" - {line}"); + } + println!(); + + let prof = resolve_profile_path_doctor( + args.profile.as_ref(), + file_cfg.profile.as_deref(), + &workspace, + ); + print_profile_hint_section(&prof); + println!(); + + check_env(); + println!(); + let build_ok = if args.no_build { + println!("cargo: skipped (--no-build)"); + true + } else if args.release_build { + run_cargo_release_build(args.manifest_dir.as_deref()) + } else { + run_cargo_check(args.manifest_dir.as_deref()) + }; + println!(); + if args.tcp_ping { + ping_print(); + println!(); + } + if !args.no_http_check { + http_checks_print(args.embeddings_check); + println!(); + } + if build_ok { + 0 + } else { + 1 + } +} + +fn home_dir() -> Option { + #[cfg(windows)] + { + std::env::var_os("USERPROFILE").map(PathBuf::from) + } + #[cfg(not(windows))] + { + std::env::var_os("HOME").map(PathBuf::from) + } +} + +fn expand_user_path(raw: &str) -> PathBuf { + if let Some(rest) = raw.strip_prefix("~/") { + home_dir() + .map(|h| h.join(rest)) + .unwrap_or_else(|| PathBuf::from(raw)) + } else { + PathBuf::from(raw) + } +} + +fn resolve_profile_path_doctor( + cli: Option<&PathBuf>, + file: Option<&str>, + workspace: &Path, +) -> Option { + if let Some(p) = cli { + return Some(if p.is_absolute() { + p.clone() + } else { + workspace.join(p) + }); + } + if let Some(s) = file { + let p = expand_user_path(s.trim()); + return Some(if p.is_absolute() { + p + } else { + workspace.join(p) + }); + } + let def = home_dir()?.join(".claw-analog").join("profile.toml"); + if def.is_file() { + Some(def) + } else { + None + } +} + +fn print_profile_hint_section(path: &Option) { + println!("Profile (system prompt snippet):"); + match path { + None => println!(" (none — no --profile, no `profile` in TOML, default file absent)"), + Some(p) => { + print!(" path: {}", p.display()); + match load_profile_hint(p) { + Ok(Some(h)) => println!(" — loaded, {} chars", h.chars().count()), + Ok(None) => println!(" — file ok, empty `line`"), + Err(e) => println!(" — error: {e}"), + } + } + } +} + +fn mask_env_line(name: &str) { + match std::env::var(name) { + Ok(v) if !v.trim().is_empty() => { + println!(" {name}: set ({} chars)", v.chars().count()); + } + Ok(_) => println!(" {name}: set but empty"), + Err(_) => println!(" {name}: unset"), + } +} + +fn check_env() { + println!("Environment (values are not printed):"); + for name in ENV_CHECK { + mask_env_line(name); + } + let anthro_ok = std::env::var("ANTHROPIC_API_KEY") + .map(|s| !s.trim().is_empty()) + .unwrap_or(false) + || std::env::var("ANTHROPIC_AUTH_TOKEN") + .map(|s| !s.trim().is_empty()) + .unwrap_or(false); + let openai_ok = std::env::var("OPENAI_API_KEY") + .map(|s| !s.trim().is_empty()) + .unwrap_or(false); + println!(); + if anthro_ok { + println!("Anthropic credentials: OK (API key and/or auth token)."); + } else { + println!("Anthropic credentials: not set — needed for default Claude/Anthropic models."); + } + if openai_ok { + println!("OpenAI API key: set — use `openai/...` model prefix for that provider."); + } else { + println!("OpenAI API key: unset — only relevant for `openai/` models."); + } + if !anthro_ok && !openai_ok { + println!("\nNote: neither Anthropic nor OpenAI keys are set; live runs will fail until you export credentials (see USAGE.md)."); + } +} + +/// Walk upward from `start` for a `Cargo.toml` that defines `[workspace]`. +pub fn discover_cargo_workspace(start: &Path) -> Option { + let mut dir = start.to_path_buf(); + for _ in 0..32 { + let manifest = dir.join("Cargo.toml"); + if manifest.is_file() { + if let Ok(txt) = std::fs::read_to_string(&manifest) { + if txt.contains("[workspace]") { + return Some(dir); + } + } + } + dir = dir.parent()?.to_path_buf(); + } + None +} + +fn workspace_root_or_eprint(manifest_dir: Option<&Path>) -> Option { + let start = manifest_dir + .map(Path::to_path_buf) + .or_else(|| std::env::current_dir().ok()) + .unwrap_or_else(|| PathBuf::from(".")); + discover_cargo_workspace(&start).or_else(|| { + eprintln!( + "cargo: could not find a [workspace] Cargo.toml above {}.\n Pass --manifest-dir pointing at the `rust` folder of claw-code.", + start.display() + ); + None + }) +} + +/// `cargo check` does not replace `target/debug/claw-analog.exe`, so `cargo run … doctor` works on Windows. +fn run_cargo_check(manifest_dir: Option<&Path>) -> bool { + let Some(root) = workspace_root_or_eprint(manifest_dir) else { + return false; + }; + println!("cargo check -p claw-analog (workspace {})", root.display()); + println!(" (compile-only; avoids “access denied” replacing the running debug exe on Windows)"); + let status = Command::new("cargo") + .args(["check", "-p", "claw-analog"]) + .current_dir(&root) + .status(); + match status { + Ok(s) if s.success() => { + println!("cargo check: OK"); + true + } + Ok(s) => { + eprintln!("cargo check: failed ({s})"); + false + } + Err(e) => { + eprintln!("cargo check: could not run `cargo` ({e}). Is Rust/Cargo on PATH?"); + false + } + } +} + +fn run_cargo_release_build(manifest_dir: Option<&Path>) -> bool { + let Some(root) = workspace_root_or_eprint(manifest_dir) else { + return false; + }; + println!( + "cargo build --release -p claw-analog (workspace {})", + root.display() + ); + println!(" (output in target/release/; does not overwrite a running target/debug/ binary)"); + let status = Command::new("cargo") + .args(["build", "--release", "-p", "claw-analog"]) + .current_dir(&root) + .status(); + match status { + Ok(s) if s.success() => { + println!("cargo build --release: OK"); + true + } + Ok(s) => { + eprintln!("cargo build --release: failed ({s})"); + false + } + Err(e) => { + eprintln!("cargo build --release: could not run `cargo` ({e}). Is Rust/Cargo on PATH?"); + false + } + } +} + +fn default_anthropic_base() -> String { + std::env::var("ANTHROPIC_BASE_URL").unwrap_or_else(|_| "https://api.anthropic.com".into()) +} + +fn parse_host_port(url: &str) -> Result<(String, u16), String> { + let url = url.trim().trim_end_matches('/'); + let (scheme, rest) = if let Some(r) = url.strip_prefix("https://") { + ("https", r) + } else if let Some(r) = url.strip_prefix("http://") { + ("http", r) + } else { + return Err("URL must start with http:// or https://".into()); + }; + let host_part = rest + .split('/') + .next() + .filter(|s| !s.is_empty()) + .ok_or_else(|| "missing host".to_string())?; + if let Some((host, port_s)) = host_part.rsplit_once(':') { + if let Ok(p) = port_s.parse::() { + let host = host.trim_start_matches('[').trim_end_matches(']'); + return Ok((host.to_string(), p)); + } + } + let default_port = if scheme == "https" { 443 } else { 80 }; + Ok((host_part.to_string(), default_port)) +} + +fn ping_print() { + let url = default_anthropic_base(); + println!("TCP check for ANTHROPIC_BASE_URL (default if unset): {url}"); + match parse_host_port(&url) { + Ok((host, port)) => match tcp_ping(&host, port) { + Ok(()) => println!(" reachability: OK ({host}:{port})"), + Err(e) => println!(" reachability: FAIL ({host}:{port}) — {e}"), + }, + Err(e) => println!(" could not parse URL: {e}"), + } + println!(" (HTTP/TLS application data is not validated; this is connect() only.)"); +} + +fn tcp_ping(host: &str, port: u16) -> Result<(), String> { + let addr = (host, port) + .to_socket_addrs() + .map_err(|e| e.to_string())? + .next() + .ok_or_else(|| "no resolved addresses".to_string())?; + TcpStream::connect_timeout(&addr, Duration::from_secs(3)).map_err(|e| e.to_string())?; + Ok(()) +} + +fn http_checks_print(embeddings_check: bool) { + println!("HTTP/TLS checks (auth + TLS validation + quota headers when available):"); + + let rt = tokio::runtime::Builder::new_current_thread() + .enable_all() + .build(); + let Ok(rt) = rt else { + println!(" runtime: FAIL (could not build tokio runtime)"); + return; + }; + + rt.block_on(async { + // OpenAI-compatible providers (OPENAI_BASE_URL, OPENAI_API_KEY) + if let Ok(key) = std::env::var("OPENAI_API_KEY") { + if !key.trim().is_empty() { + let base = std::env::var("OPENAI_BASE_URL") + .ok() + .unwrap_or_else(|| "https://api.openai.com/v1".to_string()); + let url = openai_models_url(base.as_str()); + let mut headers = HeaderMap::new(); + if let Ok(v) = HeaderValue::from_str(format!("Bearer {}", key.trim()).as_str()) { + headers.insert(reqwest::header::AUTHORIZATION, v); + } + let _ = http_check_and_print("openai", url.as_str(), headers).await; + + if embeddings_check { + let model = std::env::var("OPENAI_EMBEDDING_MODEL") + .ok() + .or_else(|| std::env::var("CLAW_RAG_EMBEDDING_MODEL").ok()) + .unwrap_or_else(|| "text-embedding-3-small".to_string()); + let eurl = openai_embeddings_url(base.as_str()); + let mut eheaders = HeaderMap::new(); + if let Ok(v) = HeaderValue::from_str(format!("Bearer {}", key.trim()).as_str()) + { + eheaders.insert(reqwest::header::AUTHORIZATION, v); + } + let _ = openai_embeddings_probe( + "openai embeddings", + eurl.as_str(), + &model, + eheaders, + ) + .await; + } else { + println!(" openai embeddings: skipped (pass --embeddings-check to enable)"); + } + } else { + println!(" openai: skipped (OPENAI_API_KEY empty)"); + } + } else { + println!(" openai: skipped (OPENAI_API_KEY unset)"); + } + + // Anthropic (ANTHROPIC_BASE_URL, ANTHROPIC_API_KEY/AUTH_TOKEN) + let a_key = std::env::var("ANTHROPIC_API_KEY").ok(); + let a_tok = std::env::var("ANTHROPIC_AUTH_TOKEN").ok(); + let a_base = std::env::var("ANTHROPIC_BASE_URL") + .ok() + .unwrap_or_else(|| "https://api.anthropic.com".to_string()); + if a_key.as_deref().is_some_and(|s| !s.trim().is_empty()) + || a_tok.as_deref().is_some_and(|s| !s.trim().is_empty()) + { + let url = anthropic_models_url(a_base.as_str()); + let mut headers = HeaderMap::new(); + headers.insert( + HeaderName::from_static("anthropic-version"), + HeaderValue::from_static("2023-06-01"), + ); + if let Some(k) = a_key.as_deref().map(str::trim).filter(|s| !s.is_empty()) { + if let Ok(v) = HeaderValue::from_str(k) { + headers.insert(HeaderName::from_static("x-api-key"), v); + } + } else if let Some(t) = a_tok.as_deref().map(str::trim).filter(|s| !s.is_empty()) { + if let Ok(v) = HeaderValue::from_str(format!("Bearer {t}").as_str()) { + headers.insert(reqwest::header::AUTHORIZATION, v); + } + } + let _ = http_check_and_print("anthropic", url.as_str(), headers).await; + } else { + println!(" anthropic: skipped (no API key/token)"); + } + + // RAG service (RAG_BASE_URL) — just basic health + stats. + if let Ok(base) = std::env::var("RAG_BASE_URL") { + let base = base.trim().trim_end_matches('/'); + if !base.is_empty() { + let headers = HeaderMap::new(); + let _ = + http_check_and_print("rag health", &format!("{base}/health"), headers.clone()) + .await; + let _ = + http_check_and_print("rag stats", &format!("{base}/v1/stats"), headers).await; + } + } + }); + + println!(" (TLS validation is performed by the HTTP client; certificate errors surface as request failures.)"); +} + +fn openai_models_url(base: &str) -> String { + let b = base.trim().trim_end_matches('/'); + if b.ends_with("/v1") { + format!("{b}/models") + } else { + format!("{b}/v1/models") + } +} + +fn openai_embeddings_url(base: &str) -> String { + let b = base.trim().trim_end_matches('/'); + if b.ends_with("/v1") { + format!("{b}/embeddings") + } else { + format!("{b}/v1/embeddings") + } +} + +fn anthropic_models_url(base: &str) -> String { + let b = base.trim().trim_end_matches('/'); + format!("{b}/v1/models?limit=1") +} + +async fn http_check_and_print(label: &str, url: &str, headers: HeaderMap) -> Result<(), ()> { + let client = reqwest::Client::builder() + .timeout(Duration::from_secs(8)) + .build(); + let Ok(client) = client else { + println!(" {label}: FAIL (client build)"); + return Err(()); + }; + + let resp = client.get(url).headers(headers).send().await; + match resp { + Ok(r) => { + let status = r.status(); + println!(" {label}: {status} ({url})"); + print_quota_headers(r.headers()); + Ok(()) + } + Err(e) => { + let msg = e.to_string(); + if msg.to_ascii_lowercase().contains("certificate") + || msg.to_ascii_lowercase().contains("tls") + { + println!(" {label}: FAIL (TLS/cert) ({url}) — {msg}"); + } else { + println!(" {label}: FAIL ({url}) — {msg}"); + } + Err(()) + } + } +} + +fn print_quota_headers(headers: &HeaderMap) { + let mut out: Vec<(String, String)> = Vec::new(); + for (k, v) in headers.iter() { + let name = k.as_str().to_ascii_lowercase(); + if name.contains("ratelimit") || name.contains("quota") { + if let Ok(s) = v.to_str() { + out.push((k.as_str().to_string(), s.to_string())); + } + } + // OpenAI-compatible common headers: + if name.starts_with("x-ratelimit-") { + if let Ok(s) = v.to_str() { + out.push((k.as_str().to_string(), s.to_string())); + } + } + } + out.sort(); + out.dedup(); + for (k, v) in out { + println!(" {k}: {v}"); + } +} + +async fn openai_embeddings_probe( + label: &str, + url: &str, + model: &str, + headers: HeaderMap, +) -> Result<(), ()> { + let client = reqwest::Client::builder() + .timeout(Duration::from_secs(12)) + .build(); + let Ok(client) = client else { + println!(" {label}: FAIL (client build)"); + return Err(()); + }; + + // Minimal request: one short string. We don't parse the embedding content. + let body = serde_json::json!({ + "model": model, + "input": ["ping"] + }); + + let resp = client.post(url).headers(headers).json(&body).send().await; + match resp { + Ok(r) => { + let status = r.status(); + println!(" {label}: {status} ({url}) model={model}"); + print_quota_headers(r.headers()); + if !status.is_success() { + let t = r.text().await.unwrap_or_default(); + if !t.trim().is_empty() { + println!(" body: {}", t.chars().take(400).collect::()); + } + return Err(()); + } + Ok(()) + } + Err(e) => { + let msg = e.to_string(); + if msg.to_ascii_lowercase().contains("certificate") + || msg.to_ascii_lowercase().contains("tls") + { + println!(" {label}: FAIL (TLS/cert) ({url}) — {msg}"); + } else { + println!(" {label}: FAIL ({url}) — {msg}"); + } + Err(()) + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn parses_base_url_host_port() { + assert_eq!( + parse_host_port("http://127.0.0.1:8080/v1").unwrap(), + ("127.0.0.1".into(), 8080) + ); + assert_eq!( + parse_host_port("https://api.anthropic.com").unwrap(), + ("api.anthropic.com".into(), 443) + ); + } +} diff --git a/rust/crates/claw-analog/src/lib.rs b/rust/crates/claw-analog/src/lib.rs new file mode 100644 index 00000000..e4ed517b --- /dev/null +++ b/rust/crates/claw-analog/src/lib.rs @@ -0,0 +1,2889 @@ +//! Lean agent harness: tool loop, optional streaming, optional `PermissionEnforcer`. +#![forbid(unsafe_code)] + +use std::collections::BTreeMap; +use std::fmt::Write; +use std::io::{self, IsTerminal}; +use std::path::{Component, Path, PathBuf}; +use std::process::{Command, Stdio}; +use std::time::Duration; + +use api::{ + max_tokens_for_model, resolve_model_alias, ApiError, ContentBlockDelta, ContentBlockStartEvent, + InputContentBlock, InputMessage, MessageDeltaEvent, MessageRequest, MessageResponse, + MessageStartEvent, MessageStopEvent, OutputContentBlock, ProviderClient, StreamEvent, + ToolChoice, ToolDefinition, ToolResultContentBlock, +}; +use globset::{Glob, GlobSet, GlobSetBuilder}; +use ignore::WalkBuilder; +use runtime::permission_enforcer::{EnforcementResult, PermissionEnforcer}; +pub use runtime::PermissionMode; +use runtime::PermissionPolicy; +use serde_json::{json, Value}; + +/// Refuses unrestricted permission modes in non-interactive runs unless explicitly opted in (same spirit as full `claw` CLI). +pub fn enforce_non_interactive_permission_rules( + mode: PermissionMode, + accept_danger_non_interactive: bool, +) -> Result<(), String> { + enforce_non_interactive_permission_rules_with_tty( + mode, + accept_danger_non_interactive, + io::stdin().is_terminal(), + ) +} + +/// Same as [`enforce_non_interactive_permission_rules`] but with an explicit stdin-TTY flag (for tests and tooling). +pub fn enforce_non_interactive_permission_rules_with_tty( + mode: PermissionMode, + accept_danger_non_interactive: bool, + stdin_is_tty: bool, +) -> Result<(), String> { + if matches!( + mode, + PermissionMode::DangerFullAccess | PermissionMode::Allow + ) && !stdin_is_tty + && !accept_danger_non_interactive + { + return Err( + "permission modes 'danger-full-access' and 'allow' are refused when stdin is not a TTY (non-interactive). \ + Use --permission read-only or workspace-write for CI/automation, or pass --accept-danger-non-interactive if you accept the risk." + .into(), + ); + } + if mode == PermissionMode::Prompt && !stdin_is_tty { + eprintln!( + "[claw-analog] warning: 'prompt' without a TTY cannot confirm tool use; writes remain denied. For headless edits use --permission workspace-write." + ); + } + Ok(()) +} + +/// Assistant reply language hint (system prompt); does not switch the API model name. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)] +pub enum AnalogLanguage { + #[default] + En, + Ru, +} + +impl AnalogLanguage { + #[must_use] + pub fn from_toml_str(s: &str) -> Option { + match s.trim().to_ascii_lowercase().as_str() { + "en" | "english" => Some(Self::En), + "ru" | "russian" => Some(Self::Ru), + _ => None, + } + } + + #[must_use] + pub const fn as_str(self) -> &'static str { + match self { + Self::En => "en", + Self::Ru => "ru", + } + } +} + +fn language_system_hint(lang: AnalogLanguage) -> Option<&'static str> { + match lang { + AnalogLanguage::En => None, + AnalogLanguage::Ru => Some( + "Язык: отвечайте по-русски, когда пользователь пишет по-русски; пути к файлам, идентификаторы в коде и стандартные термины API можно оставлять на английском.", + ), + } +} + +/// Human-readable text vs newline-delimited JSON events (for CI and agent pipelines). +#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)] +pub enum OutputFormat { + #[default] + Rich, + Json, +} + +/// Built-in behavior presets: system prompt bias + default permission when not overridden. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)] +pub enum Preset { + #[default] + None, + Audit, + Explain, + Implement, +} + +impl Preset { + #[must_use] + pub fn from_toml_str(s: &str) -> Option { + match s.trim().to_ascii_lowercase().as_str() { + "" | "none" => Some(Self::None), + "audit" => Some(Self::Audit), + "explain" => Some(Self::Explain), + "implement" => Some(Self::Implement), + _ => None, + } + } + + pub fn label(self) -> Option<&'static str> { + match self { + Self::None => None, + Self::Audit => Some("audit"), + Self::Explain => Some("explain"), + Self::Implement => Some("implement"), + } + } + + fn extra_system(self) -> Option<&'static str> { + match self { + Self::None => None, + Self::Audit => Some( + "Preset: audit — prioritize security, correctness, and suspicious patterns; cite file paths and evidence; prefer read-only investigation.", + ), + Self::Explain => Some( + "Preset: explain — teach clearly; define terms; ground claims in repository content; avoid unnecessary jargon.", + ), + Self::Implement => Some( + "Preset: implement — make focused edits; read before writing when unsure; keep changes small and explain what you changed.", + ), + } + } +} + +/// Infer a reasonable preset from the initial user prompt. +/// +/// This is intentionally heuristic and conservative: +/// - Prefer `audit` when security/review intent is detected. +/// - Prefer `implement` when the user asks to change/fix/add/refactor something. +/// - Prefer `explain` for "why/how/explain" style questions. +/// - Fall back to `none`. +#[must_use] +pub fn infer_preset_from_prompt(prompt: &str) -> Preset { + let p = prompt.trim().to_ascii_lowercase(); + if p.is_empty() { + return Preset::None; + } + + // High priority: audit / security review intent. + let audit_hits = [ + "audit", + "security", + "secure", + "vuln", + "vulnerability", + "threat", + "review", + "pentest", + "опасн", + "безопас", + "уязв", + "аудит", + "ревью", + ]; + if audit_hits.iter().any(|k| p.contains(k)) { + return Preset::Audit; + } + + // Next: implement intent (do work / change code). + let implement_hits = [ + "implement", + "add", + "build", + "create", + "change", + "update", + "refactor", + "optimize", + "fix", + "bug", + "feature", + "сделай", + "сделать", + "добав", + "передел", + "измен", + "обнов", + "рефактор", + "оптимиз", + "почин", + "исправ", + "баг", + "фича", + ]; + if implement_hits.iter().any(|k| p.contains(k)) { + return Preset::Implement; + } + + // Then: explain intent. + let explain_hits = [ + "explain", + "why", + "how", + "what is", + "help me understand", + "объясни", + "объяснить", + "почему", + "как", + "что такое", + "разъясни", + ]; + if explain_hits.iter().any(|k| p.contains(k)) { + return Preset::Explain; + } + + Preset::None +} + +/// Stable NDJSON contract id for consumers. Bump [`NDJSON_FORMAT_VERSION`] when event shapes break compatibility. +pub const NDJSON_SCHEMA: &str = "claw-analog-ndjson"; +/// Increment when NDJSON event types or required `run_start` fields change incompatibly. +pub const NDJSON_FORMAT_VERSION: u32 = 1; + +/// Default `model` when CLI and TOML omit it. +pub const ANALOG_DEFAULT_MODEL: &str = "sonnet"; + +/// Map TOML / policy strings to [`PermissionMode`] (same rules as the main `claw-analog` CLI). +#[must_use] +pub fn permission_mode_from_toml_str(s: &str) -> Option { + match s.to_ascii_lowercase().replace('_', "-").as_str() { + "read-only" | "readonly" => Some(PermissionMode::ReadOnly), + "workspace-write" | "write" => Some(PermissionMode::WorkspaceWrite), + "prompt" => Some(PermissionMode::Prompt), + "danger-full-access" | "danger" => Some(PermissionMode::DangerFullAccess), + "allow" => Some(PermissionMode::Allow), + _ => None, + } +} + +fn output_format_from_toml_str(s: &str) -> Option { + match s.to_ascii_lowercase().as_str() { + "json" => Some(OutputFormat::Json), + "rich" => Some(OutputFormat::Rich), + _ => None, + } +} + +/// How doctor (or tooling) overrides `stream` relative to TOML. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)] +pub enum StreamOverride { + #[default] + FromFile, + ForceOn, + ForceOff, +} + +/// Optional CLI knobs for [`resolve_analog_options`] (subset of the real run CLI). +#[derive(Debug, Clone, Default)] +pub struct AnalogDoctorOverrides { + pub model: Option, + pub permission: Option, + pub preset: Option, + pub output_format: Option, + pub stream: StreamOverride, + pub no_runtime_enforcer: bool, + pub accept_danger_non_interactive: bool, +} + +#[derive(Debug, Clone)] +pub struct ResolvedAnalogOptions { + pub model: String, + pub permission_mode: PermissionMode, + pub preset: Preset, + pub output_format: OutputFormat, + pub use_stream: bool, + pub use_runtime_enforcer: bool, + pub accept_danger_non_interactive: bool, + /// One line per knob: human-readable provenance (`model ← CLI`, etc.). + pub provenance: Vec, +} + +/// Effective options after merging `.claw-analog.toml` with optional CLI overrides (same precedence as `claw-analog` run). +#[must_use] +pub fn resolve_analog_options( + file: &AnalogFileConfig, + overrides: &AnalogDoctorOverrides, +) -> ResolvedAnalogOptions { + let (model, m_src) = if let Some(ref m) = overrides.model { + (m.trim().to_string(), "CLI") + } else if let Some(ref fm) = file.model { + let fm = fm.trim(); + if fm.is_empty() { + (ANALOG_DEFAULT_MODEL.to_string(), "default (empty in TOML)") + } else { + (fm.to_string(), ".claw-analog.toml") + } + } else { + (ANALOG_DEFAULT_MODEL.to_string(), "default") + }; + + let (preset, p_src) = if let Some(p) = overrides.preset { + (p, "CLI") + } else if let Some(s) = file.preset.as_deref().and_then(Preset::from_toml_str) { + (s, ".claw-analog.toml") + } else { + (Preset::None, "default (none)") + }; + + let (permission_mode, perm_src) = if let Some(p) = overrides.permission { + (p, "CLI") + } else if let Some(s) = file + .permission + .as_deref() + .and_then(permission_mode_from_toml_str) + { + (s, ".claw-analog.toml") + } else { + match preset { + Preset::Implement => ( + PermissionMode::WorkspaceWrite, + "default for preset implement", + ), + _ => (PermissionMode::ReadOnly, "default (read-only)"), + } + }; + + let (output_format, of_src) = if let Some(o) = overrides.output_format { + (o, "CLI") + } else if let Some(s) = file + .output_format + .as_deref() + .and_then(output_format_from_toml_str) + { + (s, ".claw-analog.toml") + } else { + (OutputFormat::Rich, "default (rich)") + }; + + let (use_stream, stream_src) = match overrides.stream { + StreamOverride::ForceOn => (true, "CLI (--stream)"), + StreamOverride::ForceOff => (false, "CLI (--no-stream)"), + StreamOverride::FromFile => { + if let Some(b) = file.stream { + (b, ".claw-analog.toml") + } else { + (false, "default (off)") + } + } + }; + + let use_runtime_enforcer = + !overrides.no_runtime_enforcer && !file.no_runtime_enforcer.unwrap_or(false); + let re_src = if overrides.no_runtime_enforcer { + "CLI (--no-runtime-enforcer)" + } else if file.no_runtime_enforcer == Some(true) { + ".claw-analog.toml" + } else { + "default (on)" + }; + + let accept_danger_non_interactive = overrides.accept_danger_non_interactive + || file.accept_danger_non_interactive.unwrap_or(false); + let ad_src = match ( + overrides.accept_danger_non_interactive, + file.accept_danger_non_interactive.unwrap_or(false), + ) { + (true, true) => "CLI and .claw-analog.toml", + (true, false) => "CLI", + (false, true) => ".claw-analog.toml", + (false, false) => "default (off)", + }; + + let provenance = vec![ + format!("model ← {m_src}"), + format!("preset ← {p_src}"), + format!("permission ← {perm_src}"), + format!("output_format ← {of_src}"), + format!("stream ← {stream_src}"), + format!("runtime_enforcer ← {re_src}"), + format!("accept_danger_non_interactive ← {ad_src}"), + ]; + + ResolvedAnalogOptions { + model, + permission_mode, + preset, + output_format, + use_stream, + use_runtime_enforcer, + accept_danger_non_interactive, + provenance, + } +} + +/// User home directory (`USERPROFILE` on Windows, `HOME` elsewhere). +#[must_use] +pub fn analog_user_home_dir() -> Option { + #[cfg(windows)] + { + std::env::var_os("USERPROFILE").map(PathBuf::from) + } + #[cfg(not(windows))] + { + std::env::var_os("HOME").map(PathBuf::from) + } +} + +/// Expand a leading `~/` path using [`analog_user_home_dir`]. +#[must_use] +pub fn analog_expand_tilde_path(raw: &str) -> PathBuf { + if let Some(rest) = raw.strip_prefix("~/") { + analog_user_home_dir() + .map(|h| h.join(rest)) + .unwrap_or_else(|| PathBuf::from(raw)) + } else { + PathBuf::from(raw) + } +} + +/// Match main CLI profile resolution: `--profile`, then TOML `profile`, then default `~/.claw-analog/profile.toml` if it exists. +#[must_use] +pub fn resolve_analog_profile_path( + workspace: &Path, + profile_cli: Option, + profile_from_toml: Option<&str>, +) -> Option { + if let Some(p) = profile_cli { + return Some(if p.is_absolute() { + p + } else { + workspace.join(&p) + }); + } + if let Some(s) = profile_from_toml { + let p = analog_expand_tilde_path(s.trim()); + return Some(if p.is_absolute() { + p + } else { + workspace.join(p) + }); + } + let def = analog_user_home_dir()? + .join(".claw-analog") + .join("profile.toml"); + if def.is_file() { + Some(def) + } else { + None + } +} + +fn persist_conversation_sessions( + config: &AnalogConfig, + ws_str: &str, + model: &str, + messages: &[InputMessage], +) -> Result<(), String> { + if let Some(p) = &config.session_path { + session_save(p, ws_str, model, config.preset, messages)?; + } + if let Some(p) = &config.session_save_path { + let duplicate = config.session_path.as_ref() == Some(p); + if !duplicate { + session_save(p, ws_str, model, config.preset, messages)?; + } + } + Ok(()) +} + +/// Max bytes read from `profile.toml`; line is truncated to this many **Unicode scalars** after trim. +pub const PROFILE_FILE_MAX_BYTES: usize = 2048; +pub const PROFILE_LINE_MAX_CHARS: usize = 512; + +const SESSION_FILE_VERSION: u32 = 1; + +#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] +struct SessionFile { + version: u32, + workspace: String, + model: String, + #[serde(default, skip_serializing_if = "Option::is_none")] + preset: Option, + messages: Vec, +} + +/// Load a session file without appending a new user prompt. +pub fn session_load_messages(path: &Path) -> Result, String> { + if !path.exists() { + return Ok(Vec::new()); + } + let raw = std::fs::read_to_string(path).map_err(|e| e.to_string())?; + let file: SessionFile = serde_json::from_str(&raw).map_err(|e| e.to_string())?; + if file.version != SESSION_FILE_VERSION { + return Err(format!( + "session file version {} not supported (expected {SESSION_FILE_VERSION})", + file.version + )); + } + Ok(file.messages) +} + +fn atomic_write(path: &Path, contents: &[u8]) -> Result<(), String> { + if let Some(parent) = path.parent() { + if !parent.as_os_str().is_empty() { + std::fs::create_dir_all(parent).map_err(|e| e.to_string())?; + } + } + let tmp = path.with_extension("tmp_session_write"); + std::fs::write(&tmp, contents).map_err(|e| e.to_string())?; + #[cfg(windows)] + { + let _ = std::fs::remove_file(path); + } + std::fs::rename(&tmp, path).map_err(|e| e.to_string())?; + Ok(()) +} + +/// Load prior turns (if the file exists), append `new_prompt` as a new user message. +pub fn session_bootstrap_messages( + path: &Path, + workspace: &str, + model: &str, + preset: Preset, + new_prompt: &str, +) -> Result, String> { + if !path.exists() { + return Ok(vec![InputMessage::user_text(new_prompt.to_string())]); + } + let raw = std::fs::read_to_string(path).map_err(|e| e.to_string())?; + let file: SessionFile = serde_json::from_str(&raw).map_err(|e| e.to_string())?; + if file.version != SESSION_FILE_VERSION { + return Err(format!( + "session file version {} not supported (expected {SESSION_FILE_VERSION})", + file.version + )); + } + if file.workspace != workspace { + eprintln!( + "[claw-analog] warning: session workspace differs (file: {}, current: {})", + file.workspace, workspace + ); + } + if file.model != model { + eprintln!( + "[claw-analog] warning: session model differs (file: {}, current: {})", + file.model, model + ); + } + let want_preset = preset.label().map(String::from); + if want_preset.as_deref() != file.preset.as_deref() { + eprintln!( + "[claw-analog] warning: session preset {:?} vs current {:?}", + file.preset, want_preset + ); + } + let mut messages = file.messages; + messages.push(InputMessage::user_text(new_prompt.to_string())); + Ok(messages) +} + +pub fn session_save( + path: &Path, + workspace: &str, + model: &str, + preset: Preset, + messages: &[InputMessage], +) -> Result<(), String> { + let data = SessionFile { + version: SESSION_FILE_VERSION, + workspace: workspace.into(), + model: model.into(), + preset: preset.label().map(String::from), + messages: messages.to_vec(), + }; + let json = serde_json::to_string_pretty(&data).map_err(|e| e.to_string())?; + atomic_write(path, json.as_bytes())?; + Ok(()) +} + +fn session_warn_common() { + eprintln!( + "[claw-analog] session: files may contain secrets (tool output, pasted keys). Do not share. Large histories increase API cost." + ); +} + +#[derive(Debug, serde::Deserialize)] +#[serde(deny_unknown_fields)] +struct ProfileToml { + line: Option, +} + +/// Read `~/.claw-analog/profile.toml`-style file: single `line` merged into system prompt. +pub fn load_profile_hint(path: &Path) -> Result, String> { + let meta = std::fs::metadata(path).map_err(|e| e.to_string())?; + if meta.len() as usize > PROFILE_FILE_MAX_BYTES { + return Err(format!( + "profile file too large ({} bytes; max {})", + meta.len(), + PROFILE_FILE_MAX_BYTES + )); + } + let raw = std::fs::read_to_string(path).map_err(|e| e.to_string())?; + let parsed: ProfileToml = toml::from_str(&raw).map_err(|e| e.to_string())?; + let line = parsed.line.unwrap_or_default(); + let line = line.trim(); + if line.is_empty() { + return Ok(None); + } + let nchars = line.chars().count(); + if nchars > PROFILE_LINE_MAX_CHARS { + eprintln!( + "[claw-analog] warning: profile line truncated ({} chars; max {})", + nchars, PROFILE_LINE_MAX_CHARS + ); + let truncated: String = line.chars().take(PROFILE_LINE_MAX_CHARS).collect(); + return Ok(Some(truncated)); + } + Ok(Some(line.to_string())) +} + +#[derive(Debug, Clone)] +pub struct AnalogConfig { + pub model: String, + pub workspace: PathBuf, + /// Active [`PermissionMode`] (read-only, workspace-write, prompt, danger-full-access, allow). + pub permission_mode: PermissionMode, + /// Allow `danger-full-access` / `allow` when stdin is not a TTY (automation opt-in). + pub accept_danger_non_interactive: bool, + pub use_stream: bool, + pub output_format: OutputFormat, + /// Gate tools with [`PermissionEnforcer`] (aligned with main CLI policy). + pub use_runtime_enforcer: bool, + pub max_read_bytes: u64, + pub max_turns: u32, + pub max_list_entries: usize, + pub grep_max_lines: usize, + /// Cap for `glob_workspace` and for `grep_workspace` when using `glob`. + pub glob_max_paths: usize, + /// `walkdir` max depth from the search root (prevents unbounded recursion). + pub glob_max_depth: usize, + pub preset: Preset, + /// Bias assistant replies toward English or Russian (system prompt only). + pub language: AnalogLanguage, + /// When set, load/save turn history (resume with the same path). See session warnings in `how_to_run.md`. + pub session_path: Option, + /// After each session snapshot, also write this path (export without resuming from `--session`, or copy of the same file). + pub session_save_path: Option, + /// One short line from profile TOML, merged into the system prompt. + pub profile_hint: Option, + pub prompt: String, + /// When set (TOML `rag_base_url` or env `RAG_BASE_URL`), exposes `retrieve_context` and calls `POST {base}/v1/query`. + pub rag_base_url: Option, + pub rag_http_timeout: Duration, + /// Upper bound for `top_k` accepted from the model (default 32). + pub rag_top_k_max: u32, +} + +/// Optional defaults from `.claw-analog.toml` (see `load_analog_toml`). +#[derive(Debug, Clone, Default, serde::Deserialize)] +#[serde(deny_unknown_fields)] +pub struct AnalogFileConfig { + pub model: Option, + pub stream: Option, + pub output_format: Option, + pub permission: Option, + pub no_runtime_enforcer: Option, + /// Acknowledge danger/allow mode in CI when stdin is not a TTY. + pub accept_danger_non_interactive: Option, + pub max_read_bytes: Option, + pub max_turns: Option, + pub max_list_entries: Option, + pub grep_max_lines: Option, + pub glob_max_paths: Option, + pub glob_max_depth: Option, + pub preset: Option, + /// `en` or `ru` — reply language hint in system prompt (not the API model id). + pub language: Option, + /// Session file path (relative to workspace if not absolute). + pub session: Option, + /// Profile snippet path (default `~/.claw-analog/profile.toml` when omitted; see `profile` CLI). + pub profile: Option, + /// Override env `RAG_BASE_URL` when non-empty (HTTP root of `claw-rag-service`, no trailing `/v1` path). + pub rag_base_url: Option, + /// Timeout for `retrieve_context` HTTP calls (seconds). + pub rag_timeout_secs: Option, + /// Max `top_k` the model may request (default 32, hard-capped at 256). + pub rag_top_k_max: Option, +} + +/// Read `.claw-analog.toml`; relative paths are the caller's responsibility. +pub fn load_analog_toml(path: &Path) -> Result { + let raw = std::fs::read_to_string(path).map_err(|e| e.to_string())?; + toml::from_str(&raw).map_err(|e| e.to_string()) +} + +/// Non-empty `rag_base_url` from TOML wins; otherwise `RAG_BASE_URL` from the environment. +#[must_use] +pub fn resolve_rag_base_url(file: &AnalogFileConfig) -> Option { + let from_file = file + .rag_base_url + .as_ref() + .map(|s| s.trim()) + .filter(|s| !s.is_empty()) + .map(|s| s.to_string()); + if from_file.is_some() { + return from_file; + } + std::env::var("RAG_BASE_URL") + .ok() + .map(|s| s.trim().to_string()) + .filter(|s| !s.is_empty()) +} + +const MAX_JSON_TOOL_OUTPUT_BYTES: usize = 32 * 1024; +const RAG_QUERY_MAX_CHARS: usize = 12_000; + +fn write_json_line(out: &mut impl std::io::Write, value: &Value) -> std::io::Result<()> { + serde_json::to_writer(&mut *out, value).map_err(std::io::Error::other)?; + writeln!(out) +} + +fn truncate_for_json(s: &str) -> (String, bool) { + let max = MAX_JSON_TOOL_OUTPUT_BYTES; + if s.len() <= max { + return (s.to_string(), false); + } + let mut end = max; + while end > 0 && !s.is_char_boundary(end) { + end -= 1; + } + (format!("{}…", &s[..end]), true) +} + +fn assistant_plain_text(content: &[OutputContentBlock]) -> String { + content + .iter() + .filter_map(|b| { + if let OutputContentBlock::Text { text } = b { + Some(text.as_str()) + } else { + None + } + }) + .collect::>() + .concat() +} + +fn tool_calls_for_json(content: &[OutputContentBlock]) -> Vec { + content + .iter() + .filter_map(|b| { + if let OutputContentBlock::ToolUse { id, name, input } = b { + Some(json!({ + "id": id, + "name": name, + "input": input, + })) + } else { + None + } + }) + .collect() +} + +fn git_gate_is_repo(workspace: &Path) -> Result<(), String> { + let out = Command::new("git") + .args(["rev-parse", "--is-inside-work-tree"]) + .current_dir(workspace) + .output() + .map_err(|e| format!("git not available: {e}"))?; + if !out.status.success() { + return Err("not a git work tree".to_string()); + } + Ok(()) +} + +fn is_safe_git_rev_range(s: &str) -> bool { + let t = s.trim(); + if t.is_empty() || t.len() > 200 { + return false; + } + // Conservative allowlist: alnum, common ref/range punctuation. + t.chars().all(|c| { + c.is_ascii_alphanumeric() || matches!(c, '.' | '/' | '_' | '-' | '^' | '~' | ':' | '@') + }) +} + +fn read_pipe_capped(r: impl std::io::Read, cap: usize) -> std::io::Result<(Vec, bool)> { + use std::io::Read; + let mut buf = Vec::new(); + let mut limited = r.take(u64::try_from(cap.saturating_add(1)).unwrap_or(u64::MAX)); + limited.read_to_end(&mut buf)?; + let truncated = buf.len() > cap; + if truncated { + buf.truncate(cap); + } + Ok((buf, truncated)) +} + +fn run_git_capped(workspace: &Path, args: &[String], cap: usize) -> Result { + git_gate_is_repo(workspace)?; + let mut child = Command::new("git") + .arg("--no-optional-locks") + .args(args) + .current_dir(workspace) + .stdout(Stdio::piped()) + .stderr(Stdio::piped()) + .spawn() + .map_err(|e| format!("git spawn failed: {e}"))?; + + let stdout = child.stdout.take().ok_or("git stdout unavailable")?; + let stderr = child.stderr.take().ok_or("git stderr unavailable")?; + + let out_handle = std::thread::spawn(move || read_pipe_capped(stdout, cap)); + let err_handle = std::thread::spawn(move || read_pipe_capped(stderr, cap)); + + let status = child.wait().map_err(|e| format!("git wait failed: {e}"))?; + let (out_bytes, out_trunc) = out_handle + .join() + .map_err(|_| "git stdout thread panicked".to_string())? + .map_err(|e| format!("git stdout read failed: {e}"))?; + let (err_bytes, err_trunc) = err_handle + .join() + .map_err(|_| "git stderr thread panicked".to_string())? + .map_err(|e| format!("git stderr read failed: {e}"))?; + + let mut out = String::from_utf8_lossy(&out_bytes).into_owned(); + let err = String::from_utf8_lossy(&err_bytes).into_owned(); + if !err.trim().is_empty() { + if !out.trim().is_empty() { + out.push_str("\n\n"); + } + out.push_str(err.trim_end()); + } + if out_trunc || err_trunc { + if !out.ends_with('\n') { + out.push('\n'); + } + out.push_str(&format!("… truncated to {cap} bytes")); + } + + if status.success() { + Ok(out) + } else if out.trim().is_empty() { + Err(format!("git failed (exit={})", status)) + } else { + Err(out) + } +} + +fn build_policy(mode: PermissionMode) -> PermissionPolicy { + PermissionPolicy::new(mode) + .with_tool_requirement("read_file", PermissionMode::ReadOnly) + .with_tool_requirement("list_dir", PermissionMode::ReadOnly) + .with_tool_requirement("glob_workspace", PermissionMode::ReadOnly) + .with_tool_requirement("grep_workspace", PermissionMode::ReadOnly) + .with_tool_requirement("grep_search", PermissionMode::ReadOnly) + .with_tool_requirement("git_diff", PermissionMode::ReadOnly) + .with_tool_requirement("git_log", PermissionMode::ReadOnly) + .with_tool_requirement("retrieve_context", PermissionMode::ReadOnly) + .with_tool_requirement("write_file", PermissionMode::WorkspaceWrite) +} + +fn tool_definitions(mode: PermissionMode, rag_base_url: Option<&str>) -> Vec { + let mut tools = vec![ + ToolDefinition { + name: "read_file".to_string(), + description: Some("Read a UTF-8 file under the workspace.".to_string()), + input_schema: json!({ + "type": "object", + "properties": { + "path": { "type": "string", "description": "Relative path from workspace root" } + }, + "required": ["path"] + }), + }, + ToolDefinition { + name: "list_dir".to_string(), + description: Some("Non-recursive directory listing (use `.` for root).".to_string()), + input_schema: json!({ + "type": "object", + "properties": { + "path": { "type": "string", "description": "Relative directory" } + } + }), + }, + ToolDefinition { + name: "glob_workspace".to_string(), + description: Some( + "List UTF-8 file paths under workspace matching a glob (relative to search root). Recursive depth and path count are capped. For Rust monorepos, crates often live under `rust/crates//`; use `root` `.` and patterns like `**/my-crate/**/*.rs` if a direct path is unknown.".to_string(), + ), + input_schema: json!({ + "type": "object", + "properties": { + "root": { "type": "string", "description": "Relative directory under workspace (default `.`)" }, + "pattern": { "type": "string", "description": "Glob relative to root, use `/` e.g. `**/*.rs`" }, + "max_paths": { "type": "integer", "description": "Max paths to return (capped by server)" } + }, + "required": ["pattern"] + }), + }, + ToolDefinition { + name: "grep_workspace".to_string(), + description: Some( + "Literal substring search per line (no regex, no shell). Pass `path`, or `paths`, or `glob` + optional `glob_root`.".to_string(), + ), + input_schema: json!({ + "type": "object", + "properties": { + "path": { "type": "string", "description": "Single relative file" }, + "paths": { "type": "array", "items": { "type": "string" }, "description": "Several relative files" }, + "glob": { "type": "string", "description": "Glob of files under workspace (same rules as glob_workspace)" }, + "glob_root": { "type": "string", "description": "Directory for `glob` (default `.`)" }, + "pattern": { "type": "string", "description": "Literal substring" }, + "max_lines": { "type": "integer", "description": "Total max matching lines across all files (capped)" } + }, + "required": ["pattern"] + }), + }, + ToolDefinition { + name: "grep_search".to_string(), + description: Some("Alias of `grep_workspace` (prompt compatibility). Same inputs.".to_string()), + input_schema: json!({ + "type": "object", + "properties": { + "path": { "type": "string" }, + "paths": { "type": "array", "items": { "type": "string" } }, + "glob": { "type": "string" }, + "glob_root": { "type": "string" }, + "pattern": { "type": "string" }, + "max_lines": { "type": "integer" } + }, + "required": ["pattern"] + }), + }, + ToolDefinition { + name: "git_diff".to_string(), + description: Some( + "Read-only `git diff` from the workspace repo (no color). Optional `cached` for staged diff; optional `rev_range`; optional path filters.".to_string(), + ), + input_schema: json!({ + "type": "object", + "properties": { + "cached": { "type": "boolean", "description": "Use --cached (staged diff)" }, + "rev_range": { "type": "string", "description": "Revision range like `HEAD~3..HEAD` or `main...HEAD`" }, + "context_lines": { "type": "integer", "description": "Unified diff context lines (passed as -U)" }, + "paths": { "type": "array", "items": { "type": "string" }, "description": "Relative paths to limit the diff" } + } + }), + }, + ToolDefinition { + name: "git_log".to_string(), + description: Some( + "Read-only `git log` from the workspace repo (no color). Supports `max_count`, optional `rev_range`, optional path filters.".to_string(), + ), + input_schema: json!({ + "type": "object", + "properties": { + "max_count": { "type": "integer", "description": "Max commits (default 20; capped by server)" }, + "rev_range": { "type": "string", "description": "Revision range like `HEAD~20..HEAD`" }, + "paths": { "type": "array", "items": { "type": "string" }, "description": "Relative paths to limit the log" } + } + }), + }, + ]; + if rag_base_url.is_some() { + tools.push(ToolDefinition { + name: "retrieve_context".to_string(), + description: Some( + "Semantic search over the workspace RAG index (separate claw-rag-service). Returns paths and snippets.".to_string(), + ), + input_schema: json!({ + "type": "object", + "properties": { + "query": { "type": "string", "description": "Natural-language query" }, + "top_k": { "type": "integer", "description": "Max hits (default 8; capped by server)" } + }, + "required": ["query"] + }), + }); + } + if matches!( + mode, + PermissionMode::WorkspaceWrite | PermissionMode::DangerFullAccess | PermissionMode::Allow + ) { + tools.push(ToolDefinition { + name: "write_file".to_string(), + description: Some( + "Create or overwrite a UTF-8 file (parents created if needed).".to_string(), + ), + input_schema: json!({ + "type": "object", + "properties": { + "path": { "type": "string" }, + "content": { "type": "string" } + }, + "required": ["path", "content"] + }), + }); + } + tools +} + +/// Nudge models away from answering “implementation” questions from ops wiring alone. +const SOURCE_GROUNDING_HINT: &str = "When asked where something is implemented or how an internal pipeline works, ground the answer in program source (e.g. crate modules, `main`/CLI entrypoints), not only deployment manifests (`docker-compose`, CI YAML, shell scripts) unless the question is explicitly about ops. Open the relevant service sources before concluding. If `list_dir`/`glob_workspace` under a short name (e.g. a service folder) returns empty, this repo is often a monorepo: try `glob_workspace` with `root` `.` and a broad `pattern` such as `**/claw-rag-service/**/*.rs` or `rust/crates/**/src/**/*.rs` before concluding the code is missing."; + +fn system_prompt( + mode: PermissionMode, + root: &Path, + preset: Preset, + profile_hint: Option<&str>, + language: AnalogLanguage, + rag_enabled: bool, +) -> String { + let root_s = root.display(); + let rag_blurb = if rag_enabled { + ", `retrieve_context` (RAG over indexed workspace via HTTP)" + } else { + "" + }; + let git_blurb = ", `git_diff`, `git_log` (read-only git context)"; + let base = match mode { + PermissionMode::ReadOnly => format!( + "You are a read-only coding assistant. Workspace root: {root_s}. \ + Tools: `read_file`, `list_dir`, `glob_workspace`, `grep_workspace` / `grep_search` (literal substring){git_blurb}{rag_blurb}. Paths relative; use `/`; no `..`." + ), + PermissionMode::WorkspaceWrite => format!( + "You are a coding assistant with read/list/glob/grep/write{git_blurb}{rag_blurb}. Workspace root: {root_s}. \ + Relative paths only; no `..`." + ), + PermissionMode::Prompt => format!( + "You are a coding assistant in prompt-style permission mode (workspace root: {root_s}). \ + Read/list/glob/grep{git_blurb}{rag_blurb} tools available; `write_file` is gated — in this harness writes require workspace-write or higher unless an interactive prompt is available (non-interactive runs deny writes per PolicyEnforcer)." + ), + PermissionMode::DangerFullAccess | PermissionMode::Allow => format!( + "You are a coding assistant with read/list/glob/grep/write{git_blurb}{rag_blurb} and expanded permission mode '{}' (workspace root: {root_s}). \ + Still use only the provided tools; paths must stay under workspace.", + mode.as_str() + ), + }; + let mut out = base; + out.push('\n'); + out.push_str(SOURCE_GROUNDING_HINT); + if let Some(x) = preset.extra_system() { + out.push('\n'); + out.push_str(x); + } + if let Some(h) = profile_hint.filter(|s| !s.is_empty()) { + out.push('\n'); + out.push_str("Learner hint: "); + out.push_str(h); + } + if let Some(h) = language_system_hint(language) { + out.push('\n'); + out.push_str(h); + } + out +} + +/// Print effective tool names and policy summary (no network; for `--print-tools` dry-run). +pub fn print_tools_dry_run( + permission_mode: PermissionMode, + use_runtime_enforcer: bool, + rag_base_url: Option<&str>, + out: &mut impl io::Write, +) -> std::io::Result<()> { + let tools = tool_definitions(permission_mode, rag_base_url); + writeln!(out, "claw-analog — effective tools (dry-run, no API calls)")?; + writeln!( + out, + "permission_mode: {} runtime::PermissionEnforcer: {}", + permission_mode.as_str(), + if use_runtime_enforcer { "on" } else { "off" } + )?; + writeln!(out, "\nTools:")?; + for t in tools { + let desc = t.description.as_deref().unwrap_or("—"); + writeln!(out, " - {} — {desc}", t.name)?; + } + Ok(()) +} + +#[derive(Debug)] +enum BlockKind { + Text, + Tool { + id: String, + name: String, + json: String, + }, +} + +pub(crate) fn format_rag_query_json_for_model(body: &str) -> Result { + let v: Value = serde_json::from_str(body).map_err(|e| format!("invalid JSON: {e}"))?; + let phase = v.get("phase").and_then(|x| x.as_str()).unwrap_or("unknown"); + let hits = v + .get("hits") + .and_then(|h| h.as_array()) + .ok_or_else(|| "missing hits array".to_string())?; + let mut out = String::new(); + writeln!(&mut out, "phase: {phase}").map_err(|e| e.to_string())?; + if hits.is_empty() { + writeln!(&mut out, "(no hits)").map_err(|e| e.to_string())?; + return Ok(out); + } + for (i, h) in hits.iter().enumerate() { + let path = h.get("path").and_then(|x| x.as_str()).unwrap_or(""); + let snippet = h.get("snippet").and_then(|x| x.as_str()).unwrap_or(""); + let score = h.get("score").and_then(|x| x.as_f64()); + write!(&mut out, "{}. ", i + 1).map_err(|e| e.to_string())?; + if let Some(s) = score { + write!(&mut out, "score={s:.4} ").map_err(|e| e.to_string())?; + } + writeln!(&mut out, "path={path}").map_err(|e| e.to_string())?; + let lines: Vec<&str> = snippet.lines().collect(); + for line in lines.iter().take(32) { + writeln!(&mut out, " {line}").map_err(|e| e.to_string())?; + } + if lines.len() > 32 { + writeln!(&mut out, " …").map_err(|e| e.to_string())?; + } + writeln!(&mut out).map_err(|e| e.to_string())?; + } + Ok(out) +} + +async fn retrieve_context_tool( + http: &reqwest::Client, + rag_base_url: &str, + top_k_cap: u32, + enforcer: Option<&PermissionEnforcer>, + input: &Value, +) -> String { + if let Err(e) = enforce_tool(enforcer, "retrieve_context", input) { + return format!("error: permission denied: {e}"); + } + let Some(q) = input.get("query").and_then(|v| v.as_str()) else { + return "error: missing query".to_string(); + }; + let q = q.trim(); + if q.is_empty() { + return "error: empty query".to_string(); + } + if q.chars().count() > RAG_QUERY_MAX_CHARS { + return format!("error: query too long (max {RAG_QUERY_MAX_CHARS} chars)"); + } + let cap = top_k_cap.max(1); + let top_k = input + .get("top_k") + .and_then(|v| v.as_u64()) + .map(|n| n as u32) + .unwrap_or(8) + .clamp(1, cap); + let base = rag_base_url.trim_end_matches('/'); + let url = format!("{base}/v1/query"); + let body = json!({ "query": q, "top_k": top_k }); + let resp = match http.post(url).json(&body).send().await { + Ok(r) => r, + Err(e) => return format!("error: RAG request failed: {e}"), + }; + let status = resp.status(); + let text = match resp.text().await { + Ok(t) => t, + Err(e) => return format!("error: RAG response body: {e}"), + }; + if !status.is_success() { + return format!("error: RAG HTTP {status}: {text}"); + } + match format_rag_query_json_for_model(&text) { + Ok(s) => s, + Err(e) => format!("error: {e}\nraw: {text}"), + } +} + +/// Run the agent loop; assistant text is written to `out` (streaming deltas when `use_stream`). +pub async fn run( + config: AnalogConfig, + out: &mut impl std::io::Write, +) -> Result<(), Box> { + let workspace = config.workspace.canonicalize()?; + enforce_non_interactive_permission_rules( + config.permission_mode, + config.accept_danger_non_interactive, + )?; + let model = resolve_model_alias(&config.model); + let client = ProviderClient::from_model(model.as_str())?; + let rag_enabled = config.rag_base_url.is_some(); + let rag_http = if rag_enabled { + Some( + reqwest::Client::builder() + .timeout(config.rag_http_timeout) + .build()?, + ) + } else { + None + }; + let tools = tool_definitions(config.permission_mode, config.rag_base_url.as_deref()); + let profile_ref = config.profile_hint.as_deref(); + let system = system_prompt( + config.permission_mode, + &workspace, + config.preset, + profile_ref, + config.language, + rag_enabled, + ); + let ws_str = workspace.display().to_string(); + + if config.session_path.is_some() || config.session_save_path.is_some() { + session_warn_common(); + } + + let mut messages = if let Some(ref sp) = config.session_path { + session_bootstrap_messages(sp, &ws_str, model.as_str(), config.preset, &config.prompt)? + } else { + vec![InputMessage::user_text(config.prompt.clone())] + }; + let max_tokens = max_tokens_for_model(model.as_str()); + let policy = build_policy(config.permission_mode); + let enforcer = PermissionEnforcer::new(policy); + + if config.output_format == OutputFormat::Json { + write_json_line( + out, + &json!({ + "type": "run_start", + "schema": NDJSON_SCHEMA, + "format_version": NDJSON_FORMAT_VERSION, + "workspace": ws_str.clone(), + "model": model.clone(), + "stream": config.use_stream, + "permission": config.permission_mode.as_str(), + "preset": config.preset.label(), + "session": config.session_path.as_ref().map(|p| p.display().to_string()), + "session_save": config.session_save_path.as_ref().map(|p| p.display().to_string()), + "rag_enabled": rag_enabled, + }), + )?; + } + + for turn in 0..config.max_turns { + if config.output_format == OutputFormat::Json { + write_json_line(out, &json!({ "type": "turn_start", "turn": turn + 1 }))?; + } + + let request = MessageRequest { + model: model.clone(), + max_tokens, + messages: messages.clone(), + system: Some(system.clone()), + tools: Some(tools.clone()), + tool_choice: Some(ToolChoice::Auto), + ..Default::default() + }; + + let response = if config.use_stream { + stream_to_message_response(&client, &request, out, config.output_format).await? + } else { + let r = client.send_message(&request).await?; + if config.output_format == OutputFormat::Rich { + for block in &r.content { + if let OutputContentBlock::Text { text } = block { + write!(out, "{text}")?; + } + } + } + r + }; + + eprintln!( + "[claw-analog] turn {} stop_reason={:?} tokens≈{}", + turn + 1, + response.stop_reason, + response.total_tokens(), + ); + + if config.output_format == OutputFormat::Json { + let text_full = assistant_plain_text(&response.content); + write_json_line( + out, + &json!({ + "type": "assistant_turn", + "turn": turn + 1, + "stop_reason": response.stop_reason, + "usage": { + "input_tokens": response.usage.input_tokens, + "output_tokens": response.usage.output_tokens, + "cache_creation_input_tokens": response.usage.cache_creation_input_tokens, + "cache_read_input_tokens": response.usage.cache_read_input_tokens, + "total_tokens": response.total_tokens(), + }, + "text": text_full, + "tool_calls": tool_calls_for_json(&response.content), + "request_id": response.request_id, + }), + )?; + } + + messages.push(InputMessage { + role: "assistant".to_string(), + content: output_to_input_blocks(&response.content), + }); + + let tool_uses = collect_tool_uses(&response.content); + if tool_uses.is_empty() || response.stop_reason.as_deref() != Some("tool_use") { + persist_conversation_sessions(&config, &ws_str, model.as_str(), &messages)?; + break; + } + + let mut results: Vec = Vec::new(); + for tu in tool_uses { + let text = if tu.name == "retrieve_context" { + match (&rag_http, &config.rag_base_url) { + (Some(http), Some(base)) => { + retrieve_context_tool( + http, + base, + config.rag_top_k_max, + config.use_runtime_enforcer.then_some(&enforcer), + tu.input, + ) + .await + } + _ => "error: retrieve_context is not configured (set RAG_BASE_URL or rag_base_url in .claw-analog.toml)".to_string(), + } + } else { + dispatch_tool( + tu.name, + tu.input, + &workspace, + &ws_str, + config.permission_mode, + config.use_runtime_enforcer.then_some(&enforcer), + config.max_read_bytes, + config.max_list_entries, + config.grep_max_lines, + config.glob_max_paths, + config.glob_max_depth, + ) + }; + let is_err = text.starts_with("error:"); + eprintln!( + "[claw-analog] tool {} -> {} chars is_error={}", + tu.name, + text.len(), + is_err, + ); + if config.output_format == OutputFormat::Json { + let (output, truncated) = truncate_for_json(&text); + write_json_line( + out, + &json!({ + "type": "tool_result", + "turn": turn + 1, + "tool_use_id": tu.id, + "name": tu.name, + "is_error": is_err, + "output": output, + "output_len_chars": text.chars().count(), + "truncated": truncated, + }), + )?; + } + results.push(InputContentBlock::ToolResult { + tool_use_id: tu.id.to_string(), + content: vec![ToolResultContentBlock::Text { text }], + is_error: is_err, + }); + } + messages.push(InputMessage { + role: "user".to_string(), + content: results, + }); + persist_conversation_sessions(&config, &ws_str, model.as_str(), &messages)?; + } + + if config.output_format == OutputFormat::Json { + write_json_line(out, &json!({ "type": "run_end", "ok": true }))?; + } + + Ok(()) +} + +async fn stream_to_message_response( + client: &ProviderClient, + request: &MessageRequest, + out: &mut impl std::io::Write, + output_format: OutputFormat, +) -> Result { + let mut stream = client.stream_message(request).await?; + let mut block_kind: BTreeMap = BTreeMap::new(); + let mut text_buf: BTreeMap = BTreeMap::new(); + let mut message_id = String::from("stream"); + let mut message_model = request.model.clone(); + let mut stop_reason: Option = None; + let mut usage = api::Usage::default(); + let mut saw_stop = false; + let mut finished: BTreeMap = BTreeMap::new(); + + while let Some(event) = stream.next_event().await? { + match event { + StreamEvent::MessageStart(MessageStartEvent { message }) => { + message_id = message.id; + message_model = message.model; + for block in message.content { + if let OutputContentBlock::Text { text } = block { + if text.is_empty() { + continue; + } + match output_format { + OutputFormat::Rich => { + write!(out, "{text}").ok(); + } + OutputFormat::Json => { + write_json_line( + out, + &json!({ "type": "assistant_text_delta", "text": text }), + ) + .map_err(ApiError::from)?; + } + } + } + } + } + StreamEvent::ContentBlockStart(ContentBlockStartEvent { + index, + content_block, + }) => match content_block { + OutputContentBlock::Text { text } => { + block_kind.insert(index, BlockKind::Text); + text_buf.insert(index, text); + } + OutputContentBlock::ToolUse { id, name, input } => { + let json = if input.as_object().is_some_and(|m| m.is_empty()) { + String::new() + } else { + input.to_string() + }; + block_kind.insert(index, BlockKind::Tool { id, name, json }); + } + OutputContentBlock::Thinking { .. } + | OutputContentBlock::RedactedThinking { .. } => {} + }, + StreamEvent::ContentBlockDelta(delta) => match delta.delta { + ContentBlockDelta::TextDelta { text } => { + if !text.is_empty() { + match output_format { + OutputFormat::Rich => { + write!(out, "{text}").ok(); + } + OutputFormat::Json => { + write_json_line( + out, + &json!({ "type": "assistant_text_delta", "text": text }), + ) + .map_err(ApiError::from)?; + } + } + text_buf.entry(delta.index).or_default().push_str(&text); + } + } + ContentBlockDelta::InputJsonDelta { partial_json } => { + if let Some(BlockKind::Tool { json, .. }) = block_kind.get_mut(&delta.index) { + json.push_str(&partial_json); + } + } + ContentBlockDelta::ThinkingDelta { .. } + | ContentBlockDelta::SignatureDelta { .. } => {} + }, + StreamEvent::ContentBlockStop(stop) => { + let idx = stop.index; + match block_kind.remove(&idx) { + Some(BlockKind::Text) => { + let t = text_buf.remove(&idx).unwrap_or_default(); + if !t.is_empty() { + finished.insert(idx, OutputContentBlock::Text { text: t }); + } + } + Some(BlockKind::Tool { id, name, json }) => { + let input = serde_json::from_str::(&json) + .unwrap_or_else(|_| json!({ "raw": json })); + finished.insert(idx, OutputContentBlock::ToolUse { id, name, input }); + } + None => {} + } + } + StreamEvent::MessageDelta(MessageDeltaEvent { delta, usage: u }) => { + usage = u; + stop_reason = delta.stop_reason.or(stop_reason); + } + StreamEvent::MessageStop(MessageStopEvent {}) => { + saw_stop = true; + break; + } + } + } + + if !saw_stop { + return client.send_message(request).await; + } + + let content: Vec = finished.into_values().collect(); + if content.is_empty() { + return client.send_message(request).await; + } + let has_tools = content + .iter() + .any(|b| matches!(b, OutputContentBlock::ToolUse { .. })); + let stop_reason = stop_reason.or_else(|| { + Some(if has_tools { + "tool_use".to_string() + } else { + "end_turn".to_string() + }) + }); + + Ok(MessageResponse { + id: message_id, + kind: "message".to_string(), + role: "assistant".to_string(), + content, + model: message_model, + stop_reason, + stop_sequence: None, + usage, + request_id: stream.request_id().map(ToString::to_string), + }) +} + +struct ToolUse<'a> { + id: &'a str, + name: &'a str, + input: &'a Value, +} + +fn collect_tool_uses(content: &[OutputContentBlock]) -> Vec> { + content + .iter() + .filter_map(|b| { + if let OutputContentBlock::ToolUse { id, name, input } = b { + Some(ToolUse { + id: id.as_str(), + name: name.as_str(), + input, + }) + } else { + None + } + }) + .collect() +} + +fn output_to_input_blocks(blocks: &[OutputContentBlock]) -> Vec { + blocks + .iter() + .filter_map(|b| match b { + OutputContentBlock::Text { text } => { + Some(InputContentBlock::Text { text: text.clone() }) + } + OutputContentBlock::ToolUse { id, name, input } => Some(InputContentBlock::ToolUse { + id: id.clone(), + name: name.clone(), + input: input.clone(), + }), + OutputContentBlock::Thinking { .. } | OutputContentBlock::RedactedThinking { .. } => { + None + } + }) + .collect() +} + +pub fn validate_rel_path(rel: &str) -> Result<(), String> { + let p = Path::new(rel); + for c in p.components() { + match c { + Component::Normal(_) | Component::CurDir => {} + Component::Prefix(_) | Component::RootDir | Component::ParentDir => { + return Err("path must be relative with no '..' or absolute segments".into()); + } + } + } + Ok(()) +} + +fn join_under_root(root: &Path, rel: &str) -> Result { + validate_rel_path(rel)?; + Ok(root.join(rel)) +} + +fn assert_workspace_path(root: &Path, path: &Path) -> Result<(), String> { + let root_canon = root.canonicalize().map_err(|e| e.to_string())?; + + if path.exists() { + let c = path.canonicalize().map_err(|e| e.to_string())?; + return if c.starts_with(&root_canon) { + Ok(()) + } else { + Err("path escapes workspace".into()) + }; + } + + if let Some(parent) = path.parent() { + if parent.as_os_str().is_empty() { + return Ok(()); + } + let mut cur = parent; + loop { + if cur == root { + break; + } + if cur.exists() { + let pc = cur.canonicalize().map_err(|e| e.to_string())?; + if !pc.starts_with(&root_canon) { + return Err("path escapes workspace".into()); + } + break; + } + cur = cur.parent().ok_or_else(|| "invalid path".to_string())?; + } + } + Ok(()) +} + +fn enforce_tool( + enforcer: Option<&PermissionEnforcer>, + tool: &str, + input: &Value, +) -> Result<(), String> { + let Some(e) = enforcer else { + return Ok(()); + }; + let payload = input.to_string(); + match e.check(tool, &payload) { + EnforcementResult::Allowed => Ok(()), + EnforcementResult::Denied { reason, .. } => Err(reason), + } +} + +fn assert_safe_glob_pattern(pattern: &str) -> Result<(), String> { + if pattern.contains("..") { + return Err("glob pattern must not contain '..'".into()); + } + Ok(()) +} + +/// Returns workspace-relative paths using `/`, sorted; `truncated` if `max_paths` reached. +pub fn glob_workspace_collect( + workspace: &Path, + rel_root: &str, + glob_pat: &str, + max_depth: usize, + max_paths: usize, +) -> Result<(Vec, bool), String> { + assert_safe_glob_pattern(glob_pat)?; + if max_paths == 0 { + return Ok((Vec::new(), false)); + } + let root_path = join_under_root(workspace, rel_root)?; + assert_workspace_path(workspace, &root_path)?; + let g = Glob::new(glob_pat).map_err(|e| e.to_string())?; + let mut b = GlobSetBuilder::new(); + b.add(g); + let set: GlobSet = b.build().map_err(|e| e.to_string())?; + + let mut out = Vec::new(); + let mut truncated = false; + let depth = max_depth.max(1); + let mut walker = WalkBuilder::new(&root_path); + walker + .follow_links(false) + .max_depth(Some(depth)) + .git_ignore(true) + .git_exclude(true) + .ignore(true) + .hidden(false) + .add_custom_ignore_filename(".clawignore"); + for result in walker.build() { + let entry = match result { + Ok(e) => e, + Err(_) => continue, + }; + if !entry.file_type().is_some_and(|t| t.is_file()) { + continue; + } + let full = entry.path(); + let rel_search = full + .strip_prefix(&root_path) + .map_err(|_| "internal path prefix".to_string())?; + let rel_s = rel_search.to_string_lossy().replace('\\', "/"); + if rel_s.is_empty() { + continue; + } + if set.is_match(rel_s.as_str()) { + let ws_rel = full + .strip_prefix(workspace) + .map_err(|_| "internal workspace prefix".to_string())?; + let line = ws_rel.to_string_lossy().replace('\\', "/"); + out.push(line); + if out.len() >= max_paths { + truncated = true; + break; + } + } + } + out.sort(); + out.dedup(); + Ok((out, truncated)) +} + +/// Literal substring per line; capped lines; no regex/shell. +pub fn grep_in_file( + path: &Path, + pattern: &str, + max_file_bytes: u64, + max_matching_lines: usize, +) -> Result { + let cap = max_matching_lines.max(1); + let (s, _) = grep_in_file_labeled(path, pattern, max_file_bytes, cap, None)?; + Ok(s) +} + +fn grep_in_file_labeled( + path: &Path, + pattern: &str, + max_file_bytes: u64, + max_matching_lines: usize, + path_label: Option<&str>, +) -> Result<(String, usize), String> { + if max_matching_lines == 0 { + return Ok((String::new(), 0)); + } + let bytes = std::fs::read(path).map_err(|e| e.to_string())?; + if bytes.iter().take(8 * 1024).any(|b| *b == 0) { + return Err("file looks binary (NUL byte)".into()); + } + if bytes.len() as u64 > max_file_bytes { + return Err(format!( + "file too large ({} bytes; max {})", + bytes.len(), + max_file_bytes + )); + } + let text = String::from_utf8(bytes).map_err(|_| "invalid UTF-8".to_string())?; + let mut out = String::new(); + let mut count = 0usize; + let cap = max_matching_lines; + for (lineno, line) in text.lines().enumerate() { + if line.contains(pattern) { + count += 1; + if out.len() < 256 * 1024 { + match path_label { + Some(label) => { + let _ = writeln!(&mut out, "{label}:{}:{}", lineno + 1, line); + } + None => { + let _ = writeln!(&mut out, "{}:{}", lineno + 1, line); + } + } + } + if count >= cap { + let _ = writeln!(&mut out, "… truncated after {cap} matching lines"); + break; + } + } + } + if out.is_empty() { + if path_label.is_none() { + Ok(("(no matches)".into(), 0)) + } else { + Ok((String::new(), 0)) + } + } else { + Ok((out, count)) + } +} + +fn dispatch_grep_workspace( + input: &Value, + workspace: &Path, + max_read: u64, + grep_cap: usize, + glob_max_paths: usize, + glob_max_depth: usize, +) -> String { + let Some(pattern) = input.get("pattern").and_then(|p| p.as_str()) else { + return "error: missing pattern".to_string(); + }; + if pattern.is_empty() { + return "error: empty pattern".to_string(); + } + let max_lines_total = input + .get("max_lines") + .and_then(|v| v.as_u64()) + .map(|n| n as usize) + .unwrap_or(grep_cap) + .min(grep_cap.max(1)); + + let path_single = input + .get("path") + .and_then(|p| p.as_str()) + .filter(|s| !s.is_empty()); + let paths_arr = input.get("paths").and_then(|p| p.as_array()); + let glob = input + .get("glob") + .and_then(|p| p.as_str()) + .filter(|s| !s.is_empty()); + + let mut selector_count = 0u8; + if path_single.is_some() { + selector_count += 1; + } + if paths_arr.is_some_and(|a| !a.is_empty()) { + selector_count += 1; + } + if glob.is_some() { + selector_count += 1; + } + if selector_count > 1 { + return "error: specify only one of path, paths, or glob".to_string(); + } + if selector_count == 0 { + return "error: provide path, paths, or glob".to_string(); + } + + let mut files: Vec = Vec::new(); + if let Some(g) = glob { + let glob_root = input + .get("glob_root") + .and_then(|p| p.as_str()) + .filter(|s| !s.is_empty()) + .unwrap_or("."); + match glob_workspace_collect(workspace, glob_root, g, glob_max_depth, glob_max_paths) { + Ok((mut v, _)) => { + if v.is_empty() { + return "(no matches)".into(); + } + files.append(&mut v); + } + Err(e) => return format!("error: {e}"), + } + } else if let Some(arr) = paths_arr { + files.reserve(arr.len()); + for p in arr { + let Some(s) = p.as_str() else { + return "error: paths must be strings".to_string(); + }; + if !s.is_empty() { + files.push(s.to_string()); + } + } + if files.is_empty() { + return "error: paths is empty".to_string(); + } + } else if let Some(p) = path_single { + files.push(p.to_string()); + } + + files.sort(); + files.dedup(); + + let multi = files.len() > 1; + let mut combined = String::new(); + let mut total_matches = 0usize; + + for rel in files { + if total_matches >= max_lines_total { + break; + } + let remaining = max_lines_total.saturating_sub(total_matches); + if remaining == 0 { + break; + } + let Ok(full) = join_under_root(workspace, &rel) else { + return format!("error: invalid path {rel:?}"); + }; + if let Err(e) = assert_workspace_path(workspace, &full) { + return format!("error: {e}"); + } + let label = if multi { Some(rel.as_str()) } else { None }; + match grep_in_file_labeled(&full, pattern, max_read, remaining, label) { + Ok((chunk, n)) => { + if multi { + if n > 0 { + combined.push_str(&chunk); + total_matches += n; + } + } else { + return chunk; + } + } + Err(e) => return format!("error: {e}"), + } + } + + if combined.is_empty() { + "(no matches)".into() + } else { + combined + } +} + +#[allow(clippy::too_many_arguments)] +pub fn dispatch_tool( + name: &str, + input: &Value, + workspace: &Path, + workspace_str: &str, + mode: PermissionMode, + enforcer: Option<&PermissionEnforcer>, + max_read: u64, + max_list: usize, + grep_cap: usize, + glob_max_paths: usize, + glob_max_depth: usize, +) -> String { + match name { + "read_file" => { + if let Err(e) = enforce_tool(enforcer, name, input) { + return format!("error: permission denied: {e}"); + } + let Some(path_s) = input.get("path").and_then(|p| p.as_str()) else { + return "error: missing path".to_string(); + }; + let Ok(full) = join_under_root(workspace, path_s) else { + return format!("error: invalid path {path_s:?}"); + }; + if let Err(e) = assert_workspace_path(workspace, &full) { + return format!("error: {e}"); + } + match std::fs::read(&full) { + Ok(bytes) => { + if bytes.iter().take(8 * 1024).any(|b| *b == 0) { + return "error: file looks binary (NUL byte)".to_string(); + } + if bytes.len() as u64 > max_read { + return format!( + "error: file too large ({} bytes; max {})", + bytes.len(), + max_read + ); + } + String::from_utf8_lossy(&bytes).into_owned() + } + Err(e) => format!("error: read failed: {e}"), + } + } + "list_dir" => { + if let Err(e) = enforce_tool(enforcer, "list_dir", input) { + return format!("error: permission denied: {e}"); + } + let path_s = input + .get("path") + .and_then(|p| p.as_str()) + .filter(|s| !s.is_empty()) + .unwrap_or("."); + let Ok(full) = join_under_root(workspace, path_s) else { + return format!("error: invalid path {path_s:?}"); + }; + if let Err(e) = assert_workspace_path(workspace, &full) { + return format!("error: {e}"); + } + // Use ignore-aware walker to respect .gitignore/.clawignore. + let mut walker = WalkBuilder::new(&full); + walker + .follow_links(false) + .max_depth(Some(1)) + .git_ignore(true) + .git_exclude(true) + .ignore(true) + .hidden(false) + .add_custom_ignore_filename(".clawignore"); + let mut names: Vec = walker + .build() + .filter_map(|r| r.ok()) + .filter_map(|e| { + let p = e.path(); + if p == full { + return None; + } + p.file_name().map(|n| n.to_string_lossy().into_owned()) + }) + .take(max_list.saturating_add(1)) + .collect(); + names.sort(); + names.dedup(); + let truncated = names.len() > max_list; + names.truncate(max_list); + let body = names.join("\n"); + if truncated { + format!("{body}\n… truncated to {max_list} entries") + } else { + body + } + } + "glob_workspace" => { + if let Err(e) = enforce_tool(enforcer, name, input) { + return format!("error: permission denied: {e}"); + } + let root = input + .get("root") + .and_then(|r| r.as_str()) + .filter(|s| !s.is_empty()) + .unwrap_or("."); + let Some(pat) = input.get("pattern").and_then(|p| p.as_str()) else { + return "error: missing pattern".to_string(); + }; + if pat.is_empty() { + return "error: empty pattern".to_string(); + } + let cap = input + .get("max_paths") + .and_then(|v| v.as_u64()) + .map(|n| n as usize) + .map(|n| n.min(glob_max_paths)) + .unwrap_or(glob_max_paths); + match glob_workspace_collect(workspace, root, pat, glob_max_depth, cap) { + Ok((paths, truncated)) => { + if paths.is_empty() { + "(no matches)".into() + } else { + let body = paths.join("\n"); + if truncated { + format!("{body}\n… truncated (max_paths={cap})") + } else { + body + } + } + } + Err(e) => format!("error: {e}"), + } + } + "grep_workspace" => { + if let Err(e) = enforce_tool(enforcer, name, input) { + return format!("error: permission denied: {e}"); + } + dispatch_grep_workspace( + input, + workspace, + max_read, + grep_cap, + glob_max_paths, + glob_max_depth, + ) + } + "grep_search" => { + if let Err(e) = enforce_tool(enforcer, name, input) { + return format!("error: permission denied: {e}"); + } + dispatch_grep_workspace( + input, + workspace, + max_read, + grep_cap, + glob_max_paths, + glob_max_depth, + ) + } + "retrieve_context" => { + "error: retrieve_context runs via async HTTP only (configure RAG_BASE_URL)".to_string() + } + "write_file" => { + if !matches!( + mode, + PermissionMode::WorkspaceWrite + | PermissionMode::DangerFullAccess + | PermissionMode::Allow + ) { + return format!( + "error: write_file requires workspace-write, danger-full-access, or allow (current: {})", + mode.as_str() + ); + } + if let Err(e) = enforce_tool(enforcer, name, input) { + return format!("error: permission denied: {e}"); + } + let Some(path_s) = input.get("path").and_then(|p| p.as_str()) else { + return "error: missing path".to_string(); + }; + let Some(content) = input.get("content").and_then(|p| p.as_str()) else { + return "error: missing content".to_string(); + }; + let Ok(full) = join_under_root(workspace, path_s) else { + return format!("error: invalid path {path_s:?}"); + }; + if let Err(e) = assert_workspace_path(workspace, &full) { + return format!("error: {e}"); + } + if let Some(e) = enforcer { + match e.check_file_write(&full.display().to_string(), workspace_str) { + EnforcementResult::Allowed => {} + EnforcementResult::Denied { reason, .. } => { + return format!("error: permission denied: {reason}"); + } + } + } + if let Some(parent) = full.parent() { + if let Err(e) = std::fs::create_dir_all(parent) { + return format!("error: mkdir: {e}"); + } + } + match std::fs::write(&full, content.as_bytes()) { + Ok(()) => format!("wrote {} bytes to {}", content.len(), full.display()), + Err(e) => format!("error: write failed: {e}"), + } + } + "git_diff" => { + if let Err(e) = enforce_tool(enforcer, name, input) { + return format!("error: permission denied: {e}"); + } + let cached = input + .get("cached") + .and_then(|v| v.as_bool()) + .unwrap_or(false); + let context_lines = input.get("context_lines").and_then(|v| v.as_i64()); + let rev_range = input + .get("rev_range") + .and_then(|v| v.as_str()) + .map(str::trim); + if let Some(rr) = rev_range { + if !is_safe_git_rev_range(rr) { + return "error: invalid rev_range".to_string(); + } + } + + let mut args: Vec = vec![ + "diff".to_string(), + "--no-color".to_string(), + "--no-ext-diff".to_string(), + ]; + if let Some(n) = context_lines { + let n = n.clamp(0, 100); + args.push(format!("-U{n}")); + } + if cached { + args.push("--cached".to_string()); + } + if let Some(rr) = rev_range { + if !rr.is_empty() { + args.push(rr.to_string()); + } + } + if let Some(arr) = input.get("paths").and_then(|v| v.as_array()) { + let mut paths: Vec = Vec::new(); + for p in arr.iter().filter_map(|v| v.as_str()) { + if validate_rel_path(p).is_err() { + return format!("error: invalid path {p:?}"); + } + paths.push(p.replace('\\', "/")); + } + if !paths.is_empty() { + args.push("--".to_string()); + args.extend(paths); + } + } + match run_git_capped(workspace, &args, max_read as usize) { + Ok(s) => { + if s.trim().is_empty() { + "(no diff)".to_string() + } else { + s + } + } + Err(e) => format!("error: {e}"), + } + } + "git_log" => { + if let Err(e) = enforce_tool(enforcer, name, input) { + return format!("error: permission denied: {e}"); + } + let max_count = input + .get("max_count") + .and_then(|v| v.as_u64()) + .unwrap_or(20) + .min(50); + let rev_range = input + .get("rev_range") + .and_then(|v| v.as_str()) + .map(str::trim); + if let Some(rr) = rev_range { + if !is_safe_git_rev_range(rr) { + return "error: invalid rev_range".to_string(); + } + } + let mut args: Vec = vec![ + "log".to_string(), + "--no-color".to_string(), + "--no-decorate".to_string(), + format!("--max-count={max_count}"), + "--pretty=format:%h %s".to_string(), + ]; + if let Some(rr) = rev_range { + if !rr.is_empty() { + args.push(rr.to_string()); + } + } + if let Some(arr) = input.get("paths").and_then(|v| v.as_array()) { + let mut paths: Vec = Vec::new(); + for p in arr.iter().filter_map(|v| v.as_str()) { + if validate_rel_path(p).is_err() { + return format!("error: invalid path {p:?}"); + } + paths.push(p.replace('\\', "/")); + } + if !paths.is_empty() { + args.push("--".to_string()); + args.extend(paths); + } + } + match run_git_capped(workspace, &args, max_read as usize) { + Ok(s) => { + if s.trim().is_empty() { + "(no commits)".to_string() + } else { + s + } + } + Err(e) => format!("error: {e}"), + } + } + _ => { + format!("error: unknown tool {name} (input {input})") + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use std::sync::{Mutex, OnceLock}; + + fn mock_env_lock() -> std::sync::MutexGuard<'static, ()> { + static LOCK: OnceLock> = OnceLock::new(); + LOCK.get_or_init(|| Mutex::new(())) + .lock() + .unwrap_or_else(|e| e.into_inner()) + } + + async fn mock_env_lock_async() -> tokio::sync::MutexGuard<'static, ()> { + use tokio::sync::Mutex as AsyncMutex; + static LOCK: OnceLock> = OnceLock::new(); + LOCK.get_or_init(|| AsyncMutex::new(())).lock().await + } + + fn git(cwd: &Path, args: &[&str]) { + let out = Command::new("git") + .args(args) + .current_dir(cwd) + .output() + .expect("git should run"); + if !out.status.success() { + panic!( + "git {:?} failed: {}", + args, + String::from_utf8_lossy(&out.stderr) + ); + } + } + + #[test] + fn validate_rel_rejects_dotdot() { + assert!(validate_rel_path("..\\x").is_err()); + assert!(validate_rel_path("a/../../b").is_err()); + assert!(validate_rel_path("src/main.rs").is_ok()); + } + + #[test] + fn resolve_analog_options_preset_implement_default_write() { + let file = AnalogFileConfig { + preset: Some("implement".into()), + ..Default::default() + }; + let r = resolve_analog_options(&file, &AnalogDoctorOverrides::default()); + assert_eq!(r.permission_mode, PermissionMode::WorkspaceWrite); + assert!(r.provenance.iter().any(|s| s.contains("implement"))); + } + + #[test] + fn resolve_analog_options_cli_beats_toml() { + let file = AnalogFileConfig { + model: Some("from-file".into()), + ..Default::default() + }; + let o = AnalogDoctorOverrides { + model: Some("from-cli".into()), + ..Default::default() + }; + let r = resolve_analog_options(&file, &o); + assert_eq!(r.model, "from-cli"); + assert!(r.provenance[0].contains("CLI")); + } + + #[test] + fn grep_finds_lines() { + let dir = tempfile::tempdir().unwrap(); + let f = dir.path().join("t.txt"); + std::fs::write(&f, "alpha\nbeta parity\ngamma\nparity tail\n").unwrap(); + let s = grep_in_file(&f, "parity", 4096, 10).unwrap(); + assert!(s.contains("2:")); + assert!(s.contains("4:")); + } + + #[test] + fn glob_workspace_respects_cap_and_depth() { + let dir = tempfile::tempdir().unwrap(); + let root = dir.path().canonicalize().unwrap(); + std::fs::create_dir_all(root.join("src/nested")).unwrap(); + std::fs::write(root.join("src/nested/foo.rs"), "").unwrap(); + std::fs::write(root.join("src/bar.txt"), "").unwrap(); + let (paths, trunc) = glob_workspace_collect(&root, ".", "**/*.rs", 32, 500).expect("glob"); + assert!(!trunc); + assert!(paths.iter().any(|p| p.ends_with("foo.rs"))); + let (few, trunc2) = glob_workspace_collect(&root, ".", "**/*", 32, 1).expect("glob2"); + assert!(trunc2); + assert_eq!(few.len(), 1); + } + + #[test] + fn glob_workspace_respects_gitignore_and_clawignore() { + let dir = tempfile::tempdir().unwrap(); + let root = dir.path().canonicalize().unwrap(); + // The ignore walker enables gitignore semantics more consistently when a repo root is present. + std::fs::create_dir_all(root.join(".git")).unwrap(); + std::fs::write(root.join(".gitignore"), "node_modules/\n").unwrap(); + std::fs::write(root.join(".clawignore"), "ignored_dir/\n").unwrap(); + + std::fs::create_dir_all(root.join("src")).unwrap(); + std::fs::write(root.join("src/kept.rs"), "").unwrap(); + + std::fs::create_dir_all(root.join("node_modules")).unwrap(); + std::fs::write(root.join("node_modules/ignored.rs"), "").unwrap(); + + std::fs::create_dir_all(root.join("ignored_dir")).unwrap(); + std::fs::write(root.join("ignored_dir/also_ignored.rs"), "").unwrap(); + + let (paths, trunc) = glob_workspace_collect(&root, ".", "**/*.rs", 32, 500).expect("glob"); + assert!(!trunc); + assert_eq!(paths.len(), 1); + assert!(paths[0].ends_with("src/kept.rs")); + } + + #[test] + fn grep_paths_and_glob_and_grep_search_alias() { + let dir = tempfile::tempdir().unwrap(); + let root = dir.path().canonicalize().unwrap(); + std::fs::write(root.join("a.txt"), "one xhere\n").unwrap(); + std::fs::write(root.join("b.txt"), "two xhere\n").unwrap(); + + let out = dispatch_grep_workspace( + &json!({ "paths": ["a.txt", "b.txt"], "pattern": "xhere" }), + &root, + 4096, + 50, + 100, + 16, + ); + assert!(out.contains("a.txt:")); + assert!(out.contains("b.txt:")); + + let out_g = dispatch_grep_workspace( + &json!({ "glob": "*.txt", "pattern": "xhere" }), + &root, + 4096, + 50, + 100, + 16, + ); + assert!(out_g.contains("a.txt:") || out_g.contains("b.txt:")); + + let alias = dispatch_tool( + "grep_search", + &json!({ "path": "a.txt", "pattern": "xhere" }), + &root, + &root.display().to_string(), + PermissionMode::ReadOnly, + None, + 4096, + 100, + 50, + 2000, + 32, + ); + assert!(alias.contains("1:")); + } + + #[test] + fn session_save_and_resume_appends_prompt() { + let dir = tempfile::tempdir().unwrap(); + let path = dir.path().join("sess.json"); + let ws = dir.path().canonicalize().unwrap(); + let wss = ws.display().to_string(); + let m = "m1"; + session_save( + &path, + &wss, + m, + Preset::Audit, + &[InputMessage::user_text("first")], + ) + .expect("save"); + let msgs = + session_bootstrap_messages(&path, &wss, m, Preset::Audit, "second").expect("boot"); + assert_eq!(msgs.len(), 2); + assert_eq!(msgs[1].role, "user"); + let json = serde_json::to_value(&msgs[1]).expect("ser"); + assert_eq!(json["content"][0]["text"], "second"); + } + + #[test] + fn profile_line_load_and_cap() { + let dir = tempfile::tempdir().unwrap(); + let p = dir.path().join("profile.toml"); + let long = "x".repeat(PROFILE_LINE_MAX_CHARS + 20); + std::fs::write(&p, format!("line = \"{long}\"\n")).unwrap(); + let h = load_profile_hint(&p).expect("ok"); + assert_eq!( + h.as_ref().map(|s| s.chars().count()), + Some(PROFILE_LINE_MAX_CHARS) + ); + } + + #[test] + fn system_prompt_includes_preset_and_hint() { + let tmp = tempfile::tempdir().unwrap(); + let root = tmp.path().canonicalize().unwrap(); + let s = system_prompt( + PermissionMode::ReadOnly, + &root, + Preset::Explain, + Some("keep answers short"), + AnalogLanguage::En, + false, + ); + assert!(s.contains("Preset: explain")); + assert!(s.contains("Learner hint: keep answers short")); + assert!(s.contains("deployment manifests")); + assert!(s.contains("monorepo")); + } + + #[test] + fn system_prompt_russian_language_hint() { + let tmp = tempfile::tempdir().unwrap(); + let root = tmp.path().canonicalize().unwrap(); + let s = system_prompt( + PermissionMode::ReadOnly, + &root, + Preset::None, + None, + AnalogLanguage::Ru, + false, + ); + assert!(s.contains("Язык:")); + } + + #[test] + fn system_prompt_rag_lists_retrieve_context() { + let tmp = tempfile::tempdir().unwrap(); + let root = tmp.path().canonicalize().unwrap(); + let s = system_prompt( + PermissionMode::ReadOnly, + &root, + Preset::None, + None, + AnalogLanguage::En, + true, + ); + assert!(s.contains("retrieve_context")); + } + + #[test] + fn enforce_non_interactive_rejects_danger_when_not_tty() { + assert!(enforce_non_interactive_permission_rules_with_tty( + PermissionMode::Allow, + false, + false + ) + .is_err()); + } + + #[test] + fn enforce_non_interactive_accepts_danger_with_flag() { + assert!(enforce_non_interactive_permission_rules_with_tty( + PermissionMode::DangerFullAccess, + true, + false + ) + .is_ok()); + } + + #[test] + fn enforce_non_interactive_accepts_danger_when_tty() { + assert!(enforce_non_interactive_permission_rules_with_tty( + PermissionMode::Allow, + false, + true + ) + .is_ok()); + } + + #[test] + fn print_tools_dry_run_lists_read_only_tools() { + let mut buf = Vec::new(); + print_tools_dry_run(PermissionMode::ReadOnly, true, None, &mut buf).unwrap(); + let s = String::from_utf8_lossy(&buf); + assert!(s.contains("read_file")); + assert!(!s.contains("write_file")); + assert!(!s.contains("retrieve_context")); + let mut buf2 = Vec::new(); + print_tools_dry_run(PermissionMode::WorkspaceWrite, true, None, &mut buf2).unwrap(); + let s2 = String::from_utf8_lossy(&buf2); + assert!(s2.contains("write_file")); + let mut buf3 = Vec::new(); + print_tools_dry_run( + PermissionMode::ReadOnly, + true, + Some("http://127.0.0.1:8787"), + &mut buf3, + ) + .unwrap(); + let s3 = String::from_utf8_lossy(&buf3); + assert!(s3.contains("retrieve_context")); + } + + #[test] + fn rag_response_formatting() { + let out = format_rag_query_json_for_model( + r#"{"hits":[{"path":"a.rs","snippet":"one\ntwo","score":0.5}],"phase":"1-sqlite"}"#, + ) + .unwrap(); + assert!(out.contains("phase: 1-sqlite")); + assert!(out.contains("a.rs")); + assert!(out.contains("one")); + assert!(out.contains("score=")); + } + + #[test] + fn resolve_rag_base_url_toml_beats_env() { + let _g = mock_env_lock(); + std::env::set_var("RAG_BASE_URL", "http://from-env"); + let file = AnalogFileConfig { + rag_base_url: Some("http://from-toml".into()), + ..Default::default() + }; + assert_eq!( + resolve_rag_base_url(&file).as_deref(), + Some("http://from-toml") + ); + std::env::remove_var("RAG_BASE_URL"); + } + + #[test] + fn infer_preset_from_prompt_prefers_audit_over_others() { + assert_eq!( + infer_preset_from_prompt("please do a security review and audit this"), + Preset::Audit + ); + assert_eq!( + infer_preset_from_prompt("Аудит безопасности"), + Preset::Audit + ); + } + + #[test] + fn infer_preset_from_prompt_detects_implement() { + assert_eq!( + infer_preset_from_prompt("fix the bug in parser"), + Preset::Implement + ); + assert_eq!(infer_preset_from_prompt("добавь фичу"), Preset::Implement); + } + + #[test] + fn infer_preset_from_prompt_detects_explain() { + assert_eq!( + infer_preset_from_prompt("explain how this works"), + Preset::Explain + ); + assert_eq!( + infer_preset_from_prompt("почему падает? объясни"), + Preset::Explain + ); + } + + #[test] + fn load_analog_toml_parses() { + let dir = tempfile::tempdir().unwrap(); + let p = dir.path().join(".claw-analog.toml"); + std::fs::write( + &p, + r#" +model = "opus" +stream = true +output_format = "json" +permission = "read-only" +language = "ru" +glob_max_paths = 100 +"#, + ) + .unwrap(); + let c = load_analog_toml(&p).expect("toml"); + assert_eq!(c.model.as_deref(), Some("opus")); + assert_eq!(c.stream, Some(true)); + assert_eq!(c.output_format.as_deref(), Some("json")); + assert_eq!(c.language.as_deref(), Some("ru")); + assert_eq!(c.glob_max_paths, Some(100)); + } + + #[test] + fn git_tools_work_in_temp_repo() { + let _g = mock_env_lock(); + let dir = tempfile::tempdir().expect("tempdir"); + let root = dir.path(); + + git(root, &["init", "--quiet", "--initial-branch=main"]); + git(root, &["config", "user.email", "tests@example.com"]); + git(root, &["config", "user.name", "Claw Analog Tests"]); + + std::fs::write(root.join("a.txt"), "a\n").expect("write a"); + git(root, &["add", "a.txt"]); + git(root, &["commit", "-m", "initial", "--quiet"]); + std::fs::write(root.join("a.txt"), "a!\n").expect("modify a"); + + let ws_str = root.display().to_string(); + let log_out = dispatch_tool( + "git_log", + &json!({"max_count": 5}), + root, + &ws_str, + PermissionMode::ReadOnly, + None, + 256 * 1024, + 200, + 200, + 1000, + 32, + ); + assert!(log_out.contains("initial"), "log output was: {log_out}"); + + let diff_out = dispatch_tool( + "git_diff", + &json!({}), + root, + &ws_str, + PermissionMode::ReadOnly, + None, + 256 * 1024, + 200, + 200, + 1000, + 32, + ); + assert!( + diff_out.contains("diff --git") || diff_out.contains("@@"), + "diff output was: {diff_out}" + ); + } + + #[tokio::test] + async fn mock_read_file_roundtrip() { + let _env = mock_env_lock_async().await; + use mock_anthropic_service::MockAnthropicService; + + let dir = tempfile::tempdir().unwrap(); + let root = dir.path().canonicalize().unwrap(); + std::fs::write(root.join("fixture.txt"), "hello parity fixture\n").unwrap(); + + let mock = MockAnthropicService::spawn().await.expect("mock"); + let url = mock.base_url(); + + let _g1 = EnvVarGuard::set("ANTHROPIC_API_KEY", "sk-test-mock"); + let _g2 = EnvVarGuard::set("ANTHROPIC_BASE_URL", url.as_str()); + + let config = AnalogConfig { + model: "claude-sonnet-4-6".into(), + workspace: root.clone(), + permission_mode: PermissionMode::ReadOnly, + accept_danger_non_interactive: false, + use_stream: false, + output_format: OutputFormat::Rich, + use_runtime_enforcer: true, + max_read_bytes: 1024 * 64, + max_turns: 4, + max_list_entries: 100, + grep_max_lines: 50, + glob_max_paths: 2000, + glob_max_depth: 32, + preset: Preset::None, + language: AnalogLanguage::En, + session_path: None, + session_save_path: None, + profile_hint: None, + prompt: "PARITY_SCENARIO:read_file_roundtrip summarize".into(), + rag_base_url: None, + rag_http_timeout: Duration::from_secs(30), + rag_top_k_max: 32, + }; + + let mut out = Vec::new(); + run(config, &mut out).await.expect("run"); + + let text = String::from_utf8_lossy(&out); + assert!( + text.contains("read_file roundtrip") || text.contains("fixture"), + "unexpected model text: {text}" + ); + } + + #[tokio::test] + async fn mock_session_save_export_without_resume_path() { + let _env = mock_env_lock_async().await; + use mock_anthropic_service::MockAnthropicService; + + let dir = tempfile::tempdir().unwrap(); + let root = dir.path().canonicalize().unwrap(); + std::fs::write(root.join("fixture.txt"), "hello parity fixture\n").unwrap(); + + let mock = MockAnthropicService::spawn().await.expect("mock"); + let url = mock.base_url(); + + let _g1 = EnvVarGuard::set("ANTHROPIC_API_KEY", "sk-test-mock"); + let _g2 = EnvVarGuard::set("ANTHROPIC_BASE_URL", url.as_str()); + + let export = dir.path().join("export-session.json"); + + let config = AnalogConfig { + model: "claude-sonnet-4-6".into(), + workspace: root, + permission_mode: PermissionMode::ReadOnly, + accept_danger_non_interactive: false, + use_stream: false, + output_format: OutputFormat::Rich, + use_runtime_enforcer: true, + max_read_bytes: 1024 * 64, + max_turns: 4, + max_list_entries: 100, + grep_max_lines: 50, + glob_max_paths: 2000, + glob_max_depth: 32, + preset: Preset::None, + language: AnalogLanguage::En, + session_path: None, + session_save_path: Some(export.clone()), + profile_hint: None, + prompt: "PARITY_SCENARIO:read_file_roundtrip summarize".into(), + rag_base_url: None, + rag_http_timeout: Duration::from_secs(30), + rag_top_k_max: 32, + }; + + let mut out = Vec::new(); + run(config, &mut out).await.expect("run"); + + let raw = std::fs::read_to_string(&export).expect("export file"); + let v: Value = serde_json::from_str(&raw).expect("session json"); + assert_eq!(v["version"], 1); + let msgs = v["messages"].as_array().expect("messages"); + assert!( + msgs.len() >= 2, + "expected user+assistant, got {}", + msgs.len() + ); + } + + #[tokio::test] + async fn mock_streaming_text_json() { + let _env = mock_env_lock_async().await; + use mock_anthropic_service::MockAnthropicService; + + let dir = tempfile::tempdir().unwrap(); + let root = dir.path().canonicalize().unwrap(); + + let mock = MockAnthropicService::spawn().await.expect("mock"); + let url = mock.base_url(); + + let _g1 = EnvVarGuard::set("ANTHROPIC_API_KEY", "sk-test-mock"); + let _g2 = EnvVarGuard::set("ANTHROPIC_BASE_URL", url.as_str()); + + let config = AnalogConfig { + model: "claude-sonnet-4-6".into(), + workspace: root, + permission_mode: PermissionMode::ReadOnly, + accept_danger_non_interactive: false, + use_stream: true, + output_format: OutputFormat::Json, + use_runtime_enforcer: true, + max_read_bytes: 1024 * 64, + max_turns: 2, + max_list_entries: 100, + grep_max_lines: 50, + glob_max_paths: 2000, + glob_max_depth: 32, + preset: Preset::None, + language: AnalogLanguage::En, + session_path: None, + session_save_path: None, + profile_hint: None, + prompt: "PARITY_SCENARIO:streaming_text hello".into(), + rag_base_url: None, + rag_http_timeout: Duration::from_secs(30), + rag_top_k_max: 32, + }; + + let mut buf = Vec::new(); + run(config, &mut buf).await.expect("run"); + + let s = String::from_utf8_lossy(&buf); + let lines: Vec = s + .lines() + .filter(|l| !l.is_empty()) + .map(|l| serde_json::from_str::(l).unwrap_or(Value::Null)) + .filter(|v| !v.is_null()) + .collect(); + + let types: Vec<&str> = lines + .iter() + .filter_map(|v| v.get("type").and_then(|t| t.as_str())) + .collect(); + + assert!(types.contains(&"run_start"), "types={types:?}"); + let run_start = lines + .iter() + .find(|v| v.get("type").and_then(|t| t.as_str()) == Some("run_start")) + .expect("run_start"); + assert_eq!( + run_start.get("schema").and_then(|v| v.as_str()), + Some(NDJSON_SCHEMA) + ); + assert_eq!( + run_start.get("format_version").and_then(|v| v.as_u64()), + Some(u64::from(NDJSON_FORMAT_VERSION)) + ); + assert!( + types.contains(&"assistant_text_delta"), + "expected NDJSON deltas, types={types:?}" + ); + + let turn = lines + .iter() + .find(|v| v.get("type").and_then(|t| t.as_str()) == Some("assistant_turn")) + .expect("assistant_turn line"); + let text = turn["text"].as_str().unwrap_or(""); + assert!( + text.contains("Mock streaming") && text.contains("parity harness"), + "rebuilt assistant text: {text:?}" + ); + + assert!(types.contains(&"run_end"), "types={types:?}"); + } + + struct EnvVarGuard { + key: &'static str, + old: Option, + } + + impl EnvVarGuard { + fn set(key: &'static str, value: &str) -> Self { + let old = std::env::var_os(key); + std::env::set_var(key, value); + Self { key, old } + } + } + + impl Drop for EnvVarGuard { + fn drop(&mut self) { + match self.old.take() { + Some(v) => std::env::set_var(self.key, v), + None => std::env::remove_var(self.key), + } + } + } +} diff --git a/rust/crates/claw-analog/src/main.rs b/rust/crates/claw-analog/src/main.rs new file mode 100644 index 00000000..42b5134a --- /dev/null +++ b/rust/crates/claw-analog/src/main.rs @@ -0,0 +1,522 @@ +//! Binary wrapper for `claw_analog::run` — see `how_to_run.md` in repo root. + +mod agents; +mod config_cmd; +mod doctor; + +use std::path::{Path, PathBuf}; +use std::time::Duration; + +use clap::{CommandFactory, Parser, Subcommand, ValueEnum}; +use clap_complete::{generate, Shell}; +use claw_analog::{ + load_analog_toml, load_profile_hint, permission_mode_from_toml_str, print_tools_dry_run, + resolve_analog_profile_path, resolve_rag_base_url, AnalogConfig, AnalogFileConfig, + AnalogLanguage, OutputFormat, PermissionMode, Preset, ANALOG_DEFAULT_MODEL, +}; + +#[derive(Copy, Clone, Debug, ValueEnum)] +enum PermissionArg { + ReadOnly, + WorkspaceWrite, + Prompt, + #[value(name = "danger-full-access")] + DangerFullAccess, + /// Same unrestricted posture as danger-full-access for this narrow tool set. + Allow, +} + +#[derive(Copy, Clone, Debug, ValueEnum)] +enum OutputFormatArg { + Rich, + Json, +} + +#[derive(Copy, Clone, Debug, ValueEnum)] +enum LangArg { + En, + Ru, +} + +impl From for AnalogLanguage { + fn from(a: LangArg) -> Self { + match a { + LangArg::En => AnalogLanguage::En, + LangArg::Ru => AnalogLanguage::Ru, + } + } +} + +#[derive(Copy, Clone, Debug, ValueEnum)] +enum PresetCli { + None, + /// Automatically infer a preset from the initial prompt. + Auto, + Audit, + Explain, + Implement, +} + +impl From for Preset { + fn from(p: PresetCli) -> Self { + match p { + PresetCli::None => Preset::None, + PresetCli::Auto => Preset::None, + PresetCli::Audit => Preset::Audit, + PresetCli::Explain => Preset::Explain, + PresetCli::Implement => Preset::Implement, + } + } +} + +#[derive(Parser, Debug)] +#[command( + name = "claw-analog", + version, + about = "Lean tool-agent loop (read/list/grep/write) on claw-code `api` providers" +)] +#[command(args_conflicts_with_subcommands = true)] +struct RootCli { + #[command(subcommand)] + command: Option, + #[command(flatten)] + run: RunCli, +} + +#[derive(Subcommand, Debug)] +enum Commands { + /// Verify credentials, `cargo check -p claw-analog` (or `--release-build`), config merge preview, optional `--tcp-ping`. + Doctor(doctor::DoctorCli), + Config { + #[command(subcommand)] + command: ConfigSub, + }, + /// Print shell completion script for this binary (redirect to a file or `source` it). + Complete(CompleteCli), + /// Run multiple specialized sub-agents sequentially (shared base session). + Agents(agents::AgentsCli), +} + +#[derive(Subcommand, Debug)] +enum ConfigSub { + /// Parse `.claw-analog.toml` and profile; print a merge preview (no API calls). + Validate(config_cmd::ValidateCli), +} + +#[derive(Parser, Debug)] +struct CompleteCli { + #[arg(value_enum)] + shell: ShellKind, +} + +#[derive(Copy, Clone, Debug, ValueEnum)] +enum ShellKind { + Bash, + Zsh, + Fish, + #[value(name = "powershell", alias = "pwsh")] + Powershell, +} + +#[derive(Parser, Debug)] +struct RunCli { + /// Config file (default: `/.claw-analog.toml` if that path exists). + #[arg(long, value_name = "PATH")] + config: Option, + #[arg(short, long)] + model: Option, + #[arg(short = 'w', long, default_value = ".")] + workspace: PathBuf, + #[arg(long, value_enum)] + permission: Option, + #[arg(long, value_enum)] + preset: Option, + /// Reply language hint for the assistant (`en` or `ru` in system prompt; not the API model id). + #[arg(long, value_enum)] + lang: Option, + /// Print effective tools for merged `permission` / enforcer, then exit (no prompt, no API). + #[arg(long, default_value_t = false, action = clap::ArgAction::SetTrue)] + print_tools: bool, + /// Persist message history for resume (JSON). See `how_to_run.md` for risks. + #[arg(long, value_name = "PATH")] + session: Option, + /// Write session JSON to this path on each snapshot (export without `--session`, or an extra copy). + #[arg(long, value_name = "PATH")] + save_session: Option, + /// Profile snippet TOML (`line = "..."`). Default: `~/.claw-analog/profile.toml` if it exists. + #[arg(long, value_name = "PATH")] + profile: Option, + /// Stream assistant text to stdout as tokens arrive (uses `stream_message`). + #[arg(long, default_value_t = false, conflicts_with = "no_stream")] + stream: bool, + /// Turn streaming off (overrides `stream` in config). + #[arg(long, default_value_t = false, conflicts_with = "stream")] + no_stream: bool, + /// Newline-delimited JSON events on stdout (for agents / CI). Diagnostics stay on stderr. + #[arg(long, value_enum)] + output_format: Option, + /// Disable `runtime::PermissionEnforcer` (paths are still jailed; policy checks are weakened). + #[arg(long = "no-runtime-enforcer", default_value_t = false, action = clap::ArgAction::SetTrue)] + no_runtime_enforcer: bool, + /// Allow `danger-full-access` / `allow` when stdin is not a TTY (CI/automation; use with care). + #[arg(long = "accept-danger-non-interactive", default_value_t = false, action = clap::ArgAction::SetTrue)] + accept_danger_non_interactive: bool, + #[arg(long)] + max_read_bytes: Option, + #[arg(long)] + max_turns: Option, + #[arg(long)] + max_list_entries: Option, + #[arg(long)] + grep_max_lines: Option, + #[arg(long)] + glob_max_paths: Option, + #[arg(long)] + glob_max_depth: Option, + prompt: Option, +} + +const DEF_MAX_READ: u64 = 256 * 1024; +const DEF_MAX_TURNS: u32 = 24; +const DEF_MAX_LIST: usize = 500; +const DEF_GREP_MAX: usize = 200; +const DEF_GLOB_PATHS: usize = 2000; +const DEF_GLOB_DEPTH: usize = 32; +const DEF_RAG_TIMEOUT_SECS: u64 = 30; +const DEF_RAG_TOP_K_MAX: u32 = 32; +const RAG_TOP_K_ABS_CAP: u32 = 256; + +fn config_file_path(cli: &RunCli) -> PathBuf { + cli.config + .clone() + .unwrap_or_else(|| cli.workspace.join(".claw-analog.toml")) +} + +fn load_file_config(path: &Path) -> AnalogFileConfig { + if !path.is_file() { + return AnalogFileConfig::default(); + } + match load_analog_toml(path) { + Ok(c) => c, + Err(e) => { + eprintln!( + "[claw-analog] warning: failed to read {}: {e}", + path.display() + ); + AnalogFileConfig::default() + } + } +} + +fn output_format_from_toml(s: &str) -> Option { + match s.to_ascii_lowercase().as_str() { + "json" => Some(OutputFormat::Json), + "rich" => Some(OutputFormat::Rich), + _ => None, + } +} + +fn resolve_session_path( + cli: Option, + file: Option<&str>, + workspace: &Path, +) -> Option { + let p = cli.or_else(|| file.map(PathBuf::from))?; + Some(if p.is_absolute() { + p + } else { + workspace.join(p) + }) +} + +fn merge_language(cli: Option, file: Option<&str>) -> AnalogLanguage { + if let Some(l) = cli { + return l.into(); + } + file.and_then(AnalogLanguage::from_toml_str) + .unwrap_or_default() +} + +fn merge_preset(cli: Option, file: Option<&str>, prompt: &str) -> Preset { + if let Some(p) = cli { + return match p { + PresetCli::Auto => claw_analog::infer_preset_from_prompt(prompt), + other => Preset::from(other), + }; + } + if file.is_some_and(|s| s.trim().eq_ignore_ascii_case("auto")) { + return claw_analog::infer_preset_from_prompt(prompt); + } + if let Some(s) = file.and_then(Preset::from_toml_str) { + return s; + } + claw_analog::infer_preset_from_prompt(prompt) +} + +fn merge_permission( + cli: Option, + file_perm: Option, + preset: Preset, +) -> PermissionMode { + if let Some(p) = cli { + return match p { + PermissionArg::ReadOnly => PermissionMode::ReadOnly, + PermissionArg::WorkspaceWrite => PermissionMode::WorkspaceWrite, + PermissionArg::Prompt => PermissionMode::Prompt, + PermissionArg::DangerFullAccess => PermissionMode::DangerFullAccess, + PermissionArg::Allow => PermissionMode::Allow, + }; + } + if let Some(s) = file_perm.as_deref().and_then(permission_mode_from_toml_str) { + return s; + } + match preset { + Preset::Implement => PermissionMode::WorkspaceWrite, + _ => PermissionMode::ReadOnly, + } +} + +fn build_config( + cli: &RunCli, + file: &AnalogFileConfig, + prompt: String, + profile_hint: Option, + session_path: Option, + preset: Preset, + permission_mode: PermissionMode, +) -> AnalogConfig { + let model = cli + .model + .clone() + .or_else(|| file.model.clone()) + .unwrap_or_else(|| ANALOG_DEFAULT_MODEL.into()); + + let output_format = cli + .output_format + .map(|o| match o { + OutputFormatArg::Rich => OutputFormat::Rich, + OutputFormatArg::Json => OutputFormat::Json, + }) + .or_else(|| { + file.output_format + .as_deref() + .and_then(output_format_from_toml) + }) + .unwrap_or(OutputFormat::Rich); + + let use_stream = if cli.no_stream { + false + } else if cli.stream { + true + } else { + file.stream.unwrap_or(false) + }; + + let use_runtime_enforcer = + !cli.no_runtime_enforcer && !file.no_runtime_enforcer.unwrap_or(false); + + let accept_danger_non_interactive = + cli.accept_danger_non_interactive || file.accept_danger_non_interactive.unwrap_or(false); + + let max_read_bytes = cli + .max_read_bytes + .or(file.max_read_bytes) + .unwrap_or(DEF_MAX_READ); + let max_turns = cli.max_turns.or(file.max_turns).unwrap_or(DEF_MAX_TURNS); + let max_list_entries = cli + .max_list_entries + .or(file.max_list_entries) + .unwrap_or(DEF_MAX_LIST); + let grep_max_lines = cli + .grep_max_lines + .or(file.grep_max_lines) + .unwrap_or(DEF_GREP_MAX); + let glob_max_paths = cli + .glob_max_paths + .or(file.glob_max_paths) + .unwrap_or(DEF_GLOB_PATHS); + let glob_max_depth = cli + .glob_max_depth + .or(file.glob_max_depth) + .unwrap_or(DEF_GLOB_DEPTH); + + let rag_base_url = resolve_rag_base_url(file); + let rag_http_timeout = + Duration::from_secs(file.rag_timeout_secs.unwrap_or(DEF_RAG_TIMEOUT_SECS).max(1)); + let rag_top_k_max = file + .rag_top_k_max + .unwrap_or(DEF_RAG_TOP_K_MAX) + .clamp(1, RAG_TOP_K_ABS_CAP); + + let session_save_path = cli.save_session.as_ref().map(|p| { + if p.is_absolute() { + p.clone() + } else { + cli.workspace.join(p) + } + }); + + let language = merge_language(cli.lang, file.language.as_deref()); + + AnalogConfig { + model, + workspace: cli.workspace.clone(), + permission_mode, + accept_danger_non_interactive, + use_stream, + output_format, + use_runtime_enforcer, + max_read_bytes, + max_turns, + max_list_entries, + grep_max_lines, + glob_max_paths, + glob_max_depth, + preset, + language, + session_path, + session_save_path, + profile_hint, + prompt, + rag_base_url, + rag_http_timeout, + rag_top_k_max, + } +} + +#[tokio::main] +async fn main() -> Result<(), Box> { + let root = RootCli::parse(); + match root.command { + Some(Commands::Doctor(d)) => { + let code = doctor::run_doctor(d); + std::process::exit(code); + } + Some(Commands::Agents(a)) => { + let code = match agents::run_agents(a) { + Ok(()) => 0, + Err(e) => { + eprintln!("agents: {e}"); + 1 + } + }; + std::process::exit(code); + } + Some(Commands::Config { command }) => { + let code = match command { + ConfigSub::Validate(v) => config_cmd::run_validate(v), + }; + std::process::exit(code); + } + Some(Commands::Complete(co)) => { + let shell = match co.shell { + ShellKind::Bash => Shell::Bash, + ShellKind::Zsh => Shell::Zsh, + ShellKind::Fish => Shell::Fish, + ShellKind::Powershell => Shell::PowerShell, + }; + let mut cmd = RootCli::command(); + generate(shell, &mut cmd, "claw-analog", &mut std::io::stdout()); + return Ok(()); + } + None => {} + } + let cli = root.run; + let cfg_path = config_file_path(&cli); + let file_cfg = load_file_config(&cfg_path); + + if cli.print_tools { + let preset = merge_preset( + cli.preset, + file_cfg.preset.as_deref(), + &cli.prompt.clone().unwrap_or_default(), + ); + let permission_mode = merge_permission(cli.permission, file_cfg.permission.clone(), preset); + let use_runtime_enforcer = + !cli.no_runtime_enforcer && !file_cfg.no_runtime_enforcer.unwrap_or(false); + let rag_url = resolve_rag_base_url(&file_cfg); + print_tools_dry_run( + permission_mode, + use_runtime_enforcer, + rag_url.as_deref(), + &mut std::io::stdout(), + )?; + return Ok(()); + } + + let pre_output_format = cli + .output_format + .map(|o| match o { + OutputFormatArg::Rich => OutputFormat::Rich, + OutputFormatArg::Json => OutputFormat::Json, + }) + .or_else(|| { + file_cfg + .output_format + .as_deref() + .and_then(output_format_from_toml) + }) + .unwrap_or(OutputFormat::Rich); + + let prompt = if let Some(p) = cli.prompt.clone() { + p + } else { + use std::io::Read; + let mut buf = String::new(); + std::io::stdin().read_to_string(&mut buf)?; + if buf.trim().is_empty() { + if matches!(pre_output_format, OutputFormat::Json) { + println!( + "{}", + serde_json::json!({"type": "error", "message": "empty prompt (pass as arg or stdin)"}) + ); + } + return Err("empty prompt (pass as arg or stdin)".into()); + } + buf + }; + + let preset = merge_preset(cli.preset, file_cfg.preset.as_deref(), &prompt); + let permission_mode = merge_permission(cli.permission, file_cfg.permission.clone(), preset); + + let session_path = resolve_session_path( + cli.session.clone(), + file_cfg.session.as_deref(), + &cli.workspace, + ); + + let profile_path = resolve_analog_profile_path( + &cli.workspace, + cli.profile.clone(), + file_cfg.profile.as_deref(), + ); + + let profile_hint = if let Some(ref p) = profile_path { + load_profile_hint(p)? + } else { + None + }; + + let config = build_config( + &cli, + &file_cfg, + prompt, + profile_hint, + session_path, + preset, + permission_mode, + ); + let output_format = config.output_format; + + let mut out = std::io::stdout(); + if let Err(e) = claw_analog::run(config, &mut out).await { + if matches!(output_format, OutputFormat::Json) { + println!( + "{}", + serde_json::json!({"type": "error", "message": e.to_string()}) + ); + } + return Err(e); + } + Ok(()) +}