From bafc872030a7f5bef07865ea5963d4a9df8a54d9 Mon Sep 17 00:00:00 2001 From: MaksTinyWorkshop Date: Tue, 31 Mar 2026 15:57:09 +0200 Subject: [PATCH] leadtech-bmad-mcp: close lot 1 and implement lot 2 index --- knowledge/backend/patterns/prisma.md | 11 + knowledge/frontend/risques/tests.md | 11 + mcp/leadtech_bmad_mcp/README.md | 18 +- .../docs/implementation_plan.md | 32 ++- .../docs/knowledge_metadata.md | 2 + mcp/leadtech_bmad_mcp/pyproject.toml | 1 + .../src/leadtech_bmad_mcp/indexer.py | 15 ++ .../src/leadtech_bmad_mcp/knowledge.py | 209 +++++++++++++++++- mcp/leadtech_bmad_mcp/tests/test_knowledge.py | 70 ++++++ 9 files changed, 344 insertions(+), 25 deletions(-) create mode 100644 mcp/leadtech_bmad_mcp/src/leadtech_bmad_mcp/indexer.py diff --git a/knowledge/backend/patterns/prisma.md b/knowledge/backend/patterns/prisma.md index 94a2f99..8d1de96 100644 --- a/knowledge/backend/patterns/prisma.md +++ b/knowledge/backend/patterns/prisma.md @@ -1,3 +1,14 @@ +--- +title: Backend — Patterns : Prisma +domain: backend +bucket: patterns +tags: [prisma, postgres, migration, pagination, idempotency, decimal] +applies_to: [analysis, implementation, review, debug] +severity: medium +validated_on: 2026-03-23 +source_projects: [app-template-resto, app-alexandrie] +--- + # Backend — Patterns : Prisma > Extrait de la base de connaissance Lead_tech. Voir `knowledge/backend/patterns/README.md` pour l'index complet. diff --git a/knowledge/frontend/risques/tests.md b/knowledge/frontend/risques/tests.md index 40b2973..955c002 100644 --- a/knowledge/frontend/risques/tests.md +++ b/knowledge/frontend/risques/tests.md @@ -1,3 +1,14 @@ +--- +title: Frontend — Risques & vigilance : Tests +domain: frontend +bucket: risques +tags: [tests, jest, react-native, ts-jest, coverage, facade] +applies_to: [analysis, implementation, review, debug] +severity: high +validated_on: 2026-03-31 +source_projects: [app-alexandrie, app-template-resto] +--- + # Frontend — Risques & vigilance : Tests > Extrait de la base de connaissance Lead_tech. Voir `knowledge/frontend/risques/README.md` pour l'index complet. diff --git a/mcp/leadtech_bmad_mcp/README.md b/mcp/leadtech_bmad_mcp/README.md index 624b2cf..84c8e06 100644 --- a/mcp/leadtech_bmad_mcp/README.md +++ b/mcp/leadtech_bmad_mcp/README.md @@ -20,6 +20,7 @@ Documents de référence phase 1 : - exposer la base Lead_tech en `resources` MCP lisibles par un agent - retrouver les patterns/risques les plus probables pour une story +- utiliser un index local compile si disponible, avec fallback automatique sur le scan Markdown - appliquer quelques gates transverses deja stabilises dans Lead_tech - encapsuler la capitalisation dans un flux plus propre que l'edition manuelle @@ -86,6 +87,22 @@ pip install -e ".[dev]" pytest tests -q ``` +## Rebuild de l'index local + +Le MCP cherche d'abord un index JSON local a la racine de `LEADTECH_ROOT` : + +- `LEADTECH_ROOT/.leadtech_mcp_index.json` + +Pour le regenerer : + +```bash +cd /srv/helpers/_Assistant_Lead_Tech/mcp/leadtech_bmad_mcp +source .venv/bin/activate +leadtech-bmad-build-index +``` + +Si le fichier est absent, invalide ou d'une autre version, le serveur retombe automatiquement sur le scan Markdown direct. + ## Lancement (stdio) ```bash @@ -116,7 +133,6 @@ Avant de merger cette brique dans `main` : ## Upgrades conseilles -- index de recherche compile plutot qu'un scan fichier par fichier - metadonnees YAML/front matter dans `knowledge/` pour fiabiliser le ranking - schémas MCP formalises et versionnes pour chaque tool - logs d'execution par story pour auditer les gates et la decision humaine diff --git a/mcp/leadtech_bmad_mcp/docs/implementation_plan.md b/mcp/leadtech_bmad_mcp/docs/implementation_plan.md index c0a69a9..d8b46ea 100644 --- a/mcp/leadtech_bmad_mcp/docs/implementation_plan.md +++ b/mcp/leadtech_bmad_mcp/docs/implementation_plan.md @@ -16,7 +16,7 @@ Mode d'usage : | Lot | Objectif | Statut | | --- | --- | --- | | Lot 1 | Contrat MCP v1 + metadonnees `knowledge` + compatibilite loader | En cours avance | -| Lot 2 | Index compile local + branchement de la recherche MCP dessus | A faire | +| Lot 2 | Index compile local + branchement de la recherche MCP dessus | Termine | | Lot 3 | Gates configurables + packaging + rollout BMAD | A faire | --- @@ -41,6 +41,7 @@ Stabiliser le contrat du MCP et preparer un corpus `knowledge/` assez structure - [x] `knowledge/backend/patterns/auth.md` - [x] `knowledge/backend/patterns/contracts.md` - [x] `knowledge/backend/patterns/nestjs.md` +- [x] `knowledge/backend/patterns/prisma.md` - [x] `knowledge/backend/risques/auth.md` - [x] `knowledge/backend/risques/contracts.md` - [x] `knowledge/backend/risques/nestjs.md` @@ -48,12 +49,13 @@ Stabiliser le contrat du MCP et preparer un corpus `knowledge/` assez structure - [x] `knowledge/frontend/patterns/navigation.md` - [x] `knowledge/frontend/patterns/tests.md` - [x] `knowledge/frontend/risques/navigation.md` +- [x] `knowledge/frontend/risques/tests.md` - [x] `knowledge/workflow/risques/story-tracking.md` ### Reste a faire avant cloture complete du lot -- [ ] Verifier si `knowledge/backend/patterns/prisma.md` doit aussi entrer dans le noyau pilote -- [ ] Verifier si `knowledge/frontend/risques/tests.md` doit aussi entrer dans le noyau pilote +- [x] Verifier si `knowledge/backend/patterns/prisma.md` doit aussi entrer dans le noyau pilote +- [x] Verifier si `knowledge/frontend/risques/tests.md` doit aussi entrer dans le noyau pilote - [ ] Faire un commit de cloture explicite du Lot 1 ### Critere de fin @@ -72,25 +74,26 @@ Remplacer le scan Markdown a la volee par un index local plus rapide, plus fiabl ### Taches -- [ ] Definir le format de l'index (JSON d'abord) -- [ ] Creer un script de build d'index -- [ ] Indexer les docs `knowledge/*` -- [ ] Indexer les docs globaux `10_*`, `40_*`, `90_*` -- [ ] Prevoir un mode fallback si l'index n'existe pas -- [ ] Rebrancher `search_knowledge()` sur l'index -- [ ] Rebrancher `search_global_docs()` sur l'index -- [ ] Ajouter des tests d'integration sur un mini corpus indexe +- [x] Definir le format de l'index (JSON d'abord) +- [x] Creer un script de build d'index +- [x] Indexer les docs `knowledge/*` +- [x] Indexer les docs globaux `10_*`, `40_*`, `90_*` +- [x] Prevoir un mode fallback si l'index n'existe pas +- [x] Rebrancher `search_knowledge()` sur l'index +- [x] Rebrancher `search_global_docs()` sur l'index +- [x] Ajouter des tests d'integration sur un mini corpus indexe ### Livrables attendus - `src/leadtech_bmad_mcp/indexer.py` -- un artefact d'index local versionnable ou regenerable +- un artefact d'index local regenerable (`.leadtech_mcp_index.json`) - documentation de rebuild ### Critere de fin - les tools de recherche utilisent d'abord l'index - le fallback texte brut reste disponible pour ne pas bloquer le dev +- le rebuild est documente et teste --- @@ -134,6 +137,11 @@ Sortir les regles du code dur, rendre l'installation reproductible, puis cabler - loader front matter ajoute - `matched_docs` ajoute a `get_guidance` - noyau pilote annote sur backend, frontend et workflow +- Lot 2 implemente +- index JSON local `.leadtech_mcp_index.json` ajoute au design +- `search_knowledge()` et `search_global_docs()` relis d'abord sur l'index avec fallback scan +- script `leadtech-bmad-build-index` ajoute +- tests d'integration indexes ajoutes --- diff --git a/mcp/leadtech_bmad_mcp/docs/knowledge_metadata.md b/mcp/leadtech_bmad_mcp/docs/knowledge_metadata.md index 1185fe9..8fadb4a 100644 --- a/mcp/leadtech_bmad_mcp/docs/knowledge_metadata.md +++ b/mcp/leadtech_bmad_mcp/docs/knowledge_metadata.md @@ -92,6 +92,7 @@ Noyau pilote actuellement couvert : - `knowledge/backend/patterns/auth.md` - `knowledge/backend/patterns/contracts.md` - `knowledge/backend/patterns/nestjs.md` +- `knowledge/backend/patterns/prisma.md` - `knowledge/backend/risques/auth.md` - `knowledge/backend/risques/contracts.md` - `knowledge/backend/risques/nestjs.md` @@ -99,6 +100,7 @@ Noyau pilote actuellement couvert : - `knowledge/frontend/patterns/navigation.md` - `knowledge/frontend/patterns/tests.md` - `knowledge/frontend/risques/navigation.md` +- `knowledge/frontend/risques/tests.md` - `knowledge/workflow/risques/story-tracking.md` Phase 2 : diff --git a/mcp/leadtech_bmad_mcp/pyproject.toml b/mcp/leadtech_bmad_mcp/pyproject.toml index 2631884..cc1ec24 100644 --- a/mcp/leadtech_bmad_mcp/pyproject.toml +++ b/mcp/leadtech_bmad_mcp/pyproject.toml @@ -17,6 +17,7 @@ dev = ["pytest>=7.0"] [project.scripts] leadtech-bmad-mcp = "leadtech_bmad_mcp.server:main" +leadtech-bmad-build-index = "leadtech_bmad_mcp.indexer:main" [tool.setuptools] package-dir = {"" = "src"} diff --git a/mcp/leadtech_bmad_mcp/src/leadtech_bmad_mcp/indexer.py b/mcp/leadtech_bmad_mcp/src/leadtech_bmad_mcp/indexer.py new file mode 100644 index 0000000..55f6025 --- /dev/null +++ b/mcp/leadtech_bmad_mcp/src/leadtech_bmad_mcp/indexer.py @@ -0,0 +1,15 @@ +from __future__ import annotations + +from .knowledge import get_index_path, get_paths, write_search_index + + +def main() -> None: + paths = get_paths() + target = write_search_index(paths) + print(f"Index écrit: {target}") + print(f"Documents racine: {paths.root}") + print(f"Index attendu par le MCP: {get_index_path(paths)}") + + +if __name__ == "__main__": + main() diff --git a/mcp/leadtech_bmad_mcp/src/leadtech_bmad_mcp/knowledge.py b/mcp/leadtech_bmad_mcp/src/leadtech_bmad_mcp/knowledge.py index e5793d1..bc89b60 100644 --- a/mcp/leadtech_bmad_mcp/src/leadtech_bmad_mcp/knowledge.py +++ b/mcp/leadtech_bmad_mcp/src/leadtech_bmad_mcp/knowledge.py @@ -1,5 +1,6 @@ from __future__ import annotations +import json import os from dataclasses import dataclass from pathlib import Path @@ -8,6 +9,8 @@ from typing import Any VALID_DOMAINS = {"backend", "frontend", "ux", "n8n", "product", "workflow"} VALID_BUCKETS = {"patterns", "risques"} +INDEX_VERSION = 1 +INDEX_FILENAME = ".leadtech_mcp_index.json" @dataclass(frozen=True) @@ -26,7 +29,12 @@ class KnowledgeDocument: def get_paths() -> LeadtechPaths: - root = Path(os.getenv("LEADTECH_ROOT", "/srv/helpers/_Assistant_Lead_Tech")).resolve() + configured_root = os.getenv("LEADTECH_ROOT") + if configured_root: + root = Path(configured_root).resolve() + else: + default_root = Path("/srv/helpers/_Assistant_Lead_Tech").resolve() + root = default_root if default_root.exists() else Path(__file__).resolve().parents[4] return LeadtechPaths( root=root, knowledge=root / "knowledge", @@ -100,7 +108,9 @@ def parse_front_matter(content: str) -> tuple[dict[str, Any], str]: key, value = stripped.split(":", 1) metadata[key.strip()] = _coerce_metadata_scalar(value) - body = "\n".join(lines[end_idx + 1 :]).lstrip("\n") + marker = "\n---\n" + _, _, tail = content.partition(marker) + body = tail.lstrip("\n") return metadata, body @@ -110,6 +120,11 @@ def read_knowledge_document(path: Path) -> KnowledgeDocument: return KnowledgeDocument(path=path, metadata=metadata, body=body) +def get_index_path(paths: LeadtechPaths | None = None) -> Path: + resolved_paths = paths or get_paths() + return resolved_paths.root / INDEX_FILENAME + + def _extract_excerpt(content: str, tokens: list[str]) -> str: """Retourne un extrait centré sur la première occurrence d'un token, ou le début du fichier.""" low = content.lower() @@ -129,18 +144,182 @@ def _extract_excerpt(content: str, tokens: list[str]) -> str: return excerpt -def search_knowledge(domain: str, query: str, bucket: str | None = None, max_items: int = 12) -> list[dict[str, str]]: - buckets = [bucket] if bucket else ["patterns", "risques"] - tokens = [t.strip().lower() for t in query.split() if t.strip()] +def _metadata_search_text(metadata: dict[str, Any]) -> str: + parts: list[str] = [] + for value in metadata.values(): + if isinstance(value, list): + parts.extend(str(item) for item in value) + else: + parts.append(str(value)) + return " ".join(parts).lower() + + +def _score_text(content: str, tokens: list[str]) -> int: + low = content.lower() + return sum(low.count(tok) for tok in tokens) + + +def _build_knowledge_entry(paths: LeadtechPaths, file_path: Path, bucket: str, domain: str) -> dict[str, Any]: + doc = read_knowledge_document(file_path) + return { + "kind": "knowledge", + "domain": domain, + "bucket": bucket, + "relative_path": str(file_path.relative_to(paths.root)), + "title": str(doc.metadata.get("title", file_path.stem)), + "metadata": doc.metadata, + "body": doc.body, + } + + +def _build_global_entry(paths: LeadtechPaths, filename: str, label: str) -> dict[str, Any] | None: + file_path = paths.root / filename + if not file_path.exists(): + return None + return { + "kind": "global", + "label": label, + "filename": filename, + "relative_path": str(file_path.relative_to(paths.root)), + "content": read_text(file_path), + } + + +def build_search_index(paths: LeadtechPaths | None = None) -> dict[str, Any]: + resolved_paths = paths or get_paths() + entries: list[dict[str, Any]] = [] + + for domain in sorted(VALID_DOMAINS): + for bucket in sorted(VALID_BUCKETS): + for file_path in list_domain_files(domain, bucket): + entries.append(_build_knowledge_entry(resolved_paths, file_path, bucket, domain)) + + for filename, label in _GLOBAL_DOCS: + entry = _build_global_entry(resolved_paths, filename, label) + if entry is not None: + entries.append(entry) + + return { + "version": INDEX_VERSION, + "root": str(resolved_paths.root), + "entries": entries, + } + + +def write_search_index(paths: LeadtechPaths | None = None, output_path: Path | None = None) -> Path: + resolved_paths = paths or get_paths() + target = output_path or get_index_path(resolved_paths) + payload = build_search_index(resolved_paths) + target.write_text(json.dumps(payload, ensure_ascii=False, indent=2) + "\n", encoding="utf-8") + return target + + +def load_search_index(paths: LeadtechPaths | None = None) -> dict[str, Any] | None: + resolved_paths = paths or get_paths() + index_path = get_index_path(resolved_paths) + if not index_path.exists(): + return None + + try: + payload = json.loads(index_path.read_text(encoding="utf-8")) + except json.JSONDecodeError: + return None + + if payload.get("version") != INDEX_VERSION: + return None + + entries = payload.get("entries") + if not isinstance(entries, list): + return None + + return payload + + +def _search_knowledge_from_entries(root: Path, entries: list[dict[str, Any]], domain: str, buckets: list[str], tokens: list[str], max_items: int) -> list[dict[str, str]]: out: list[dict[str, str]] = [] + for entry in entries: + if entry.get("kind") != "knowledge": + continue + if entry.get("domain") != domain: + continue + if entry.get("bucket") not in buckets: + continue + + metadata = entry.get("metadata", {}) + body = str(entry.get("body", "")) + score = _score_text(body, tokens) + score += _score_text(_metadata_search_text(metadata), tokens) * 3 + if score <= 0: + continue + + relative_path = Path(str(entry["relative_path"])) + out.append( + { + "path": str((root / relative_path).resolve()), + "bucket": str(entry["bucket"]), + "title": str(entry.get("title", relative_path.stem)), + "score": str(score), + "excerpt": _extract_excerpt(body, tokens), + "tags": ", ".join(metadata.get("tags", [])), + "severity": str(metadata.get("severity", "")), + "applies_to": ", ".join(metadata.get("applies_to", [])), + } + ) + + out.sort(key=lambda x: int(x["score"]), reverse=True) + return out[:max_items] + + +def _search_global_from_entries(root: Path, entries: list[dict[str, Any]], tokens: list[str], max_items: int) -> list[dict[str, str]]: + out: list[dict[str, str]] = [] + + for entry in entries: + if entry.get("kind") != "global": + continue + + content = str(entry.get("content", "")) + score = _score_text(content, tokens) + if score <= 0: + continue + + relative_path = Path(str(entry["relative_path"])) + out.append( + { + "path": str((root / relative_path).resolve()), + "bucket": "global", + "title": str(entry["label"]), + "filename": str(entry["filename"]), + "score": str(score), + "excerpt": _extract_excerpt(content, tokens), + } + ) + + out.sort(key=lambda x: int(x["score"]), reverse=True) + return out[:max_items] + + +def _validate_slug(slug: str) -> str: + path = Path(slug) + if path.name != slug or any(part in {"..", "."} for part in path.parts): + raise ValueError("Chemin hors base autorisee: slug invalide") + return slug + + +def search_knowledge(domain: str, query: str, bucket: str | None = None, max_items: int = 12) -> list[dict[str, str]]: + paths = get_paths() + buckets = [bucket] if bucket else ["patterns", "risques"] + tokens = [t.strip().lower() for t in query.split() if t.strip()] + index = load_search_index(paths) + if index is not None: + return _search_knowledge_from_entries(paths.root, index["entries"], domain, buckets, tokens, max_items) + + out: list[dict[str, str]] = [] for b in buckets: for file_path in list_domain_files(domain, b): doc = read_knowledge_document(file_path) - body_low = doc.body.lower() - metadata_text = " ".join(str(value) for value in doc.metadata.values()).lower() - score = sum(body_low.count(tok) for tok in tokens) - score += sum(metadata_text.count(tok) * 3 for tok in tokens) + score = _score_text(doc.body, tokens) + score += _score_text(_metadata_search_text(doc.metadata), tokens) * 3 if score <= 0: continue out.append( @@ -161,7 +340,8 @@ def search_knowledge(domain: str, query: str, bucket: str | None = None, max_ite def read_knowledge_doc(domain: str, bucket: str, slug: str) -> str: - file_path = _safe_path(get_paths().knowledge, domain, bucket, f"{slug}.md") + safe_slug = _validate_slug(slug) + file_path = _safe_path(get_paths().knowledge, domain, bucket, f"{safe_slug}.md") if not file_path.exists(): raise FileNotFoundError(f"Fichier introuvable: {file_path}") return read_text(file_path) @@ -178,7 +358,12 @@ _GLOBAL_DOCS: list[tuple[str, str]] = [ def search_global_docs(query: str, max_items: int = 4) -> list[dict[str, str]]: """Cherche dans les fichiers globaux Lead_tech (decisions, postmortems, conventions).""" tokens = [t.strip().lower() for t in query.split() if t.strip()] - root = get_paths().root + paths = get_paths() + index = load_search_index(paths) + if index is not None: + return _search_global_from_entries(paths.root, index["entries"], tokens, max_items) + + root = paths.root out: list[dict[str, str]] = [] for filename, label in _GLOBAL_DOCS: @@ -186,7 +371,7 @@ def search_global_docs(query: str, max_items: int = 4) -> list[dict[str, str]]: if not file_path.exists(): continue content = read_text(file_path) - score = sum(content.lower().count(tok) for tok in tokens) + score = _score_text(content, tokens) if score <= 0: continue out.append( diff --git a/mcp/leadtech_bmad_mcp/tests/test_knowledge.py b/mcp/leadtech_bmad_mcp/tests/test_knowledge.py index 22b5b9b..50b89ea 100644 --- a/mcp/leadtech_bmad_mcp/tests/test_knowledge.py +++ b/mcp/leadtech_bmad_mcp/tests/test_knowledge.py @@ -9,6 +9,10 @@ from leadtech_bmad_mcp.knowledge import ( list_domain_files, search_knowledge, search_global_docs, + build_search_index, + write_search_index, + load_search_index, + get_index_path, read_knowledge_doc, read_knowledge_document, _extract_excerpt, @@ -147,6 +151,56 @@ def test_search_knowledge_uses_front_matter_tags(tmp_path): assert results[0]["title"] == "Backend — Patterns : NestJS" +def test_build_search_index_includes_knowledge_and_global_docs(tmp_path): + paths = _make_knowledge(tmp_path) + _make_global_docs(tmp_path) + with patch("leadtech_bmad_mcp.knowledge.get_paths", return_value=paths): + payload = build_search_index() + + assert payload["version"] == 1 + assert any(entry["kind"] == "knowledge" for entry in payload["entries"]) + assert any(entry["kind"] == "global" for entry in payload["entries"]) + + +def test_search_knowledge_uses_index_when_present(tmp_path): + paths = _make_knowledge(tmp_path) + _make_global_docs(tmp_path) + + with patch("leadtech_bmad_mcp.knowledge.get_paths", return_value=paths): + write_search_index() + + (paths.knowledge / "backend" / "patterns" / "contracts.md").write_text( + "contenu remplace sans mot cle", + encoding="utf-8", + ) + + with patch("leadtech_bmad_mcp.knowledge.get_paths", return_value=paths): + results = search_knowledge("backend", "zod") + + assert results + assert results[0]["title"] == "contracts" + + +def test_search_knowledge_falls_back_when_index_missing(tmp_path): + paths = _make_knowledge(tmp_path) + with patch("leadtech_bmad_mcp.knowledge.get_paths", return_value=paths): + results = search_knowledge("backend", "zod contract") + index = load_search_index() + + assert results + assert index is None + + +def test_load_search_index_invalid_json_returns_none(tmp_path): + paths = _make_knowledge(tmp_path) + get_index_path(paths).write_text("{invalid", encoding="utf-8") + + with patch("leadtech_bmad_mcp.knowledge.get_paths", return_value=paths): + payload = load_search_index() + + assert payload is None + + # --------------------------------------------------------------------------- # read_knowledge_doc # --------------------------------------------------------------------------- @@ -290,6 +344,22 @@ def test_search_global_docs_includes_excerpt(tmp_path): assert len(results[0]["excerpt"]) > 0 +def test_search_global_docs_uses_index_when_present(tmp_path): + paths = _make_global_docs(tmp_path) + (paths.knowledge / "backend" / "patterns").mkdir(parents=True) + + with patch("leadtech_bmad_mcp.knowledge.get_paths", return_value=paths): + write_search_index() + + (tmp_path / "40_decisions_et_archi.md").write_text("contenu modifie sans postgres", encoding="utf-8") + + with patch("leadtech_bmad_mcp.knowledge.get_paths", return_value=paths): + results = search_global_docs("PostgreSQL") + + assert results + assert results[0]["title"] == "architecture" + + def test_search_global_docs_missing_file_skipped(tmp_path): # Seulement 40_ créé, les deux autres absents (tmp_path / "40_decisions_et_archi.md").write_text("PostgreSQL recommandé.")