From 0a07ab8593da6ce4aaa6c22f5faa4795fee0f814 Mon Sep 17 00:00:00 2001 From: Ole Date: Sun, 31 May 2026 20:25:41 +0000 Subject: [PATCH] init --- .agents/skills/code-quality/SKILL.md | 413 +++++ .../correctness-and-error-handling/SKILL.md | 364 +++++ .agents/skills/create-client-tool/SKILL.md | 122 ++ .agents/skills/dependencies-audit/SKILL.md | 306 ++++ .agents/skills/design/SKILL.md | 52 + .agents/skills/design/building-pages.md | 354 +++++ .agents/skills/design/handling-states.md | 426 ++++++ .agents/skills/design/picking-components.md | 18 + .agents/skills/design/primitive-usage.md | 343 +++++ .agents/skills/design/storybook-links.md | 133 ++ .agents/skills/design/writing-copy.md | 233 +++ .../dm-limits-and-best-practices/SKILL.md | 613 ++++++++ .agents/skills/flows-code-review/SKILL.md | 40 + .agents/skills/graph-viewer/SKILL.md | 103 ++ .../graph-viewer/code/GraphViewerCanvas.tsx | 177 +++ .../graph-viewer/code/GraphViewerLegend.tsx | 122 ++ .agents/skills/graph-viewer/code/README.md | 293 ++++ .../skills/graph-viewer/code/ZoomControls.tsx | 111 ++ .../skills/graph-viewer/code/graph-config.ts | 271 ++++ .../skills/graph-viewer/code/graph-service.ts | 855 +++++++++++ .agents/skills/graph-viewer/code/index.ts | 21 + .agents/skills/graph-viewer/code/types.ts | 431 ++++++ .../graph-viewer/code/useCanvasResize.ts | 30 + .../graph-viewer/code/useDataModelLoader.ts | 90 ++ .../graph-viewer/code/useGraphDataPipeline.ts | 101 ++ .../graph-viewer/code/useGraphSelection.ts | 98 ++ .../graph-viewer/code/useGraphViewer.tsx | 288 ++++ .../skills/graph-viewer/code/useNodeBuffer.ts | 132 ++ .../skills/graph-viewer/code/useSeedNode.ts | 113 ++ .agents/skills/integrate-atlas-chat/SKILL.md | 143 ++ .../integrate-atlas-chat/code/client.ts | 153 ++ .../skills/integrate-atlas-chat/code/index.ts | 31 + .../code/pyodide-react.ts | 146 ++ .../code/pyodide-runtime.ts | 295 ++++ .../integrate-atlas-chat/code/pyodide.ts | 22 + .../integrate-atlas-chat/code/python.ts | 45 + .../skills/integrate-atlas-chat/code/react.ts | 278 ++++ .../integrate-atlas-chat/code/session.test.ts | 155 ++ .../integrate-atlas-chat/code/session.ts | 325 ++++ .../skills/integrate-atlas-chat/code/types.ts | 320 ++++ .../integrate-atlas-chat/code/validation.ts | 49 + .agents/skills/integrate-file-viewer/SKILL.md | 287 ++++ .../code/CogniteFileViewer.tsx | 479 ++++++ .../code/DocumentAnnotationOverlay.tsx | 229 +++ .../code/fileResolution.ts | 132 ++ .../integrate-file-viewer/code/index.ts | 40 + .../integrate-file-viewer/code/mimeTypes.ts | 171 +++ .../integrate-file-viewer/code/types.ts | 189 +++ .../code/useDocumentAnnotations.ts | 268 ++++ .../code/useFileResolver.ts | 122 ++ .../integrate-file-viewer/code/useViewport.ts | 280 ++++ .../skills/integrate-fusion-agent/SKILL.md | 342 +++++ .agents/skills/integrate-todo-list/SKILL.md | 158 ++ .../integrate-todo-list/code/TodoContext.tsx | 19 + .../integrate-todo-list/code/TodoItemRow.tsx | 38 + .../integrate-todo-list/code/TodoPanel.tsx | 44 + .../code/TodoToolResultCard.tsx | 47 + .../integrate-todo-list/code/todoWriteTool.ts | 344 +++++ .../skills/integrate-todo-list/code/types.ts | 7 + .../integrate-todo-list/code/useTodoList.ts | 7 + .../code/useTodoWriteTool.ts | 17 + .agents/skills/migrate-app-to-flows/SKILL.md | 130 ++ .agents/skills/performance/SKILL.md | 525 +++++++ .../pull-changes-resolve-conflicts/SKILL.md | 95 ++ .../reference.md | 36 + .agents/skills/reveal-3d/SKILL.md | 162 ++ .agents/skills/reveal-3d/code/README.md | 46 + .../code/reveal/cache/AssetMappingCache.ts | 180 +++ .../code/reveal/cache/CacheProvider.tsx | 41 + .../reveal/components/Reveal3DResources.tsx | 213 +++ .../code/reveal/components/RevealCanvas.tsx | 27 + .../reveal/components/RevealKeepAlive.tsx | 84 ++ .../context/InstanceStylingProvider.tsx | 83 ++ .../code/reveal/context/RevealProvider.tsx | 63 + .../reveal/context/instanceStylingContext.ts | 14 + .../code/reveal/context/revealContext.ts | 12 + .../hooks/use3dDataForSelectedInstance.ts | 30 + .../code/reveal/hooks/useFdmMappings.ts | 330 ++++ .../reveal/hooks/useFindRelated3dInstances.ts | 21 + .../code/reveal/hooks/useFocusCamera.ts | 79 + .../code/reveal/hooks/useInstanceStyling.ts | 64 + .../hooks/useInstancesWithBoundingBoxes.ts | 130 ++ .../reveal-3d/code/reveal/hooks/useModels.ts | 153 ++ .../reveal/hooks/usePrefetchedFdmMappings.ts | 331 ++++ .../code/reveal/hooks/useRelatedInstances.ts | 250 ++++ .../hooks/useRemoveNonReferencedModels.ts | 45 + .../code/reveal/hooks/useRenderTarget.ts | 30 + .../reveal-3d/code/reveal/hooks/useReveal.ts | 11 + .../code/reveal/hooks/useRevealContext.ts | 13 + .agents/skills/reveal-3d/code/reveal/index.ts | 54 + .../code/reveal/settings/QualityPresets.ts | 55 + .../settings/RevealSettingsController.ts | 125 ++ .agents/skills/reveal-3d/code/reveal/types.ts | 90 ++ .../reveal-3d/code/reveal/utils/cdf-types.ts | 24 + .../code/reveal/utils/data-mapper.ts | 54 + .../code/reveal/utils/executeParallel.ts | 42 + .../reveal-3d/code/reveal/utils/views.ts | 70 + .agents/skills/reveal-3d/evals/evals.json | 201 +++ .../reveal-3d/references/implementation.md | 301 ++++ .../reveal-3d/references/vite-config.md | 136 ++ .agents/skills/security/SKILL.md | 248 +++ .agents/skills/setup-flows-auth/SKILL.md | 191 +++ .agents/skills/setup-python-tools/SKILL.md | 176 +++ .agents/skills/skill-creator/SKILL.md | 482 ++++++ .../skills/skill-creator/agents/analyzer.md | 274 ++++ .../skills/skill-creator/agents/comparator.md | 202 +++ .agents/skills/skill-creator/agents/grader.md | 223 +++ .../skill-creator/assets/eval_review.html | 146 ++ .../eval-viewer/generate_review.py | 471 ++++++ .../skill-creator/eval-viewer/viewer.html | 1325 +++++++++++++++++ .../skill-creator/references/schemas.md | 446 ++++++ .../skills/skill-creator/scripts/__init__.py | 0 .../scripts/aggregate_benchmark.py | 401 +++++ .../skill-creator/scripts/generate_report.py | 326 ++++ .../scripts/improve_description.py | 251 ++++ .../skill-creator/scripts/package_skill.py | 143 ++ .../skill-creator/scripts/quick_validate.py | 103 ++ .../skills/skill-creator/scripts/run_eval.py | 368 +++++ .../skills/skill-creator/scripts/run_loop.py | 343 +++++ .agents/skills/skill-creator/scripts/utils.py | 102 ++ .agents/skills/test-coverage/SKILL.md | 375 +++++ .agents/skills/use-topbar/IMPLEMENTATION.md | 179 +++ .agents/skills/use-topbar/INTERVIEW.md | 178 +++ .agents/skills/use-topbar/RULES.md | 205 +++ .agents/skills/use-topbar/SKILL.md | 91 ++ .claude/commands/speckit.analyze.md | 249 ++++ .claude/commands/speckit.checklist.md | 361 +++++ .claude/commands/speckit.clarify.md | 247 +++ .claude/commands/speckit.implement.md | 198 +++ .claude/commands/speckit.plan.md | 149 ++ .claude/commands/speckit.specify.md | 327 ++++ .claude/commands/speckit.tasks.md | 200 +++ .claude/skills/code-quality/SKILL.md | 413 +++++ .../correctness-and-error-handling/SKILL.md | 364 +++++ .claude/skills/create-client-tool/SKILL.md | 122 ++ .claude/skills/dependencies-audit/SKILL.md | 306 ++++ .claude/skills/design/SKILL.md | 52 + .claude/skills/design/building-pages.md | 354 +++++ .claude/skills/design/handling-states.md | 426 ++++++ .claude/skills/design/picking-components.md | 18 + .claude/skills/design/primitive-usage.md | 343 +++++ .claude/skills/design/storybook-links.md | 133 ++ .claude/skills/design/writing-copy.md | 233 +++ .../dm-limits-and-best-practices/SKILL.md | 613 ++++++++ .claude/skills/flows-code-review/SKILL.md | 40 + .claude/skills/graph-viewer/SKILL.md | 103 ++ .../graph-viewer/code/GraphViewerCanvas.tsx | 177 +++ .../graph-viewer/code/GraphViewerLegend.tsx | 122 ++ .claude/skills/graph-viewer/code/README.md | 293 ++++ .../skills/graph-viewer/code/ZoomControls.tsx | 111 ++ .../skills/graph-viewer/code/graph-config.ts | 271 ++++ .../skills/graph-viewer/code/graph-service.ts | 855 +++++++++++ .claude/skills/graph-viewer/code/index.ts | 21 + .claude/skills/graph-viewer/code/types.ts | 431 ++++++ .../graph-viewer/code/useCanvasResize.ts | 30 + .../graph-viewer/code/useDataModelLoader.ts | 90 ++ .../graph-viewer/code/useGraphDataPipeline.ts | 101 ++ .../graph-viewer/code/useGraphSelection.ts | 98 ++ .../graph-viewer/code/useGraphViewer.tsx | 288 ++++ .../skills/graph-viewer/code/useNodeBuffer.ts | 132 ++ .../skills/graph-viewer/code/useSeedNode.ts | 113 ++ .claude/skills/integrate-atlas-chat/SKILL.md | 143 ++ .../integrate-atlas-chat/code/client.ts | 153 ++ .../skills/integrate-atlas-chat/code/index.ts | 31 + .../code/pyodide-react.ts | 146 ++ .../code/pyodide-runtime.ts | 295 ++++ .../integrate-atlas-chat/code/pyodide.ts | 22 + .../integrate-atlas-chat/code/python.ts | 45 + .../skills/integrate-atlas-chat/code/react.ts | 278 ++++ .../integrate-atlas-chat/code/session.test.ts | 155 ++ .../integrate-atlas-chat/code/session.ts | 325 ++++ .../skills/integrate-atlas-chat/code/types.ts | 320 ++++ .../integrate-atlas-chat/code/validation.ts | 49 + .claude/skills/integrate-file-viewer/SKILL.md | 287 ++++ .../code/CogniteFileViewer.tsx | 479 ++++++ .../code/DocumentAnnotationOverlay.tsx | 229 +++ .../code/fileResolution.ts | 132 ++ .../integrate-file-viewer/code/index.ts | 40 + .../integrate-file-viewer/code/mimeTypes.ts | 171 +++ .../integrate-file-viewer/code/types.ts | 189 +++ .../code/useDocumentAnnotations.ts | 268 ++++ .../code/useFileResolver.ts | 122 ++ .../integrate-file-viewer/code/useViewport.ts | 280 ++++ .../skills/integrate-fusion-agent/SKILL.md | 342 +++++ .claude/skills/integrate-todo-list/SKILL.md | 158 ++ .../integrate-todo-list/code/TodoContext.tsx | 19 + .../integrate-todo-list/code/TodoItemRow.tsx | 38 + .../integrate-todo-list/code/TodoPanel.tsx | 44 + .../code/TodoToolResultCard.tsx | 47 + .../integrate-todo-list/code/todoWriteTool.ts | 344 +++++ .../skills/integrate-todo-list/code/types.ts | 7 + .../integrate-todo-list/code/useTodoList.ts | 7 + .../code/useTodoWriteTool.ts | 17 + .claude/skills/migrate-app-to-flows/SKILL.md | 130 ++ .claude/skills/performance/SKILL.md | 525 +++++++ .../pull-changes-resolve-conflicts/SKILL.md | 95 ++ .../reference.md | 36 + .claude/skills/reveal-3d/SKILL.md | 162 ++ .claude/skills/reveal-3d/code/README.md | 46 + .../code/reveal/cache/AssetMappingCache.ts | 180 +++ .../code/reveal/cache/CacheProvider.tsx | 41 + .../reveal/components/Reveal3DResources.tsx | 213 +++ .../code/reveal/components/RevealCanvas.tsx | 27 + .../reveal/components/RevealKeepAlive.tsx | 84 ++ .../context/InstanceStylingProvider.tsx | 83 ++ .../code/reveal/context/RevealProvider.tsx | 63 + .../reveal/context/instanceStylingContext.ts | 14 + .../code/reveal/context/revealContext.ts | 12 + .../hooks/use3dDataForSelectedInstance.ts | 30 + .../code/reveal/hooks/useFdmMappings.ts | 330 ++++ .../reveal/hooks/useFindRelated3dInstances.ts | 21 + .../code/reveal/hooks/useFocusCamera.ts | 79 + .../code/reveal/hooks/useInstanceStyling.ts | 64 + .../hooks/useInstancesWithBoundingBoxes.ts | 130 ++ .../reveal-3d/code/reveal/hooks/useModels.ts | 153 ++ .../reveal/hooks/usePrefetchedFdmMappings.ts | 331 ++++ .../code/reveal/hooks/useRelatedInstances.ts | 250 ++++ .../hooks/useRemoveNonReferencedModels.ts | 45 + .../code/reveal/hooks/useRenderTarget.ts | 30 + .../reveal-3d/code/reveal/hooks/useReveal.ts | 11 + .../code/reveal/hooks/useRevealContext.ts | 13 + .claude/skills/reveal-3d/code/reveal/index.ts | 54 + .../code/reveal/settings/QualityPresets.ts | 55 + .../settings/RevealSettingsController.ts | 125 ++ .claude/skills/reveal-3d/code/reveal/types.ts | 90 ++ .../reveal-3d/code/reveal/utils/cdf-types.ts | 24 + .../code/reveal/utils/data-mapper.ts | 54 + .../code/reveal/utils/executeParallel.ts | 42 + .../reveal-3d/code/reveal/utils/views.ts | 70 + .claude/skills/reveal-3d/evals/evals.json | 201 +++ .../reveal-3d/references/implementation.md | 301 ++++ .../reveal-3d/references/vite-config.md | 136 ++ .claude/skills/security/SKILL.md | 248 +++ .claude/skills/setup-flows-auth/SKILL.md | 191 +++ .claude/skills/setup-python-tools/SKILL.md | 176 +++ .claude/skills/skill-creator/SKILL.md | 482 ++++++ .../skills/skill-creator/agents/analyzer.md | 274 ++++ .../skills/skill-creator/agents/comparator.md | 202 +++ .claude/skills/skill-creator/agents/grader.md | 223 +++ .../skill-creator/assets/eval_review.html | 146 ++ .../eval-viewer/generate_review.py | 471 ++++++ .../skill-creator/eval-viewer/viewer.html | 1325 +++++++++++++++++ .../skill-creator/references/schemas.md | 446 ++++++ .../skills/skill-creator/scripts/__init__.py | 0 .../scripts/aggregate_benchmark.py | 401 +++++ .../skill-creator/scripts/generate_report.py | 326 ++++ .../scripts/improve_description.py | 251 ++++ .../skill-creator/scripts/package_skill.py | 143 ++ .../skill-creator/scripts/quick_validate.py | 103 ++ .../skills/skill-creator/scripts/run_eval.py | 368 +++++ .../skills/skill-creator/scripts/run_loop.py | 343 +++++ .claude/skills/skill-creator/scripts/utils.py | 102 ++ .claude/skills/test-coverage/SKILL.md | 375 +++++ .claude/skills/use-topbar/IMPLEMENTATION.md | 179 +++ .claude/skills/use-topbar/INTERVIEW.md | 178 +++ .claude/skills/use-topbar/RULES.md | 205 +++ .claude/skills/use-topbar/SKILL.md | 91 ++ .cursor/commands/speckit.analyze.md | 249 ++++ .cursor/commands/speckit.checklist.md | 361 +++++ .cursor/commands/speckit.clarify.md | 247 +++ .cursor/commands/speckit.implement.md | 198 +++ .cursor/commands/speckit.plan.md | 149 ++ .cursor/commands/speckit.specify.md | 327 ++++ .cursor/commands/speckit.tasks.md | 200 +++ .specify/init-options.json | 3 + .specify/memory/constitution.md | 298 ++++ .specify/scripts/bash/check-prerequisites.sh | 190 +++ .specify/scripts/bash/common.sh | 645 ++++++++ .specify/scripts/bash/setup-plan.sh | 75 + .specify/templates/checklist-template.md | 40 + .specify/templates/plan-template.md | 104 ++ .specify/templates/spec-template.md | 128 ++ .specify/templates/tasks-template.md | 251 ++++ AGENTS.md | 298 ++++ CLAUDE.md | 298 ++++ 275 files changed, 52660 insertions(+) create mode 100644 .agents/skills/code-quality/SKILL.md create mode 100644 .agents/skills/correctness-and-error-handling/SKILL.md create mode 100644 .agents/skills/create-client-tool/SKILL.md create mode 100644 .agents/skills/dependencies-audit/SKILL.md create mode 100644 .agents/skills/design/SKILL.md create mode 100644 .agents/skills/design/building-pages.md create mode 100644 .agents/skills/design/handling-states.md create mode 100644 .agents/skills/design/picking-components.md create mode 100644 .agents/skills/design/primitive-usage.md create mode 100644 .agents/skills/design/storybook-links.md create mode 100644 .agents/skills/design/writing-copy.md create mode 100644 .agents/skills/dm-limits-and-best-practices/SKILL.md create mode 100644 .agents/skills/flows-code-review/SKILL.md create mode 100644 .agents/skills/graph-viewer/SKILL.md create mode 100644 .agents/skills/graph-viewer/code/GraphViewerCanvas.tsx create mode 100644 .agents/skills/graph-viewer/code/GraphViewerLegend.tsx create mode 100644 .agents/skills/graph-viewer/code/README.md create mode 100644 .agents/skills/graph-viewer/code/ZoomControls.tsx create mode 100644 .agents/skills/graph-viewer/code/graph-config.ts create mode 100644 .agents/skills/graph-viewer/code/graph-service.ts create mode 100644 .agents/skills/graph-viewer/code/index.ts create mode 100644 .agents/skills/graph-viewer/code/types.ts create mode 100644 .agents/skills/graph-viewer/code/useCanvasResize.ts create mode 100644 .agents/skills/graph-viewer/code/useDataModelLoader.ts create mode 100644 .agents/skills/graph-viewer/code/useGraphDataPipeline.ts create mode 100644 .agents/skills/graph-viewer/code/useGraphSelection.ts create mode 100644 .agents/skills/graph-viewer/code/useGraphViewer.tsx create mode 100644 .agents/skills/graph-viewer/code/useNodeBuffer.ts create mode 100644 .agents/skills/graph-viewer/code/useSeedNode.ts create mode 100644 .agents/skills/integrate-atlas-chat/SKILL.md create mode 100644 .agents/skills/integrate-atlas-chat/code/client.ts create mode 100644 .agents/skills/integrate-atlas-chat/code/index.ts create mode 100644 .agents/skills/integrate-atlas-chat/code/pyodide-react.ts create mode 100644 .agents/skills/integrate-atlas-chat/code/pyodide-runtime.ts create mode 100644 .agents/skills/integrate-atlas-chat/code/pyodide.ts create mode 100644 .agents/skills/integrate-atlas-chat/code/python.ts create mode 100644 .agents/skills/integrate-atlas-chat/code/react.ts create mode 100644 .agents/skills/integrate-atlas-chat/code/session.test.ts create mode 100644 .agents/skills/integrate-atlas-chat/code/session.ts create mode 100644 .agents/skills/integrate-atlas-chat/code/types.ts create mode 100644 .agents/skills/integrate-atlas-chat/code/validation.ts create mode 100644 .agents/skills/integrate-file-viewer/SKILL.md create mode 100644 .agents/skills/integrate-file-viewer/code/CogniteFileViewer.tsx create mode 100644 .agents/skills/integrate-file-viewer/code/DocumentAnnotationOverlay.tsx create mode 100644 .agents/skills/integrate-file-viewer/code/fileResolution.ts create mode 100644 .agents/skills/integrate-file-viewer/code/index.ts create mode 100644 .agents/skills/integrate-file-viewer/code/mimeTypes.ts create mode 100644 .agents/skills/integrate-file-viewer/code/types.ts create mode 100644 .agents/skills/integrate-file-viewer/code/useDocumentAnnotations.ts create mode 100644 .agents/skills/integrate-file-viewer/code/useFileResolver.ts create mode 100644 .agents/skills/integrate-file-viewer/code/useViewport.ts create mode 100644 .agents/skills/integrate-fusion-agent/SKILL.md create mode 100644 .agents/skills/integrate-todo-list/SKILL.md create mode 100644 .agents/skills/integrate-todo-list/code/TodoContext.tsx create mode 100644 .agents/skills/integrate-todo-list/code/TodoItemRow.tsx create mode 100644 .agents/skills/integrate-todo-list/code/TodoPanel.tsx create mode 100644 .agents/skills/integrate-todo-list/code/TodoToolResultCard.tsx create mode 100644 .agents/skills/integrate-todo-list/code/todoWriteTool.ts create mode 100644 .agents/skills/integrate-todo-list/code/types.ts create mode 100644 .agents/skills/integrate-todo-list/code/useTodoList.ts create mode 100644 .agents/skills/integrate-todo-list/code/useTodoWriteTool.ts create mode 100644 .agents/skills/migrate-app-to-flows/SKILL.md create mode 100644 .agents/skills/performance/SKILL.md create mode 100644 .agents/skills/pull-changes-resolve-conflicts/SKILL.md create mode 100644 .agents/skills/pull-changes-resolve-conflicts/reference.md create mode 100644 .agents/skills/reveal-3d/SKILL.md create mode 100644 .agents/skills/reveal-3d/code/README.md create mode 100644 .agents/skills/reveal-3d/code/reveal/cache/AssetMappingCache.ts create mode 100644 .agents/skills/reveal-3d/code/reveal/cache/CacheProvider.tsx create mode 100644 .agents/skills/reveal-3d/code/reveal/components/Reveal3DResources.tsx create mode 100644 .agents/skills/reveal-3d/code/reveal/components/RevealCanvas.tsx create mode 100644 .agents/skills/reveal-3d/code/reveal/components/RevealKeepAlive.tsx create mode 100644 .agents/skills/reveal-3d/code/reveal/context/InstanceStylingProvider.tsx create mode 100644 .agents/skills/reveal-3d/code/reveal/context/RevealProvider.tsx create mode 100644 .agents/skills/reveal-3d/code/reveal/context/instanceStylingContext.ts create mode 100644 .agents/skills/reveal-3d/code/reveal/context/revealContext.ts create mode 100644 .agents/skills/reveal-3d/code/reveal/hooks/use3dDataForSelectedInstance.ts create mode 100644 .agents/skills/reveal-3d/code/reveal/hooks/useFdmMappings.ts create mode 100644 .agents/skills/reveal-3d/code/reveal/hooks/useFindRelated3dInstances.ts create mode 100644 .agents/skills/reveal-3d/code/reveal/hooks/useFocusCamera.ts create mode 100644 .agents/skills/reveal-3d/code/reveal/hooks/useInstanceStyling.ts create mode 100644 .agents/skills/reveal-3d/code/reveal/hooks/useInstancesWithBoundingBoxes.ts create mode 100644 .agents/skills/reveal-3d/code/reveal/hooks/useModels.ts create mode 100644 .agents/skills/reveal-3d/code/reveal/hooks/usePrefetchedFdmMappings.ts create mode 100644 .agents/skills/reveal-3d/code/reveal/hooks/useRelatedInstances.ts create mode 100644 .agents/skills/reveal-3d/code/reveal/hooks/useRemoveNonReferencedModels.ts create mode 100644 .agents/skills/reveal-3d/code/reveal/hooks/useRenderTarget.ts create mode 100644 .agents/skills/reveal-3d/code/reveal/hooks/useReveal.ts create mode 100644 .agents/skills/reveal-3d/code/reveal/hooks/useRevealContext.ts create mode 100644 .agents/skills/reveal-3d/code/reveal/index.ts create mode 100644 .agents/skills/reveal-3d/code/reveal/settings/QualityPresets.ts create mode 100644 .agents/skills/reveal-3d/code/reveal/settings/RevealSettingsController.ts create mode 100644 .agents/skills/reveal-3d/code/reveal/types.ts create mode 100644 .agents/skills/reveal-3d/code/reveal/utils/cdf-types.ts create mode 100644 .agents/skills/reveal-3d/code/reveal/utils/data-mapper.ts create mode 100644 .agents/skills/reveal-3d/code/reveal/utils/executeParallel.ts create mode 100644 .agents/skills/reveal-3d/code/reveal/utils/views.ts create mode 100644 .agents/skills/reveal-3d/evals/evals.json create mode 100644 .agents/skills/reveal-3d/references/implementation.md create mode 100644 .agents/skills/reveal-3d/references/vite-config.md create mode 100644 .agents/skills/security/SKILL.md create mode 100644 .agents/skills/setup-flows-auth/SKILL.md create mode 100644 .agents/skills/setup-python-tools/SKILL.md create mode 100644 .agents/skills/skill-creator/SKILL.md create mode 100644 .agents/skills/skill-creator/agents/analyzer.md create mode 100644 .agents/skills/skill-creator/agents/comparator.md create mode 100644 .agents/skills/skill-creator/agents/grader.md create mode 100644 .agents/skills/skill-creator/assets/eval_review.html create mode 100644 .agents/skills/skill-creator/eval-viewer/generate_review.py create mode 100644 .agents/skills/skill-creator/eval-viewer/viewer.html create mode 100644 .agents/skills/skill-creator/references/schemas.md create mode 100644 .agents/skills/skill-creator/scripts/__init__.py create mode 100755 .agents/skills/skill-creator/scripts/aggregate_benchmark.py create mode 100755 .agents/skills/skill-creator/scripts/generate_report.py create mode 100755 .agents/skills/skill-creator/scripts/improve_description.py create mode 100755 .agents/skills/skill-creator/scripts/package_skill.py create mode 100755 .agents/skills/skill-creator/scripts/quick_validate.py create mode 100755 .agents/skills/skill-creator/scripts/run_eval.py create mode 100755 .agents/skills/skill-creator/scripts/run_loop.py create mode 100644 .agents/skills/skill-creator/scripts/utils.py create mode 100644 .agents/skills/test-coverage/SKILL.md create mode 100644 .agents/skills/use-topbar/IMPLEMENTATION.md create mode 100644 .agents/skills/use-topbar/INTERVIEW.md create mode 100644 .agents/skills/use-topbar/RULES.md create mode 100644 .agents/skills/use-topbar/SKILL.md create mode 100644 .claude/commands/speckit.analyze.md create mode 100644 .claude/commands/speckit.checklist.md create mode 100644 .claude/commands/speckit.clarify.md create mode 100644 .claude/commands/speckit.implement.md create mode 100644 .claude/commands/speckit.plan.md create mode 100644 .claude/commands/speckit.specify.md create mode 100644 .claude/commands/speckit.tasks.md create mode 100644 .claude/skills/code-quality/SKILL.md create mode 100644 .claude/skills/correctness-and-error-handling/SKILL.md create mode 100644 .claude/skills/create-client-tool/SKILL.md create mode 100644 .claude/skills/dependencies-audit/SKILL.md create mode 100644 .claude/skills/design/SKILL.md create mode 100644 .claude/skills/design/building-pages.md create mode 100644 .claude/skills/design/handling-states.md create mode 100644 .claude/skills/design/picking-components.md create mode 100644 .claude/skills/design/primitive-usage.md create mode 100644 .claude/skills/design/storybook-links.md create mode 100644 .claude/skills/design/writing-copy.md create mode 100644 .claude/skills/dm-limits-and-best-practices/SKILL.md create mode 100644 .claude/skills/flows-code-review/SKILL.md create mode 100644 .claude/skills/graph-viewer/SKILL.md create mode 100644 .claude/skills/graph-viewer/code/GraphViewerCanvas.tsx create mode 100644 .claude/skills/graph-viewer/code/GraphViewerLegend.tsx create mode 100644 .claude/skills/graph-viewer/code/README.md create mode 100644 .claude/skills/graph-viewer/code/ZoomControls.tsx create mode 100644 .claude/skills/graph-viewer/code/graph-config.ts create mode 100644 .claude/skills/graph-viewer/code/graph-service.ts create mode 100644 .claude/skills/graph-viewer/code/index.ts create mode 100644 .claude/skills/graph-viewer/code/types.ts create mode 100644 .claude/skills/graph-viewer/code/useCanvasResize.ts create mode 100644 .claude/skills/graph-viewer/code/useDataModelLoader.ts create mode 100644 .claude/skills/graph-viewer/code/useGraphDataPipeline.ts create mode 100644 .claude/skills/graph-viewer/code/useGraphSelection.ts create mode 100644 .claude/skills/graph-viewer/code/useGraphViewer.tsx create mode 100644 .claude/skills/graph-viewer/code/useNodeBuffer.ts create mode 100644 .claude/skills/graph-viewer/code/useSeedNode.ts create mode 100644 .claude/skills/integrate-atlas-chat/SKILL.md create mode 100644 .claude/skills/integrate-atlas-chat/code/client.ts create mode 100644 .claude/skills/integrate-atlas-chat/code/index.ts create mode 100644 .claude/skills/integrate-atlas-chat/code/pyodide-react.ts create mode 100644 .claude/skills/integrate-atlas-chat/code/pyodide-runtime.ts create mode 100644 .claude/skills/integrate-atlas-chat/code/pyodide.ts create mode 100644 .claude/skills/integrate-atlas-chat/code/python.ts create mode 100644 .claude/skills/integrate-atlas-chat/code/react.ts create mode 100644 .claude/skills/integrate-atlas-chat/code/session.test.ts create mode 100644 .claude/skills/integrate-atlas-chat/code/session.ts create mode 100644 .claude/skills/integrate-atlas-chat/code/types.ts create mode 100644 .claude/skills/integrate-atlas-chat/code/validation.ts create mode 100644 .claude/skills/integrate-file-viewer/SKILL.md create mode 100644 .claude/skills/integrate-file-viewer/code/CogniteFileViewer.tsx create mode 100644 .claude/skills/integrate-file-viewer/code/DocumentAnnotationOverlay.tsx create mode 100644 .claude/skills/integrate-file-viewer/code/fileResolution.ts create mode 100644 .claude/skills/integrate-file-viewer/code/index.ts create mode 100644 .claude/skills/integrate-file-viewer/code/mimeTypes.ts create mode 100644 .claude/skills/integrate-file-viewer/code/types.ts create mode 100644 .claude/skills/integrate-file-viewer/code/useDocumentAnnotations.ts create mode 100644 .claude/skills/integrate-file-viewer/code/useFileResolver.ts create mode 100644 .claude/skills/integrate-file-viewer/code/useViewport.ts create mode 100644 .claude/skills/integrate-fusion-agent/SKILL.md create mode 100644 .claude/skills/integrate-todo-list/SKILL.md create mode 100644 .claude/skills/integrate-todo-list/code/TodoContext.tsx create mode 100644 .claude/skills/integrate-todo-list/code/TodoItemRow.tsx create mode 100644 .claude/skills/integrate-todo-list/code/TodoPanel.tsx create mode 100644 .claude/skills/integrate-todo-list/code/TodoToolResultCard.tsx create mode 100644 .claude/skills/integrate-todo-list/code/todoWriteTool.ts create mode 100644 .claude/skills/integrate-todo-list/code/types.ts create mode 100644 .claude/skills/integrate-todo-list/code/useTodoList.ts create mode 100644 .claude/skills/integrate-todo-list/code/useTodoWriteTool.ts create mode 100644 .claude/skills/migrate-app-to-flows/SKILL.md create mode 100644 .claude/skills/performance/SKILL.md create mode 100644 .claude/skills/pull-changes-resolve-conflicts/SKILL.md create mode 100644 .claude/skills/pull-changes-resolve-conflicts/reference.md create mode 100644 .claude/skills/reveal-3d/SKILL.md create mode 100644 .claude/skills/reveal-3d/code/README.md create mode 100644 .claude/skills/reveal-3d/code/reveal/cache/AssetMappingCache.ts create mode 100644 .claude/skills/reveal-3d/code/reveal/cache/CacheProvider.tsx create mode 100644 .claude/skills/reveal-3d/code/reveal/components/Reveal3DResources.tsx create mode 100644 .claude/skills/reveal-3d/code/reveal/components/RevealCanvas.tsx create mode 100644 .claude/skills/reveal-3d/code/reveal/components/RevealKeepAlive.tsx create mode 100644 .claude/skills/reveal-3d/code/reveal/context/InstanceStylingProvider.tsx create mode 100644 .claude/skills/reveal-3d/code/reveal/context/RevealProvider.tsx create mode 100644 .claude/skills/reveal-3d/code/reveal/context/instanceStylingContext.ts create mode 100644 .claude/skills/reveal-3d/code/reveal/context/revealContext.ts create mode 100644 .claude/skills/reveal-3d/code/reveal/hooks/use3dDataForSelectedInstance.ts create mode 100644 .claude/skills/reveal-3d/code/reveal/hooks/useFdmMappings.ts create mode 100644 .claude/skills/reveal-3d/code/reveal/hooks/useFindRelated3dInstances.ts create mode 100644 .claude/skills/reveal-3d/code/reveal/hooks/useFocusCamera.ts create mode 100644 .claude/skills/reveal-3d/code/reveal/hooks/useInstanceStyling.ts create mode 100644 .claude/skills/reveal-3d/code/reveal/hooks/useInstancesWithBoundingBoxes.ts create mode 100644 .claude/skills/reveal-3d/code/reveal/hooks/useModels.ts create mode 100644 .claude/skills/reveal-3d/code/reveal/hooks/usePrefetchedFdmMappings.ts create mode 100644 .claude/skills/reveal-3d/code/reveal/hooks/useRelatedInstances.ts create mode 100644 .claude/skills/reveal-3d/code/reveal/hooks/useRemoveNonReferencedModels.ts create mode 100644 .claude/skills/reveal-3d/code/reveal/hooks/useRenderTarget.ts create mode 100644 .claude/skills/reveal-3d/code/reveal/hooks/useReveal.ts create mode 100644 .claude/skills/reveal-3d/code/reveal/hooks/useRevealContext.ts create mode 100644 .claude/skills/reveal-3d/code/reveal/index.ts create mode 100644 .claude/skills/reveal-3d/code/reveal/settings/QualityPresets.ts create mode 100644 .claude/skills/reveal-3d/code/reveal/settings/RevealSettingsController.ts create mode 100644 .claude/skills/reveal-3d/code/reveal/types.ts create mode 100644 .claude/skills/reveal-3d/code/reveal/utils/cdf-types.ts create mode 100644 .claude/skills/reveal-3d/code/reveal/utils/data-mapper.ts create mode 100644 .claude/skills/reveal-3d/code/reveal/utils/executeParallel.ts create mode 100644 .claude/skills/reveal-3d/code/reveal/utils/views.ts create mode 100644 .claude/skills/reveal-3d/evals/evals.json create mode 100644 .claude/skills/reveal-3d/references/implementation.md create mode 100644 .claude/skills/reveal-3d/references/vite-config.md create mode 100644 .claude/skills/security/SKILL.md create mode 100644 .claude/skills/setup-flows-auth/SKILL.md create mode 100644 .claude/skills/setup-python-tools/SKILL.md create mode 100644 .claude/skills/skill-creator/SKILL.md create mode 100644 .claude/skills/skill-creator/agents/analyzer.md create mode 100644 .claude/skills/skill-creator/agents/comparator.md create mode 100644 .claude/skills/skill-creator/agents/grader.md create mode 100644 .claude/skills/skill-creator/assets/eval_review.html create mode 100644 .claude/skills/skill-creator/eval-viewer/generate_review.py create mode 100644 .claude/skills/skill-creator/eval-viewer/viewer.html create mode 100644 .claude/skills/skill-creator/references/schemas.md create mode 100644 .claude/skills/skill-creator/scripts/__init__.py create mode 100755 .claude/skills/skill-creator/scripts/aggregate_benchmark.py create mode 100755 .claude/skills/skill-creator/scripts/generate_report.py create mode 100755 .claude/skills/skill-creator/scripts/improve_description.py create mode 100755 .claude/skills/skill-creator/scripts/package_skill.py create mode 100755 .claude/skills/skill-creator/scripts/quick_validate.py create mode 100755 .claude/skills/skill-creator/scripts/run_eval.py create mode 100755 .claude/skills/skill-creator/scripts/run_loop.py create mode 100644 .claude/skills/skill-creator/scripts/utils.py create mode 100644 .claude/skills/test-coverage/SKILL.md create mode 100644 .claude/skills/use-topbar/IMPLEMENTATION.md create mode 100644 .claude/skills/use-topbar/INTERVIEW.md create mode 100644 .claude/skills/use-topbar/RULES.md create mode 100644 .claude/skills/use-topbar/SKILL.md create mode 100644 .cursor/commands/speckit.analyze.md create mode 100644 .cursor/commands/speckit.checklist.md create mode 100644 .cursor/commands/speckit.clarify.md create mode 100644 .cursor/commands/speckit.implement.md create mode 100644 .cursor/commands/speckit.plan.md create mode 100644 .cursor/commands/speckit.specify.md create mode 100644 .cursor/commands/speckit.tasks.md create mode 100644 .specify/init-options.json create mode 100644 .specify/memory/constitution.md create mode 100755 .specify/scripts/bash/check-prerequisites.sh create mode 100755 .specify/scripts/bash/common.sh create mode 100755 .specify/scripts/bash/setup-plan.sh create mode 100644 .specify/templates/checklist-template.md create mode 100644 .specify/templates/plan-template.md create mode 100644 .specify/templates/spec-template.md create mode 100644 .specify/templates/tasks-template.md create mode 100644 AGENTS.md create mode 100644 CLAUDE.md diff --git a/.agents/skills/code-quality/SKILL.md b/.agents/skills/code-quality/SKILL.md new file mode 100644 index 0000000..bd84e19 --- /dev/null +++ b/.agents/skills/code-quality/SKILL.md @@ -0,0 +1,413 @@ +--- +name: code-quality +description: "MUST be used whenever reviewing a Flows app for code quality, maintainability, or clean code issues — before a PR review, after a feature is complete, or when the user asks for a code review. Do NOT skip linting steps. Triggers: code quality, code review, clean code, refactor, maintainability, technical debt, any type, naming, dead code, duplication, DRY, single responsibility, component size, lint, linting, TypeScript strict, dependency injection, file structure." +allowed-tools: Read, Glob, Grep, Shell, Write +metadata: + argument-hint: "[file, directory, or PR branch to review — e.g. 'src/components/AssetPanel.tsx']" +--- + +# Code Quality Review + +Review **$ARGUMENTS** (or the whole app if no argument is given) for code quality issues. Work through every step below in order and report all findings with file paths and line numbers. + +--- + +## Step 1 — Run the linter first + +Before reading any code manually, get a baseline from the automated tools: + +```bash +pnpm run lint +``` + +List every error and warning. Fix all errors before proceeding — lint errors are not negotiable. Warnings should be reviewed and resolved unless there is a documented exception. + +Also run the TypeScript compiler in strict mode to surface any hidden type issues: + +```bash +pnpm exec tsc --noEmit +``` + +List every type error. These must be fixed. + +--- + +## Step 2 — TypeScript type safety + +### 2a — Eliminate `any` types + +Search for `any` usage across the codebase: + +```bash +grep -rn --include="*.ts" --include="*.tsx" -E ": any|as any|" src/ +``` + +For each hit, replace with the correct type. Common substitutions: + +| Instead of | Use | +|------------|-----| +| `any` for unknown external data | `unknown` + type guard or Zod parse | +| `any` for event handlers | `React.ChangeEvent`, `React.MouseEvent`, etc. | +| `any` for CDF responses | The SDK's own response types (import from `@cognite/sdk`) | +| `any[]` for arrays | `T[]` with the correct generic | +| `as any` casts | Proper type narrowing or explicit overloaded function signature | + +The goal is zero `any` in `src/`. If a third-party library forces it, wrap the call in a typed adapter function so `any` does not leak into the app. + +### 2b — Make impossible states unrepresentable + +Use the type system to make invalid states fail at compile time. Fewer reachable states = easier code to read and change. + +**Branded types** — brand primitives so they can't be mixed up. Validate once at the boundary; downstream code trusts the type. + +```ts +type PhoneNumber = string & { __brand: "PhoneNumber" }; + +function parsePhone(input: string): PhoneNumber { + if (!/^\+?\d{10,15}$/.test(input)) throw new Error(`Invalid: ${input}`); + return input as PhoneNumber; +} +``` + +If the project uses a library with native branded-type support (e.g. Effect), use their primitives instead of rolling your own. + +**Discriminated unions over flag bags** — replace boolean/optional combos with an exhaustive union: + +```ts +// Don't — invalid combos representable +type State = { loading: boolean; user?: User; error?: string }; + +// Do — only valid states exist +type State = + | { status: "loading" } + | { status: "success"; user: User } + | { status: "error"; error: string }; +``` + +Search for flag-bag patterns: + +```bash +grep -rn --include="*.ts" --include="*.tsx" -E "loading\?|isLoading.*isError|isSuccess.*isError" src/ +``` + +Flag every type that combines boolean flags where only certain combos are valid. These should be discriminated unions. + +### 2c — Let types flow end-to-end + +DB schema → server → client should share types without manual duplication. Don't restate types you can derive — reach for `Pick`, `Omit`, `Parameters`, `ReturnType`, `Awaited`, `typeof` before writing a new interface. + +```ts +// Don't — duplicate shape, drifts when the row changes +type UserSummary = { id: string; email: Email }; +function renderUser(u: UserSummary) { /* ... */ } + +// Do — derive from the source of truth +type User = Awaited>; +function renderUser(u: Pick) { /* ... */ } +``` + +```bash +# Find manually duplicated type shapes +grep -rn --include="*.ts" --include="*.tsx" -E "^(export )?type \w+Summary|^(export )?interface \w+DTO" src/ +``` + +Flag interfaces that manually restate fields already present on an SDK or DB type — these should use `Pick`/`Omit` instead. + +### 2d — Pass objects, not positional arguments + +Functions with two or more parameters of the same primitive type should receive a named-property object so callers can't silently swap arguments. + +```ts +// Don't — swap two args, still compiles +sendEmail("Welcome!", "Hi there"); + +// Do — order-independent, self-documenting +sendEmail({ to: "alice@x.com", subject: "Welcome!", body: "Hi there" }); +``` + +```bash +# Find functions with multiple string/number parameters (potential swap bugs) +grep -rn --include="*.ts" --include="*.tsx" -E "^\s*(export\s+)?(function|const)\s+\w+\s*\([^)]*string[^)]*string" src/ +``` + +--- + +## Step 3 — Check component size and single responsibility + +List all `.tsx` files with their line counts: + +```bash +node -e "const fs=require('fs'),path=require('path');function walk(d){return fs.readdirSync(d,{withFileTypes:true}).flatMap(e=>{const p=path.join(d,e.name);return e.isDirectory()?walk(p):p.endsWith('.tsx')?[p]:[]})}walk('src').map(p=>({p,l:fs.readFileSync(p,'utf8').split('\n').length})).sort((a,b)=>b.l-a.l).forEach(({l,p})=>console.log(l,p))" +``` + +Flag every component file over **150 lines**. For each, read it and check: + +- Does it do more than one thing? (fetch data AND render UI AND handle form state) +- Can the fetch logic move to a custom hook (`useAssetData`)? +- Can sub-sections be extracted as named sub-components? + +Apply the split only when it creates a genuinely cleaner separation — do not split for the sake of line count alone. A well-named 200-line component is better than three poorly-named 60-line ones. + +--- + +## Step 4 — Find and remove duplicate logic (DRY) + +Search for copy-pasted patterns across hooks, utilities, and components: + +```bash +# Find repeated fetch patterns +grep -rn --include="*.ts" --include="*.tsx" -E "sdk\.(assets|timeseries|events|files)\.(list|retrieve)" src/ + +# Find repeated formatting functions +grep -rn --include="*.ts" --include="*.tsx" -E "toLocaleDateString|toLocaleString|new Date\(" src/ + +# Find repeated className strings longer than 40 chars +grep -rn --include="*.tsx" -E 'className="[^"]{40,}"' src/ +``` + +For each set of duplicates: +- Extract to `src/utils/` if it is a pure function +- Extract to `src/hooks/` if it contains React state or effects +- Extract to a shared component if it is JSX + +--- + +## Step 5 — Enforce dependency injection for external calls + +Components and hooks must not import the CDF client directly. The SDK client must be obtained from context (via `useCogniteClient()` or a prop) so the component is testable in isolation. + +```bash +grep -rn --include="*.ts" --include="*.tsx" -E "new CogniteClient|createCogniteClient" src/ +``` + +Flag any direct client construction outside of the app's bootstrap / auth setup file. The pattern should always be: + +```ts +// GOOD — client comes from context +export function useMyData() { + const sdk = useCogniteClient(); // from Flows auth context + // ... +} + +// BAD — direct construction inside a hook or component +const sdk = new CogniteClient({ project: "my-project", ... }); +``` + +Similarly, Atlas tools should receive their dependencies via `execute`'s closure over a hook-provided ref, not by importing a global singleton. + +--- + +## Step 6 — Verify coding patterns and testability + +Check that the codebase follows the three core patterns required by the Flows app review process. These patterns keep code testable, maintainable, and consistent. + +### 6a — Dependency injection via React context + +Hooks must declare their dependencies through a context type and consume them via `useContext`, not by importing them directly. This enables testing without module-level mocks. + +```bash +# Find hooks that import other hooks/services directly (potential DI violation) +grep -rn --include="*.ts" --include="*.tsx" -E "^import.*from\s+['\"]\.\./" src/hooks/ + +# Find hooks that use useContext for dependency injection (good pattern) +grep -rn --include="*.ts" --include="*.tsx" "useContext" src/hooks/ +``` + +The preferred pattern: + +```typescript +// GOOD — injectable via context +const defaultDependencies = { useDataSource, useAnalytics }; +export type UseMyHookContextType = typeof defaultDependencies; +export const UseMyHookContext = createContext(defaultDependencies); +export function useMyHook() { + const { useDataSource } = useContext(UseMyHookContext); +} + +// BAD — hard-coded import, requires vi.mock to test +import { useDataSource } from '../data/useDataSource'; +export function useMyHook() { const data = useDataSource(); } +``` + +For non-React code (utilities, services), use **factory functions with partial dependency overrides**: + +```typescript +type Deps = { serviceFactory: () => SomeService }; +const defaultDeps: Deps = { serviceFactory: () => new SomeServiceImpl() }; +export const doSomething = async (props: Props, depOverrides?: Partial) => { + const deps = { ...defaultDeps, ...depOverrides }; + const service = deps.serviceFactory(); +}; +``` + +Flag every hook that imports dependencies directly instead of receiving them through context. These are testability concerns even if tests exist today. + +### 6b — Interface-based services + +Service classes must implement explicit TypeScript interfaces. This keeps production code substitutable and test doubles type-safe. + +```bash +# Find service/class definitions and check for interface implementations +grep -rn --include="*.ts" --include="*.tsx" -E "class\s+\w+(Service|Client|Repository|Manager)" src/ + +# Find unsafe casts in production AND test code +grep -rn --include="*.ts" --include="*.tsx" "as unknown as" src/ +``` + +Flag: +- Service classes that do not implement an explicit interface +- `as unknown as T` casts in either production or test code — this signals poor interface design + +### 6c — ViewModel pattern + +Page-level hooks (`useSomethingViewModel`) must separate business logic from presentation. UI components receive data and callbacks only; they contain no data-fetching, side-effect logic, or direct SDK calls. + +```bash +# Find page/view components +grep -rn --include="*.tsx" --include="*.ts" -l "useQuery\|useMutation\|sdk\.\|client\." src/pages/ src/views/ 2>/dev/null + +# Find ViewModel hooks +grep -rn --include="*.ts" --include="*.tsx" -l "ViewModel" src/hooks/ 2>/dev/null +``` + +Flag: +- Page components that contain `useQuery`, `useMutation`, or direct SDK calls — this logic should be in a ViewModel hook +- Missing ViewModel hooks for pages with non-trivial data logic + +### 6d — Test mock quality + +```bash +# Find vi.mock usage — each should have a comment justifying why context injection wasn't used +grep -rn --include="*.ts" --include="*.tsx" "vi\.mock" src/ + +# Find unsafe test casts +grep -rn --include="*.ts" --include="*.tsx" "as unknown as" src/ | grep -E "\.test\.|\.spec\." +``` + +Flag: +- `vi.mock` usage without a justification comment explaining why context injection was not possible +- `as unknown as T` casts in test files — signals poor interface design in the production code + +--- + +## Step 7 — Check naming conventions + +Read a representative sample of files and verify: + +| Artifact | Convention | Examples | +|----------|-----------|---------| +| Files & directories | `kebab-case` | `asset-panel.tsx`, `use-asset-data.ts` | +| React components | `PascalCase` | `AssetPanel`, `NavigationBar` | +| Variables, functions, hooks | `camelCase` | `isLoading`, `fetchAssets`, `useAssetData` | +| Constants (module-level) | `SCREAMING_SNAKE_CASE` | `MAX_ITEMS`, `AGENT_EXTERNAL_ID` | +| TypeScript types & interfaces | `PascalCase` | `AssetNode`, `ChartConfig` | +| Boolean variables | Auxiliary verb prefix | `isLoading`, `hasError`, `canEdit` | + +Search for common violations: + +```bash +# TSX components not in PascalCase (filename starts with lowercase) +node -e "const fs=require('fs'),path=require('path');function walk(d){return fs.readdirSync(d,{withFileTypes:true}).flatMap(e=>{const p=path.join(d,e.name);return e.isDirectory()?walk(p):p.endsWith('.tsx')?[p]:[]})}walk('src').filter(p=>/^[a-z]/.test(path.basename(p))).forEach(p=>console.log(p))" + +# Hook files not prefixed with "use" +node -e "const fs=require('fs');fs.readdirSync('src/hooks').filter(f=>f.endsWith('.ts')&&!f.startsWith('use')).forEach(f=>console.log('src/hooks/'+f))" +``` + +--- + +## Step 8 — Remove dead code + +```powershell +# Find commented-out code blocks (3+ consecutive commented lines) +Get-ChildItem -Recurse -Include "*.ts","*.tsx" src | ForEach-Object { + $file = $_; $lines = Get-Content $file.FullName + $count = 0; $startLine = 0 + for ($i = 0; $i -lt $lines.Count; $i++) { + if ($lines[$i] -match '^\s*//') { + if ($count -eq 0) { $startLine = $i + 1 } + $count++ + } else { + if ($count -ge 3) { "$($file.FullName):$startLine — $count consecutive comment lines" } + $count = 0 + } + } + if ($count -ge 3) { "$($file.FullName):$startLine — $count consecutive comment lines" } +} + +# Find console.log/debug statements +grep -rn --include="*.tsx" --include="*.ts" -E "console\.(log|debug|warn|error|info)" src/ + +# Find TODO/FIXME/HACK comments +grep -rn --include="*.tsx" --include="*.ts" -E "(TODO|FIXME|HACK|XXX):" src/ +``` + +Search for unreachable pages (routes defined in the router but whose component is never imported or rendered) and entirely unused files: + +```bash +# Find all .ts/.tsx files and check if they are imported anywhere +for file in $(find src -name "*.ts" -o -name "*.tsx" | grep -v ".test." | grep -v ".spec." | grep -v "node_modules"); do + basename=$(basename "$file" | sed 's/\.[^.]*$//') + imports=$(grep -rn --include="*.ts" --include="*.tsx" "$basename" src/ | grep -v "$file" | wc -l) + if [ "$imports" -eq 0 ]; then + echo "UNUSED: $file" + fi +done + +# Find route definitions and verify their components are imported +grep -rn --include="*.tsx" --include="*.ts" -E "path:\s*['\"]|/ +│ ├── .tsx +│ └── index.ts # re-exports the public API +├── hooks/ # Custom hooks (each file = one hook) +├── utils/ # Pure utility functions (no React) +├── contexts/ # React context providers +├── pages/ or views/ # Route-level components +└── types/ # Shared TypeScript types +``` + +Flag: +- Business logic sitting directly in page components (should be in hooks) +- Utility functions living inside component files (should be in `utils/`) +- Types defined inline in component files when they are used across multiple files (should be in `types/`) +- Missing `index.ts` barrel files for component directories (makes imports verbose) + +--- + +## Step 10 — Report findings + +Produce a structured report grouped by category: + +| Category | File | Line | Issue | Recommendation | +|----------|------|------|-------|----------------| +| TypeScript | `src/hooks/useData.ts` | 18 | `response as any` cast | Import and use `NodeItem` type from `@cognite/sdk` | +| Size | `src/components/Dashboard.tsx` | — | 340 lines, mixes fetch and render logic | Extract `useDashboardData` hook (~120 lines) | +| DRY | `src/components/A.tsx`, `src/components/B.tsx` | 45, 62 | Identical date formatter | Extract to `src/utils/formatDate.ts` | +| Naming | `src/hooks/data.ts` | — | File name does not start with `use` | Rename to `useData.ts` | +| Dead code | `src/App.tsx` | 88 | `console.log("debug response", data)` | Remove | + +If no issues are found in a step, state "No issues found" for that step. Do not skip steps silently. + +--- + +## Done + +Summarize the total number of findings by category and list the highest-impact items to address first. Any `any` type and lint error must be treated as blocking — list these separately. diff --git a/.agents/skills/correctness-and-error-handling/SKILL.md b/.agents/skills/correctness-and-error-handling/SKILL.md new file mode 100644 index 0000000..7c8d873 --- /dev/null +++ b/.agents/skills/correctness-and-error-handling/SKILL.md @@ -0,0 +1,364 @@ +--- +name: correctness-and-error-handling +description: "MUST be used whenever fixing correctness and error handling issues in a Flows app. This skill finds AND fixes bugs, missing error states, unhandled rejections, and edge-case failures — it does not just report them. Triggers: correctness, error handling, bug fix, edge case, crash, unhandled, null, undefined, empty state, loading state, error boundary, try catch, async error, useEffect cleanup, type guard, runtime error, robustness." +allowed-tools: Read, Glob, Grep, Shell, Write +metadata: + argument-hint: "[file or directory to fix, or leave blank to fix the whole app]" +--- + +# Correctness & Error Handling Fix + +Find and fix correctness issues and missing error handling in **$ARGUMENTS** (or the whole app if no argument is given). Work through every step below. Each step searches for problems and then **fixes them in place**. Only report issues that cannot be auto-fixed. + +--- + +## Step 1 — Map data flows and fix known defects + +Read these files before checking anything: + +- `src/main.tsx` / `src/App.tsx` — top-level error boundaries and auth flow +- All files matching `**/hooks/*.ts`, `**/contexts/*.tsx` — shared async state +- All files matching `**/api/*.ts`, `**/services/*.ts` — CDF SDK call sites + +For each async data source, note: +- What happens when the request fails (network error, CDF 403, timeout)? +- What does the UI show while loading? +- What does the UI show if the result is empty? + +### Find and fix known defects in critical paths + +```bash +# Find TODO/FIXME/HACK in critical code paths (not test files) +grep -rn --include="*.ts" --include="*.tsx" -E "(TODO|FIXME|HACK|XXX):" src/ | grep -v ".test." | grep -v ".spec." + +# Find "fix" or "broken" or "workaround" markers +grep -rn --include="*.ts" --include="*.tsx" -i -E "(TODO.*fix|workaround|broken|known.?bug|temporary.?hack)" src/ +``` + +For each match in a critical path (data fetching, rendering, auth, navigation): + +1. **Read the surrounding code** to understand the incomplete/broken behavior. +2. **Fix the underlying issue** — implement the missing logic, correct the broken behavior, or add proper error handling. +3. If the fix requires significant architectural changes beyond this skill's scope, **replace the TODO with a safe failure mode**: graceful error handling, a sensible fallback value, or an explicit user-facing message explaining degraded functionality. +4. **Remove the TODO/FIXME/HACK comment** after fixing. The code should speak for itself. + +Do not leave TODOs in critical paths. Every one must be resolved or converted to a safe fallback. + +--- + +## Step 2 — Add top-level error boundary + +Every Flows app must have at least one React Error Boundary wrapping the main content so that an unexpected render-time exception shows a user-friendly message instead of a blank screen. + +```bash +grep -rn --include="*.tsx" --include="*.ts" -E "ErrorBoundary|componentDidCatch|getDerivedStateFromError" src/ +``` + +If no error boundary exists, **create the ErrorFallback component and add the ErrorBoundary wrapper** to `App.tsx`. Install `react-error-boundary` if not present: + +```bash +pnpm add react-error-boundary +``` + +Then add to `App.tsx`: + +```tsx +import { ErrorBoundary } from "react-error-boundary"; + +function ErrorFallback({ error }: { error: Error }) { + return ( +
+

Something went wrong

+
{error.message}
+
+ ); +} + +// Wrap the main content: + + + +``` + +Write the updated `App.tsx` with the ErrorBoundary in place. Do not just suggest it — make the edit. + +--- + +## Step 3 — Wrap unhandled async functions in try/catch + +Search for every `async` function and `Promise` chain that does not have error handling: + +```bash +# Find async functions +grep -rn --include="*.ts" --include="*.tsx" -E "async\s+function|async\s+\(" src/ + +# Find .then() without .catch() +grep -rn --include="*.ts" --include="*.tsx" -E "\.then\(" src/ | grep -v "\.catch\(" +``` + +**Fix each one:** + +- For bare `async` functions that lack try/catch: **wrap the function body** in try/catch. Log the error with context and re-throw so callers/query layers can handle it: + +```ts +async function fetchAssets(sdk: CogniteClient) { + try { + const result = await sdk.assets.list({ limit: 100 }); + return result.items; + } catch (error) { + console.error("Failed to fetch assets:", error); + throw error; + } +} +``` + +- For `.then()` without `.catch()`: **add `.catch()`** to the chain: + +```ts +somePromise.then(handleResult).catch((error) => { + console.error("Operation failed:", error); +}); +``` + +- For TanStack Query consumers (`useQuery`/`useMutation`) missing `isError` handling: **add the error check and error UI** to the component: + +```tsx +const { data, isLoading, isError, error } = useQuery({ + queryKey: ["assets"], + queryFn: () => fetchAssets(sdk), +}); + +if (isError) return ; +``` + +Read each file, make the edit, and write it back. + +--- + +## Step 4 — Add missing loading, error, and empty states to components + +For each component that fetches data, it must have three distinct UI states: + +| State | Required UI | +|-------|-------------| +| Loading | Spinner, skeleton, or loading indicator | +| Error | User-readable message (not a raw error object or blank space) | +| Empty | "No results" / "Nothing here yet" message (not a blank list) | + +Search for components that render data without checking loading state: + +```bash +grep -rn --include="*.tsx" -E "\.(map|filter|find)\(" src/ | grep -v "isLoading\|isPending\|skeleton\|Skeleton" +``` + +For each hit, read the component and **add the missing states directly**: + +- **Missing loading state** — add before the data render: +```tsx +if (isLoading) { + return
; +} +``` + +- **Missing error state** — add after the loading check: +```tsx +if (isError) { + return ( +
+

Failed to load data. Please try again.

+
+ ); +} +``` + +- **Missing empty state** — add after the error check, before the `.map()`: +```tsx +if (!data || data.length === 0) { + return ( +
+

No results found.

+
+ ); +} +``` + +Insert these checks in the correct order (loading, then error, then empty) above the existing data render. Write each fixed file. + +--- + +## Step 5 — Add type narrowing for external data + +External data (CDF responses, URL params, `localStorage`, `JSON.parse`) must be validated before use. TypeScript types alone are not runtime guarantees. + +```bash +# Find JSON.parse without validation +grep -rn --include="*.ts" --include="*.tsx" -E "JSON\.parse\(" src/ + +# Find localStorage reads +grep -rn --include="*.ts" --include="*.tsx" -E "localStorage\.(get|set)Item" src/ + +# Find useSearchParams usage +grep -rn --include="*.ts" --include="*.tsx" -E "useSearchParams|searchParams\.get" src/ +``` + +**Fix each one:** + +- **`JSON.parse(x) as T`** — replace with Zod safeParse: +```ts +import { z } from "zod"; + +const MySchema = z.object({ /* fields */ }); +const parseResult = MySchema.safeParse(JSON.parse(raw)); +if (!parseResult.success) { + console.warn("Invalid stored data, using defaults:", parseResult.error); + return defaultValue; +} +const validated = parseResult.data; +``` + +- **`searchParams.get("id")`** without null check — add nullish fallback: +```ts +const id = searchParams.get("id") ?? defaultId; +``` + +- **`localStorage.getItem(key)`** used directly — add type guard and fallback: +```ts +const raw = localStorage.getItem(key); +if (raw === null) return defaultValue; +try { + const parsed = JSON.parse(raw); + // validate parsed shape + return isValidShape(parsed) ? parsed : defaultValue; +} catch { + return defaultValue; +} +``` + +Do not cast external data with `as MyType` — that bypasses runtime safety. Read, fix, and write each file. + +--- + +## Step 6 — Fix null, undefined, and unsafe array access + +Read every component that accesses properties of data returned from CDF or passed via props. + +```bash +grep -rn --include="*.tsx" --include="*.ts" -E "\w+\[0\]\." src/ +``` + +**Fix each unsafe pattern found:** + +- **Unsafe nested property access** — add optional chaining and nullish coalescing: +```tsx +// Before: asset.properties.space.Asset.name +// After: +const name = asset.properties?.["my-space"]?.["Asset"]?.name ?? "Unknown"; +``` + +- **Unguarded `.map()` on possibly-undefined array** — add nullish fallback: +```tsx +// Before: items.map(renderItem) +// After: +(items ?? []).map(renderItem) +``` + +- **Unsafe array index access** — use `.at()` with optional chaining: +```tsx +// Before: items[0].name +// After: +const first = items.at(0)?.name ?? "—"; +``` + +Read each file with a match, apply the fix, and write it back. + +--- + +## Step 7 — Add useEffect cleanup functions + +Every `useEffect` that sets up a subscription, timer, event listener, or async operation that can outlive the component must return a cleanup function. + +```bash +grep -rn --include="*.tsx" --include="*.ts" -B 2 -A 15 "useEffect" src/ +``` + +For each `useEffect`, check whether cleanup is needed and **add the cleanup function** if missing: + +| Pattern | Fix to add | +|---------|-----------| +| `addEventListener` | Add `return () => removeEventListener(...)` | +| `setInterval` / `setTimeout` | Add `return () => clearInterval(id)` / `clearTimeout(id)` | +| CDF streaming / SSE | Add `return () => stream.close()` | +| `fetch` / CDF SDK call | Add AbortController: `const controller = new AbortController()` at the top, pass `controller.signal` to fetch, add `return () => controller.abort()`, and guard state updates with `if (!controller.signal.aborted)` | +| Zustand / event emitter subscription | Add `return () => unsubscribe()` | + +Reference pattern for async effects: + +```ts +useEffect(() => { + const controller = new AbortController(); + + async function load() { + try { + const data = await fetchWithSignal(controller.signal); + if (!controller.signal.aborted) setState(data); + } catch (err) { + if (err instanceof Error && err.name !== "AbortError") { + setError(err); + } + } + } + + load(); + return () => controller.abort(); +}, [id]); +``` + +Read each effect, add the missing cleanup, and write the file. + +--- + +## Step 8 — Add edge case guards + +For each feature, check and **add guards** for: + +- **Empty data**: If zero-item lists are not handled, add an empty state check before rendering. +- **Single item**: If list rendering has off-by-one bugs with a single entry, fix the logic. +- **Maximum data / pagination**: If CDF returns the full `limit` and there are more pages, ensure pagination is communicated to the user. Add a "Load more" or pagination indicator if missing. +- **Concurrent requests / stale results**: If the user can trigger a new request before the previous completes, add stale request cancellation (AbortController or a request ID check). +- **Network offline**: If the app silently fails when offline, add a meaningful error message. + +For Atlas tool `execute` functions, **add argument validation** at the top of every execute function: + +```ts +execute: async (args) => { + if (!args.assetId || typeof args.assetId !== "string") { + return { output: "Missing or invalid assetId", details: null }; + } + // ... safe to proceed +} +``` + +Search for `execute` functions, read each one, add the validation, and write the file. + +--- + +## Step 9 — Report remaining findings + +Produce a structured report covering: + +1. **What was fixed in each step** — summarize the changes made (files edited, patterns fixed). +2. **Remaining issues** — only list issues that could not be auto-fixed (e.g., require architectural changes, need product decisions, or are outside the scope of this skill). + +| Severity | File | Line | Issue | Status | +|----------|------|------|-------|--------| +| HIGH | `src/hooks/useAssets.ts` | 34 | Unhandled promise rejection | FIXED — wrapped in try/catch | +| MEDIUM | `src/components/AssetList.tsx` | 12 | No empty state | FIXED — added empty state check | +| MEDIUM | `src/auth/flow.ts` | 45 | Auth error handling needs product decision | UNFIXED — requires team input | + +If no issues are found in a step, state "No issues found" for that step. Do not skip steps silently. + +--- + +## Done + +Summarize what was fixed by severity. Flag any remaining HIGH issues that could cause data loss, crashes in production, or misleading UI states, and list them first for immediate attention. diff --git a/.agents/skills/create-client-tool/SKILL.md b/.agents/skills/create-client-tool/SKILL.md new file mode 100644 index 0000000..1160f59 --- /dev/null +++ b/.agents/skills/create-client-tool/SKILL.md @@ -0,0 +1,122 @@ +--- +name: create-client-tool +description: "MUST be used whenever creating an AtlasTool (client-side tool) for an Atlas agent. Do NOT manually write AtlasTool definitions or wire them into useAtlasChat — this skill handles the TypeBox schema, execute function, and hook wiring. Prerequisite: integrate-atlas-chat (vendored src/atlas-agent + TypeBox/AJV deps). This includes tools that fetch data, render UI, call APIs, show charts, query local state, or perform any browser-side action. Triggers: AtlasTool, client tool, add tool, create tool, new tool, tool definition, agent tool." +allowed-tools: Read, Glob, Grep, Edit, Write +metadata: + argument-hint: "[tool-name] [brief description of what it does]" +--- + +# Create a Client Tool + +Scaffold a new `AtlasTool` named **$ARGUMENTS** and wire it into the app. + +## Prerequisite + +**`integrate-atlas-chat`** must already be complete: the app should vend the atlas-agent sources under `src/atlas-agent/` (including `react.ts`) and have `@sinclair/typebox`, `ajv`, and `ajv-formats` installed as in that skill. + +## Background + +Client tools let the Atlas Agent invoke logic that runs in the browser — rendering charts, +querying local state, showing UI panels, triggering navigation, etc. The agent decides when +to call the tool; the app executes it and returns a result. + +The flow is: +1. Agent responds with a `clientTool` action +2. The library validates the arguments against the TypeBox schema +3. `execute()` runs in the browser and returns `{ output, details }` +4. `output` (string) is sent back to the agent as the tool result +5. `details` (any shape) is available on `message.toolCalls` for the UI to render + +--- + +## Step 1 — Understand the codebase + +Before writing anything, read: + +- The file where `useAtlasChat` is called (often `src/App.tsx` or a chat hook) to find where `tools` is passed — imports are typically from `./atlas-agent/react` after **`integrate-atlas-chat`** +- Any existing tool definitions to match the file/naming conventions + +--- + +## Step 2 — Define the tool + +Create the tool as a typed constant. Use `Type` from `@sinclair/typebox` to define the parameters schema — this gives both compile-time types and runtime validation (same stack as the vendored atlas-agent from **`integrate-atlas-chat`**). + +```ts +import { Type } from "@sinclair/typebox"; +import type { AtlasTool } from "./atlas-agent/types"; + +export const myTool: AtlasTool = { + name: "my_tool", // snake_case — this is what the agent uses to invoke it + description: + "One sentence describing what this tool does and when the agent should call it.", + parameters: Type.Object({ + exampleParam: Type.String({ description: "What this param is for" }), + optionalNum: Type.Optional(Type.Number({ description: "..." })), + }), + execute: async (args) => { + // args is fully typed from the schema above + // Do the work here — call APIs, update state, render UI, etc. + return { + output: "Plain text summary sent back to the agent", + details: { + // Any structured data you want available in the UI via message.toolCalls + }, + }; + }, +}; +``` + +Adjust the `./atlas-agent/...` path if the tool file is not directly under `src/` next to the `atlas-agent` folder (for example `../atlas-agent/types` from `src/tools/`). + +### TypeBox quick reference + +| Schema | Usage | +|---|---| +| `Type.String()` | string | +| `Type.Number()` | number | +| `Type.Boolean()` | boolean | +| `Type.Literal("foo")` | exact value | +| `Type.Union([Type.Literal("a"), Type.Literal("b")])` | enum | +| `Type.Array(Type.String())` | string[] | +| `Type.Object({ ... })` | object | +| `Type.Optional(...)` | mark any field optional | + +Always add a `description` to each field — the agent uses these to understand what to pass. + +--- + +## Step 3 — Wire into useAtlasChat + +Find the `useAtlasChat` call and add the tool to the `tools` array: + +```ts +const { messages, send, ... } = useAtlasChat({ + client: isLoading ? null : sdk, + agentExternalId: AGENT_EXTERNAL_ID, + tools: [myTool], // add here +}); +``` + +--- + +## Step 4 — Render tool results (if needed) + +If the tool returns structured `details`, render them in the message list. +`message.toolCalls` is a `ToolCall[]` — one entry per tool call (client-side and server-side) in call order. + +```tsx +{msg.toolCalls?.map((tc, i) => ( + // tc.name — tool name + // tc.output — the string sent back to the agent + // tc.details — your structured data (cast to your known shape) + +))} +``` + +--- + +## Done + +The agent can now invoke `$ARGUMENTS`. Describe what it does clearly in the `description` +field — the agent relies on that string to decide when and how to call the tool. diff --git a/.agents/skills/dependencies-audit/SKILL.md b/.agents/skills/dependencies-audit/SKILL.md new file mode 100644 index 0000000..719257a --- /dev/null +++ b/.agents/skills/dependencies-audit/SKILL.md @@ -0,0 +1,306 @@ +--- +name: dependencies-audit +description: "MUST be used whenever fixing dependency issues in a Flows app. This skill finds AND fixes vulnerabilities, outdated packages, deprecated dependencies, and license issues — it does not just report them. Triggers: dependencies, packages, fix dependencies, update packages, fix vulnerabilities, npm audit fix, pnpm audit fix, CVE fix, outdated, deprecated, supply chain, license." +allowed-tools: Read, Glob, Grep, Shell, Write +metadata: + argument-hint: "[path to package.json, or leave blank to audit the root package.json]" +--- + +# Dependencies Fix + +Find and fix all dependency issues in **$ARGUMENTS** (or the root `package.json` if no argument is given) — vulnerabilities, outdated packages, deprecated dependencies, license problems, and supply-chain risks. This skill produces the `review-packages.md` artifact required by the Flows app review process. + +--- + +## Step 1 — Read and list all dependencies + +```bash +# List all dependencies and devDependencies +node -e " + const pkg = require('./package.json'); + console.log('=== Dependencies ==='); + Object.entries(pkg.dependencies || {}).forEach(([name, ver]) => console.log(name + ' @ ' + ver)); + console.log('\\n=== Dev Dependencies ==='); + Object.entries(pkg.devDependencies || {}).forEach(([name, ver]) => console.log(name + ' @ ' + ver)); +" +``` + +Record the total count of dependencies and devDependencies. + +--- + +## Step 2 — Look up npm metadata and update outdated packages + +For each package, gather: +- **Latest version** on npm +- **Weekly downloads** +- **Last publish date** +- **Deprecated** flag + +```bash +# Batch lookup — run for each package (example for a single package) +npm view --json 2>/dev/null | node -e " + const data = JSON.parse(require('fs').readFileSync('/dev/stdin','utf8')); + console.log(JSON.stringify({ + name: data.name, + latest: data['dist-tags']?.latest, + modified: data.time?.modified, + deprecated: data.deprecated || false, + })); +" + +# For weekly downloads, use the npm API +curl -s "https://api.npmjs.org/downloads/point/last-week/" | node -e " + const data = JSON.parse(require('fs').readFileSync('/dev/stdin','utf8')); + console.log(data.downloads); +" +``` + +For efficiency, batch multiple lookups. If the project has many dependencies, use a script: + +```bash +node -e " + const { execSync } = require('child_process'); + const pkg = require('./package.json'); + const allDeps = { ...pkg.dependencies, ...pkg.devDependencies }; + + for (const [name, usedVersion] of Object.entries(allDeps)) { + try { + const info = JSON.parse(execSync('npm view ' + name + ' --json 2>/dev/null', { encoding: 'utf8' })); + const latest = info['dist-tags']?.latest || 'unknown'; + const modified = info.time?.modified || 'unknown'; + const deprecated = info.deprecated ? 'YES' : 'No'; + console.log([name, usedVersion, latest, modified, deprecated].join(' | ')); + } catch { + console.log(name + ' | ' + usedVersion + ' | LOOKUP FAILED'); + } + } +" +``` + +### Fix: Update outdated packages + +For each package that is >1 major version behind, update it: + +```bash +pnpm update @latest +``` + +For packages that are 1+ minor versions behind, update to latest minor: + +```bash +pnpm update +``` + +After updating, run `pnpm install` and `pnpm run build` to verify nothing breaks. If a major update breaks the build, revert that specific update and note it as a manual-fix item. + +--- + +## Step 3 — Run security audit and fix vulnerabilities + +```bash +# Run audit with the project's package manager +pnpm audit --json 2>/dev/null || npm audit --json 2>/dev/null + +# Also run production-only audit (what ships to users) +pnpm audit --prod --json 2>/dev/null || npm audit --production --json 2>/dev/null +``` + +Parse the JSON output for: +- Severity counts (critical, high, moderate, low) +- Per-vulnerability details (package, severity, title, patched version, advisory URL) + +Any package with a known CVE is an automatic **Fail** in the health column. + +### Fix: Resolve vulnerabilities + +Run `pnpm audit fix` to auto-fix what's possible. For remaining high/critical CVEs that can't be auto-fixed, manually update the vulnerable package in `package.json` to the patched version and run `pnpm install`. If the patched version has breaking changes, apply the minimum code changes needed to adapt. If a vulnerability is in a transitive dependency, use `pnpm overrides` in `package.json` to force the patched version: + +```json +{ + "pnpm": { + "overrides": { + "vulnerable-package": ">=2.1.0" + } + } +} +``` + +After applying fixes, re-run `pnpm audit` to confirm the vulnerabilities are resolved. Run `pnpm run build` to verify nothing breaks. + +--- + +## Step 4 — Assign health scores and fix Fail-scored packages + +For each package, assign a health indicator: + +| Health | Criteria | +|--------|----------| +| **Pass** | >100k weekly downloads AND updated within last 12 months AND not deprecated AND version is current or near-current (within 1 major) | +| **Warn** | 10k–100k weekly downloads OR >12 months since last publish OR >1 major version behind | +| **Fail** | <10k weekly downloads OR no update in 2+ years OR deprecated OR known CVE | + +Edge cases: +- `@cognite/*` packages: trust Cognite-internal packages even if download counts are low +- `@types/*` packages: trust DefinitelyTyped packages; focus on whether the version matches the main package +- Newly published packages (<6 months old): flag as **Warn** for review, not auto-Fail on low downloads + +### Fix: Replace Fail-scored packages + +For each Fail-scored package: + +- **If deprecated:** find and install the recommended replacement. Update all imports across the codebase. +- **If unmaintained (2+ years):** find an actively maintained alternative with equivalent functionality. Replace it. +- **If low downloads and not `@cognite/*`:** evaluate whether it's truly needed. If a native JS/TS equivalent exists or the functionality is simple, remove the dependency and implement inline. + +After each replacement, run `pnpm install` and `pnpm run build` to verify the replacement works. + +--- + +## Step 5 — Check for supply-chain risks and mitigate + +```bash +# Check for install scripts (preinstall, postinstall, prepare) +node -e " + const { execSync } = require('child_process'); + const pkg = require('./package.json'); + const allDeps = Object.keys({ ...pkg.dependencies, ...pkg.devDependencies }); + + for (const name of allDeps) { + try { + const info = JSON.parse(execSync('npm view ' + name + ' --json 2>/dev/null', { encoding: 'utf8' })); + const scripts = info.scripts || {}; + const risky = ['preinstall', 'install', 'postinstall'].filter(s => scripts[s]); + if (risky.length > 0) { + console.log('INSTALL SCRIPT: ' + name + ' — ' + risky.join(', ')); + } + } catch {} + } +" + +# Check for packages with very few maintainers (single point of failure) +# This is informational, not blocking +``` + +### Fix: Evaluate and mitigate install script risks + +For each dependency with install scripts, determine if the script is legitimate (e.g., native module compilation for `sharp`, `esbuild`, `better-sqlite3`). Known build tools and native module packages are expected to have install scripts. + +If the package is not a known build tool and has suspicious install scripts, replace it with a safer alternative. After replacement, run `pnpm install` and `pnpm run build` to verify. + +--- + +## Step 6 — Check license compatibility and replace problematic packages + +```bash +# List all licenses +npx license-checker --summary 2>/dev/null || node -e " + const { execSync } = require('child_process'); + const pkg = require('./package.json'); + const allDeps = Object.keys({ ...pkg.dependencies, ...pkg.devDependencies }); + + for (const name of allDeps) { + try { + const info = JSON.parse(execSync('npm view ' + name + ' --json 2>/dev/null', { encoding: 'utf8' })); + console.log(name + ': ' + (info.license || 'UNKNOWN')); + } catch {} + } +" +``` + +Acceptable licenses for Flows apps (commercial distribution): +- MIT, Apache-2.0, BSD-2-Clause, BSD-3-Clause, ISC, 0BSD, Unlicense, CC0-1.0 + +Licenses that need legal review: +- GPL-2.0, GPL-3.0, LGPL-2.1, LGPL-3.0, AGPL-3.0, MPL-2.0, EUPL-1.1 +- Any "UNKNOWN" or missing license + +### Fix: Replace packages with problematic licenses + +For each package with a copyleft license (GPL, AGPL) or unknown license in **production dependencies**, find an MIT/Apache-2.0 licensed alternative and replace it. Update all imports across the codebase. + +For **devDependencies** with copyleft licenses, these are lower risk but still flag for awareness. + +After each replacement, run `pnpm install` and `pnpm run build` to verify. + +--- + +## Step 7 — Generate the review-packages.md artifact (post-fix state) + +Re-run the metadata lookups after all fixes have been applied to capture the post-fix state. Then produce the output in the format required by the Flows app review process: + +```markdown +## Package audit: [app name] + +### Dependencies + +| Package | Used version | Latest | Weekly downloads | Last published | Deprecated | CVEs | Health | +| ------- | ------------ | ------ | ---------------- | -------------- | ---------- | ---- | ------ | +| react | ^18.2.0 | 18.3.1 | 25M | 2024-04-26 | No | 0 | Pass | +| some-old-lib | ^1.0.0 | 1.0.3 | 5k | 2021-03-15 | No | 0 | Fail | + +### Dev Dependencies + +| Package | Used version | Latest | Weekly downloads | Last published | Deprecated | CVEs | Health | +| ------- | ------------ | ------ | ---------------- | -------------- | ---------- | ---- | ------ | +| vitest | ^1.6.0 | 2.0.1 | 8M | 2024-07-01 | No | 0 | Pass | + +### Security audit + +| Severity | Count | +| -------- | ----- | +| Critical | 0 | +| High | 0 | +| Moderate | 0 | +| Low | 0 | + +#### Vulnerabilities + +| Package | Severity | Title | Patched in | Advisory | +| ------- | -------- | ----- | ---------- | -------- | +| (none found) | — | — | — | — | + +### License summary + +| License | Count | Packages | +| ------- | ----- | -------- | +| MIT | 45 | react, react-dom, ... | +| Apache-2.0 | 3 | ... | + +### Supply-chain flags + +| Package | Risk | Details | +| ------- | ---- | ------- | +| (none found) | — | — | +``` + +--- + +## Step 8 — Report remaining issues + +Summarize what was fixed and what remains: + +### Fixed + +| Category | Count | Details | +|----------|-------|---------| +| Packages updated | N | list of packages and version changes | +| CVEs resolved | N | list of CVEs fixed | +| Deprecated deps replaced | N | old package -> new package | +| License issues resolved | N | old package -> new package | + +### Remaining (could not auto-fix) + +List only issues that could not be automatically fixed: +- Breaking changes from major updates that need manual code adaptation +- Licenses that need legal review (e.g., LGPL in transitive dependencies) +- Packages with no maintained alternative available +- Vulnerabilities with no patched version available yet + +For each remaining item, explain why it could not be auto-fixed and what the app author needs to do. + +--- + +## Done + +State the overall health verdict: how many Pass/Warn/Fail after fixes, how many issues were resolved, and any remaining items that need manual attention from the app author. diff --git a/.agents/skills/design/SKILL.md b/.agents/skills/design/SKILL.md new file mode 100644 index 0000000..ca52ac5 --- /dev/null +++ b/.agents/skills/design/SKILL.md @@ -0,0 +1,52 @@ +--- +name: design +description: Simplified Aura guidance for selecting primitives, keeping token usage consistent, and applying reliable layout/copy/state patterns in Flows and Fusion apps. +allowed-tools: Read, Glob, Grep, Edit, Write +--- + +## Role + +Use Aura as the default UI system for customer-facing product work. Prefer decision-level guidance over exhaustive rules: +- choose the right primitive first, +- apply semantic tokens (no raw values), +- keep layouts and UX states consistent, +- write concise, action-oriented copy. + +Use Storybook for component APIs and exact props. Use this skill for "what to choose and when." + + + +Consult this skill whenever you are: + +- Creating or migrating interactive UI, forms, tables, navigation, or data display +- Writing or modifying styles, colors, spacing, or typography +- Choosing components, tokens, or layout patterns +- Creating or restructuring pages and responsive layouts +- Writing or editing any user-facing text +- Building forms, handling API responses, async actions, confirmations, or dynamic content +- Implementing accessibility (keyboard, focus, headings, ARIA, alt text) +- Applying Aura correctly in a Flows or React app + + + + + +| If you are… | Open | +|-------------|------| +| Choosing primitives and deciding what to use when | `primitive-usage.md` | +| Where to look for Storybook, docs, and Figma (router) | `picking-components.md` | +| Structuring a page or choosing a layout pattern | `building-pages.md` | +| Writing any user-facing text | `writing-copy.md` | +| Forms, loading, errors, confirmations, or page-level accessibility | `handling-states.md` | +| Looking up Storybook URLs for foundations or components | `storybook-links.md` | + + + +## Operating principles + +1. Use Aura primitives before custom UI. +2. Follow foundations through semantic tokens and Aura defaults; do not hardcode raw values. +3. If a primitive almost fits, do not override visuals to force it; check variants/props first, then document the gap. +4. Keep behavior predictable and accessible: keyboard support, visible focus, and clear feedback for loading/success/error. +5. Use `storybook-links.md` for canonical component/foundation URLs. +6. Use publicly reachable links — Aura design system docs (Mintlify), Fusion preview Storybook, and Figma as documented in `primitive-usage.md` and `picking-components.md`. diff --git a/.agents/skills/design/building-pages.md b/.agents/skills/design/building-pages.md new file mode 100644 index 0000000..30bea7d --- /dev/null +++ b/.agents/skills/design/building-pages.md @@ -0,0 +1,354 @@ +# Building pages and layouts + +## Role + +You are structuring pages for a customer-facing application. Consistent layouts across apps are essential. Every page must use an approved pattern. + +The Aura system uses Tailwind CSS for layout. All layouts use Tailwind flex/grid utilities with Aura spacing tokens. The sidebar component uses Aura's sidebar tokens (bg-sidebar, text-sidebar-foreground, etc.). + +For all Storybook URLs, see `./storybook-links.md`. + + +Source of truth for layout foundations: +https://cognitedata.github.io/aura/storybook/?path=/docs/foundations-layout--docs + +Cross-reference these Storybook stories when implementing any layout pattern (full URLs): + +| Story | URL | Use for | +|-------|-----|---------| +| Breakpoints | https://cognitedata.github.io/aura/storybook/?path=/story/foundations-layout--breakpoints | Official breakpoint values | +| Container Queries | https://cognitedata.github.io/aura/storybook/?path=/story/foundations-layout--container-queries | Responsive within components | +| Column Spans | https://cognitedata.github.io/aura/storybook/?path=/story/foundations-layout--column-spans | 2-col, 3-col, asymmetric splits | +| Layout Compositions | https://cognitedata.github.io/aura/storybook/?path=/story/foundations-layout--compositions | Combining layout parts | +| Sidebar Left | https://cognitedata.github.io/aura/storybook/?path=/story/foundations-layout--sidebar-left-layout | Sidebar implementation | +| Card Grid | https://cognitedata.github.io/aura/storybook/?path=/story/foundations-layout--card-grid-layout | Card grid layout | +| Dashboard | https://cognitedata.github.io/aura/storybook/?path=/story/foundations-layout--dashboard-layout | Dashboard with metrics | +| Comprehensive Dashboard | https://cognitedata.github.io/aura/storybook/?path=/story/foundations-layout--comprehensive-dashboard | Full dashboard | +| Grid Patterns | https://cognitedata.github.io/aura/storybook/?path=/story/foundations-layout--grid-patterns-reference | Grid configuration catalog | +| Code Examples | https://cognitedata.github.io/aura/storybook/?path=/story/foundations-layout--code-examples | Copy-paste Tailwind code | + +Base: https://cognitedata.github.io/aura/storybook/ + + + +Standard layout primitives used across all patterns: + +CONTENT MAX-WIDTHS: +- max-w-7xl — dashboards, full-width layouts +- max-w-4xl — detail pages +- max-w-2xl — forms, wizard step content +- max-w-sm — search inputs, narrow controls + +SECTION SPACING: +- space-y-8 — between major page sections (e.g. form groups) +- space-y-6 — between sections within a page +- space-y-4 — between items within a section +- space-y-2 — between label and field, tight groupings + +GRID GAPS: +- gap-6 — dashboard grids, chart grids, panel gaps +- gap-4 — card grids, metric grids +- gap-3 — toolbar items, button groups + +PAGE PADDING: +- px-6 py-8 — standard content area (desktop) +- px-4 py-6 — mobile content area +- p-4 — card/panel internal padding +- p-6 — larger card internal padding + + + +The Aura system has dedicated sidebar tokens that differ from the main content area: + +| Token | Purpose | Light value | Dark value | +|-------|---------|-------------|------------| +| bg-sidebar | Sidebar background | mountain-900 | mountain-900 | +| text-sidebar-foreground | Sidebar text | mountain-100 | mountain-100 | +| text-sidebar-primary | Sidebar primary | mountain-600 | mountain-600 | +| text-sidebar-primary-foreground | Active item text | white | white | +| bg-sidebar-accent | Active/hover bg | mountain-700 | mountain-700 | +| text-sidebar-accent-foreground | Active text | white | white | +| border-sidebar-border | Sidebar borders | mountain-800 | mountain-800 | + +Note: The sidebar is ALWAYS dark-themed, even in light mode. + + + + + + +3+ top-level sections. Persistent navigation needed. +Most common for multi-page apps. + + + +┌──────────┬─────────────────────────────┐ +│ │ Page Header / Breadcrumb │ +│ Sidebar │─────────────────────────────│ +│ Nav │ │ +│ (dark) │ Main Content Area │ +│ │ (bg-background) │ +│ │ │ +└──────────┴─────────────────────────────┘ + + + +Desktop (1440px+): Sidebar 240px, content fills rest. +Tablet (768px-1439px): Sidebar collapsible via hamburger. +Mobile (below 768px): Sidebar hidden. Hamburger menu. + Consider bottom nav for 3-5 primary sections. + + + +Implement using Storybook **Example: Sidebar Left**: +https://cognitedata.github.io/aura/storybook/?path=/story/foundations-layout--sidebar-left-layout + + + + + +Data visualizations, metrics, monitoring. Maximum horizontal space needed. + + + +┌─────────────────────────────────────────┐ +│ Top Navigation Bar │ +├─────────────────────────────────────────┤ +│ Page Header + Filters │ +├─────────────────────────────────────────┤ +│ ┌───────┐ ┌───────┐ ┌───────┐ │ +│ │Metric │ │Metric │ │Metric │ │ +│ └───────┘ └───────┘ └───────┘ │ +├─────────────────────────────────────────┤ +│ Charts / Visualizations │ +├─────────────────────────────────────────┤ +│ Data Table │ +└─────────────────────────────────────────┘ + + + +Desktop: Multi-column grid (grid-cols-3 or grid-cols-4). +Tablet: 2-column grid. Charts stack. +Mobile: Single column. Metrics as horizontal scroll. + + + +Implement using Storybook **Example: Dashboard** and **Example: Comprehensive Dashboard**: +- https://cognitedata.github.io/aura/storybook/?path=/story/foundations-layout--dashboard-layout +- https://cognitedata.github.io/aura/storybook/?path=/story/foundations-layout--comprehensive-dashboard + + + + + +Data entry, creation flows, configuration, settings with form fields. + + + +┌─────────────────────────────────────────┐ +│ Page Header + Back navigation │ +├─────────────────────────────────────────┤ +│ ┌───────────────────────────────┐ │ +│ │ Form Section 1 (heading) │ │ +│ │ [fields] │ │ +│ ├───────────────────────────────┤ │ +│ │ Form Section 2 (heading) │ │ +│ │ [fields] │ │ +│ └───────────────────────────────┘ │ +├─────────────────────────────────────────┤ +│ Sticky footer: [Cancel] [Save action] │ +└─────────────────────────────────────────┘ + + + +Desktop: Form centered, max-w-2xl (672px) or max-w-3xl. +Tablet: Form fills width with px-6 padding. +Mobile: Full width. Sticky footer stays. Fields stack. + + + +Use centered content (max-w-2xl) and Tailwind patterns from Storybook **Code Examples**: +https://cognitedata.github.io/aura/storybook/?path=/story/foundations-layout--code-examples + + + + + +Viewing a single record: report details, user profile, item information with related data. + + + +┌─────────────────────────────────────────┐ +│ Breadcrumb: Reports > Q2 Summary │ +├─────────────────────────────────────────┤ +│ Record Header [Title, status, actions] │ +├─────────────────────────────────────────┤ +│ ┌─────────────────┬───────────────┐ │ +│ │ Main Content │ Sidebar │ │ +│ │ (2/3 width) │ (1/3 width) │ │ +│ └─────────────────┴───────────────┘ │ +└─────────────────────────────────────────┘ + + + +Desktop: Two-column (grid-cols-3, main span-2, sidebar span-1). +Tablet: Sidebar below main content. +Mobile: Single column. Sidebar collapses. + + + +Use asymmetric columns from Storybook **Pattern: Column Spans** and composition patterns from **Code Examples**: +- https://cognitedata.github.io/aura/storybook/?path=/story/foundations-layout--column-spans +- https://cognitedata.github.io/aura/storybook/?path=/story/foundations-layout--code-examples + + + + + +App preferences, account settings, notification config. + + + +┌─────────────────────────────────────────┐ +│ Page Header: Settings │ +├───────────┬─────────────────────────────┤ +│ Settings │ Section Content │ +│ Nav │ [Form fields / toggles] │ +└───────────┴─────────────────────────────┘ + + + +Desktop: Left nav + content area. +Tablet: Top tabs replacing left nav. +Mobile: Category list → tap opens section full-screen. + + + +Implement using Storybook **Pattern: Layout Compositions**: +https://cognitedata.github.io/aura/storybook/?path=/story/foundations-layout--compositions + + + + + +Comparison views, editor + preview, master-detail with equal emphasis on both sides. + + + +┌─────────────────────┬─────────────────────┐ +│ │ │ +│ Panel Left │ Panel Right │ +│ (1/2 width) │ (1/2 width) │ +│ │ │ +└─────────────────────┴─────────────────────┘ + + + +Desktop: grid-cols-2, equal columns. +Tablet: grid-cols-2 with narrower gap. +Mobile: Stack vertically (grid-cols-1), or use Segmented Control to switch between panels. + + + +Implement using Storybook **Pattern: Column Spans**: +https://cognitedata.github.io/aura/storybook/?path=/story/foundations-layout--column-spans + + + + + +Navigation + content + properties panel. IDE-style layouts. Complex editing workflows with context panels. + + + +┌──────────┬───────────────────┬──────────┐ +│ │ │ │ +│ Nav/ │ Main Content │ Props/ │ +│ Tree │ (flexible) │ Detail │ +│ (fixed) │ │ (fixed) │ +│ │ │ │ +└──────────┴───────────────────┴──────────┘ + + + +Desktop (1440px+): All 3 panels visible. +Tablet (768-1439px): Hide right panel, toggle via button. +Mobile (below 768px): Single panel with navigation as Drawer, right panel as bottom sheet or separate route. + + + +Implement using Storybook **Pattern: Layout Compositions**: +https://cognitedata.github.io/aura/storybook/?path=/story/foundations-layout--compositions + + + + + +Browsing collections — reports, users, assets, items. The most common page type in data-heavy applications. + + + +┌──────────────────────────────────────────┐ +│ Page Header [Title] [Create button] │ +├──────────────────────────────────────────┤ +│ Filters toolbar [Search] [Filters] │ +├──────────────────────────────────────────┤ +│ Table / List │ +│ (with empty state when no data) │ +├──────────────────────────────────────────┤ +│ Pagination │ +└──────────────────────────────────────────┘ + + + +Desktop: Full table with all columns visible. +Tablet: Hide non-essential columns, allow horizontal scroll. +Mobile: Switch to card/list view with stackable filters. + + + +Use Storybook **Example: Card Grid** for card-style list variants and **Grid Patterns** for table/grid configuration: +- https://cognitedata.github.io/aura/storybook/?path=/story/foundations-layout--card-grid-layout +- https://cognitedata.github.io/aura/storybook/?path=/story/foundations-layout--grid-patterns-reference + + + + + +Multi-step creation flows, onboarding, configuration wizards, setup processes. + + + +┌──────────────────────────────────────────┐ +│ Step indicator (1 — 2 — 3 — 4) │ +├──────────────────────────────────────────┤ +│ │ +│ Step Content Area │ +│ (centered, max-w-2xl) │ +│ │ +├──────────────────────────────────────────┤ +│ [Back] [Next/Submit] │ +└──────────────────────────────────────────┘ + + + +Desktop: Centered content, horizontal numbered step indicator. +Tablet: Same layout with px-6 padding. +Mobile: Step indicator becomes compact ("Step 2 of 4"), content fills width. + + + +Center step content with max-w-2xl; use Storybook **Code Examples** for step and footer patterns: +https://cognitedata.github.io/aura/storybook/?path=/story/foundations-layout--code-examples + + + + + + +1. Only 1-2 pages? — Sidebar still works, or use top nav. +2. Layout not listed? — Compose from these patterns. Add a short code comment if the composition is genuinely new. +3. Both data display and entry? — Choose by primary purpose. +4. Need resizable panels? — Start with split-screen or three-panel, add resize handles as needed. +5. Very wide content (data tables)? — Use full-width-dashboard without max-w constraint, or list-page with horizontal scroll. + diff --git a/.agents/skills/design/handling-states.md b/.agents/skills/design/handling-states.md new file mode 100644 index 0000000..1ae4e32 --- /dev/null +++ b/.agents/skills/design/handling-states.md @@ -0,0 +1,426 @@ +# Handling states, validation, and accessibility + +## Role + +You are implementing how the application responds to user actions and ensuring page-level accessibility. These patterns determine whether users trust the application. Follow them for every form, API call, and user action. + +Aura components handle many accessibility concerns automatically; you are responsible for composition, copy, focus, and page structure. Aura's focus system uses `shadow-focus-ring` (custom shadow token). Never remove or override it. + +All UI elements use Aura components and tokens. Error states use the destructive token family, warnings use warning tokens, success uses success tokens. + +For message wording patterns, see `writing-copy.md`. + +For all Storybook URLs, see `./storybook-links.md`. + + +What Aura components handle automatically: + +| Concern | Aura handles | You verify | +|---------|-------------|-----------| +| Focus indicators | shadow-focus-ring on interactive elements | Not hidden by overflow or z-index | +| Keyboard activation | Button: Enter/Space. Input: standard keys | Custom elements also respond | +| ARIA roles | Correct roles on Dialog, Segmented Control (Tabs ARIA pattern), etc. | Custom components have roles | +| Color contrast | Token pairs designed for AA compliance | Page backgrounds don't reduce contrast | +| Dark mode | Semantic tokens adapt automatically | Custom colors also work in dark mode | +| Disabled states | Communicated via aria-disabled | Reason for disabled is accessible | +| Focus trapping | Dialog traps focus when open | You return focus to trigger on close | + + + + + + +- Validate on blur, not on every keystroke +- Show errors inline, adjacent to the field +- Preserve user input on failure (never clear the form) +- Move focus to first error field on submission failure +- Announce errors to screen readers via aria-live + + + +| State | Background | Text | Border | +|-------|-----------|------|--------| +| Error | bg-destructive | text-destructive-foreground | border-destructive | +| Warning | bg-warning | text-warning-foreground | — | +| Success | bg-success | text-success-foreground | — | +| Disabled | bg-disabled | text-disabled-foreground | — | + + + +| Field type | Correct message | Incorrect message | +|-----------|----------------|-------------------| +| Required | "Report name is required." | "Required" | +| Email | "Email must include an @ symbol." | "Invalid" | +| Password | "At least 8 characters." | "Too short" | +| Number | "Value must be between 1 and 100." | "Invalid" | +| Date | "End date must be after start date." | "Invalid date" | + +See `writing-copy.md` for full message patterns. + + + +Every form field must support the validation states applicable to its type. Use this table to determine which states to implement: + +| Field type | required | format | length | range | uniqueness | +|-----------|----------|--------|--------|-------|------------| +| Text Input | yes | — | optional | — | optional | +| Email Input | yes | yes | — | — | optional | +| Password | yes | yes | yes | — | — | +| Number Input | yes | — | — | yes | — | +| Date Picker | yes | — | — | yes | — | +| Textarea | yes | — | yes | — | — | +| Select | yes | — | — | — | — | +| Combobox | yes | — | — | — | — | +| Checkbox | — | — | — | — | — | +| File Upload | yes | yes | — | yes (size) | — | + +"yes" = must implement. "optional" = implement if relevant. + + + +A complete form field with all states (default, focused, error, success, disabled): + +import { Input } from '@/components/ui/input'; +import { Label } from '@/components/ui/label'; +import { HelperText } from '@/components/ui/helper-text'; + +{/* Default / Focused state */} +
+ + setName(e.target.value)} + onBlur={validateName} + aria-describedby="report-name-helper" + aria-invalid={!!nameError} + disabled={isSubmitting} + /> + {nameError ? ( + + {nameError} + + ) : ( + + A descriptive name for your report. + + )} +
+ +Key implementation details: +- Label with required indicator (asterisk in text-destructive) +- Input with aria-describedby linking to HelperText +- Input with aria-invalid reflecting error state +- Validation on blur via onBlur handler +- HelperText swaps between hint (default) and error message +- Disabled state during form submission +
+
+ + + +Any action taking more than 300ms must show a loading indicator using Aura components. + + + +| Context | Pattern | Aura component | +|---------|---------|---------------| +| Page load | Skeleton screen | Skeleton | +| Button action | Button disabled + spinner | Button loading state | +| Data refresh | Overlay spinner | Spinner on existing content | +| Long operation | Progress bar + message | Progress | + + + +{/* Button loading during async action */} + + +{/* Skeleton while page loads */} +{isLoading ? ( +
+ + + +
+) : ( + +)} +
+ + +{/* No loading state */} + + +{/* Blank screen while loading */} +{isLoading ? null : } + +
+ + + +Every API failure must show a user-facing message using Aura Alert component. Never fail silently. +See `writing-copy.md` for message wording. + + + +import { Alert, AlertDescription } from '@/components/ui/alert'; +import { AlertCircle } from 'lucide-react'; + +{error && ( + + + + {error} + + + +)} + + + + + +Use Sonner toast for brief confirmations. + + + +import { toast } from 'sonner'; + +// After save +toast.success('Report saved successfully.'); + +// After delete +toast.success('Report deleted.'); + +// After bulk action +toast.success(`${count} items archived.`); + + + +// No feedback +await saveReport(data); +navigate('/reports'); + +// Vague +toast.success('Done!'); + + + + + +Destructive actions must show Dialog with specific action verb. See `writing-copy.md` for copy. + + + +import { + Dialog, DialogContent, DialogDescription, + DialogFooter, DialogHeader, DialogTitle, +} from '@/components/ui/dialog'; + + + + + Delete this report? + + This will permanently remove "{report.name}" and + all associated data. This cannot be undone. + + + + + + + + + + + +{/* Yes/No, no description, wrong variant */} + + + Confirm + Are you sure? + + + + + + + + + +
+ + + + + +- Tab order follows visual reading order +- Every interactive element reachable via Tab +- No keyboard traps +- Skip-to-content link on pages with complex nav + + + + + Skip to main content + + +
+

Reports

+ {/* Content in logical tab order */} +
+
+
+ + + +Use heading levels (H1–H6) in strict sequential order. +- One H1 per page (the page title) +- Never skip levels (H1 directly to H3) +- Never use heading tags for visual sizing — use text-* + classes from the Typography foundation instead + +Aura applies text-undefined-foreground to headings by default. + + + + + +Every image needs alt. Icons in buttons need aria-label. + + + +| Type | Approach | Example | +|------|----------|---------| +| Informational | Describe content | alt="Chart: output up 20%" | +| Decorative | Empty | alt="" | +| Icon button | aria-label on parent | aria-label="Delete report" | +| Icon with label | Hide icon | aria-hidden="true" on icon | + + + +{/* Icon + text: hide icon from screen reader */} + + +{/* Icon only: label on button */} + + + + + + +| Scenario | Method | +|----------|--------| +| Search results update | aria-live="polite" | +| Form error | aria-live="assertive" | +| Toast | Sonner handles this | +| Dialog opens | Focus moves to dialog (Aura handles) | +| Dialog closes | Return focus to trigger | + + + +{/* Screen reader announcement for filtered results */} +
+ {results.length} results found for "{query}" +
+ +
+
+ + + +Never use color alone to convey meaning. + + + +{/* Status with text + color */} + + + + + +{/* Color only — invisible to colorblind users */} + + + + +
+ + +Short sentences and simple grammar translate more reliably. Plan for text expansion in localized UIs (e.g. German often adds 30–40% length); allow flexible button and title widths. + +For automated checks, use WAVE, axe DevTools, or Lighthouse in Chrome DevTools. For manual verification, unplug the mouse and complete primary tasks with keyboard only; spot-check with VoiceOver (Mac) or NVDA (Windows) for critical flows. + + + +Before submitting any page: +- [ ] Tab through all elements in logical order? +- [ ] Every button/link works with Enter/Space? +- [ ] Every dialog opens/closes with keyboard? +- [ ] Escape closes dialogs, popovers, dropdowns? +- [ ] Every image has appropriate alt text? +- [ ] Every form field has a visible label? +- [ ] Non-color indicator for every status? +- [ ] Headings follow H1 → H2 → H3? +- [ ] Dynamic updates announced to screen readers? +- [ ] Focus ring (shadow-focus-ring) visible on all elements? + + + +**Forms and async** +1. Destructive action with undo? — Still confirm. Mention undo in body: "You can undo within 30 seconds." +2. Bulk delete? — One confirmation: "Delete 12 reports?" +3. Auto-save? — Subtle "Saved" indicator, not toast each time. +4. Error in multi-step flow? — Don't lose progress. Show error on current step. Let user retry. + +**Accessibility** +1. Complex data viz? — Text summary via alt or sr-only text. +2. Drag-and-drop? — Keyboard alternative required. +3. Real-time dashboard? — aria-live="polite", not "assertive". +4. Third-party embed? — iframe with descriptive title. + diff --git a/.agents/skills/design/picking-components.md b/.agents/skills/design/picking-components.md new file mode 100644 index 0000000..d8645a0 --- /dev/null +++ b/.agents/skills/design/picking-components.md @@ -0,0 +1,18 @@ +# Picking components and applying tokens + +This file is a **pointer**. Day-to-day primitive decisions live in **`primitive-usage.md`**. Canonical Storybook URLs in this repo live in **`storybook-links.md`**. + +Published Aura resources (outside this repo): [Aura design system docs](https://cognite-dune-docs.mintlify.app/aura-design-system/index), [Aura Storybook (Fusion preview)](https://storybook-aura-22829.fusion-preview.preview.cogniteapp.com/), [Figma library](https://www.figma.com/design/pMnMQvfErZDJzWgrcWCIwZ/WIP---Aura-library). + +## When to open what + +| Need | File | +|------|------| +| Which primitive, when to use it, dos and don'ts | `primitive-usage.md` | +| Page layouts and approved patterns | `building-pages.md` | +| Interface copy | `writing-copy.md` | +| Forms, loading, errors, confirmations, page a11y | `handling-states.md` | +| Storybook URLs for foundations and components | `storybook-links.md` | +| Component props, variants, and foundation token reference | Storybook and [Aura design system docs](https://cognite-dune-docs.mintlify.app/aura-design-system/index) (this file does not duplicate them) | + +Use Storybook and [Aura design system docs](https://cognite-dune-docs.mintlify.app/aura-design-system/index) for props, variants, and examples; use this skill for composition choices and consistency with Fusion and Flows apps. diff --git a/.agents/skills/design/primitive-usage.md b/.agents/skills/design/primitive-usage.md new file mode 100644 index 0000000..c1f9181 --- /dev/null +++ b/.agents/skills/design/primitive-usage.md @@ -0,0 +1,343 @@ +# Aura primitive usage guidance (simplified) + +## Purpose + +Use this file for primitive-level decisions when building Flows and Fusion apps. +It captures usage guidance that is typically missing from component specs and prop tables. + +## Resources + +Links below must stay usable without Cognite VPN or internal auth. Do not use Cognite-internal short URL domains in this skill. + +- Figma library: https://www.figma.com/design/pMnMQvfErZDJzWgrcWCIwZ/WIP---Aura-library +- Aura design system docs: https://cognite-dune-docs.mintlify.app/aura-design-system/index +- Aura Storybook (Fusion preview): https://storybook-aura-22829.fusion-preview.preview.cogniteapp.com/ +- Storybook path index in this repo (same `/docs/...` paths; hosts may differ): `./storybook-links.md` + +## What Aura is + +Aura is Cognite's AI-native design system. It provides: +- visual language, +- primitive library, +- usage conventions for app UX. + +Always prefer an Aura primitive before building custom UI. + +## Guidance tiers + +- Foundations: non-negotiable style decisions; use tokens and do not override with raw values. +- Primitives: default building blocks; use these unless there is a clear product reason not to. +- Patterns: repeatable workflows and compositions; use established patterns for consistency across apps. + +## Global primitive rules + +1. Prefer primitives over custom components. +2. Keep behavior accessible (keyboard activation, focus visibility, and clear state changes). +3. Do not hide critical information if users need fast comparison or repeated switching. +4. When selection is required before action, prefer contextual actions tied to that selection. +5. Use Storybook for exact variants, props, and implementation details. + +## Primitive guidance + +Storybook links below use the Fusion preview host; paths match `./storybook-links.md`. + +### Accordion + +**Storybook:** https://storybook-aura-22829.fusion-preview.preview.cogniteapp.com/?path=/docs/primitives-accordion--docs + +**Definition** +Accordion reveals and hides grouped content sections to reduce cognitive load and page density. + +**Use when** +- Grouping settings in side/config panels. +- Breaking long forms into manageable sections. +- Organizing docs/FAQ/help content. +- Showing nested information hierarchies. + +**Use something else when** +- All content must stay visible for comparison/scanning. +- Content is short and easy to read without progressive disclosure. +- Users are making high-stakes or multi-step decisions where hidden content can cause errors. + +**Dos and don'ts** +- Do use clear, specific section titles. +- Do keep icon and heading behavior consistent. +- Do not use for very short/simple content. +- Do not nest accordions. + +**Behavior** +- Header controls expand/collapse via click/tap/Enter/Space. +- Support multi-expand unless product pattern requires single-expand. +- Keep expanded content available to assistive tech. + +**Often used with** +- `Separator`, section headings, and form controls inside panel content. + +### Action toolbar + +**Storybook:** coming soon + +**Definition** +Action toolbar is a transient bottom-aligned action row that appears when users select items (for example in data-heavy views). + +**Use when** +- Actions apply only to selected items. +- You need to reduce persistent toolbar clutter in tables/lists/cards. +- The workflow depends on selected state before next actions are valid. + +**Use something else when** +- Actions are page-level and do not require selection first (use a standard toolbar/page actions). + +**Dos and don'ts** +- Do keep actions contextual to the current selection. +- Do keep the set focused (use overflow when needed). +- Do center it in the container/page scope. +- Do not make it draggable. + +**Behavior** +- Hidden by default; appears after selection. +- Anchored to bottom area; remains until selection clears, action completes, or user navigates away. +- If no reload occurs, it exits after action completion. + +**Often used with** +- Selection patterns in data views, `Checkbox`, `Button`, `Menu`, and `Tooltip` for icon-only actions. + +### Avatar + +**Storybook:** https://storybook-aura-22829.fusion-preview.preview.cogniteapp.com/?path=/docs/primitives-avatar--docs + +**Definition** +Avatar visually represents a user, team, or concept and helps recognition in collaborative UI. + +**Use when** +- Showing people in comments, chat, sharing, or collaborators. +- Representing accounts, teams, or organizations. +- Displaying AI/agent identities in conversational interfaces. + +**Behavior** +- Choose size based on context density. +- Use overflow patterns for constrained spaces (for example +N with menu). +- Can be informational or interactive based on context. +- Can include status badges/dots. + +**Often used with** +- `Badge`, `Tooltip`, `Menu`. + +### Alert + +**Storybook:** https://storybook-aura-22829.fusion-preview.preview.cogniteapp.com/?path=/docs/primitives-alert--docs + +**Definition** +Alert communicates contextual, medium-emphasis information inside page/task flow. It is not a blocking modal. + +**Use when** +- Providing inline guidance/recommendations in the current task. +- Calling attention to warnings/issues that need awareness but are not blocking. +- Offering direct actions that resolve the issue in context. + +**Dos and don'ts** +- Do include action buttons only when actions are directly related to resolving/dismissing the alert. +- Do evaluate simpler feedback methods first (for example field-level validation). +- Do not attach unrelated actions. + +**Placement** +- Align with surrounding content; do not pin flush against dividers. +- Use card style for wrapped content in constrained areas. +- Use strip style for short messages in wider areas. + +**Behavior** +- Inline with page flow (not full-screen blocking). +- Dismissal removes/hides alert per variant. +- Action path should be clear and minimal. + +**Often used with** +- `Button` for direct resolution actions. + +### Badge + +**Storybook:** https://storybook-aura-22829.fusion-preview.preview.cogniteapp.com/?path=/docs/primitives-badge--docs + +**Definition** +Compact label for status, category, or metadata. + +**Use when** +- Surfacing state at a glance (for example active, draft, error). +- Tagging items without taking primary focus from the page. + +**Use something else when** +- The message needs explanation or recovery steps (consider `Alert` or inline text). +- You need a primary action (use `Button`). + +**Often used with** +- `Avatar`, tables and lists, filter chips. + +### Banner + +**Storybook:** https://storybook-aura-22829.fusion-preview.preview.cogniteapp.com/?path=/docs/primitives-banner--docs + +**Definition** +Persistent or dismissible message scoped at page or section level — stronger than inline helper text, broader than a single-field `Alert` in some layouts. + +**Use when** +- Announcing environment or product state (maintenance, trial, feature preview). +- Page-wide outcomes that should stay visible while the user continues. + +**Use something else when** +- Task-specific guidance inside a flow (`Alert`). +- Brief confirmation after an action (`Sonner Toast`). + +### Button + +**Storybook:** https://storybook-aura-22829.fusion-preview.preview.cogniteapp.com/?path=/docs/primitives-button--docs + +**Definition** +Primary control for discrete actions. + +**Use when** +- Committing, navigating a clear next step, or triggering destructive work (with confirmation pattern). + +**Dos and don'ts** +- One primary action per logical section when possible. +- Match variant to risk: destructive actions use destructive variant and confirmation. +- Label with verb + object (see `writing-copy.md`). +- Icon-only actions need an accessible name (`aria-label`). + +**Often used with** +- `Button Group`, `Dialog` / `Alert Dialog`, forms. + +### Dialog and Alert Dialog + +**Storybook:** [Dialog](https://storybook-aura-22829.fusion-preview.preview.cogniteapp.com/?path=/docs/primitives-dialog--docs) · [Alert Dialog](https://storybook-aura-22829.fusion-preview.preview.cogniteapp.com/?path=/docs/primitives-alert-dialog--docs) + +**Definition** +- **Alert Dialog** — short, focused confirmation or acknowledgment; interrupts for a clear binary or limited choice. +- **Dialog** — richer content: forms, multi-field flows, or explanations that do not fit a strip or inline pattern. + +**Use Alert Dialog when** +- Confirming destructive or irreversible actions. +- Blocking until the user chooses a small set of options. + +**Use Dialog when** +- Collecting input or showing structured content that needs focus without leaving the page. + +**Use something else when** +- Inline persistence is enough (`Alert`). +- Only a quick acknowledgement is needed (`Sonner Toast`). + +### Drawer + +**Storybook:** https://storybook-aura-22829.fusion-preview.preview.cogniteapp.com/?path=/docs/primitives-drawer--docs + +**Definition** +Secondary surface that slides in for filters, detail, or medium-length tasks without a full page change. + +**Use when** +- Supporting the main view (filters, record details, auxiliary forms). + +**Use something else when** +- The task needs full attention or multi-step wizard treatment (full page or `Dialog`). +- Content is very short (consider `Popover` or inline). + +### Empty State + +**Storybook:** https://storybook-aura-22829.fusion-preview.preview.cogniteapp.com/?path=/docs/primitives-empty-state--docs + +**Definition** +Placeholder when there is no data yet or results are empty. + +**Use when** +- Lists, tables, charts, or artifacts have zero rows/points. + +**Dos and don'ts** +- Explain what will appear and how to get started. +- Include a single clear CTA when creation/import applies. + +### Segmented Control + +**Storybook:** https://storybook-aura-22829.fusion-preview.preview.cogniteapp.com/?path=/docs/primitives-segmented-control--docs + +**Definition** +Switches between a small number of peer views or modes on the same page. + +**Use when** +- Two to several comparable sections (for example overview vs details vs activity). + +**Use something else when** +- Content is hierarchical or lengthy and users must open multiple sections at once (consider `Accordion` or visible sections). +- Navigating separate routes (tabs/sidebar patterns — see `building-pages.md`). + +**Relationship to Accordion** +- Segmented control swaps visibility of peer panels; accordion stacks expandable sections. Prefer segmented control when users switch modes frequently; accordion when progressive disclosure matters. + +### Sonner Toast + +**Storybook:** https://storybook-aura-22829.fusion-preview.preview.cogniteapp.com/?path=/docs/primitives-sonner-toast--docs + +**Definition** +Lightweight, auto-dismiss feedback for outcomes that do not need a blocking surface. + +**Use when** +- Confirming save, delete, or background completion. +- Non-critical notices the user can miss without breaking a workflow. + +**Use something else when** +- User must read and act before continuing (`Alert Dialog`, `Dialog`, or persistent `Alert` / `Banner`). + +### Table + +**Storybook:** https://storybook-aura-22829.fusion-preview.preview.cogniteapp.com/?path=/docs/primitives-table--docs + +**Definition** +Dense, scannable display of rows and columns with optional selection and actions. + +**Use when** +- Comparing rows, scanning many attributes, or operating on multiple items. + +**Use something else when** +- A simple fixed list of links or single-column items (`List`). +- A primary chart or narrative view (`Card`, charts — see Storybook). + +**Often used with** +- Selection + **Action toolbar** (when selection-gated actions apply), `Pagination`, `Empty State`, row `Checkbox`, `Dropdown Menu` for row actions. + +### Toolbar + +**Storybook:** https://storybook-aura-22829.fusion-preview.preview.cogniteapp.com/?path=/docs/primitives-toolbar--docs + +**Definition** +Persistent strip of primary tools or filters for a page or region — available without selecting rows first. + +**Use when** +- Page-level create/filter/export actions. +- Tools that apply to the whole view or the current query. + +**Use something else when** +- Actions apply only after row/item selection (use **Action toolbar** pattern). + +### Tooltip, Popover, and Hover Card + +**Storybook:** [Tooltip](https://storybook-aura-22829.fusion-preview.preview.cogniteapp.com/?path=/docs/primitives-tooltip--docs) · [Popover](https://storybook-aura-22829.fusion-preview.preview.cogniteapp.com/?path=/docs/primitives-popover--docs) · [Hover Card](https://storybook-aura-22829.fusion-preview.preview.cogniteapp.com/?path=/docs/primitives-hover-card--docs) + +**Definition** +- **Tooltip** — short hint on hover/focus; no heavy interaction inside. +- **Popover** — click-triggered panel for interactive or structured supplemental content. +- **Hover Card** — richer preview on hover for entities (profiles, references). + +**Use Tooltip when** +- Clarifying a control or icon in one line or sentence. + +**Use Popover when** +- User picks options, fills short fields, or reads formatted content on demand. + +**Use Hover Card when** +- Previewing related metadata without leaving context. + +**Use something else when** +- Content is essential to the task — surface it inline or in `Dialog` / `Drawer`. + +## Escalation guidance + +If a primitive does not fit: +1. Check Storybook variants/props first. +2. Compose with existing primitives. +3. If still blocked, note the gap and keep implementation consistent with Aura foundations. diff --git a/.agents/skills/design/storybook-links.md b/.agents/skills/design/storybook-links.md new file mode 100644 index 0000000..ace2d69 --- /dev/null +++ b/.agents/skills/design/storybook-links.md @@ -0,0 +1,133 @@ +# Aura Storybook Links + +Canonical reference for all Aura Storybook URLs. If the Storybook +domain or path structure changes, update this file and propagate +to individual skills. + +Base URL: `https://cognitedata.github.io/aura/storybook/` + +Last verified against Storybook: 2026-03-13 + +## Foundations + +| Foundation | URL | +|-----------|-----| +| Colors | https://cognitedata.github.io/aura/storybook/?path=/docs/foundations-colors--docs | +| Effects | https://cognitedata.github.io/aura/storybook/?path=/docs/foundations-effects--docs | +| Layout | https://cognitedata.github.io/aura/storybook/?path=/docs/foundations-layout--docs | +| Typography | https://cognitedata.github.io/aura/storybook/?path=/docs/foundations-typography--docs | + +## Components — Actions and Inputs + +| Component | URL | +|-----------|-----| +| Button | https://cognitedata.github.io/aura/storybook/?path=/docs/primitives-button--docs | +| Button Group | https://cognitedata.github.io/aura/storybook/?path=/docs/primitives-button-group--docs | +| Input | https://cognitedata.github.io/aura/storybook/?path=/docs/primitives-input--docs | +| Input Group | https://cognitedata.github.io/aura/storybook/?path=/docs/primitives-input-group--docs | +| Textarea | https://cognitedata.github.io/aura/storybook/?path=/docs/primitives-textarea--docs | +| Select | https://cognitedata.github.io/aura/storybook/?path=/docs/primitives-select--docs | +| Combobox | https://cognitedata.github.io/aura/storybook/?path=/docs/primitives-combobox--docs | +| Checkbox | https://cognitedata.github.io/aura/storybook/?path=/docs/primitives-checkbox--docs | +| Radio Group | https://cognitedata.github.io/aura/storybook/?path=/docs/primitives-radio-group--docs | +| Switch | https://cognitedata.github.io/aura/storybook/?path=/docs/primitives-switch--docs | +| Toggle | https://cognitedata.github.io/aura/storybook/?path=/docs/primitives-toggle--docs | +| Slider | https://cognitedata.github.io/aura/storybook/?path=/docs/primitives-slider--docs | +| Date Picker | https://cognitedata.github.io/aura/storybook/?path=/docs/primitives-date-picker--docs | +| Time Input | https://cognitedata.github.io/aura/storybook/?path=/docs/primitives-time-input--docs | +| Calendar | https://cognitedata.github.io/aura/storybook/?path=/docs/primitives-calendar--docs | + +## Components — Form Support + +| Component | URL | +|-----------|-----| +| Label | https://cognitedata.github.io/aura/storybook/?path=/docs/primitives-label--docs | +| Helper Text | https://cognitedata.github.io/aura/storybook/?path=/docs/primitives-helper-text--docs | + +## Components — Layout and Containers + +| Component | URL | +|-----------|-----| +| Card | https://cognitedata.github.io/aura/storybook/?path=/docs/primitives-card--docs | +| Accordion | https://cognitedata.github.io/aura/storybook/?path=/docs/primitives-accordion--docs | +| Collapsible | https://cognitedata.github.io/aura/storybook/?path=/docs/primitives-collapsible--docs | +| Separator | https://cognitedata.github.io/aura/storybook/?path=/docs/primitives-separator--docs | +| Swap Slot | https://cognitedata.github.io/aura/storybook/?path=/docs/primitives-swap-slot--docs | +| Empty State | https://cognitedata.github.io/aura/storybook/?path=/docs/primitives-empty-state--docs | + +## Components — Navigation + +| Component | URL | +|-----------|-----| +| Sidebar | https://cognitedata.github.io/aura/storybook/?path=/docs/primitives-sidebar--docs | +| Topbar | https://cognitedata.github.io/aura/storybook/?path=/docs/primitives-topbar--docs | +| Breadcrumb | https://cognitedata.github.io/aura/storybook/?path=/docs/primitives-breadcrumb--docs | +| Segmented Control | https://cognitedata.github.io/aura/storybook/?path=/docs/primitives-segmented-control--docs | +| Menubar | https://cognitedata.github.io/aura/storybook/?path=/docs/primitives-menubar--docs | +| Pagination | https://cognitedata.github.io/aura/storybook/?path=/docs/primitives-pagination--docs | +| Toolbar | https://cognitedata.github.io/aura/storybook/?path=/docs/primitives-toolbar--docs | + +## Components — Data Display + +| Component | URL | +|-----------|-----| +| Table | https://cognitedata.github.io/aura/storybook/?path=/docs/primitives-table--docs | +| List | https://cognitedata.github.io/aura/storybook/?path=/docs/primitives-list--docs | +| Badge | https://cognitedata.github.io/aura/storybook/?path=/docs/primitives-badge--docs | +| Avatar | https://cognitedata.github.io/aura/storybook/?path=/docs/primitives-avatar--docs | +| Progress | https://cognitedata.github.io/aura/storybook/?path=/docs/primitives-progress--docs | +| Skeleton | https://cognitedata.github.io/aura/storybook/?path=/docs/primitives-skeleton--docs | +| Kbd | https://cognitedata.github.io/aura/storybook/?path=/docs/primitives-kbd--docs | +| TreeView | https://cognitedata.github.io/aura/storybook/?path=/docs/primitives-treeview--docs | + +## Components — Artifact (Domain-Specific) + +All Artifact sub-variants live on the same docs page (`primitives-artifact--docs`). +Chart and Count are also available as standalone components with their own docs pages. + +| Component | Sub-variant | URL | +|-----------|-------------|-----| +| Artifact (Count) | Metric/count display inside an Artifact | https://cognitedata.github.io/aura/storybook/?path=/docs/primitives-artifact--docs | +| Artifact (List) | List display inside an Artifact | https://cognitedata.github.io/aura/storybook/?path=/docs/primitives-artifact--docs | +| Artifact (Progress) | Progress tracking inside an Artifact | https://cognitedata.github.io/aura/storybook/?path=/docs/primitives-artifact--docs | +| Artifact (Alert) | Alert/status inside an Artifact | https://cognitedata.github.io/aura/storybook/?path=/docs/primitives-artifact--docs | +| Artifact (Tree View) | Hierarchical data inside an Artifact | https://cognitedata.github.io/aura/storybook/?path=/docs/primitives-artifact--docs | +| Artifact (Chart) | Data visualization inside an Artifact | https://cognitedata.github.io/aura/storybook/?path=/docs/primitives-artifact--docs | +| Chart (standalone) | Data visualization outside an Artifact | https://cognitedata.github.io/aura/storybook/?path=/docs/primitives-chart--docs | +| Count (standalone) | Metric/count display outside an Artifact | https://cognitedata.github.io/aura/storybook/?path=/docs/primitives-count--docs | + +## Components — Feedback and Overlays + +| Component | URL | +|-----------|-----| +| Alert | https://cognitedata.github.io/aura/storybook/?path=/docs/primitives-alert--docs | +| Alert Dialog | https://cognitedata.github.io/aura/storybook/?path=/docs/primitives-alert-dialog--docs | +| Banner | https://cognitedata.github.io/aura/storybook/?path=/docs/primitives-banner--docs | +| Dialog | https://cognitedata.github.io/aura/storybook/?path=/docs/primitives-dialog--docs | +| Drawer | https://cognitedata.github.io/aura/storybook/?path=/docs/primitives-drawer--docs | +| Sonner Toast | https://cognitedata.github.io/aura/storybook/?path=/docs/primitives-sonner-toast--docs | +| Tooltip | https://cognitedata.github.io/aura/storybook/?path=/docs/primitives-tooltip--docs | +| Popover | https://cognitedata.github.io/aura/storybook/?path=/docs/primitives-popover--docs | +| Hover Card | https://cognitedata.github.io/aura/storybook/?path=/docs/primitives-hover-card--docs | +| Context Menu | https://cognitedata.github.io/aura/storybook/?path=/docs/primitives-context-menu--docs | +| Dropdown Menu | https://cognitedata.github.io/aura/storybook/?path=/docs/primitives-dropdown-menu--docs | +| Command | https://cognitedata.github.io/aura/storybook/?path=/docs/primitives-command--docs | +| Page Loader | https://cognitedata.github.io/aura/storybook/?path=/docs/primitives-page-loader--docs | + +## Components — AI + +| Component | URL | +|-----------|-----| +| Chain of Thought | https://cognitedata.github.io/aura/storybook/?path=/docs/ai-chain-of-thought--docs | +| Code Block | https://cognitedata.github.io/aura/storybook/?path=/docs/ai-code-block--docs | +| Confirmation | https://cognitedata.github.io/aura/storybook/?path=/docs/ai-confirmation--docs | +| Conversation | https://cognitedata.github.io/aura/storybook/?path=/docs/ai-conversation--docs | +| Inline Citation | https://cognitedata.github.io/aura/storybook/?path=/docs/ai-inline-citation--docs | +| Loader | https://cognitedata.github.io/aura/storybook/?path=/docs/ai-loader--docs | +| Message | https://cognitedata.github.io/aura/storybook/?path=/docs/ai-message--docs | +| Prompt Input | https://cognitedata.github.io/aura/storybook/?path=/docs/ai-prompt-input--docs | +| Reasoning | https://cognitedata.github.io/aura/storybook/?path=/docs/ai-reasoning--docs | +| Shimmer | https://cognitedata.github.io/aura/storybook/?path=/docs/ai-shimmer--docs | +| Sources | https://cognitedata.github.io/aura/storybook/?path=/docs/ai-sources--docs | +| Suggestion | https://cognitedata.github.io/aura/storybook/?path=/docs/ai-suggestion--docs | +| Tool | https://cognitedata.github.io/aura/storybook/?path=/docs/ai-tool--docs | diff --git a/.agents/skills/design/writing-copy.md b/.agents/skills/design/writing-copy.md new file mode 100644 index 0000000..90b9600 --- /dev/null +++ b/.agents/skills/design/writing-copy.md @@ -0,0 +1,233 @@ +# Writing copy + +## Role + +You are writing interface copy for Cognite customer-facing applications. Every piece of UX text must be purposeful, concise, conversational, and clear. Always identify the target audience persona before writing — the persona determines reading level, technical vocabulary, and tone. + +For code-level accessibility (keyboard navigation, ARIA, focus, headings, live regions), see `handling-states.md`. + +## Audience personas + +Canonical persona definitions live in the cogdocs repository (`cogdocs/cogdocs-metadata.mdx`, **Audience** section). This summary covers what matters for microcopy decisions. + +| Persona | Technical level | UX copy implication | +|---|---|---| +| `businessUser` | Low | Plain language; outcomes over features; domain terms OK, avoid platform jargon | +| `businessDecisionMaker` | Low | Plain language; ROI, business value, strategic impact; minimal technical detail | +| `appMaker` | Mid | Configuration, automation, outcomes; avoid deep code/API detail | +| `dataAnalyst` | Mid | Analytics, insights, dashboards; data terms OK, keep explanations clear | +| `partner` | Mid–high | Precise; balance technical accuracy with clarity | +| `administrator` | High | Technical terms OK; reliability, security, compliance, access; be precise | +| `dataEngineer` | High | Technical terms OK; pipelines, ingestion, transformation | +| `developer` | High | Technical terms OK; APIs, SDKs, integrations; precise and concise | +| `aiEngineer` | High | Technical terms OK; ML/AI, models, automation | +| `dataScientist` | High | Technical terms OK; experiments, models, analytics | +| `securityEngineer` | High | Technical terms OK; IAM, threats, compliance | +| `solutionArchitect` | High | Technical terms OK; integration, strategy, best practices | +| `internal` | Varies | Can use Cognite-internal jargon; match internal conventions | + +**Reading level:** Low = 7th–8th grade; Mid = 9th–10th grade; High = 10th–11th grade. + +When the persona is unknown, default to plain language and outcomes. + +## Voice and tone + +Voice is consistent; tone adapts to the user's emotional state. + +| Scenario | Tone | Example | +|---|---|---| +| First-time onboarding | Friendly, welcoming | "Let's get started — Cognite Data Fusion is ready when you are." | +| Technical documentation | Clear, direct, supportive | "Configure your endpoint and authenticate using your API key." | +| Error messages | Empathetic, constructive | "Something went wrong. Try refreshing, or check your connection." | +| Success states | Encouraging, concise | "Your data is now flowing." | +| Product tours / help | Conversational, helpful | "Want a quick tour? We'll walk you through the essentials in under 2 minutes." | +| High-stakes actions | Serious, transparent | "Delete pipeline? All history will be permanently removed." | + +## Grammar and style + +### Language and capitalization + +- **American English**: color, center, organization, modeling +- **Sentence case everywhere**: "Create data model" — not "Create Data Model". No exceptions for UI text. Only proper nouns and product names are capitalized: Cognite Data Fusion, OPC-UA, Aura. +- **No all-caps** +- **No "CDF" in UI copy** — customers may white-label the platform; use product or feature names instead + +### Numbers and units + +- **Numerals for all numbers**, including those under 10: "6 queries", "3 items", "1 result" +- Non-breaking space between number and unit: "50 Mbps" +- Don't use "(s)" or "(es)" — choose singular or plural based on context + +### Abbreviations and punctuation + +- No Latin abbreviations: use "for example" not "e.g.", "and more" not "etc." +- Define acronyms and technical terms when first used (unless writing for technical personas) +- No ampersands (&): use "and" — including in headings +- **Oxford comma**: "apples, oranges, and pears" +- No exclamation marks in UI copy +- No period after labels, tooltip text, or single-sentence bulleted list items; use periods for multiple/complex sentences +- Ellipsis (…): only for ongoing processes or truncated text — use sparingly + +### Pronouns + +- Don't mix "my" and "your" in the same context +- **"My [resource]"** for app-owned items: "My data", "My assets" +- Minimize "I" and "we" representing the application; focus on the user's perspective +- Avoid ambiguous pronouns ("this", "that") without an explicit referent — name the thing + +## Action labels + +Use sentence case with an object: "Edit model", "Delete asset". + +### Approved labels + +| Label | Use when | +|---|---| +| Add | Taking an existing object into a new context ("Add to canvas") | +| Apply | Setting filtered values that affect subsequent system behavior | +| Approve | User agrees; initiates next step in a business process | +| Back | Returning to the previous step in a sequence or hierarchy | +| Cancel | Stopping the current action or closing a modal — warn of data loss | +| Clear | Clearing all fields/selections; restores defaults | +| Close | Closing a page, panel, or secondary window — often icon-only | +| Copy | Copying an object to the clipboard | +| Create | Making a new object from scratch | +| Delete | Permanently destroying an object | +| Discard | Discarding unsaved changes during create/edit | +| Download | Transferring a file from remote to local | +| Duplicate | Creating a copy in the same location as the original | +| Edit | Changing data/values of an existing object | +| Export | Saving data in an external format; typically opens a dialog | +| Import | Bringing data from an external source; typically opens a dialog | +| Next | Advancing to the next step in a wizard | +| Finish | Completing a multi-step wizard | +| Open | Opening a drawer, modal, or new page within current context | +| Publish | Making content available to intended users | +| Refresh | Reloading a view that is out of sync with the source | +| Register | Creating a new user account | +| Remove | Removing an object from the current context without destroying it | +| Reset | Reverting to last saved or default state | +| Save | Saving pending changes without closing the window/panel | +| Search | Goal-oriented action to find precise information | +| Select | Choosing one or more options from a list | +| Show / Hide | Revealing or removing an element from view without deleting — use as a pair | +| Sign in / Sign out | Entering or exiting the application | +| Undo / Redo | Reversing or re-applying the most recent action | +| Upload | Transferring a file from local to remote | +| View | Presenting additional information or properties for an object | + +### Labels to avoid + +| Avoid | Use instead | Reason | +|---|---|---| +| Confirm | The specific action verb ("Delete", "Send") | Too vague | +| Log in / Log out | Sign in / Sign out | "Log" is technical jargon | +| Sign up | Register | Avoids confusion with "Sign in" | +| Submit, OK, Yes | The specific outcome verb | Generic; tell users what happens | +| Click here, Read more | Descriptive link text | Inaccessible; not input-agnostic | + +## UI text patterns + +### Titles +Noun phrases, sentence case. Examples: "Asset overview", "Pipeline runs", "Configure integration" + +### Buttons and CTAs +Active imperative verb + object. 2–4 words target, 6 max. Examples: "Save changes", "Delete pipeline", "View details" + +### Error messages +Pattern: `[What failed]. [Why/context if known]. [What to do].` +Examples: +- "Ingestion failed. Check your extractor configuration and try again." +- "Couldn't save changes. Connection lost. Reconnect and retry." +Avoid: blame language, dead ends with no recovery path + +### Success messages +Past tense, specific, brief. Pattern: `[Action] [result]` +Avoid "successfully"; that's implied in the pattern +Examples: "Changes saved", "Pipeline started", "Integration configured" + +### Empty states +Explanation + CTA. Example: "No assets yet. Connect a data source to start exploring." + +### Tooltips +One to two sentences, present tense. Pattern: `[What it is]. [What it does or why it matters].` +Examples: +- "Asset ID. The unique identifier for this asset in Cognite Data Fusion." +- "Time granularity. Controls how data points are aggregated in the chart." +Never repeat the label. Never write more than 2 sentences. + +### Confirmation dialogs +State the consequence, not just the action. Pattern: `[What will be lost or affected]. [Reversibility]. [Specific action].` +- Primary CTA: match the specific action ("Delete pipeline", not "Confirm") +- Secondary CTA: always provide a clear exit ("Cancel") +Examples: +- "Delete pipeline? All runs and history will be permanently removed. This can't be undone." +- "Remove team member? They'll lose access to all shared resources immediately." +Avoid: "Are you sure?", manipulative phrasing + +### Form fields +- **Labels**: Clear noun phrases ("Time series ID", "Email address") +- **Placeholder text**: Use sparingly, only for standard formats like "name@example.com" +- **Helper text**: Verb-first; explain why the information is needed + +### Notifications +Verb-first title + contextual description. 10–15 words total. +Example: "Extractor disconnected. Check your network and reconnect." + +## Accessibility + +- Use **"Select"** not "Click" — input-agnostic: mouse, keyboard, touch, voice +- Avoid ambiguous pronouns — screen readers lose surrounding context +- Write descriptive link text: "Read pricing details" not "Click here" +- Alt text by image type: + - Icon → describes function: "Download PDF" not "download icon" + - Link image → describes destination: "Contact support" not "question mark" + - Chart/diagram → summarizes meaning: "Bar chart showing pipeline throughput declining 20% in Q3" + - Decorative image → empty alt text (`alt=""`) + - Never write "image of" or "photo of" +- For charts and metrics, describe key trends or values in adjacent text — don't rely on visual encoding alone +- Target 8–14 words per sentence (8 = 100% comprehension, 14 = 90%) +- Pair visual indicators with text: "Error: field required" alongside a red icon + +## Date and time formatting + +- **Prefer written dates**: "2 January 2023" not "02/01/2023" +- **Relative vs absolute**: ≤24 h from now → relative ("32 min ago"); >24 h → absolute ("2 Jan 2023") +- Always include the year unless obvious from context +- No ordinal numbers: "2 January" not "2nd January" +- Separate date and time with "at": "2 Jan 2023 at 10:00 AM" — no comma +- **12-hour time**: uppercase AM/PM, no periods, space before: "10:00 AM" +- **Time zone**: UTC only; spell out "UTC" in text-only contexts +- Never make the user convert time zones — handle in code +- Ranges: consistent format across start and end; for ongoing processes use absolute start + "ongoing" until complete +- Duration: no comma between units ("10 minutes 3 seconds"); space between number and unit in running text ("3 min"); no space in controls ("3min") + +**Time unit abbreviations** (no periods; same form singular/plural): +ms, s, min, hr, d, wk, mo, yr + +**Day abbreviations** (3 chars for i18n): +Mon, Tue, Wed, Thu, Fri, Sat, Sun + +**Month abbreviations** (4 chars for i18n): +Jan, Feb, Mar, Apr, May, Jun, Jul, Aug, Sep, Oct, Nov, Dec + +## Localization + +- Keep sentences short with the subject near the start — compound clauses increase translation cost +- Maintain consistent terminology and capitalization across strings (critical for translation memory) +- No Latin abbreviations in translatable strings: "for example" not "e.g.", "and more" not "etc." +- Avoid idioms and cultural references +- No ampersands: use "and" +- Small words (a, the, that, is): include in prose; may omit only in space-constrained labels and CTAs + +## Benchmarks + +| Element | Target | Maximum | +|---|---|---| +| Buttons / CTAs | 2–4 words | 6 words | +| Titles | 3–6 words, 40 characters | — | +| Tooltips | 10–20 words | 2 sentences | +| Error messages | 12–18 words | — | +| Instructions | 14 words | 20 words | +| Notifications | 10–15 words total | — | +| Line length | 40–60 characters | 70 characters | diff --git a/.agents/skills/dm-limits-and-best-practices/SKILL.md b/.agents/skills/dm-limits-and-best-practices/SKILL.md new file mode 100644 index 0000000..2b1ad86 --- /dev/null +++ b/.agents/skills/dm-limits-and-best-practices/SKILL.md @@ -0,0 +1,613 @@ +--- +name: dm-limits-and-best-practices +description: "Reference skill for CDF Data Modeling API best practices. Covers concurrency limits (avoiding 429s), pagination patterns for instances.list and instances.query, batching write operations, search vs filter guidance, and the QueuedTaskRunner (Semaphore) utility for controlling concurrent requests. Triggers: DMS limits, 429 error, rate limit, pagination, cursor, nextCursor, batching, semaphore, QueuedTaskRunner, cdfTaskRunner, instances.search, instances.list, instances.query, instances.upsert, concurrency, deadlock." +allowed-tools: Read, Glob, Grep, Edit, Write +metadata: + argument-hint: "" +--- + +# CDF Data Modeling: Limits, Concurrency & Best Practices + +This is a reference skill. When writing or reviewing code that calls CDF Data Modeling APIs, apply the patterns below. + +--- + +## DMS Limits Reference + +For the latest concurrency limits, resource limits, and property value limits, see the official documentation: +**https://docs.cognite.com/cdf/dm/dm_reference/dm_limits_and_restrictions** + +Key things to be aware of: +- Instance **apply**, **delete**, and **query** operations each have their own concurrent request limits +- Exceeding these limits returns **429 Too Many Requests** +- Transformations consume a large portion of the concurrency budget, leaving less for other clients +- `instances.list` has a max page size (use pagination for complete results) +- `instances.query` table expressions each have their own item limit +- `instances.upsert` accepts up to 1000 items per call +- `in` filters accept at most 1000 values per expression; larger sets must be split into batches + +--- + +## Search vs Filter: When to Use Which + +### `instances.search` — Free-text search on text properties + +Use `instances.search` when you need fuzzy/text matching on string fields (names, descriptions, etc.). It supports an `operator` parameter: + +- **`AND`** (default) — Narrow search. All terms must match. Use when the user provides a specific query. +- **`OR`** — Broad "shotgun" search. Any term can match. Use for exploratory/typeahead search where you want maximum recall. + +```typescript +// Narrow search: find a specific cell by name (AND — all terms must match) +const exactResults = await client.instances.search({ + view: { type: 'view', ...PROCESS_CELL_VIEW }, + query: 'reactor tank A', + properties: ['name'], + operator: 'AND', + limit: 10, +}); + +// Broad search: typeahead/autocomplete (OR — any term can match) +const broadResults = await client.instances.search({ + view: { type: 'view', ...BATCH_VIEW }, + query: 'BUDE completed', + properties: ['name', 'description', 'batchStatus'], + operator: 'OR', + limit: 10, +}); +``` + +You can combine `search` with `filter` to further constrain results with exact-match conditions: + +```typescript +// Text search + exact filter: search for "pump" but only in active nodes +const filtered = await client.instances.search({ + view: { type: 'view', ...PROCESS_CELL_VIEW }, + query: 'pump', + properties: ['name', 'description'], + filter: { + equals: { + property: getContainerProperty(MY_CONTAINER, 'status'), + value: 'active', + }, + }, + limit: 20, +}); +``` + +### `instances.list` / `instances.query` with `filter` — Exact-match filtering + +Use `filter` when you need precise, deterministic matching (equals, range, in, hasData, etc.). No fuzzy matching — values must match exactly. + +```typescript +// Exact match: get all completed batches +const completedBatches = await client.instances.list({ + instanceType: 'node', + sources: [{ source: { type: 'view', ...BATCH_VIEW } }], + filter: { + equals: { + property: getContainerProperty(BATCH_CONTAINER, 'batchStatus'), + value: 'completed', + }, + }, + limit: 1000, +}); +``` + +### Decision Guide + +| Need | Use | +| ----------------------------------- | ----------------------------- | +| User typing in a search box | `instances.search` with `OR` | +| Find a specific item by name | `instances.search` with `AND` | +| Filter by status, date range, enums | `filter` on list/query | +| Text search + exact constraints | `instances.search` + `filter` | + +### `in` filter value limit (1000) and batching + +CDF `in` filters support a maximum of 1000 values in a single filter expression. If you need to filter against more than 1000 IDs, split values into chunks and issue multiple requests, then merge results. + +```typescript +const IN_FILTER_BATCH_SIZE = 1000; +// Reuse the Chunking Utility defined in the Batching Write Operations section. + +async function listByExternalIds( + client: CogniteClient, + externalIds: string[], +): Promise { + const idBatches = chunk(externalIds, IN_FILTER_BATCH_SIZE); + const responses = await Promise.all( + idBatches.map((batch) => + cdfTaskRunner.schedule(() => + client.instances.list({ + instanceType: 'node', + sources: [{ source: { type: 'view', ...MY_VIEW } }], + filter: { + in: { + property: ['node', 'externalId'], + values: batch, + }, + }, + limit: 1000, + }) + ) + ) + ); + + return responses.flatMap((r) => r.items); +} +``` + +--- + +## QueuedTaskRunner (Semaphore) + +**Always use the global `cdfTaskRunner`** to wrap CDF API calls. It limits concurrent requests and prevents 429 errors and deadlocks. + +### Source Code + +If the project does not already have a semaphore utility, create `src/shared/utils/semaphore.ts` with this implementation: + +```typescript +/** + * AbortError thrown when a queued task is cancelled + */ +export class AbortError extends Error { + public constructor(message: string = 'Aborted') { + super(message); + this.name = 'AbortError'; + } +} + +type PendingTask = { + resolve: (result: AsyncFnResult) => void; + reject: (error: unknown) => void; + fn: AsyncFn; + key?: string; +}; + +const DEFAULT_MAX_CONCURRENT_TASKS = 15; + +/** + * QueuedTaskRunner for controlling concurrent operations + * Used to limit concurrent CDF API requests to avoid rate limiting and deadlocks + * Essentially a semaphore that allows a limited number of tasks to run at once. + */ +export default class QueuedTaskRunner< + AsyncFn extends () => Promise, + AsyncFnResult = Awaited>, +> { + private pendingTasks: PendingTask[] = []; + private currentPendingTasks: number = 0; + private readonly maxConcurrentTasks: number = 1; + + public constructor( + maxConcurrentTasks: number = DEFAULT_MAX_CONCURRENT_TASKS + ) { + this.maxConcurrentTasks = maxConcurrentTasks; + } + + public schedule( + fn: AsyncFn, + options: { key?: string } = {} + ): Promise { + this.startTrackingTime(); + + return new Promise((resolve, reject) => { + if (options.key !== undefined) { + // Cancel existing tasks with the same key (deduplication) + this.pendingTasks + .filter((task) => task.key === options.key) + .forEach((task) => task.reject(new AbortError())); + + this.pendingTasks = this.pendingTasks.filter( + (task) => task.key !== options.key + ); + } + + this.pendingTasks.push({ + resolve, + reject, + fn, + key: options.key, + }); + + this.attemptConsumingNextTask(); + }); + } + + public async attemptConsumingNextTask(): Promise { + if (this.pendingTasks.length === 0) return; + if (this.currentPendingTasks >= this.maxConcurrentTasks) return; + + const pendingTask = this.pendingTasks.shift(); + if (pendingTask === undefined) { + throw new Error('pendingTask is undefined, this should never happen'); + } + + this.currentPendingTasks++; + const { fn, resolve, reject } = pendingTask; + + try { + const result = await fn(); + resolve(result); + } catch (e) { + reject(e); + } finally { + this.currentPendingTasks--; + this.tick(); + this.attemptConsumingNextTask(); + } + } + + public clearQueue = (): void => { + this.pendingTasks = []; + }; + + private startTime: number | null = null; + + private startTrackingTime = (): void => { + if (this.startTime === null) { + this.startTime = performance.now(); + } + }; + + private tick = (): void => { + if (this.pendingTasks.length === 0) { + this.startTime = null; + } + }; +} + +/** + * Global task runner for CDF API requests + * Limits concurrent requests to avoid 429 rate limiting and deadlocks + */ +export const cdfTaskRunner = new QueuedTaskRunner(DEFAULT_MAX_CONCURRENT_TASKS); +``` + +### Usage Pattern + +Always wrap CDF calls with `cdfTaskRunner.schedule()`: + +```typescript +import { cdfTaskRunner } from '../../../../shared/utils/semaphore'; + +// Single query +export async function fetchBatches(client: CogniteClient): Promise { + return cdfTaskRunner.schedule(async () => { + const response = await client.instances.query({ + with: { /* ... */ }, + select: { /* ... */ }, + }); + return response.items?.nodes || []; + }); +} + +// Multiple parallel queries (safe — the semaphore limits concurrency) +export async function enrichBatch( + client: CogniteClient, + batch: CDFBatch +): Promise { + const [currentOp, lastOp, cells, material] = await Promise.all([ + fetchCurrentOperation(client, batch.space, batch.externalId), + fetchLastCompletedOperation(client, batch.space, batch.externalId), + fetchProcessCells(client, batch.space, batch.externalId), + fetchMaterial(client, batch.space, batch.externalId), + ]); + return { currentOp, lastOp, cells, material }; +} + +// Each of the above functions internally uses cdfTaskRunner.schedule(), +// so Promise.all is safe — the semaphore prevents exceeding concurrency limits +``` + +### Deduplication with Keys + +Use the `key` option to cancel stale requests when the same query is triggered again (e.g., user changes filters quickly): + +```typescript +const result = await cdfTaskRunner.schedule( + async () => client.instances.query({ /* ... */ }), + { key: `batch-flow-${batchId}` } +); +// If another call with the same key arrives before this completes, +// the previous pending call is rejected with AbortError +``` + +--- + +## Pagination + +DMS `instances.list` returns at most `limit` items and a `nextCursor` for the next page. +DMS `instances.query` uses a `cursors` object keyed by table expression name. + +### instances.list Pagination + +```typescript +async function fetchAllNodes(client: CogniteClient): Promise { + const allItems: CDFNodeResponse[] = []; + let cursor: string | undefined = undefined; + + do { + const response = await client.instances.list({ + instanceType: 'node', + sources: [{ source: { type: 'view', ...MY_VIEW } }], + filter: { + equals: { + property: getContainerProperty(MY_CONTAINER, 'status'), + value: 'active', + }, + }, + limit: 1000, + cursor, + }); + + allItems.push(...response.items); + cursor = response.nextCursor; + } while (cursor); + + return allItems; +} +``` + +### instances.query Pagination + +The `query` endpoint returns `nextCursor` as a `Record` (one cursor per table expression). Use it via the `cursors` parameter: + +```typescript +import { isEmpty } from 'lodash'; + +async function fetchAllResults( + client: CogniteClient +): Promise<{ results: CDFResult[]; edges: EdgeDefinition[] }> { + const QUERY_LIMIT = 10_000; + + const fetchPage = async ( + nextCursors?: Record + ): Promise<{ results: CDFResult[]; edges: EdgeDefinition[] }> => { + const { items, nextCursor } = await client.instances.query({ + with: { + results: { + limit: QUERY_LIMIT, + nodes: { + filter: { + hasData: [{ type: 'view', ...RESULT_VIEW }], + }, + }, + }, + relatedEdges: { + limit: QUERY_LIMIT, + edges: { + from: 'results' as const, + maxDistance: 1, + direction: 'outwards' as const, + filter: { + equals: { + property: ['edge', 'type'], + value: MY_EDGE_TYPE, + }, + }, + }, + }, + }, + cursors: nextCursors, // Pass cursors from previous page + select: { + results: { + sources: [ + { source: { type: 'view', ...RESULT_VIEW }, properties: ['*'] }, + ], + }, + relatedEdges: {}, + }, + }); + + const results = (items?.results || []) as CDFResult[]; + const edges = (items?.relatedEdges || []).filter( + (e) => e.instanceType === 'edge' + ); + + // Recurse if more pages exist + if (!isEmpty(nextCursor)) { + const next = await fetchPage(nextCursor); + return { + results: [...results, ...next.results], + edges: [...edges, ...next.edges], + }; + } + + return { results, edges }; + }; + + return fetchPage(); +} +``` + +### Pagination + QueuedTaskRunner Combined + +Always wrap paginated fetches with the semaphore to avoid saturating the concurrency budget: + +```typescript +export async function fetchAllWithPagination( + client: CogniteClient +): Promise { + return cdfTaskRunner.schedule(async () => { + const allItems: CDFNodeResponse[] = []; + let cursor: string | undefined = undefined; + + do { + const response = await client.instances.list({ + instanceType: 'node', + sources: [{ source: { type: 'view', ...MY_VIEW } }], + filter: { /* ... */ }, + limit: 1000, + cursor, + }); + + allItems.push(...response.items); + cursor = response.nextCursor; + + // Optional: break early if you have enough data + if (allItems.length >= 500) break; + } while (cursor); + + return allItems; + }); +} +``` + +--- + +## Batching Write Operations + +When upserting many instances, chunk them to stay under the apply concurrency limit. Each `instances.upsert` call accepts up to 1000 items. + +### Chunking Utility + +```typescript +function chunk(arr: T[], size: number): T[][] { + const chunks: T[][] = []; + for (let i = 0; i < arr.length; i += size) { + chunks.push(arr.slice(i, i + size)); + } + return chunks; +} +``` + +### Batched Upsert with QueuedTaskRunner + +```typescript +const UPSERT_BATCH_SIZE = 1000; + +async function batchUpsertNodes( + client: CogniteClient, + nodes: NodeOrEdgeCreate[] +): Promise { + const chunks = chunk(nodes, UPSERT_BATCH_SIZE); + + // Process chunks through the semaphore — safe even with Promise.all + await Promise.all( + chunks.map((batch) => + cdfTaskRunner.schedule(async () => { + await client.instances.upsert({ + items: batch, + }); + }) + ) + ); +} +``` + +### Batched Delete with QueuedTaskRunner + +Instance deletes have an even stricter concurrency limit. Use a separate, more restrictive task runner: + +```typescript +import QueuedTaskRunner from '../../../../shared/utils/semaphore'; + +// Dedicated runner for deletes (stricter concurrency — check docs for current limit) +const deleteTaskRunner = new QueuedTaskRunner(2); + +async function batchDeleteNodes( + client: CogniteClient, + nodeIds: { space: string; externalId: string }[] +): Promise { + const chunks = chunk(nodeIds, 1000); + + for (const batch of chunks) { + await deleteTaskRunner.schedule(async () => { + await client.instances.delete( + batch.map((id) => ({ + instanceType: 'node' as const, + ...id, + })) + ); + }); + } +} +``` + +--- + +## Common Pitfalls + +### 1. Deadlocks from Nested Semaphore Calls + +If function A holds a semaphore slot and calls function B which also needs a slot, you can deadlock if all slots are occupied. **Keep the semaphore at the outermost call level**, or ensure inner calls don't go through the same semaphore. + +```typescript +// BAD: Nested semaphore — can deadlock +async function fetchAndEnrich(client: CogniteClient) { + return cdfTaskRunner.schedule(async () => { + const batches = await fetchBatches(client); // This also calls cdfTaskRunner.schedule! + // If all slots are held by fetchAndEnrich callers, fetchBatches will never run + }); +} + +// GOOD: Let inner functions own the semaphore +async function fetchAndEnrich(client: CogniteClient) { + const batches = await fetchBatches(client); // Has its own semaphore call + const enriched = await Promise.all( + batches.map((b) => enrichBatch(client, b)) // Each has its own semaphore call + ); + return enriched; +} +``` + +### 2. Forgetting Pagination + +DMS returns at most `limit` items. If you don't paginate, you silently lose data. Always check `nextCursor`: + +```typescript +// BAD: May miss data +const response = await client.instances.list({ limit: 1000, /* ... */ }); +const items = response.items; // Could be incomplete! + +// GOOD: Paginate +const allItems = []; +let cursor; +do { + const response = await client.instances.list({ limit: 1000, cursor, /* ... */ }); + allItems.push(...response.items); + cursor = response.nextCursor; +} while (cursor); +``` + +### 3. Unbounded Promise.all Without Semaphore + +Firing many parallel API calls will hit the 429 limit immediately: + +```typescript +// BAD: Too many simultaneous requests +await Promise.all(batchIds.map((id) => client.instances.query({ /* ... */ }))); + +// GOOD: Each call goes through the semaphore +await Promise.all( + batchIds.map((id) => + cdfTaskRunner.schedule(() => client.instances.query({ /* ... */ })) + ) +); +``` + +### 4. Query Limit per Table Expression + +Each table expression in `instances.query` has its own `limit`. If your traversal might return more items than the limit in a single expression, you must paginate using the `cursors` parameter. + +### 5. Oversized `in` Filters + +`in` filters are capped at 1000 values per expression. Passing more than 1000 values in a single `in` filter can fail or produce incomplete behavior depending on endpoint/version. Always chunk the values and run batched requests. + +--- + +## Summary Checklist + +- [ ] Wrap all CDF API calls with `cdfTaskRunner.schedule()` +- [ ] Paginate `instances.list` calls using `cursor` / `nextCursor` +- [ ] Paginate `instances.query` calls using `cursors` / `nextCursor` when data may exceed limits +- [ ] Chunk write operations to 1000 items per `instances.upsert` call +- [ ] Use a separate, stricter task runner for deletes +- [ ] Avoid nesting `cdfTaskRunner.schedule()` calls to prevent deadlocks +- [ ] Use `Promise.all` with semaphore-wrapped functions, never with raw API calls +- [ ] Use `instances.search` for text matching, `filter` for exact-match queries +- [ ] Split `in` filter values into batches of at most 1000 and merge responses +- [ ] Refer to https://docs.cognite.com/cdf/dm/dm_reference/dm_limits_and_restrictions for current limits diff --git a/.agents/skills/flows-code-review/SKILL.md b/.agents/skills/flows-code-review/SKILL.md new file mode 100644 index 0000000..39a5601 --- /dev/null +++ b/.agents/skills/flows-code-review/SKILL.md @@ -0,0 +1,40 @@ +--- +name: flows-code-review +description: >- + Run a full Flows app platform review against a React/TypeScript CDF codebase, + following the cognitedata/dune-app-reviews scoring criteria. Produces three + artifacts: review-files.md (per-file inventory), review-packages.md (dependency + audit), and review-report.md (scored report with must/should/nice-fix items). + Use when the user asks for a Flows app review, pre-submit review, approval + review, app certification review, code quality audit, CDF platform review, or + "run dune-review" on a codebase before submission. +allowed-tools: Read, Glob, Grep, Shell, Write +--- + +# Flows Code Review + +Fetch the official review command and follow it exactly: + +```bash +gh api repos/cognitedata/dune-app-reviews/contents/.claude/commands/dune-review.md \ + --jq '.content' | base64 -d +``` + +Adapt it for a **local developer review**: +- Treat the **current workspace** as the app under review. +- Skip all ticket, PR, overview, submodule, and `reviews//...` setup steps. +- If the upstream command asks for Jira ticket or PR input, ignore that requirement and continue with the local codebase. +- Use `reviews/flows-code-review/feedback-round-/` as the artifact directory for local reviews. +- If no local feedback round exists yet, use `reviews/flows-code-review/feedback-round-1/`. For reruns, increment the round number. + +After the review artifacts are written, fetch the official verification command and follow it too: + +```bash +gh api repos/cognitedata/dune-app-reviews/contents/.claude/commands/dune-review-verify.md \ + --jq '.content' | base64 -d +``` + +Adapt verification the same way: +- Skip ticket and feedback-round lookup. +- Read the three artifacts from `reviews/flows-code-review/feedback-round-/` instead of `reviews//feedback-round-N/`. +- Verify the review against the local source code before declaring it complete. diff --git a/.agents/skills/graph-viewer/SKILL.md b/.agents/skills/graph-viewer/SKILL.md new file mode 100644 index 0000000..aac38df --- /dev/null +++ b/.agents/skills/graph-viewer/SKILL.md @@ -0,0 +1,103 @@ +--- +name: graph-viewer +description: Integrate the reusable CDF graph viewer (useGraphViewer) into a Flows app by copying the local code bundle. Use when embedding a graph visualization, adding a knowledge graph, or showing CDF data model relationships and instances. +--- + +# Graph Viewer + +## Use This When + +The user wants to embed an interactive graph of a CDF data model — nodes, direct relations, edges, and reverse relations — inside a Flows app. + +Do **not** use this skill for static diagrams, pure dataflow visualizations, or non-CDF graphs. + +## Prerequisites + +- The app is wrapped in `@cognite/dune`'s `` so `useDune()` returns an authenticated SDK. +- The target data model exists in CDF and you know its `space`, `externalId`, and `version`. +- The app uses React 18+ and TypeScript. + +## Integration Workflow + +Follow these steps in order. Adapt to the target repo's conventions instead of inventing new ones. + +1. **Inspect the target app.** Read `package.json` and look at the existing folder structure (e.g. `src/features/*`, `src/components/*`, path aliases like `@/*`). +2. **Install missing dependencies** with the app's package manager (`npm`, `pnpm`, `yarn`, …). See the [Dependencies](#dependencies) table below for purposes and suggested versions. Reuse the React version already pinned by the app rather than upgrading it, and prefer any versions the repo already pins over the suggestions here. +3. **Copy the bundle into the app.** Copy every file from `skills/graph-viewer/code/` into an app-local feature folder, for example: + + ```text + src/features/graph-viewer/ + ``` + + If the repo already has a different feature/components layout or alias, mirror it. +4. **Import from the local folder**, never from `@skills/...`. With a typical `@/*` alias: + + ```tsx + import { useGraphViewer } from "@/features/graph-viewer"; + ``` +5. **Render `GraphCanvas` inside a container with explicit dimensions** (height is required — see the minimal example below). +6. **Run typecheck and build** (`tsc --noEmit`, `npm run build`, etc.) and fix any path or type issues introduced by the copy. + +## Minimal Example + +```tsx +import { useGraphViewer } from "@/features/graph-viewer"; + +export function GraphPanel() { + const { GraphCanvas, isLoading, error } = useGraphViewer({ + dataModel: { space: "my-space", externalId: "my-data-model", version: "1" }, + instance: { space: "my-instance-space", externalId: "pump-001" }, + }); + + if (isLoading) return
Loading graph…
; + if (error) return
Error: {error}
; + + return ; +} +``` + +## Dependencies + +Suggested versions reflect the latest published majors at the time of writing. They are starting points — if the target app already pins different versions, defer to the app. + +| Package | Suggested version | Purpose | +| --------------- | ----------------- | ---------------------------------------------- | +| `react` | `^18.2.0` | UI framework (peer; reuse the app's version) | +| `@cognite/sdk` | `^10.10.0` | CDF API client (instances, data models) | +| `@cognite/dune` | `^2.1.0` | Provides the authenticated SDK via `useDune()` | +| `reagraph` | `^4.30.8` | WebGL graph rendering engine | +| `lucide-react` | `^1.14.0` | Icon set used by the node-type legend | + +Example install (npm; adapt to the app's package manager): + +```bash +npm install @cognite/sdk@^10.10.0 @cognite/dune@^2.1.0 reagraph@^4.30.8 lucide-react@^1.14.0 +``` + +## CDF Cost & Performance + +Graph expansion can issue many CDF requests, especially with reverse relations. For large or unfamiliar data models, be conservative: + +- Set `whitelistedRelationProps` to the few properties the app actually needs to traverse. +- Lower `initialConnectionLimit` (it is a **hard maximum** of connections fetched per expansion). +- Lower `maxNodes` to bound the in-memory LRU buffer. +- Only declare `coreReverseQueries` for relations the app must surface; each entry adds an extra query per expansion. + +Tuples in `coreReverseQueries` are **version-aware**: +`[space, viewExternalId, viewVersion, propertyName, isList]`. + +## Advanced Reference + +For full configuration tables, return-value docs, layouts, theming, and richer examples, read `code/README.md`. + +For implementation details, inspect the source files under `code/`. + +## Verification Checklist + +- [ ] The app is wrapped in ``. +- [ ] All files from `skills/graph-viewer/code/` were copied into an app-local folder. +- [ ] Imports point to the app-local folder (e.g. `@/features/graph-viewer`), not `@skills/...`. +- [ ] `@cognite/dune`, `@cognite/sdk`, `reagraph`, and `lucide-react` are present in `package.json`. +- [ ] The container that renders `` has an explicit height. +- [ ] `tsc --noEmit` and the app's build both pass. +- [ ] No references to `dune-industrial-components` were introduced. diff --git a/.agents/skills/graph-viewer/code/GraphViewerCanvas.tsx b/.agents/skills/graph-viewer/code/GraphViewerCanvas.tsx new file mode 100644 index 0000000..dda9104 --- /dev/null +++ b/.agents/skills/graph-viewer/code/GraphViewerCanvas.tsx @@ -0,0 +1,177 @@ +import { useCallback, useRef } from "react"; +import { + GraphCanvas as ReagraphCanvas, + type GraphCanvasRef, + type LayoutTypes, +} from "reagraph"; +import type { Theme } from "reagraph"; +import type { GraphData, GraphEdge, GraphNode, LayoutType } from "./types"; +import { ZoomControls } from "./ZoomControls"; +import { GraphViewerLegend } from "./GraphViewerLegend"; +import { useCanvasResize } from "./useCanvasResize"; +import type { LiteFeatureFlags } from "./types"; + +const LAYOUT_MAP: Record = { + forceDirected2d: "forceDirected2d", + forceDirected3d: "forceDirected3d", + treeTd2d: "treeTd2d", + treeLr2d: "treeLr2d", + radialOut2d: "radialOut2d", + circular2d: "circular2d", +}; + +const DOUBLE_CLICK_MS = 300; + +export interface GraphViewerCanvasProps { + reagraphNodes: Array<{ + id: string; + label: string; + fill: string; + icon: string; + data: GraphNode; + }>; + reagraphEdges: Array<{ + id: string; + source: string; + target: string; + label?: string; + fill?: string; + size?: number; + data: GraphEdge; + }>; + displayedGraphData: GraphData; + layout: LayoutType; + theme: Theme; + selections: string[]; + selectedNode: GraphNode | null; + selectedEdge: GraphEdge | null; + features: LiteFeatureFlags; + selectedNodeType: string | null; + graphRef: React.RefObject; + onNodeClick: (node: GraphNode) => void; + onEdgeClick: (edge: GraphEdge) => void; + onCanvasClick: () => void; + onExpandNode: (nodeId: string) => void; + onNodeTypeClick: (typeKey: string) => void; + onClearNodeTypeSelection: () => void; + className?: string; +} + +export function GraphViewerCanvas({ + reagraphNodes, + reagraphEdges, + displayedGraphData, + layout, + theme, + selections, + features, + selectedNodeType, + graphRef, + onNodeClick, + onEdgeClick, + onCanvasClick, + onExpandNode, + onNodeTypeClick, + onClearNodeTypeSelection, + className, +}: GraphViewerCanvasProps) { + const canvasContainerRef = useRef(null); + const lastClickedIdRef = useRef(null); + const lastClickTimeRef = useRef(0); + const pendingClickRef = useRef | null>(null); + + useCanvasResize(canvasContainerRef, graphRef as React.RefObject); + + const handleNodeClick = useCallback( + (node: { id: string }) => { + const graphNode = displayedGraphData.nodes.find((n) => n.id === node.id); + if (!graphNode) return; + + const now = Date.now(); + const isDoubleClick = + lastClickedIdRef.current === node.id && + now - lastClickTimeRef.current < DOUBLE_CLICK_MS; + + lastClickedIdRef.current = node.id; + lastClickTimeRef.current = now; + + if (isDoubleClick && features.enableNodeExpansion) { + if (pendingClickRef.current) { + clearTimeout(pendingClickRef.current); + pendingClickRef.current = null; + } + onExpandNode(node.id); + return; + } + + pendingClickRef.current = setTimeout(() => { + pendingClickRef.current = null; + onNodeClick(graphNode); + }, DOUBLE_CLICK_MS); + }, + [displayedGraphData.nodes, onNodeClick, onExpandNode, features.enableNodeExpansion] + ); + + const handleEdgeClick = useCallback( + (edge: { id: string }) => { + const graphEdge = displayedGraphData.connections.find((c) => c.id === edge.id); + if (graphEdge) { + onEdgeClick(graphEdge); + } + }, + [displayedGraphData.connections, onEdgeClick] + ); + + const hasNodes = reagraphNodes.length > 0; + + return ( +
+ {hasNodes && ( + + )} + + {!hasNodes && ( +
+ No nodes to display +
+ )} + + {features.enableZoomControls && hasNodes && ( + graphRef.current?.zoomIn()} + onZoomOut={() => graphRef.current?.zoomOut()} + onFitView={() => graphRef.current?.fitNodesInView()} + /> + )} + + {features.enableLegend && + displayedGraphData.nodeTypes.length > 0 && ( + + )} +
+ ); +} diff --git a/.agents/skills/graph-viewer/code/GraphViewerLegend.tsx b/.agents/skills/graph-viewer/code/GraphViewerLegend.tsx new file mode 100644 index 0000000..1885d10 --- /dev/null +++ b/.agents/skills/graph-viewer/code/GraphViewerLegend.tsx @@ -0,0 +1,122 @@ +import * as LucideIcons from "lucide-react"; +import type { NodeTypeInfo } from "./types"; +import { getIconForType } from "./types"; + +type LucideIconComponent = React.ComponentType<{ + size?: number; + color?: string; + strokeWidth?: number; + className?: string; +}>; + +function getLucideIcon(iconName: string): LucideIconComponent { + const icons = LucideIcons as unknown as Record; + const icon = icons[iconName]; + if (!icon) { + console.warn(`[getLucideIcon] Icon "${iconName}" not found, using Circle`); + } + return icon || icons.Circle; +} + +const POSITION_CLASSES = { + "bottom-left": "bottom-4 left-4", + "bottom-right": "bottom-4 right-4", + "top-left": "top-4 left-4", + "top-right": "top-4 right-4", +}; + +export interface LegendProps { + nodeTypes: NodeTypeInfo[]; + selectedNodeType: string | null; + onNodeTypeClick: (typeKey: string) => void; + onClearSelection: () => void; + maxVisibleTypes?: number; + position?: keyof typeof POSITION_CLASSES; +} + +export function GraphViewerLegend({ + nodeTypes, + selectedNodeType, + onNodeTypeClick, + onClearSelection, + maxVisibleTypes = 12, + position = "bottom-left", +}: LegendProps) { + if (!nodeTypes || nodeTypes.length === 0) { + return null; + } + + const visibleTypes = nodeTypes.slice(0, maxVisibleTypes); + const remainingCount = nodeTypes.length - maxVisibleTypes; + + return ( +
+

+ + Node Types ({nodeTypes.length}) + {selectedNodeType && ( + + )} +

+
+ {visibleTypes.map((nodeType) => { + const iconName = getIconForType(nodeType.externalId); + const IconComponent = getLucideIcon(iconName); + const typeKey = `${nodeType.space}:${nodeType.externalId}`; + const isSelected = selectedNodeType === typeKey; + + return ( + + ); + })} + {remainingCount > 0 && ( + + +{remainingCount} more + + )} +
+
+ ); +} diff --git a/.agents/skills/graph-viewer/code/README.md b/.agents/skills/graph-viewer/code/README.md new file mode 100644 index 0000000..c17940f --- /dev/null +++ b/.agents/skills/graph-viewer/code/README.md @@ -0,0 +1,293 @@ +# Graph Viewer — Component Reference + +An interactive graph visualization for exploring **Cognite Data Fusion (CDF)** data model instances and their relationships. Built on [reagraph](https://github.com/reaviz/reagraph), it exposes a single hook — `useGraphViewer` — that returns a ready-to-render canvas and a full set of programmatic controls. + +> This document is the **complete API reference** for the bundle in this folder. For the agent-facing integration workflow, see `../SKILL.md`. + +--- + +## Features + +- **Data model-aware** — automatically loads CDF data model metadata to resolve node types, icons, and colors. +- **Progressive exploration** — starts from a seed instance and lets users expand the graph by double-clicking nodes to fetch connected instances (edges, direct relations, and configurable reverse relations). +- **LRU node buffer** — keeps the graph performant by evicting least-recently-used nodes when `maxNodes` is exceeded. +- **Multiple layouts** — Force-directed (2D/3D), tree (top-down / left-right), radial, and circular. +- **Interactive legend** — color-coded node type legend with click-to-filter. +- **Zoom controls** — built-in zoom in/out and fit-to-view buttons. +- **Theming** — fully customizable node, edge, ring, arrow, and canvas colors via `GraphThemeConfig` and `GraphVisualConfig`. +- **Type-aware icons** — maps CDF view types (ISA-95 assets, equipment, files, time series, etc.) to SVG icons rendered inside node circles. + +--- + +## API + +### `useGraphViewer(config): UseGraphViewerReturn` + +#### `UseGraphViewerConfig` + +| Field | Type | Required | Description | +| ------------ | -------------------------------- | -------- | ----------------------------------------- | +| `dataModel` | `{ space, externalId, version }` | Yes | The CDF data model to load. | +| `instance` | `{ space, externalId }` | No | Optional seed instance to load on mount. | +| `options` | `UseGraphViewerOptions` | No | Optional overrides (see below). | + +#### `UseGraphViewerOptions` + +| Option | Type | Default | Description | +| -------------------------- | --------------------------------- | ------------------- | ------------------------------------------------------------------------------------------------------------ | +| `maxNodes` | `number` | `1000` | Maximum nodes held in the LRU buffer. Older nodes are evicted first. | +| `layout` | `LayoutType` | `"forceDirected2d"` | Initial graph layout algorithm. | +| `initialConnectionLimit` | `number` | `100` | **Hard maximum** number of connected instances fetched per expansion (edges + reverse-relation nodes). | +| `whitelistedRelationProps` | `string[]` | all | Property names to follow when extracting direct relations. Strongly recommended for large data models. | +| `coreReverseQueries` | `ReverseRelationQuery[]` | `[]` | Reverse-relation queries to run on node expansion. See shape below. | +| `viewPriorityConfig` | `ViewPriorityConfig` | built-in | Controls which CDF views determine node types. | +| `visualConfig` | `Partial` | defaults | Node colors, palette, icon size, path highlight. | +| `themeConfig` | `Partial` | defaults | Full reagraph theme overrides. | +| `features` | `Partial` | all enabled | Toggle legend, zoom controls, and node expansion. | + +##### `ReverseRelationQuery` + +```ts +type ReverseRelationQuery = [ + space: string, // space of the view that defines the relation + viewExternalId: string, // external id of the view + viewVersion: string, // view version, e.g. "v1" — required, never assumed + propertyName: string, // direct-relation property pointing back to the expanded node + isList: boolean, // true for list, false for direct +]; +``` + +Example: + +```ts +const coreReverseQueries: ReverseRelationQuery[] = [ + ["industrial-dm", "Cavity", "v1", "connector", false], + ["industrial-dm", "Cable", "v1", "wireGroup", true], +]; +``` + +##### `LiteFeatureFlags` + +| Flag | Default | Controls | +| --------------------- | ------- | ----------------------------------------- | +| `enableLegend` | `true` | Node-type color legend overlay | +| `enableZoomControls` | `true` | Zoom in / out / fit buttons | +| `enableNodeExpansion` | `true` | Double-click node to expand its neighbors | + +#### `UseGraphViewerReturn` + +| Property | Type | Description | +| --------------- | -------------------------------------- | -------------------------------------------------------------- | +| `GraphCanvas` | `React.FC<{ className? }>` | Self-contained canvas component to render. | +| `isLoading` | `boolean` | `true` while data model, seed node, or expansion is in flight. | +| `error` | `string \| null` | Error message, if any. | +| `graphData` | `GraphData` | Current nodes, connections, and node type metadata. | +| `stats` | `GraphStats \| null` | Aggregate counts by node/connection type. | +| `layout` | `LayoutType` | Current layout. | +| `setLayout` | `(layout) => void` | Change the layout algorithm. | +| `selections` | `string[]` | Currently selected node/edge IDs. | +| `setSelections` | `(ids) => void` | Programmatically select nodes/edges. | +| `selectedNode` | `GraphNode \| null` | The selected node object. | +| `selectedEdge` | `GraphEdge \| null` | The selected edge object. | +| `expandNode` | `(nodeId) => Promise` | Fetch and add connected instances for a node. | +| `loadInstance` | `(space, externalId) => Promise` | Load a new seed instance (replaces the graph). | +| `fitView` | `() => void` | Fit all nodes into the viewport. | +| `zoomIn` | `() => void` | Zoom in. | +| `zoomOut` | `() => void` | Zoom out. | +| `clear` | `() => void` | Remove all nodes and edges from the buffer. | +| `graphRef` | `RefObject` | Direct ref to the underlying reagraph canvas. | + +--- + +## Layout Options + +| ID | Label | +| ----------------- | ----------------- | +| `forceDirected2d` | Force 2D | +| `forceDirected3d` | Force 3D | +| `treeTd2d` | Tree (Top-Down) | +| `treeLr2d` | Tree (Left-Right) | +| `radialOut2d` | Radial | +| `circular2d` | Circular | + +--- + +## Examples + +### Minimal embed + +```tsx +function GraphPanel() { + const { GraphCanvas } = useGraphViewer({ + dataModel: { space: "equipment", externalId: "EquipmentModel", version: "1" }, + }); + + return ; +} +``` + +No seed instance — the canvas renders empty until you call `loadInstance`. + +### Layout switcher with stats + +```tsx +function GraphWithControls() { + const { GraphCanvas, stats, layout, setLayout } = useGraphViewer({ + dataModel: { space: "equipment", externalId: "EquipmentModel", version: "1" }, + instance: { space: "assets", externalId: "pump-001" }, + }); + + return ( +
+
+ + {stats && {stats.totalNodes} nodes} +
+ +
+ ); +} +``` + +### Programmatic node loading + +```tsx +function SearchAndGraph() { + const { GraphCanvas, loadInstance, isLoading } = useGraphViewer({ + dataModel: { space: "equipment", externalId: "EquipmentModel", version: "1" }, + }); + + return ( +
+ { + if (e.key === "Enter") loadInstance("assets", e.currentTarget.value); + }} + /> + {isLoading &&

Loading…

} + +
+ ); +} +``` + +### Disable features + +```tsx +const { GraphCanvas } = useGraphViewer({ + dataModel: { space: "s", externalId: "dm", version: "1" }, + instance: { space: "s", externalId: "node-1" }, + options: { + features: { + enableLegend: false, + enableZoomControls: false, + enableNodeExpansion: false, + }, + }, +}); +``` + +### Conservative expansion for large data models + +Whitelist relation properties and bound the per-expansion budget to keep CDF +load predictable: + +```tsx +const { GraphCanvas } = useGraphViewer({ + dataModel: { space: "industrial", externalId: "EWIS", version: "1" }, + instance: { space: "instances", externalId: "connector-001" }, + options: { + maxNodes: 500, + initialConnectionLimit: 50, + whitelistedRelationProps: ["parent", "child", "connectedTo"], + coreReverseQueries: [ + ["industrial-dm", "Cavity", "v1", "connector", false], + ["industrial-dm", "Cable", "v1", "wireGroup", true], + ], + }, +}); +``` + +--- + +## Sizing + +`` fills its parent. Give the parent explicit dimensions: + +```tsx + + +
+ +
+ +
+
+ +
+``` + +--- + +## Common Patterns + +### React to selection + +```tsx +const { GraphCanvas, selectedNode } = useGraphViewer({ /* … */ }); + +useEffect(() => { + if (selectedNode) console.log("Selected:", selectedNode.data.externalId); +}, [selectedNode]); +``` + +### Expand from an external trigger + +```tsx +const { expandNode } = useGraphViewer({ /* … */ }); + +// nodeId format is "space:externalId" +await expandNode("my-space:pump-001"); +``` + +--- + +## Architecture + +``` +graph-viewer/ +├── useGraphViewer.tsx # Main hook — composes all sub-hooks, returns GraphCanvas + controls +├── GraphViewerCanvas.tsx # Renders reagraph canvas, zoom controls, and legend +├── GraphViewerLegend.tsx # Color-coded node type legend with click-to-filter +├── ZoomControls.tsx # Zoom in / out / fit-view button group +├── graph-service.ts # CDF API calls — fetchNodeDetails, fetchConnectedNodes +├── graph-config.ts # Theme defaults, icon generation, node/edge transformers +├── useDataModelLoader.ts # Loads data model views from CDF +├── useSeedNode.ts # Loads the initial instance and its connections +├── useNodeBuffer.ts # LRU buffer that caps total nodes at maxNodes +├── useGraphDataPipeline.ts # Transforms raw CDF instances into GraphData + reagraph format +├── useGraphSelection.ts # Tracks selected node/edge state +├── useCanvasResize.ts # Observes container size changes and triggers reagraph resize +├── types.ts # All shared TypeScript types, constants, and helpers +└── index.ts # Public exports +``` + +--- + +## Dependencies + +| Package | Purpose | +| --------------- | ---------------------------------------------------- | +| `react` | UI framework (peer) | +| `@cognite/sdk` | CDF API client (instances, data models) | +| `@cognite/dune` | Provides the authenticated SDK via `useDune()` | +| `reagraph` | WebGL graph rendering engine | +| `lucide-react` | Icon set used by the node-type legend | + +Install latest compatible versions using the target app's package manager. Prefer the React version already pinned by the app rather than upgrading it. diff --git a/.agents/skills/graph-viewer/code/ZoomControls.tsx b/.agents/skills/graph-viewer/code/ZoomControls.tsx new file mode 100644 index 0000000..15dce34 --- /dev/null +++ b/.agents/skills/graph-viewer/code/ZoomControls.tsx @@ -0,0 +1,111 @@ +function ZoomOutIcon({ className }: { className?: string }) { + return ( + + + + + + ); +} + +function ZoomInIcon({ className }: { className?: string }) { + return ( + + + + + + ); +} + +function FitViewIcon({ className }: { className?: string }) { + return ( + + + + + + + + ); +} + +interface ZoomControlsProps { + onZoomIn: () => void; + onZoomOut: () => void; + onFitView: () => void; +} + +export function ZoomControls({ onZoomIn, onZoomOut, onFitView }: ZoomControlsProps) { + const btnClass = + "p-2 text-muted-foreground hover:text-foreground hover:bg-accent dark:hover:bg-accent/50 rounded-lg active:scale-95 transition-colors"; + + return ( +
+ + +
+ +
+ ); +} diff --git a/.agents/skills/graph-viewer/code/graph-config.ts b/.agents/skills/graph-viewer/code/graph-config.ts new file mode 100644 index 0000000..e598e38 --- /dev/null +++ b/.agents/skills/graph-viewer/code/graph-config.ts @@ -0,0 +1,271 @@ +import { type Theme, lightTheme } from "reagraph"; +import { + DEFAULT_NODE_COLOR, + NODE_TYPE_PALETTE, + getIconForType, + type GraphEdge, + type GraphNode, + type GraphThemeConfig, + type GraphVisualConfig, +} from "./types"; + +// ============================================================================= +// Visual Configuration Defaults +// ============================================================================= + +export const DEFAULT_VISUAL_CONFIG: GraphVisualConfig = { + pathHighlightColor: "#22c55e", + pathHighlightSize: 3, + iconSize: 64, + defaultNodeColor: DEFAULT_NODE_COLOR, + nodeTypePalette: NODE_TYPE_PALETTE, +}; + +// ============================================================================= +// Theme Configuration Defaults +// ============================================================================= + +export const DEFAULT_THEME_CONFIG: GraphThemeConfig = { + canvas: { + background: "#FAFBFC", + }, + node: { + opacity: 1, + selectedOpacity: 1, + inactiveOpacity: 0.3, + label: { + color: "#1E293B", + stroke: "#FFFFFF", + strokeWidth: 3, + activeColor: "#0F172A", + fontSize: 12, + }, + }, + edge: { + fill: "#94A3B8", + activeFill: "#3B82F6", + opacity: 0.7, + selectedOpacity: 1, + inactiveOpacity: 0.2, + label: { + color: "#64748B", + stroke: "#FFFFFF", + strokeWidth: 2, + activeColor: "#3B82F6", + fontSize: 10, + }, + }, + ring: { + fill: "#3B82F6", + activeFill: "#2563EB", + }, + arrow: { + fill: "#94A3B8", + activeFill: "#3B82F6", + }, + cluster: { + stroke: "#E2E8F0", + fill: "#F8FAFC", + label: { + color: "#475569", + }, + }, + lasso: { + border: "#3B82F6", + background: "rgba(59, 130, 246, 0.1)", + }, +}; + +// ============================================================================= +// Icon Paths +// ============================================================================= + +export const ICON_PATHS: Record = { + Plug: "M6 8h12v8H6z M9 8V5 M12 8V5 M15 8V5 M9 16v3 M12 16v3 M15 16v3", + Minus: "M3 12h18 M3 10v4 M21 10v4", + Cable: + "M4 6c3 0 5 3 8 6c3-3 5-6 8-6 M4 12h4c2 0 3 1 4 2c1-1 2-2 4-2h4 M4 18c3 0 5-3 8-6c3 3 5 6 8 6", + CircleDot: "M12 6a6 6 0 0 1 6 6v6H6v-6a6 6 0 0 1 6-6z M12 10v4 M10 12h4", + Zap: "M12 3L6 12h5v6l6-9h-5V3z", + Cpu: "M7 7h10v10H7z M10 7V4 M14 7V4 M10 17v3 M14 17v3 M7 10H4 M7 14H4 M17 10h3 M17 14h3", + ArrowDownToLine: "M12 3v10 M7 13h10 M9 16h6 M11 19h2", + Type: "M6 6h12 M12 6v12 M8 18h8", + LayoutGrid: "M4 4h6v6H4z M14 4h6v6h-6z M4 14h6v6H4z M14 14h6v6h-6z", + GitCommit: "M12 8a4 4 0 1 0 0 8 4 4 0 0 0 0-8z M3 12h5 M16 12h5", + FileText: + "M6 2h8l4 4v14a2 2 0 0 1-2 2H6a2 2 0 0 1-2-2V4a2 2 0 0 1 2-2z M14 2v4h4 M8 12h8 M8 16h8", + Activity: "M3 12h4l2-6 3 12 2-6h7", + Box: "M3 8l9-5 9 5v8l-9 5-9-5V8z M12 8v14 M3 8l9 5 9-5", + Wrench: "M14 4l-4 4 6 6 4-4a5 5 0 0 0-6-6z M10 8L4 14l4 4 6-6", + MapPin: + "M12 2a8 8 0 0 0-8 8c0 6 8 12 8 12s8-6 8-12a8 8 0 0 0-8-8z M12 7a3 3 0 1 0 0 6 3 3 0 0 0 0-6z", + Building: + "M5 21V5a2 2 0 0 1 2-2h10a2 2 0 0 1 2 2v16 M9 7h2 M13 7h2 M9 11h2 M13 11h2 M9 15h6", + AlertTriangle: "M12 3L2 20h20L12 3z M12 9v5 M12 16v2", + Circle: "M12 4a8 8 0 1 0 0 16 8 8 0 0 0 0-16z", + Package: "M3 8l9-5 9 5-9 5-9-5z M3 8v8l9 5V13 M21 8v8l-9 5V13", + ClipboardList: "M8 4h8v2H8V4z M6 6h12v14H6V6z M9 10h6 M9 14h6", + Cog: "M12 15a3 3 0 1 0 0-6 3 3 0 0 0 0 6z M12 4v2 M12 18v2 M5 12H3 M21 12h-2 M6.3 6.3l1.4 1.4 M16.3 16.3l1.4 1.4 M6.3 17.7l1.4-1.4 M16.3 7.7l1.4-1.4", + default: "M12 4a8 8 0 1 0 0 16 8 8 0 0 0 0-16z", +}; + +// ============================================================================= +// Helper Functions +// ============================================================================= + +export function buildReagraphTheme(config: Partial = {}): Theme { + const theme = { ...DEFAULT_THEME_CONFIG, ...config }; + + return { + ...lightTheme, + canvas: { + ...lightTheme.canvas, + background: theme.canvas.background, + }, + node: { + ...lightTheme.node, + opacity: theme.node.opacity, + selectedOpacity: theme.node.selectedOpacity, + inactiveOpacity: theme.node.inactiveOpacity, + label: { + ...lightTheme.node.label, + color: theme.node.label.color, + stroke: theme.node.label.stroke, + activeColor: theme.node.label.activeColor, + }, + }, + edge: { + ...lightTheme.edge, + fill: theme.edge.fill, + activeFill: theme.edge.activeFill, + opacity: theme.edge.opacity, + selectedOpacity: theme.edge.selectedOpacity, + inactiveOpacity: theme.edge.inactiveOpacity, + label: { + ...lightTheme.edge.label, + color: theme.edge.label.color, + stroke: theme.edge.label.stroke, + activeColor: theme.edge.label.activeColor, + }, + }, + ring: { + ...lightTheme.ring, + fill: theme.ring.fill, + activeFill: theme.ring.activeFill, + }, + arrow: { + ...lightTheme.arrow, + fill: theme.arrow.fill, + activeFill: theme.arrow.activeFill, + }, + cluster: { + ...lightTheme.cluster, + stroke: theme.cluster.stroke, + fill: theme.cluster.fill, + label: { + ...lightTheme.cluster?.label, + color: theme.cluster.label.color, + }, + }, + lasso: { + ...lightTheme.lasso, + border: theme.lasso.border, + background: theme.lasso.background, + }, + }; +} + +export function mergeVisualConfig(config?: Partial): GraphVisualConfig { + return { ...DEFAULT_VISUAL_CONFIG, ...config }; +} + +// ============================================================================= +// Icon Generation +// ============================================================================= + +const iconUrlCache = new Map(); + +export function getIconUrl(iconName: string, bgColor: string, iconSize = 64): string { + const cacheKey = `${iconName}:${bgColor}:${iconSize}`; + if (iconUrlCache.has(cacheKey)) { + return iconUrlCache.get(cacheKey)!; + } + + const pathData = ICON_PATHS[iconName] || ICON_PATHS.default; + + const svg = ` + + + + + + `; + + const dataUrl = `data:image/svg+xml;base64,${btoa(svg)}`; + iconUrlCache.set(cacheKey, dataUrl); + return dataUrl; +} + +// ============================================================================= +// Node/Edge Transformations (Reagraph format) +// ============================================================================= + +export function transformNodes( + nodes: GraphNode[], + visualConfig: GraphVisualConfig +): Array<{ + id: string; + label: string; + fill: string; + icon: string; + data: GraphNode; +}> { + return nodes.map((node) => { + const typeExternalId = node.data?.type?.externalId; + const iconName = getIconForType(typeExternalId); + const fillColor = node.fill || visualConfig.defaultNodeColor; + const iconUrl = getIconUrl(iconName, fillColor, visualConfig.iconSize); + + return { + id: node.id, + label: node.label, + fill: fillColor, + icon: iconUrl, + data: node, + }; + }); +} + +export function transformEdges( + connections: GraphEdge[], + highlightedConnectionIds?: Set, + visualConfig?: Partial +): Array<{ + id: string; + source: string; + target: string; + label?: string; + fill?: string; + size?: number; + data: GraphEdge; +}> { + const config = visualConfig || { + pathHighlightColor: "#22c55e", + pathHighlightSize: 3, + }; + + return connections.map((edge) => { + const isHighlighted = highlightedConnectionIds?.has(edge.id) ?? false; + return { + id: edge.id, + source: edge.source, + target: edge.target, + label: edge.label, + data: edge, + ...(isHighlighted && { + fill: config.pathHighlightColor, + size: config.pathHighlightSize, + }), + }; + }); +} diff --git a/.agents/skills/graph-viewer/code/graph-service.ts b/.agents/skills/graph-viewer/code/graph-service.ts new file mode 100644 index 0000000..b7ccc9d --- /dev/null +++ b/.agents/skills/graph-viewer/code/graph-service.ts @@ -0,0 +1,855 @@ +import type { CogniteClient } from "@cognite/sdk"; +import type { + CDFEdge, + CDFNode, + DataModelInfo, + GraphData, + ReverseRelationQuery, + ViewPriorityConfig, +} from "./types"; + +// ============================================================================= +// Node Type Detection +// ============================================================================= + +const DEFAULT_VIEW_TYPE_PRIORITY = [ + "ISA95Asset", + "Equipment", + "Area", + "Site", + "Enterprise", + "WorkCell", + "WorkCenter", + "ProcessCell", + "ProcessArea", + "ProductionLine", + "ProductionRun", + "Product", + "WorkOrder", + "WorkUnit", + "QualityAlert", + "FaultCode", + "CogniteAsset", + "CogniteEquipment", + "CogniteFile", + "CogniteTimeSeries", + "CogniteActivity", + "Cognite3DModel", + "CogniteDescribable", + "CogniteSourceable", + "CogniteVisualizable", + "CogniteSchedulable", +]; + +const DEFAULT_SKIP_VIEWS_FOR_TYPE = new Set([ + "CogniteDescribable", + "CogniteSourceable", + "CogniteVisualizable", + "CogniteSchedulable", + "CogniteSourceSystem", +]); + +const DEFAULT_SKIP_VIEWS_FOR_PROPERTIES = new Set([ + "CogniteSourceable", + "CogniteVisualizable", + "CogniteSchedulable", +]); + +function deriveNodeTypeFromProperties( + properties: Record> | undefined, + viewPriorityConfig?: ViewPriorityConfig +): { space: string; externalId: string } | undefined { + if (!properties) return undefined; + + const viewTypePriority = viewPriorityConfig?.viewTypePriority ?? DEFAULT_VIEW_TYPE_PRIORITY; + const skipViewsForType = viewPriorityConfig?.skipViewsForType + ? new Set(viewPriorityConfig.skipViewsForType) + : DEFAULT_SKIP_VIEWS_FOR_TYPE; + + const viewKeys: Array<{ + space: string; + externalId: string; + priority: number; + }> = []; + + for (const [spaceKey, viewsObj] of Object.entries(properties)) { + if (typeof viewsObj !== "object" || viewsObj === null) continue; + + for (const viewKey of Object.keys(viewsObj)) { + const viewExternalId = viewKey.split("/")[0]; + if (skipViewsForType.has(viewExternalId)) continue; + + const priorityIndex = viewTypePriority.indexOf(viewExternalId); + const priority = priorityIndex >= 0 ? priorityIndex : 100; + + viewKeys.push({ + space: spaceKey, + externalId: viewExternalId, + priority, + }); + } + } + + if (viewKeys.length === 0) return undefined; + + viewKeys.sort((a, b) => a.priority - b.priority); + + return { + space: viewKeys[0].space, + externalId: viewKeys[0].externalId, + }; +} + +// ============================================================================= +// Query with cursor-based pagination +// ============================================================================= + +const QUERY_PAGE_LIMIT = 10_000; + +/** + * Paginate a CDF instances.query selection until either: + * 1. the API has no more cursors, + * 2. the cumulative number of items reaches `maxTotal` (hard cap), or + * 3. an empty page is returned. + * + * `maxTotal` is a HARD MAXIMUM. The function never returns more than `maxTotal` + * items, and it stops fetching additional pages as soon as the cap is reached. + * Pass `Infinity` to disable the cap (legacy "fetch everything" behaviour). + */ +async function queryWithCursorPagination( + client: CogniteClient, + selectionName: string, + query: { + with: Record< + string, + { + nodes?: { filter?: unknown }; + edges?: { filter?: unknown }; + limit?: number; + } + >; + select: Record; + includeTyping?: boolean; + }, + limitPerPage: number = QUERY_PAGE_LIMIT, + maxTotal: number = Infinity +): Promise { + const results: T[] = []; + let cursors: Record | undefined; + + while (results.length < maxTotal) { + const remaining = maxTotal - results.length; + const pageLimit = Math.max(1, Math.min(limitPerPage, remaining)); + + const withWithLimit = { ...query.with }; + const firstKey = Object.keys(withWithLimit)[0]; + if (firstKey && withWithLimit[firstKey]) { + withWithLimit[firstKey] = { + ...withWithLimit[firstKey], + limit: pageLimit, + }; + } + const request = { + ...query, + with: withWithLimit, + cursors, + }; + const res = await client.instances.query( + request as Parameters[0] + ); + const chunk = (res.items[selectionName] ?? []) as T[]; + results.push(...chunk); + cursors = + res.nextCursor && Object.keys(res.nextCursor).length > 0 + ? (res.nextCursor as Record) + : undefined; + if (chunk.length === 0 || !cursors?.[selectionName]) break; + } + + return results.length > maxTotal ? results.slice(0, maxTotal) : results; +} + +// ============================================================================= +// fetchNodeDetails +// ============================================================================= + +export async function fetchNodeDetails( + client: CogniteClient | null, + space: string, + externalId: string +): Promise { + if (!client) { + throw new Error("CDF client is not available"); + } + + try { + const inspectResult = await client.instances.inspect({ + items: [ + { + instanceType: "node" as const, + space, + externalId, + }, + ], + inspectionOperations: { + involvedViews: { + allVersions: false, + }, + }, + }); + + const involvedViews = inspectResult.items?.[0]?.inspectionResults?.involvedViews || []; + + const sources = involvedViews.slice(0, 10).map((view) => ({ + source: { + type: "view" as const, + space: view.space, + externalId: view.externalId, + version: view.version ?? "latest", + }, + })); + + const response = await client.instances.retrieve({ + items: [ + { + instanceType: "node" as const, + space, + externalId, + }, + ], + includeTyping: true, + sources: sources.length > 0 ? sources : undefined, + }); + + if (response.items.length === 0) { + return null; + } + + const item = response.items[0]; + const properties = item.properties as Record>; + const derivedType = deriveNodeTypeFromProperties(properties); + + return { + space: item.space, + externalId: item.externalId, + instanceType: "node" as const, + version: item.version, + createdTime: item.createdTime, + lastUpdatedTime: item.lastUpdatedTime, + type: item.type || derivedType, + properties, + }; + } catch (error) { + console.error("[GraphViewer] Error fetching node details:", error); + return null; + } +} + +// ============================================================================= +// fetchConnectedNodes +// ============================================================================= + +export interface ExpandNodeResult { + newNodes: CDFNode[]; + newEdges: CDFEdge[]; + connectedNodeIds: string[]; +} + +export async function fetchConnectedNodes( + client: CogniteClient | null, + nodeSpace: string, + nodeExternalId: string, + existingNodeIds: Set, + _dataModel?: DataModelInfo, + limit = 100, + whitelistedRelationProps?: Set, + coreReverseQueries?: ReverseRelationQuery[], + viewPriorityConfig?: ViewPriorityConfig +): Promise { + if (!client) { + throw new Error("CDF client is not available. Please ensure you are authenticated."); + } + + const extractDirectRelations = ( + properties: Record> | undefined + ): Array<{ space: string; externalId: string }> => { + if (!properties) return []; + + const refs: Array<{ space: string; externalId: string }> = []; + const seen = new Set(); + + const processValue = (val: unknown) => { + if (val && typeof val === "object" && "space" in val && "externalId" in val) { + const ref = val as { space: string; externalId: string }; + if (typeof ref.space === "string" && typeof ref.externalId === "string") { + const key = `${ref.space}:${ref.externalId}`; + if (!seen.has(key)) { + seen.add(key); + refs.push({ space: ref.space, externalId: ref.externalId }); + } + } + } else if (Array.isArray(val)) { + val.forEach(processValue); + } + }; + + for (const spaceObj of Object.values(properties)) { + if (typeof spaceObj !== "object" || spaceObj === null) continue; + for (const viewObj of Object.values(spaceObj)) { + if (typeof viewObj !== "object" || viewObj === null) continue; + for (const [propName, propVal] of Object.entries(viewObj as Record)) { + if (!whitelistedRelationProps || whitelistedRelationProps.has(propName)) { + processValue(propVal); + } + } + } + } + + return refs; + }; + + const [sourceNodeRefs, edgeResponse] = await Promise.all([ + (async () => { + try { + const sourceInspect = await client.instances.inspect({ + items: [ + { + instanceType: "node" as const, + space: nodeSpace, + externalId: nodeExternalId, + }, + ], + inspectionOperations: { involvedViews: { allVersions: false } }, + }); + + if (sourceInspect.items.length > 0) { + const involvedViews = + ( + sourceInspect.items[0] as { + inspectionResults?: { + involvedViews?: Array<{ + space: string; + externalId: string; + version: string; + }>; + }; + } + ).inspectionResults?.involvedViews || []; + + const sources = involvedViews.slice(0, 10).map((v) => ({ + source: { + type: "view" as const, + space: v.space, + externalId: v.externalId, + version: v.version, + }, + })); + + if (sources.length > 0) { + const sourceNodeResponse = await client.instances.retrieve({ + items: [ + { + instanceType: "node" as const, + space: nodeSpace, + externalId: nodeExternalId, + }, + ], + sources, + includeTyping: true, + }); + + if (sourceNodeResponse.items.length > 0) { + const props = sourceNodeResponse.items[0].properties as Record< + string, + Record + >; + return extractDirectRelations(props); + } + } + } + return []; + } catch { + return []; + } + })(), + + (async () => { + type EdgeItem = { + space: string; + externalId: string; + version: number; + createdTime: number; + lastUpdatedTime: number; + type: { space: string; externalId: string }; + startNode: { space: string; externalId: string }; + endNode: { space: string; externalId: string }; + properties?: Record>; + }; + const items = await queryWithCursorPagination( + client, + "edges", + { + with: { + edges: { + edges: { + filter: { + or: [ + { + equals: { + property: ["edge", "startNode"], + value: { space: nodeSpace, externalId: nodeExternalId }, + }, + }, + { + equals: { + property: ["edge", "endNode"], + value: { space: nodeSpace, externalId: nodeExternalId }, + }, + }, + ], + }, + }, + }, + }, + select: { edges: {} }, + includeTyping: true, + }, + Math.min(limit * 2, QUERY_PAGE_LIMIT), + // Hard cap: never return more than `limit` edges per expansion. This + // upper bound protects the consumer from runaway pagination and matches + // the documented contract of `initialConnectionLimit`. + limit + ); + return { items }; + })(), + ]); + + const newEdges: CDFEdge[] = edgeResponse.items.map((edgeItem) => { + const item = edgeItem as { + space: string; + externalId: string; + version: number; + createdTime: number; + lastUpdatedTime: number; + type: { space: string; externalId: string }; + startNode: { space: string; externalId: string }; + endNode: { space: string; externalId: string }; + properties?: Record>; + }; + + return { + space: item.space, + externalId: item.externalId, + instanceType: "edge" as const, + version: item.version, + createdTime: item.createdTime, + lastUpdatedTime: item.lastUpdatedTime, + type: item.type, + startNode: item.startNode, + endNode: item.endNode, + properties: item.properties, + }; + }); + + const connectedNodeRefs = new Map(); + + for (const edge of newEdges) { + const startKey = `${edge.startNode.space}:${edge.startNode.externalId}`; + const endKey = `${edge.endNode.space}:${edge.endNode.externalId}`; + + if (!existingNodeIds.has(startKey)) { + connectedNodeRefs.set(startKey, edge.startNode); + } + if (!existingNodeIds.has(endKey)) { + connectedNodeRefs.set(endKey, edge.endNode); + } + } + + const sourceNodeKey = `${nodeSpace}:${nodeExternalId}`; + const directRelationEdges: CDFEdge[] = []; + + for (const ref of sourceNodeRefs) { + const refKey = `${ref.space}:${ref.externalId}`; + if ( + refKey !== sourceNodeKey && + !existingNodeIds.has(refKey) && + !connectedNodeRefs.has(refKey) + ) { + connectedNodeRefs.set(refKey, ref); + + directRelationEdges.push({ + space: nodeSpace, + externalId: `synthetic:${nodeExternalId}->direct->${ref.externalId}`, + instanceType: "edge" as const, + version: 1, + createdTime: Date.now(), + lastUpdatedTime: Date.now(), + type: { space: "synthetic", externalId: "direct-relation" }, + startNode: { space: nodeSpace, externalId: nodeExternalId }, + endNode: { space: ref.space, externalId: ref.externalId }, + properties: {}, + }); + } + } + + newEdges.push(...directRelationEdges); + + // Reverse relation queries + const reverseRelationRefs: Array<{ space: string; externalId: string }> = []; + const CORE_REVERSE_QUERIES = coreReverseQueries ?? []; + + try { + const nodeRef = { space: nodeSpace, externalId: nodeExternalId }; + + // Spread the per-expansion budget across all configured reverse queries so + // the total number of nodes contributed by reverse relations stays within + // `limit`. Each query gets at least 1 row. + const perQueryLimit = + CORE_REVERSE_QUERIES.length > 0 + ? Math.max(1, Math.ceil(limit / CORE_REVERSE_QUERIES.length)) + : limit; + + const queryPromises = CORE_REVERSE_QUERIES.map( + async ([viewSpace, viewExternalId, viewVersion, propertyName, isList]) => { + try { + const propertyPath = [ + viewSpace, + `${viewExternalId}/${viewVersion}`, + propertyName, + ]; + + const filter = isList + ? { + containsAny: { + property: propertyPath, + values: [nodeRef], + }, + } + : { + equals: { + property: propertyPath, + value: nodeRef, + }, + }; + + const items = await queryWithCursorPagination<{ + space: string; + externalId: string; + }>( + client, + "nodes", + { + with: { + nodes: { + nodes: { filter }, + }, + }, + select: { nodes: {} }, + includeTyping: false, + }, + Math.min(50, perQueryLimit), + perQueryLimit + ); + return items.map((item) => ({ + space: item.space, + externalId: item.externalId, + })); + } catch { + return []; + } + } + ); + + const results = await Promise.all(queryPromises); + for (const refs of results) { + reverseRelationRefs.push(...refs); + } + } catch { + // Silently ignore reverse relation query failures + } + + const syntheticEdges: CDFEdge[] = []; + + for (const ref of reverseRelationRefs) { + const refKey = `${ref.space}:${ref.externalId}`; + if ( + refKey !== sourceNodeKey && + !existingNodeIds.has(refKey) && + !connectedNodeRefs.has(refKey) + ) { + connectedNodeRefs.set(refKey, ref); + + syntheticEdges.push({ + space: ref.space, + externalId: `synthetic:${ref.externalId}->assets->${nodeExternalId}`, + instanceType: "edge" as const, + version: 1, + createdTime: Date.now(), + lastUpdatedTime: Date.now(), + type: { space: "cdf_cdm", externalId: "references-asset" }, + startNode: { space: ref.space, externalId: ref.externalId }, + endNode: { space: nodeSpace, externalId: nodeExternalId }, + properties: {}, + }); + } + } + + newEdges.push(...syntheticEdges); + + const connectedNodeIds = newEdges.flatMap((edge) => [ + `${edge.startNode.space}:${edge.startNode.externalId}`, + `${edge.endNode.space}:${edge.endNode.externalId}`, + ]); + + const nodesToFetch = Array.from(connectedNodeRefs.values()).slice(0, limit); + + let newNodes: CDFNode[] = []; + + if (nodesToFetch.length > 0) { + const inspectResponse = await client.instances.inspect({ + items: nodesToFetch.map((ref) => ({ + instanceType: "node" as const, + space: ref.space, + externalId: ref.externalId, + })), + inspectionOperations: { involvedViews: {} }, + }); + + const nodeTypeMap = new Map(); + const allViews = new Map(); + + for (const inspectItem of inspectResponse.items) { + const item = inspectItem as { + space: string; + externalId: string; + inspectionResults?: { + involvedViews?: Array<{ + space: string; + externalId: string; + version: string; + }>; + }; + }; + const key = `${item.space}:${item.externalId}`; + const views = item.inspectionResults?.involvedViews || []; + + const skipViewsForProperties = viewPriorityConfig?.skipViewsForProperties + ? new Set(viewPriorityConfig.skipViewsForProperties) + : DEFAULT_SKIP_VIEWS_FOR_PROPERTIES; + const skipViewsForType = viewPriorityConfig?.skipViewsForType + ? new Set(viewPriorityConfig.skipViewsForType) + : DEFAULT_SKIP_VIEWS_FOR_TYPE; + + for (const view of views) { + if (!skipViewsForProperties.has(view.externalId)) { + const viewKey = `${view.space}:${view.externalId}`; + if (!allViews.has(viewKey)) { + allViews.set(viewKey, view); + } + } + } + + const domainView = views.find( + (v) => !v.space.startsWith("cdf_cdm") && !skipViewsForType.has(v.externalId) + ); + const cdmView = views.find( + (v) => v.space.startsWith("cdf_cdm") && !skipViewsForType.has(v.externalId) + ); + const bestView = domainView || cdmView; + + if (bestView) { + nodeTypeMap.set(key, { + space: bestView.space, + externalId: bestView.externalId, + }); + } + } + + const sources = [ + { + source: { + type: "view" as const, + space: "cdf_cdm", + externalId: "CogniteDescribable", + version: "v1", + }, + }, + ...Array.from(allViews.values()).map((v) => ({ + source: { + type: "view" as const, + space: v.space, + externalId: v.externalId, + version: v.version, + }, + })), + ]; + + const retrieveItems = nodesToFetch.map((ref) => ({ + instanceType: "node" as const, + space: ref.space, + externalId: ref.externalId, + })); + + let nodeResponse: Awaited>; + try { + nodeResponse = await client.instances.retrieve({ + items: retrieveItems, + includeTyping: true, + }); + } catch { + nodeResponse = { items: [] }; + } + + const propertiesMap = new Map>>(); + if (sources.length > 0 && nodeResponse.items.length > 0) { + try { + const propsResponse = await client.instances.retrieve({ + items: retrieveItems, + sources, + includeTyping: true, + }); + + for (const item of propsResponse.items) { + const key = `${item.space}:${item.externalId}`; + if (item.properties) { + propertiesMap.set(key, item.properties as Record>); + } + } + } catch { + // Continue without additional properties + } + } + + const nodesWithoutProps = retrieveItems.filter((item) => { + const key = `${item.space}:${item.externalId}`; + return !propertiesMap.has(key); + }); + + if (nodesWithoutProps.length > 0) { + const individualFetches = nodesWithoutProps.map(async (nodeRef) => { + const key = `${nodeRef.space}:${nodeRef.externalId}`; + try { + const inspectResult = await client.instances.inspect({ + items: [nodeRef], + inspectionOperations: { involvedViews: { allVersions: false } }, + }); + + const views = + ( + inspectResult.items?.[0] as { + inspectionResults?: { + involvedViews?: Array<{ + space: string; + externalId: string; + version: string; + }>; + }; + } + )?.inspectionResults?.involvedViews || []; + + if (views.length > 0) { + const nodeSources = views.slice(0, 10).map((v) => ({ + source: { + type: "view" as const, + space: v.space, + externalId: v.externalId, + version: v.version, + }, + })); + + const propsResp = await client.instances.retrieve({ + items: [nodeRef], + sources: nodeSources, + includeTyping: true, + }); + + if (propsResp.items.length > 0 && propsResp.items[0].properties) { + propertiesMap.set( + key, + propsResp.items[0].properties as Record> + ); + } + } + } catch { + // Silently ignore individual fetch failures + } + }); + + await Promise.all(individualFetches); + } + + newNodes = nodeResponse.items + .filter( + (item): item is typeof item & { instanceType: "node" } => + (item as { instanceType?: string }).instanceType === "node" || !("instanceType" in item) + ) + .map((item) => { + const key = `${item.space}:${item.externalId}`; + const inspectType = nodeTypeMap.get(key); + const mergedProps = + propertiesMap.get(key) || (item.properties as Record>); + const derivedType = + deriveNodeTypeFromProperties(mergedProps, viewPriorityConfig) || inspectType; + + return { + space: item.space, + externalId: item.externalId, + instanceType: "node" as const, + version: item.version, + createdTime: item.createdTime, + lastUpdatedTime: item.lastUpdatedTime, + type: item.type || derivedType, + properties: mergedProps, + }; + }); + } + + return { newNodes, newEdges, connectedNodeIds }; +} + +// ============================================================================= +// Graph utility functions (pure, no API calls) +// ============================================================================= + +export function getGraphStats(graphData: GraphData) { + const nodeTypes = new Map(); + const connectionTypes = new Map(); + + graphData.nodes.forEach((node) => { + const type = node.data.type?.externalId || "Unknown"; + nodeTypes.set(type, (nodeTypes.get(type) || 0) + 1); + }); + + graphData.connections.forEach((connection) => { + const type = connection.data.type.externalId; + connectionTypes.set(type, (connectionTypes.get(type) || 0) + 1); + }); + + return { + totalNodes: graphData.nodes.length, + totalConnections: graphData.connections.length, + nodeTypes: Object.fromEntries(nodeTypes), + connectionTypes: Object.fromEntries(connectionTypes), + }; +} + +export function getConnectedNodes(graphData: GraphData, nodeId: string) { + const connectedNodeIds = new Set(); + + graphData.connections.forEach((connection) => { + if (connection.source === nodeId) { + connectedNodeIds.add(connection.target); + } + if (connection.target === nodeId) { + connectedNodeIds.add(connection.source); + } + }); + + return graphData.nodes.filter((node) => connectedNodeIds.has(node.id)); +} + +export function getNodeEdges(graphData: GraphData, nodeId: string) { + return graphData.connections.filter( + (connection) => connection.source === nodeId || connection.target === nodeId + ); +} diff --git a/.agents/skills/graph-viewer/code/index.ts b/.agents/skills/graph-viewer/code/index.ts new file mode 100644 index 0000000..ce3497b --- /dev/null +++ b/.agents/skills/graph-viewer/code/index.ts @@ -0,0 +1,21 @@ +export { useGraphViewer } from "./useGraphViewer"; +export type { + UseGraphViewerConfig, + UseGraphViewerOptions, + UseGraphViewerReturn, + LiteFeatureFlags, + GraphStats, + LayoutType, + GraphData, + GraphNode, + GraphEdge, + NodeTypeInfo, + CDFNode, + CDFEdge, + DataModelInfo, + ReverseRelationQuery, + ViewReference, + ViewPriorityConfig, + GraphThemeConfig, + GraphVisualConfig, +} from "./types"; diff --git a/.agents/skills/graph-viewer/code/types.ts b/.agents/skills/graph-viewer/code/types.ts new file mode 100644 index 0000000..96a9a8d --- /dev/null +++ b/.agents/skills/graph-viewer/code/types.ts @@ -0,0 +1,431 @@ +import type { GraphCanvasRef } from "reagraph"; + +// ============================================================================= +// Layout +// ============================================================================= + +export type LayoutType = + | "forceDirected2d" + | "forceDirected3d" + | "treeTd2d" + | "treeLr2d" + | "radialOut2d" + | "circular2d"; + +export interface LayoutOption { + id: LayoutType; + label: string; +} + +export const LAYOUT_OPTIONS: LayoutOption[] = [ + { id: "forceDirected2d", label: "Force 2D" }, + { id: "forceDirected3d", label: "Force 3D" }, + { id: "treeTd2d", label: "Tree (Top-Down)" }, + { id: "treeLr2d", label: "Tree (Left-Right)" }, + { id: "radialOut2d", label: "Radial" }, + { id: "circular2d", label: "Circular" }, +]; + +// ============================================================================= +// CDF Data Model Types +// ============================================================================= + +export interface ViewReference { + space: string; + externalId: string; + version?: string; +} + +export interface DataModelInfo { + space: string; + externalId: string; + name?: string; + description?: string; + version?: string; + views: ViewReference[]; +} + +/** + * Tuple describing a reverse-relation query to run on node expansion. + * + * `[space, viewExternalId, viewVersion, propertyName, isList]` + * + * - `space` - space of the view that defines the relation. + * - `viewExternalId` - external id of the view that defines the relation. + * - `viewVersion` - version of the view (e.g. `"v1"`, `"1"`). Required so the + * lookup is not pinned to any specific version. + * - `propertyName` - direct relation property on the view that points back to + * the node being expanded. + * - `isList` - `true` when the relation is `list`, otherwise `false`. + */ +export type ReverseRelationQuery = [ + space: string, + viewExternalId: string, + viewVersion: string, + propertyName: string, + isList: boolean, +]; + +export interface ViewPriorityConfig { + viewTypePriority?: string[]; + priorityViewNames?: string[]; + skipViewsForType?: string[]; + skipViewsForProperties?: string[]; +} + +// ============================================================================= +// CDF Instance Types +// ============================================================================= + +export interface CDFNode { + instanceType: "node"; + space: string; + externalId: string; + version?: number; + createdTime?: number; + lastUpdatedTime?: number; + type?: { space: string; externalId: string }; + properties?: Record; +} + +export interface CDFEdge { + instanceType: "edge"; + space: string; + externalId: string; + version?: number; + createdTime?: number; + lastUpdatedTime?: number; + type: { space: string; externalId: string }; + startNode: { space: string; externalId: string }; + endNode: { space: string; externalId: string }; + properties?: Record; +} + +// ============================================================================= +// Graph Data Types +// ============================================================================= + +export interface GraphNode { + id: string; + label: string; + fill?: string; + data: CDFNode; +} + +export interface GraphEdge { + id: string; + source: string; + target: string; + label?: string; + fill?: string; + size?: number; + data: CDFEdge; +} + +export interface NodeTypeInfo { + externalId: string; + space: string; + color: string; + count: number; +} + +export interface GraphData { + nodes: GraphNode[]; + connections: GraphEdge[]; + nodeTypes: NodeTypeInfo[]; +} + +export interface GraphSelection { + type: "node" | "edge" | null; + id: string | null; + node?: GraphNode; + edge?: GraphEdge; +} + +// ============================================================================= +// Visual / Theme Configuration +// ============================================================================= + +export interface GraphThemeConfig { + canvas: { + background: string; + }; + node: { + opacity: number; + selectedOpacity: number; + inactiveOpacity: number; + label: { + color: string; + stroke: string; + strokeWidth: number; + activeColor: string; + fontSize: number; + }; + }; + edge: { + fill: string; + activeFill: string; + opacity: number; + selectedOpacity: number; + inactiveOpacity: number; + label: { + color: string; + stroke: string; + strokeWidth: number; + activeColor: string; + fontSize: number; + }; + }; + ring: { + fill: string; + activeFill: string; + }; + arrow: { + fill: string; + activeFill: string; + }; + cluster: { + stroke: string; + fill: string; + label: { + color: string; + }; + }; + lasso: { + border: string; + background: string; + }; +} + +export interface GraphVisualConfig { + pathHighlightColor: string; + pathHighlightSize: number; + iconSize: number; + defaultNodeColor: string; + nodeTypePalette: string[]; +} + +// ============================================================================= +// Node Color Palette & Icons +// ============================================================================= + +export const NODE_TYPE_PALETTE = [ + "#3b82f6", + "#22c55e", + "#ef4444", + "#a855f7", + "#f59e0b", + "#06b6d4", + "#e11d48", + "#0ea5e9", + "#8b5cf6", + "#f97316", + "#14b8a6", + "#f43f5e", +]; + +export const DEFAULT_NODE_COLOR = "#94a3b8"; + +export const NODE_TYPE_ICONS: Record = { + Connector: "Plug", + Wire: "Minus", + Cable: "Cable", + Cavity: "CircleDot", + Shunt: "Zap", + ShuntCollection: "LayoutGrid", + GroundReference: "ArrowDownToLine", + HardwareOccurence: "Cpu", + TextElement: "Type", + WireExtermity: "GitCommitHorizontal", + CogniteFile: "FileText", + CogniteFileCategory: "FolderOpen", + CogniteTimeSeries: "Activity", + CogniteDatapoint: "TrendingUp", + CogniteAsset: "Box", + CogniteEquipment: "Wrench", + CogniteEquipmentType: "Settings", + CogniteAssetClass: "Layers", + CogniteAssetType: "Tag", + ISA95Asset: "Factory", + Enterprise: "Building2", + Site: "Building", + Area: "MapPin", + ProcessCell: "Grid3X3", + ProcessArea: "LayoutGrid", + ProductionLine: "ArrowRightLeft", + ProductionUnit: "Cpu", + Equipment: "Cog", + EquipmentModule: "CircuitBoard", + WorkCell: "Workflow", + WorkCenter: "Server", + WorkUnit: "Puzzle", + WorkOrder: "ClipboardList", + CogniteActivity: "Calendar", + MaintenanceOrder: "Hammer", + FaultCode: "AlertTriangle", + QualityAlert: "ShieldAlert", + Product: "Package", + ProductComponent: "Component", + ProductNode: "Boxes", + Batch: "Beaker", + Cognite3DModel: "Box", + CogniteCADModel: "Box", + CogniteCADNode: "Shapes", + Cognite360Image: "Image", + CognitePointCloudModel: "Scan", + CogniteAnnotation: "MessageSquare", + CogniteDiagramAnnotation: "StickyNote", + CogniteSourceSystem: "Database", + default: "Circle", +}; + +export function getIconForType(typeExternalId: string | undefined): string { + if (!typeExternalId) return NODE_TYPE_ICONS.default; + + if (NODE_TYPE_ICONS[typeExternalId]) { + return NODE_TYPE_ICONS[typeExternalId]; + } + + const lower = typeExternalId.toLowerCase(); + + if (lower.includes("connector") || lower.includes("plug")) return "Plug"; + if (lower.includes("wire")) return "Minus"; + if (lower.includes("cable")) return "Cable"; + if (lower.includes("cavity")) return "CircleDot"; + if (lower.includes("shunt") && lower.includes("collection")) return "LayoutGrid"; + if (lower.includes("shunt")) return "Zap"; + if (lower.includes("ground")) return "ArrowDownToLine"; + if (lower.includes("hardware")) return "Cpu"; + if (lower.includes("file") || lower.includes("document")) return "FileText"; + if (lower.includes("timeseries") || lower.includes("series")) return "Activity"; + if (lower.includes("asset")) return "Box"; + if (lower.includes("equipment")) return "Wrench"; + if (lower.includes("work") || lower.includes("order") || lower.includes("maintenance")) + return "ClipboardList"; + if (lower.includes("product")) return "Package"; + if (lower.includes("area") || lower.includes("location")) return "MapPin"; + if (lower.includes("site") || lower.includes("building")) return "Building"; + if (lower.includes("3d") || lower.includes("model") || lower.includes("cad")) return "Box"; + if (lower.includes("image") || lower.includes("photo")) return "Image"; + if (lower.includes("batch")) return "Beaker"; + if (lower.includes("alert") || lower.includes("fault")) return "AlertTriangle"; + + return NODE_TYPE_ICONS.default; +} + +// ============================================================================= +// Instance ID Helpers +// ============================================================================= + +export function createInstanceId(space: string, externalId: string) { + return `${space}:${externalId}`; +} + +export function parseInstanceId(id: string) { + const [space, ...rest] = id.split(":"); + return { space, externalId: rest.join(":") }; +} + +// ============================================================================= +// Node Label +// ============================================================================= + +function findNameInObject(obj: Record | unknown): string | undefined { + if (!obj || typeof obj !== "object") return undefined; + const objRecord = obj as Record; + if (typeof objRecord.name === "string" && objRecord.name.trim().length > 0) return objRecord.name; + for (const value of Object.values(objRecord)) { + if (value && typeof value === "object") { + const nested = findNameInObject(value); + if (nested) return nested; + } + } + return undefined; +} + +export function getNodeLabel(node: CDFNode): string { + if (node.properties && typeof node.properties === "object") { + for (const viewObj of Object.values(node.properties)) { + if (viewObj && typeof viewObj === "object") { + const maybe = findNameInObject(viewObj); + if (maybe) return maybe; + } + } + } + if (node.type?.externalId) { + return node.type.externalId; + } + return node.externalId; +} + +// ============================================================================= +// Lite Hook API Types +// ============================================================================= + +/** + * CDF-friendly input configuration for the `useGraphViewer` hook. + */ +export interface UseGraphViewerConfig { + dataModel: { + space: string; + externalId: string; + version: string; + }; + instance?: { + space: string; + externalId: string; + }; + options?: UseGraphViewerOptions; +} + +export interface UseGraphViewerOptions { + maxNodes?: number; + layout?: LayoutType; + whitelistedRelationProps?: string[]; + coreReverseQueries?: ReverseRelationQuery[]; + viewPriorityConfig?: ViewPriorityConfig; + initialConnectionLimit?: number; + visualConfig?: Partial; + themeConfig?: Partial; + features?: Partial; +} + +export interface LiteFeatureFlags { + enableLegend: boolean; + enableZoomControls: boolean; + enableNodeExpansion: boolean; +} + +export const DEFAULT_LITE_FEATURES: LiteFeatureFlags = { + enableLegend: true, + enableZoomControls: true, + enableNodeExpansion: true, +}; + +export interface GraphStats { + totalNodes: number; + totalConnections: number; + nodeTypes: Record; + connectionTypes: Record; +} + +export interface UseGraphViewerReturn { + GraphCanvas: React.FC<{ className?: string }>; + isLoading: boolean; + error: string | null; + graphData: GraphData; + stats: GraphStats | null; + layout: LayoutType; + setLayout: (layout: LayoutType) => void; + selections: string[]; + setSelections: (ids: string[]) => void; + selectedNode: GraphNode | null; + selectedEdge: GraphEdge | null; + expandNode: (nodeId: string) => Promise; + loadInstance: (space: string, externalId: string) => Promise; + fitView: () => void; + zoomIn: () => void; + zoomOut: () => void; + clear: () => void; + graphRef: React.RefObject; +} diff --git a/.agents/skills/graph-viewer/code/useCanvasResize.ts b/.agents/skills/graph-viewer/code/useCanvasResize.ts new file mode 100644 index 0000000..41cb82a --- /dev/null +++ b/.agents/skills/graph-viewer/code/useCanvasResize.ts @@ -0,0 +1,30 @@ +import { type RefObject, useEffect } from "react"; +import type { GraphCanvasRef } from "reagraph"; + +export function useCanvasResize( + containerRef: RefObject, + graphRef: RefObject +) { + useEffect(() => { + const container = containerRef.current; + if (!container) return; + + const triggerResize = () => { + if (graphRef.current) { + requestAnimationFrame(() => { + window.dispatchEvent(new Event("resize")); + }); + } + }; + + const resizeObserver = new ResizeObserver(triggerResize); + resizeObserver.observe(container); + + const timeoutId = setTimeout(triggerResize, 100); + + return () => { + resizeObserver.disconnect(); + clearTimeout(timeoutId); + }; + }, [containerRef, graphRef]); +} diff --git a/.agents/skills/graph-viewer/code/useDataModelLoader.ts b/.agents/skills/graph-viewer/code/useDataModelLoader.ts new file mode 100644 index 0000000..f0be000 --- /dev/null +++ b/.agents/skills/graph-viewer/code/useDataModelLoader.ts @@ -0,0 +1,90 @@ +import { useDune } from "@cognite/dune"; +import { useEffect, useState } from "react"; +import type { DataModelInfo, ViewReference } from "./types"; + +interface UseDataModelLoaderConfig { + space: string; + externalId: string; + version: string; +} + +interface UseDataModelLoaderReturn { + dataModel: DataModelInfo | null; + isLoading: boolean; + error: string | null; +} + +export function useDataModelLoader( + config: UseDataModelLoaderConfig +): UseDataModelLoaderReturn { + const { sdk, isLoading: isAuthLoading } = useDune(); + + const [dataModel, setDataModel] = useState(null); + const [isLoading, setIsLoading] = useState(true); + const [error, setError] = useState(null); + + useEffect(() => { + if (!sdk || isAuthLoading) return; + + let cancelled = false; + + async function load() { + try { + setIsLoading(true); + setError(null); + + const response = await sdk.dataModels.retrieve([ + { + space: config.space, + externalId: config.externalId, + version: config.version, + }, + ]); + + if (cancelled) return; + + if (response.items.length === 0) { + throw new Error( + `Data model not found: ${config.space}/${config.externalId} v${config.version}` + ); + } + + const model = response.items[0]; + const views: ViewReference[] = (model.views || []).map( + (v: { space: string; externalId: string; version: string }) => ({ + space: v.space, + externalId: v.externalId, + version: v.version, + }) + ); + + setDataModel({ + space: model.space, + externalId: model.externalId, + name: model.name, + description: model.description, + version: model.version, + views, + }); + } catch (err) { + if (!cancelled) { + setError( + err instanceof Error ? err.message : "Failed to load data model" + ); + } + } finally { + if (!cancelled) { + setIsLoading(false); + } + } + } + + load(); + + return () => { + cancelled = true; + }; + }, [sdk, isAuthLoading, config.space, config.externalId, config.version]); + + return { dataModel, isLoading, error }; +} diff --git a/.agents/skills/graph-viewer/code/useGraphDataPipeline.ts b/.agents/skills/graph-viewer/code/useGraphDataPipeline.ts new file mode 100644 index 0000000..f1c2c39 --- /dev/null +++ b/.agents/skills/graph-viewer/code/useGraphDataPipeline.ts @@ -0,0 +1,101 @@ +import { useMemo } from "react"; +import type { + CDFEdge, + CDFNode, + GraphData, + GraphEdge, + GraphNode, + GraphVisualConfig, + NodeTypeInfo, +} from "./types"; +import { createInstanceId, getNodeLabel } from "./types"; +import { getGraphStats } from "./graph-service"; +import { transformEdges, transformNodes } from "./graph-config"; + +interface UseGraphDataPipelineParams { + bufferNodes: CDFNode[]; + bufferConnections: CDFEdge[]; + visualConfig: GraphVisualConfig; +} + +export function useGraphDataPipeline({ + bufferNodes, + bufferConnections, + visualConfig, +}: UseGraphDataPipelineParams) { + const graphData: GraphData = useMemo(() => { + const typeMap = new Map(); + const colorMap = new Map(); + let colorIndex = 0; + + const graphNodes: GraphNode[] = bufferNodes.map((node) => { + const typeKey = node.type ? `${node.type.space}:${node.type.externalId}` : "unknown"; + + if (node.type) { + if (!typeMap.has(typeKey)) { + const color = + visualConfig.nodeTypePalette[colorIndex % visualConfig.nodeTypePalette.length] || + visualConfig.defaultNodeColor; + typeMap.set(typeKey, { + externalId: node.type.externalId, + space: node.type.space, + color, + count: 1, + }); + colorMap.set(typeKey, color); + colorIndex += 1; + } else { + const existing = typeMap.get(typeKey); + if (existing) existing.count += 1; + } + } + + const fill = node.type + ? (colorMap.get(typeKey) ?? visualConfig.defaultNodeColor) + : visualConfig.defaultNodeColor; + + return { + id: createInstanceId(node.space, node.externalId), + label: getNodeLabel(node), + fill, + data: node, + }; + }); + + const graphConnections: GraphEdge[] = bufferConnections.map((edge) => ({ + id: createInstanceId(edge.space, edge.externalId), + source: createInstanceId(edge.startNode.space, edge.startNode.externalId), + target: createInstanceId(edge.endNode.space, edge.endNode.externalId), + label: edge.type?.externalId || "", + data: edge, + })); + + return { + nodes: graphNodes, + connections: graphConnections, + nodeTypes: Array.from(typeMap.values()), + }; + }, [bufferNodes, bufferConnections, visualConfig]); + + const reagraphNodes = useMemo( + () => transformNodes(graphData.nodes, visualConfig), + [graphData.nodes, visualConfig] + ); + + const emptyHighlights = useMemo(() => new Set(), []); + + const reagraphEdges = useMemo( + () => transformEdges(graphData.connections, emptyHighlights, visualConfig), + [graphData.connections, emptyHighlights, visualConfig] + ); + + const displayedStats = useMemo(() => getGraphStats(graphData), [graphData]); + + return { + graphData, + displayedGraphData: graphData, + reagraphNodes, + reagraphEdges, + displayedStats, + }; +} diff --git a/.agents/skills/graph-viewer/code/useGraphSelection.ts b/.agents/skills/graph-viewer/code/useGraphSelection.ts new file mode 100644 index 0000000..e34068d --- /dev/null +++ b/.agents/skills/graph-viewer/code/useGraphSelection.ts @@ -0,0 +1,98 @@ +import { useCallback, useMemo, useState } from "react"; +import type { GraphEdge, GraphNode, GraphSelection } from "./types"; + +export interface UseGraphSelectionReturn { + selection: GraphSelection; + selectedNode: GraphNode | null; + selectedEdge: GraphEdge | null; + selectNode: (node: GraphNode | null) => void; + selectEdge: (edge: GraphEdge | null) => void; + clearSelection: () => void; + isNodeSelected: (nodeId: string) => boolean; + isEdgeSelected: (edgeId: string) => boolean; +} + +export function useGraphSelection(): UseGraphSelectionReturn { + const [selection, setSelection] = useState({ + type: null, + id: null, + node: undefined, + edge: undefined, + }); + + const selectNode = useCallback((node: GraphNode | null) => { + if (node) { + setSelection({ + type: "node", + id: node.id, + node, + edge: undefined, + }); + } else { + setSelection({ + type: null, + id: null, + node: undefined, + edge: undefined, + }); + } + }, []); + + const selectEdge = useCallback((edge: GraphEdge | null) => { + if (edge) { + setSelection({ + type: "edge", + id: edge.id, + node: undefined, + edge, + }); + } else { + setSelection({ + type: null, + id: null, + node: undefined, + edge: undefined, + }); + } + }, []); + + const clearSelection = useCallback(() => { + setSelection({ + type: null, + id: null, + node: undefined, + edge: undefined, + }); + }, []); + + const isNodeSelected = useCallback( + (nodeId: string) => selection.type === "node" && selection.id === nodeId, + [selection] + ); + + const isEdgeSelected = useCallback( + (edgeId: string) => selection.type === "edge" && selection.id === edgeId, + [selection] + ); + + const selectedNode = useMemo( + () => (selection.type === "node" ? (selection.node ?? null) : null), + [selection] + ); + + const selectedEdge = useMemo( + () => (selection.type === "edge" ? (selection.edge ?? null) : null), + [selection] + ); + + return { + selection, + selectedNode, + selectedEdge, + selectNode, + selectEdge, + clearSelection, + isNodeSelected, + isEdgeSelected, + }; +} diff --git a/.agents/skills/graph-viewer/code/useGraphViewer.tsx b/.agents/skills/graph-viewer/code/useGraphViewer.tsx new file mode 100644 index 0000000..e0d516a --- /dev/null +++ b/.agents/skills/graph-viewer/code/useGraphViewer.tsx @@ -0,0 +1,288 @@ +import { useDune } from "@cognite/dune"; +import { useCallback, useMemo, useRef, useState } from "react"; +import type { GraphCanvasRef } from "reagraph"; + +import { useGraphSelection } from "./useGraphSelection"; +import { useNodeBuffer } from "./useNodeBuffer"; +import { useGraphDataPipeline } from "./useGraphDataPipeline"; +import { buildReagraphTheme, mergeVisualConfig } from "./graph-config"; +import { fetchConnectedNodes } from "./graph-service"; +import type { GraphNode, GraphEdge, LayoutType } from "./types"; +import { createInstanceId, parseInstanceId } from "./types"; + +import { useDataModelLoader } from "./useDataModelLoader"; +import { useSeedNode } from "./useSeedNode"; +import { GraphViewerCanvas } from "./GraphViewerCanvas"; +import { + DEFAULT_LITE_FEATURES, + type LiteFeatureFlags, + type UseGraphViewerConfig, + type UseGraphViewerReturn, +} from "./types"; + +/** + * `useGraphViewer` -- the single entry point for embedding a CDF graph viewer. + * + * Returns a self-contained `` component plus state and controls. + * + * @example + * ```tsx + * const { GraphCanvas, isLoading, error } = useGraphViewer({ + * dataModel: { space: "my-space", externalId: "my-dm", version: "1" }, + * instance: { space: "my-inst-space", externalId: "pump-001" }, + * }); + * + * return ; + * ``` + */ +export function useGraphViewer(config: UseGraphViewerConfig): UseGraphViewerReturn { + const { sdk } = useDune(); + + const opts = config.options ?? {}; + const maxNodes = opts.maxNodes ?? 1000; + const initialConnectionLimit = opts.initialConnectionLimit ?? 100; + const features: LiteFeatureFlags = { ...DEFAULT_LITE_FEATURES, ...opts.features }; + + const whitelistedRelationProps = useMemo( + () => + opts.whitelistedRelationProps + ? new Set(opts.whitelistedRelationProps) + : undefined, + [JSON.stringify(opts.whitelistedRelationProps)] + ); + + const visualConfig = useMemo( + () => mergeVisualConfig(opts.visualConfig), + [opts.visualConfig] + ); + + const themeConfig = useMemo( + () => buildReagraphTheme(opts.themeConfig), + [opts.themeConfig] + ); + + const [layout, setLayout] = useState(opts.layout ?? "forceDirected2d"); + + const [selections, setSelections] = useState([]); + const [selectedNodeType, setSelectedNodeType] = useState(null); + const { + selectedNode, + selectedEdge, + selectNode, + selectEdge, + clearSelection, + } = useGraphSelection(); + + const { + nodes: bufferNodes, + edges: bufferEdges, + addNodes, + addEdges, + touchNode, + clear: clearBuffer, + } = useNodeBuffer(maxNodes); + + const { + dataModel, + isLoading: isDataModelLoading, + error: dataModelError, + } = useDataModelLoader(config.dataModel); + + const { + isLoading: isSeedLoading, + error: seedError, + loadInstance, + } = useSeedNode({ + dataModel, + initialInstance: config.instance, + addNodes, + addEdges, + clearBuffer, + whitelistedRelationProps, + coreReverseQueries: opts.coreReverseQueries, + viewPriorityConfig: opts.viewPriorityConfig, + initialConnectionLimit, + }); + + const graphRef = useRef(null); + + const { + graphData, + displayedGraphData, + reagraphNodes, + reagraphEdges, + displayedStats, + } = useGraphDataPipeline({ + bufferNodes, + bufferConnections: bufferEdges, + visualConfig, + }); + + const [isExpanding, setIsExpanding] = useState(false); + + const expandNode = useCallback( + async (nodeId: string) => { + if (!sdk || !dataModel) return; + + try { + setIsExpanding(true); + const { space, externalId } = parseInstanceId(nodeId); + const existingIds = new Set( + bufferNodes.map((n) => createInstanceId(n.space, n.externalId)) + ); + + const result = await fetchConnectedNodes( + sdk, + space, + externalId, + existingIds, + dataModel, + initialConnectionLimit, + whitelistedRelationProps, + opts.coreReverseQueries, + opts.viewPriorityConfig + ); + + addNodes(result.newNodes); + addEdges(result.newEdges); + touchNode(nodeId); + } finally { + setIsExpanding(false); + } + }, + [ + sdk, + dataModel, + bufferNodes, + addNodes, + addEdges, + touchNode, + initialConnectionLimit, + whitelistedRelationProps, + opts.coreReverseQueries, + opts.viewPriorityConfig, + ] + ); + + const handleNodeClick = useCallback( + (node: GraphNode) => { + selectNode(node); + setSelections([node.id]); + setSelectedNodeType(null); + touchNode(node.id); + }, + [selectNode, setSelections, touchNode] + ); + + const handleEdgeClick = useCallback( + (edge: GraphEdge) => { + selectEdge(edge); + setSelections([edge.id]); + setSelectedNodeType(null); + }, + [selectEdge, setSelections] + ); + + const handleCanvasClick = useCallback(() => { + clearSelection(); + setSelections([]); + setSelectedNodeType(null); + }, [clearSelection]); + + const handleNodeTypeClick = useCallback( + (typeKey: string) => { + if (selectedNodeType === typeKey) { + setSelectedNodeType(null); + setSelections([]); + clearSelection(); + } else { + const nodeIds = displayedGraphData.nodes + .filter((n) => { + const key = n.data?.type + ? `${n.data.type.space}:${n.data.type.externalId}` + : "unknown"; + return key === typeKey; + }) + .map((n) => n.id); + setSelectedNodeType(typeKey); + setSelections(nodeIds); + clearSelection(); + } + }, + [selectedNodeType, displayedGraphData.nodes, clearSelection] + ); + + const handleClearNodeTypeSelection = useCallback(() => { + setSelectedNodeType(null); + setSelections([]); + }, []); + + const GraphCanvasComponent = useMemo(() => { + const Component: React.FC<{ className?: string }> = ({ className }) => ( + + ); + Component.displayName = "GraphViewerCanvas"; + return Component; + }, [ + reagraphNodes, + reagraphEdges, + displayedGraphData, + layout, + themeConfig, + selections, + selectedNode, + selectedEdge, + features, + selectedNodeType, + graphRef, + handleNodeClick, + handleEdgeClick, + handleCanvasClick, + expandNode, + handleNodeTypeClick, + handleClearNodeTypeSelection, + ]); + + const isLoading = isDataModelLoading || isSeedLoading || isExpanding; + const error = dataModelError || seedError; + + return { + GraphCanvas: GraphCanvasComponent, + isLoading, + error, + graphData, + stats: displayedStats, + layout, + setLayout, + selections, + setSelections, + selectedNode, + selectedEdge, + expandNode, + loadInstance, + fitView: () => graphRef.current?.fitNodesInView(), + zoomIn: () => graphRef.current?.zoomIn(), + zoomOut: () => graphRef.current?.zoomOut(), + clear: clearBuffer, + graphRef, + }; +} diff --git a/.agents/skills/graph-viewer/code/useNodeBuffer.ts b/.agents/skills/graph-viewer/code/useNodeBuffer.ts new file mode 100644 index 0000000..02b9dd3 --- /dev/null +++ b/.agents/skills/graph-viewer/code/useNodeBuffer.ts @@ -0,0 +1,132 @@ +import { useEffect, useMemo, useState } from "react"; +import { type CDFEdge, type CDFNode, createInstanceId } from "./types"; + +type BufferedNode = { + node: CDFNode; + lastAccessed: number; +}; + +type BufferState = { + nodes: Map; + connections: CDFEdge[]; +}; + +function pruneConnections(connections: CDFEdge[], nodes: Map) { + const validNodeIds = new Set(Array.from(nodes.keys())); + return connections.filter((connection) => { + const startId = createInstanceId(connection.startNode.space, connection.startNode.externalId); + const endId = createInstanceId(connection.endNode.space, connection.endNode.externalId); + return validNodeIds.has(startId) && validNodeIds.has(endId); + }); +} + +function evictIfNeeded(state: BufferState, maxSize: number): BufferState { + if (state.nodes.size <= maxSize) { + return { + nodes: state.nodes, + connections: pruneConnections(state.connections, state.nodes), + }; + } + + const entries = Array.from(state.nodes.entries()); + entries.sort((a, b) => a[1].lastAccessed - b[1].lastAccessed); + + const toRemove = entries.length - maxSize; + for (let i = 0; i < toRemove; i++) { + state.nodes.delete(entries[i][0]); + } + + return { + nodes: state.nodes, + connections: pruneConnections(state.connections, state.nodes), + }; +} + +export function useNodeBuffer(initialMaxSize = 1000) { + const [maxSize, setMaxSize] = useState(initialMaxSize); + const [state, setState] = useState({ + nodes: new Map(), + connections: [], + }); + + useEffect(() => { + setState((prev) => evictIfNeeded(prev, maxSize)); + }, [maxSize]); + + const addNodes = (nodes: CDFNode[]) => { + const now = Date.now(); + setState((prev) => { + const nextNodes = new Map(prev.nodes); + nodes.forEach((node) => { + const key = createInstanceId(node.space, node.externalId); + nextNodes.set(key, { node, lastAccessed: now }); + }); + return evictIfNeeded({ nodes: nextNodes, connections: prev.connections }, maxSize); + }); + }; + + const addEdges = (connections: CDFEdge[]) => { + setState((prev) => { + const existingIds = new Set( + prev.connections.map((c) => createInstanceId(c.space, c.externalId)) + ); + const merged = [...prev.connections]; + connections.forEach((connection) => { + const id = createInstanceId(connection.space, connection.externalId); + if (!existingIds.has(id)) { + merged.push(connection); + } + }); + return { + nodes: prev.nodes, + connections: pruneConnections(merged, prev.nodes), + }; + }); + }; + + const touchNode = (nodeId: string) => { + setState((prev) => { + const nextNodes = new Map(prev.nodes); + const buffered = nextNodes.get(nodeId); + if (buffered) { + nextNodes.set(nodeId, { ...buffered, lastAccessed: Date.now() }); + } + return { nodes: nextNodes, connections: prev.connections }; + }); + }; + + const clear = () => { + setState({ + nodes: new Map(), + connections: [], + }); + }; + + const setBuffer = (nodes: CDFNode[], connections: CDFEdge[]) => { + const now = Date.now(); + const nodesMap = new Map(); + nodes.forEach((node) => { + const key = createInstanceId(node.space, node.externalId); + nodesMap.set(key, { node, lastAccessed: now }); + }); + const pruned = pruneConnections(connections, nodesMap); + setState(evictIfNeeded({ nodes: nodesMap, connections: pruned }, maxSize)); + }; + + const bufferedNodes = useMemo( + () => Array.from(state.nodes.values()).map((entry) => entry.node), + [state.nodes] + ); + + return { + nodes: bufferedNodes, + edges: state.connections, + addNodes, + addEdges, + touchNode, + clear, + setBuffer, + maxSize, + setMaxSize, + }; +} diff --git a/.agents/skills/graph-viewer/code/useSeedNode.ts b/.agents/skills/graph-viewer/code/useSeedNode.ts new file mode 100644 index 0000000..f46287e --- /dev/null +++ b/.agents/skills/graph-viewer/code/useSeedNode.ts @@ -0,0 +1,113 @@ +import { useDune } from "@cognite/dune"; +import { useCallback, useEffect, useRef, useState } from "react"; +import { fetchConnectedNodes, fetchNodeDetails } from "./graph-service"; +import type { + CDFEdge, + CDFNode, + DataModelInfo, + ReverseRelationQuery, + ViewPriorityConfig, +} from "./types"; +import { createInstanceId } from "./types"; + +interface UseSeedNodeConfig { + dataModel: DataModelInfo | null; + initialInstance?: { space: string; externalId: string }; + addNodes: (nodes: CDFNode[]) => void; + addEdges: (edges: CDFEdge[]) => void; + clearBuffer: () => void; + whitelistedRelationProps?: Set; + coreReverseQueries?: ReverseRelationQuery[]; + viewPriorityConfig?: ViewPriorityConfig; + initialConnectionLimit: number; +} + +interface UseSeedNodeReturn { + isLoading: boolean; + error: string | null; + loadInstance: (space: string, externalId: string) => Promise; +} + +export function useSeedNode({ + dataModel, + initialInstance, + addNodes, + addEdges, + clearBuffer, + whitelistedRelationProps, + coreReverseQueries, + viewPriorityConfig, + initialConnectionLimit, +}: UseSeedNodeConfig): UseSeedNodeReturn { + const { sdk } = useDune(); + const [isLoading, setIsLoading] = useState(false); + const [error, setError] = useState(null); + const loadedRef = useRef(false); + + const loadInstance = useCallback( + async (space: string, externalId: string) => { + if (!sdk || !dataModel) { + setError("SDK or data model not ready"); + return; + } + + try { + setIsLoading(true); + setError(null); + clearBuffer(); + + const node = await fetchNodeDetails(sdk, space, externalId); + if (!node) { + throw new Error(`Node not found: ${space}/${externalId}`); + } + + addNodes([node]); + + const seedId = createInstanceId(space, externalId); + const existingIds = new Set([seedId]); + + const result = await fetchConnectedNodes( + sdk, + space, + externalId, + existingIds, + dataModel, + initialConnectionLimit, + whitelistedRelationProps, + coreReverseQueries, + viewPriorityConfig + ); + + addNodes(result.newNodes); + addEdges(result.newEdges); + } catch (err) { + setError( + err instanceof Error + ? err.message + : "Failed to load instance" + ); + } finally { + setIsLoading(false); + } + }, + [ + sdk, + dataModel, + addNodes, + addEdges, + clearBuffer, + whitelistedRelationProps, + coreReverseQueries, + viewPriorityConfig, + initialConnectionLimit, + ] + ); + + useEffect(() => { + if (loadedRef.current || !dataModel || !initialInstance || !sdk) return; + loadedRef.current = true; + loadInstance(initialInstance.space, initialInstance.externalId); + }, [dataModel, initialInstance, sdk, loadInstance]); + + return { isLoading, error, loadInstance }; +} diff --git a/.agents/skills/integrate-atlas-chat/SKILL.md b/.agents/skills/integrate-atlas-chat/SKILL.md new file mode 100644 index 0000000..0db7c60 --- /dev/null +++ b/.agents/skills/integrate-atlas-chat/SKILL.md @@ -0,0 +1,143 @@ +--- +name: integrate-atlas-chat +description: "MUST be used whenever building a chat UI with Atlas agents in a Flows app. Do NOT manually write useAtlasChat integration code — this skill handles installation, component structure, and hook wiring. Triggers: useAtlasChat, atlas chat, streaming chat, agent chat, chat interface, chat component, chat UI. For a full chat app, run skills in order: (1) integrate-atlas-chat, (2) create-client-tool (per tool), (3) setup-python-tools (if Python tools needed)." +allowed-tools: Read, Glob, Grep, Edit, Write, Bash +metadata: + argument-hint: "[agent-external-id]" +--- + +# Integrate Atlas Agent Chat + +Add a streaming Atlas Agent chat UI to this Flows app. + +Agent external ID: **$ARGUMENTS** + +## Dependencies + +The atlas-agent library files (copied in Step 2) require these npm packages: + +| Package | Version | +|---|---| +| `@sinclair/typebox` | `^0.33.0` | +| `ajv` | `^8.17.1` | +| `ajv-formats` | `^2.1.1` | + +`@cognite/sdk` is assumed to already be present in Flows apps. + +--- + +## Your job + +Complete these steps in order. Read each file before modifying it. + +--- + +## Step 1 — Understand the app + +Read these files before touching anything: + +- `package.json` — detect package manager (`packageManager` field or lock file) and existing deps +- `src/App.tsx` (or equivalent entry component) — understand current structure + +--- + +## Step 2 — Copy the atlas-agent source files + +The atlas-agent library lives in the `code/` directory next to this skill file. Read and copy +the following files into `src/atlas-agent/` inside the app: + +- `code/types.ts` +- `code/validation.ts` +- `code/client.ts` +- `code/session.ts` +- `code/react.ts` + +> The Python-related files (`python.ts`, `pyodide.ts`, `pyodide-react.ts`, `pyodide-runtime.ts`) +> are only needed if the agent uses Python tools. The `setup-python-tools` skill copies those. + +--- + +## Step 3 — Install dependencies + +Install the required peer packages (see **Dependencies** above) using the app's package manager: + +- pnpm → `pnpm add @sinclair/typebox@^0.33.0 ajv@^8.17.1 ajv-formats@^2.1.1` +- npm → `npm install @sinclair/typebox@^0.33.0 ajv@^8.17.1 ajv-formats@^2.1.1` +- yarn → `yarn add @sinclair/typebox@^0.33.0 ajv@^8.17.1 ajv-formats@^2.1.1` + +--- + +## Step 4 — Build the chat component + +Replace (or create) the main `App.tsx` with a full chat UI. The component must: + +1. **Import** `useAtlasChat` and `ChatMessage` from `./atlas-agent/react` (relative to the component) +2. **Get the SDK** via `useDune()` from `@cognite/dune` +3. **Pass `null` while loading** — `client: isLoading ? null : sdk` +4. **Show streaming text** in real time using `msg.isStreaming` with a blinking cursor +5. **Show tool call events** — when `progress.startsWith("Executing:")`, render it distinctly + (e.g. a ⚙ icon + monospace tool name) so tool calls are clearly visible +6. **Show tool calls** — each assistant `message.toolCalls` (after streaming completes) + should appear as expandable cards beneath the message +7. **Abort button** — show a "Stop" button while `isStreaming`, wired to `abort()` +8. **Reset button** — "New chat" button wired to `reset()` +9. **Auto-scroll** — scroll to bottom on new messages and progress updates +10. **Auto-resize textarea** — expand up to ~120px, submit on Enter, newline on Shift+Enter + +### Key hook API + +```ts +import { useAtlasChat } from "./atlas-agent/react"; +import type { ChatMessage } from "./atlas-agent/react"; + +const { messages, send, isStreaming, progress, error, reset, abort } = useAtlasChat({ + client: isLoading ? null : sdk, // null-safe — hook waits for a real client + agentExternalId: "...", + tools?: AtlasTool[], // optional client-side tools +}); + +// messages[n].role — "user" | "assistant" +// messages[n].text — full text (streams chunk-by-chunk via isStreaming) +// messages[n].isStreaming — true while this message is being written +// messages[n].toolCalls — ToolCall[] once response is complete (client + server-side, in call order) +// progress — e.g. "Agent thinking" or "Executing: get_timeseries" +// isStreaming — true for the entire duration of a response +``` + +### Tool call display pattern + +```tsx +// During streaming — show as a distinct "tool call" bubble above the message +{isStreaming && progress?.startsWith("Executing:") && ( +
⚙ {progress}
+)} + +// After response — show tool calls on the assistant message +{msg.toolCalls?.map((tc, i) => ( + +))} +``` + +--- + +## Step 5 — Python tools (optional) + +If the agent has Python tools (type `runPythonCode` in its CDF config), run the +`setup-python-tools` skill to add Pyodide-based client-side execution: + +``` +/setup-python-tools $ARGUMENTS +``` + +That skill copies the Python-related source files from `@skills/integrate-atlas-chat/code`, +installs `pyodide`, sets up `usePyodideRuntime`, and wires the runtime into +`useAtlasChat` via `pythonRuntime`. The library fetches Python tool code from the agent +config automatically — no `PythonToolConfig` entries needed. + +You don't need this if the agent only uses built-in or regular client tools. + +--- + +## Done + +Start the app and you should see a streaming chat UI connected to Atlas Agent `$ARGUMENTS`. diff --git a/.agents/skills/integrate-atlas-chat/code/client.ts b/.agents/skills/integrate-atlas-chat/code/client.ts new file mode 100644 index 0000000..f479618 --- /dev/null +++ b/.agents/skills/integrate-atlas-chat/code/client.ts @@ -0,0 +1,153 @@ +/** + * AtlasClient — stateless HTTP/SSE transport layer. + * + * Single responsibility: send chat payloads to the Cognite AI agent API + * and parse the response (JSON or Server-Sent Events). + */ + +import type { CogniteClient } from '@cognite/sdk'; +import type { Agent, ChatPayload, RawAgentResponse, StreamCallbacks } from './types'; + +const CDF_API_VERSION = 'alpha'; +const AGENTS_API_VERSION = 'beta'; + +export class AtlasClient { + private readonly client: CogniteClient; + + constructor(client: CogniteClient) { + this.client = client; + } + + async listAgents(): Promise { + const { data } = await this.client.get<{ items: Agent[] }>( + `/api/v1/projects/${this.client.project}/ai/agents`, + { headers: { 'cdf-version': AGENTS_API_VERSION } }, + ); + return data.items; + } + + async getAgentById(externalId: string): Promise { + const { data } = await this.client.post<{ items: Agent[] }>( + `/api/v1/projects/${this.client.project}/ai/agents/byids`, + { data: { items: [{ externalId }] }, headers: { 'cdf-version': AGENTS_API_VERSION } }, + ); + return data.items[0] ?? null; + } + + /** + * Post a chat payload and parse the response (JSON or SSE). + * @param agentExternalId — used as a fallback identifier when the SSE result event omits agent IDs. + */ + async post( + payload: ChatPayload, + agentExternalId: string, + callbacks?: StreamCallbacks, + signal?: AbortSignal, + ): Promise { + const url = `${this.client.getBaseUrl()}/api/v1/projects/${this.client.project}/ai/internal/agents/chat`; + + const response = await fetch(url, { + method: 'POST', + headers: { + ...this.client.getDefaultRequestHeaders(), + 'Content-Type': 'application/json', + 'cdf-version': CDF_API_VERSION, + }, + body: JSON.stringify(payload), + signal, + }); + + if (!response.ok) { + const errorData = await response.json().catch(() => ({})); + throw new Error( + `Agent chat API error: ${response.status} - ${JSON.stringify(errorData)}`, + ); + } + + const contentType = response.headers.get('content-type') || ''; + + if ( + contentType.includes('text/event-stream') || + contentType.includes('text/plain') + ) { + return this.parseSSE(response, agentExternalId, callbacks); + } + + return await response.json(); + } + + /** + * Parse a Server-Sent Events streaming response. + */ + private async parseSSE( + response: Response, + agentExternalId: string, + callbacks?: StreamCallbacks, + ): Promise { + const reader = response.body?.getReader(); + if (!reader) { + throw new Error('Response body is not readable'); + } + + const decoder = new TextDecoder(); + let buffer = ''; + let finalResponse: RawAgentResponse | null = null; + + const processLine = (line: string) => { + if (!line.startsWith('data: ')) return; + + const dataStr = line.slice(6).trim(); + if (dataStr === '[DONE]') return '[DONE]' as const; + + try { + const data = JSON.parse(dataStr); + const sseResponse = data.response; + + if (!sseResponse) return; + + if (sseResponse.type === 'progress' && sseResponse.content) { + callbacks?.onProgress?.(sseResponse.content); + } else if ( + sseResponse.type === 'responseChunk' && + sseResponse.content + ) { + callbacks?.onChunk?.(sseResponse.content); + } else if (sseResponse.type === 'result') { + finalResponse = { + agentId: data.agentId || agentExternalId, + agentExternalId: data.agentExternalId || agentExternalId, + response: { + type: 'result', + cursor: sseResponse.cursor, + messages: sseResponse.messages || [], + }, + }; + } + } catch { + // Skip unparseable SSE lines + } + }; + + try { + while (true) { + const { done, value } = await reader.read(); + + if (done) break; + + buffer += decoder.decode(value, { stream: true }); + const lines = buffer.split('\n'); + buffer = lines.pop() || ''; + + if (lines.some((line) => processLine(line) === '[DONE]')) break; + } + } finally { + reader.releaseLock(); + } + + if (!finalResponse) { + throw new Error('No result response received from streaming API'); + } + + return finalResponse; + } +} diff --git a/.agents/skills/integrate-atlas-chat/code/index.ts b/.agents/skills/integrate-atlas-chat/code/index.ts new file mode 100644 index 0000000..bddb60f --- /dev/null +++ b/.agents/skills/integrate-atlas-chat/code/index.ts @@ -0,0 +1,31 @@ +/** + * Atlas Agent Client — public API. + * + * Core library for communicating with Cognite Atlas AI agents. + * Self-contained, zero imports from outside this directory (except external packages). + * + * React hook is a separate import path: + * import { useAtlasChat } from '@cognite/dune-utils/atlas-agent/react'; + */ + +// Core +export { AtlasSession } from './session'; +export { AtlasClient } from './client'; + +// TypeBox re-exports for convenience +export { Type } from '@sinclair/typebox'; +export type { Static, TSchema } from '@sinclair/typebox'; + +// Types +export type { + Agent, + AgentToolConfig, + AtlasTool, + AtlasToolResult, + AtlasResponse, + AtlasSessionConfig, + ToolCall, + StreamCallbacks, + ApiToolDefinition, + PythonRuntime, +} from './types'; diff --git a/.agents/skills/integrate-atlas-chat/code/pyodide-react.ts b/.agents/skills/integrate-atlas-chat/code/pyodide-react.ts new file mode 100644 index 0000000..0adbeb0 --- /dev/null +++ b/.agents/skills/integrate-atlas-chat/code/pyodide-react.ts @@ -0,0 +1,146 @@ +/** + * usePyodideRuntime — React hook for managing PyodideRuntime lifecycle. + * + * Separate entry point so the core atlas-agent bundle stays Pyodide-free. + */ + +import { useState, useRef, useEffect } from 'react'; +import type { CogniteClient } from '@cognite/sdk'; +import { getGlobalPyodideRuntime } from './pyodide-runtime'; +import type { PyodideRuntimeConfig } from './pyodide-runtime'; +import type { PythonRuntime } from './types'; + +// ============================================================================ +// Types +// ============================================================================ + +export interface PyodideProgress { + stage: string; + percent: number; +} + +export interface UsePyodideRuntimeOptions { + /** The `loadPyodide` function from the `pyodide` package. */ + loadPyodide: PyodideRuntimeConfig['loadPyodide']; + /** CogniteClient for SDK credential injection. `null` disables initialization. */ + client: CogniteClient | null; + /** Additional Python packages to install via micropip. */ + requirements?: string[]; + /** CDN URL for Pyodide files. */ + cdnUrl?: string; +} + +export interface UsePyodideRuntimeReturn { + /** The initialized runtime, or undefined if not yet ready. */ + runtime: PythonRuntime | undefined; + /** True while Pyodide is loading / initializing. */ + loading: boolean; + /** Error message if initialization failed. */ + error: string | null; + /** Current initialization progress. */ + progress: PyodideProgress; + /** Convenience: true when runtime is ready to use. */ + isReady: boolean; +} + +// ============================================================================ +// Hook +// ============================================================================ + +const DEFAULT_BASE_URL = 'https://api.cognitedata.com'; + +/** + * Manages PyodideRuntime initialization lifecycle. + * + * Loads Pyodide, installs packages, injects Cognite SDK credentials, + * and returns a ready-to-use `PythonRuntime` with loading/error state. + * + * ```tsx + * import { loadPyodide } from 'pyodide'; + * + * const { runtime, loading, progress, isReady } = usePyodideRuntime({ + * loadPyodide, + * client: sdk, + * requirements: ['pandas', 'numpy'], + * }); + * ``` + */ +export function usePyodideRuntime( + options: UsePyodideRuntimeOptions, +): UsePyodideRuntimeReturn { + const { client } = options; + + const [runtime, setRuntime] = useState(); + const [loading, setLoading] = useState(true); + const [error, setError] = useState(null); + const [progress, setProgress] = useState({ stage: '', percent: 0 }); + + // Refs for values that shouldn't trigger re-initialization + const loadPyodideRef = useRef(options.loadPyodide); + const requirementsRef = useRef(options.requirements); + const cdnUrlRef = useRef(options.cdnUrl); + loadPyodideRef.current = options.loadPyodide; + requirementsRef.current = options.requirements; + cdnUrlRef.current = options.cdnUrl; + + useEffect(() => { + if (!client) { + setLoading(false); + return; + } + + let mounted = true; + + (async () => { + try { + setLoading(true); + setError(null); + + const instance = getGlobalPyodideRuntime({ + loadPyodide: loadPyodideRef.current, + requirements: requirementsRef.current, + cdnUrl: cdnUrlRef.current, + onProgress: (stage, percent) => { + if (mounted) setProgress({ stage, percent }); + }, + }); + + if (!instance.isInitialized) { + if (mounted) setProgress({ stage: 'Initializing...', percent: 0 }); + + const headers = client.getDefaultRequestHeaders(); + const token = headers.Authorization?.split(' ')[1] ?? ''; + + await instance.initialize({ + project: client.project, + baseUrl: client.getBaseUrl?.() ?? DEFAULT_BASE_URL, + token, + }); + } + + if (mounted) { + setRuntime(instance); + setProgress({ stage: 'Ready', percent: 100 }); + } + } catch (err) { + if (mounted) { + setError(err instanceof Error ? err.message : String(err)); + } + } finally { + if (mounted) setLoading(false); + } + })(); + + return () => { + mounted = false; + }; + }, [client]); + + return { + runtime, + loading, + error, + progress, + isReady: !loading && !error && runtime !== undefined, + }; +} diff --git a/.agents/skills/integrate-atlas-chat/code/pyodide-runtime.ts b/.agents/skills/integrate-atlas-chat/code/pyodide-runtime.ts new file mode 100644 index 0000000..4520460 --- /dev/null +++ b/.agents/skills/integrate-atlas-chat/code/pyodide-runtime.ts @@ -0,0 +1,295 @@ +/** + * PyodideRuntime — browser-based Python execution via Pyodide. + * + * Wraps Pyodide loading, package installation, Cognite SDK setup, + * and localStorage caching into a clean PythonRuntime implementation. + * + * The consumer owns the 'pyodide' npm package — they pass `loadPyodide` + * as a config parameter so this module has no hard dependency on it. + */ + +import type { PythonRuntime } from './types'; + +// ============================================================================ +// Minimal Pyodide Interfaces (avoids 'pyodide' package dependency) +// ============================================================================ + +interface PyodideGlobals { + get(name: string): unknown; + set(name: string, value: unknown): void; +} + +/** Subset of PyodideInterface that this module uses. */ +export interface PyodideInstance { + loadPackage(packages: string[]): Promise; + runPython(code: string): unknown; + runPythonAsync(code: string): Promise; + globals: PyodideGlobals; + pyimport(name: string): unknown; +} + +interface Micropip { + install(packages: string | string[]): Promise; +} + +// ============================================================================ +// Constants +// ============================================================================ + +const DEFAULT_CDN_URL = 'https://cdn.jsdelivr.net/pyodide/v0.29.3/full/'; +const CACHE_KEY = 'dune_pyodide_initialized'; +const CACHE_VERSION = 'v1'; + +// ============================================================================ +// Config Types +// ============================================================================ + +export interface PyodideRuntimeConfig { + /** The `loadPyodide` function from the `pyodide` package. */ + loadPyodide: (options: { indexURL: string }) => Promise; + /** CDN URL for Pyodide files. Defaults to jsDelivr v0.29.3. */ + cdnUrl?: string; + /** Additional Python packages to install via micropip. */ + requirements?: string[]; + /** Progress callback for initialization stages. */ + onProgress?: (stage: string, percent: number) => void; +} + +export interface PyodideSDKConfig { + project: string; + baseUrl: string; + token: string; +} + +// ============================================================================ +// Python Utility Code (injected at init) +// ============================================================================ + +const PYTHON_UTILS = ` +import json + +def _serialize_cognite_object(obj, depth=0): + if depth > 10: return str(obj) + for attr in ('dump', 'as_dict'): + fn = getattr(obj, attr, None) + if fn: + try: return fn() + except: pass + if isinstance(obj, dict): + return {k: _serialize_cognite_object(v, depth+1) for k, v in obj.items()} + if isinstance(obj, (list, tuple)): + return [_serialize_cognite_object(i, depth+1) for i in obj] + if isinstance(obj, (str, int, float, bool, type(None))): + return obj + d = getattr(obj, '__dict__', None) + if d is not None: + try: return _serialize_cognite_object(d, depth+1) + except: pass + return str(obj) + +def as_json_string(value): + return json.dumps(_serialize_cognite_object(value)) +`; + +// ============================================================================ +// Cache Helpers +// ============================================================================ + +function isCacheValid(): boolean { + try { + return localStorage.getItem(CACHE_KEY) === CACHE_VERSION; + } catch { + return false; + } +} + +function markCacheValid(): void { + try { + localStorage.setItem(CACHE_KEY, CACHE_VERSION); + } catch { + /* localStorage unavailable */ + } +} + +/** Clear the Pyodide package cache — forces re-download on next init. */ +export function clearPyodideCache(): void { + try { + localStorage.removeItem(CACHE_KEY); + } catch { + /* localStorage unavailable */ + } +} + +// ============================================================================ +// PyProxy Detection (structural — avoids importing from 'pyodide') +// ============================================================================ + +function isPyProxy(value: unknown): value is { destroy(): void } { + return ( + value != null && + typeof value === 'object' && + 'destroy' in value && + typeof (value as Record).destroy === 'function' + ); +} + +function destroyIfPyProxy(value: unknown): void { + if (isPyProxy(value)) { + value.destroy(); + } +} + +// ============================================================================ +// PyodideRuntime Class +// ============================================================================ + +/** + * PythonRuntime backed by Pyodide. Handles loading, package installation, + * Cognite SDK credential injection, caching, and PyProxy conversion. + */ +export class PyodideRuntime implements PythonRuntime { + private pyodide?: PyodideInstance; + private micropip?: Micropip; + private _initialized = false; + private readonly config: PyodideRuntimeConfig; + + constructor(config: PyodideRuntimeConfig) { + this.config = config; + } + + get isInitialized(): boolean { + return this._initialized; + } + + /** + * Load Pyodide, install packages, and set up the Cognite SDK. + * Safe to call multiple times — subsequent calls are no-ops. + */ + async initialize(sdk: PyodideSDKConfig): Promise { + if (this._initialized) return; + + const report = this.config.onProgress ?? (() => {}); + const cdnUrl = this.config.cdnUrl ?? DEFAULT_CDN_URL; + + // 1. Load Pyodide + report('Loading Pyodide...', 10); + this.pyodide = await this.config.loadPyodide({ indexURL: cdnUrl }); + report('Pyodide loaded', 30); + + // 2. Core packages (micropip + HTTP patching) + report('Loading core packages...', 40); + await this.pyodide.loadPackage(['micropip', 'pyodide-http']); + await this.pyodide.runPythonAsync(` +try: + import pyodide_http + pyodide_http.patch_all() +except Exception: + pass +`); + this.micropip = this.pyodide.pyimport('micropip') as Micropip; + + // 3. Cognite SDK + const verb = isCacheValid() ? 'Loading' : 'Downloading'; + report(`${verb} cognite-sdk...`, 60); + await this.micropip.install('cognite-sdk'); + if (!isCacheValid()) markCacheValid(); + report('cognite-sdk ready', 80); + + // 4. Additional packages + const reqs = this.config.requirements ?? []; + if (reqs.length > 0) { + report('Installing packages...', 85); + await this.micropip.install(reqs); + } + + // 5. Utility functions + Cognite client + report('Setting up environment...', 90); + this.pyodide.runPython(PYTHON_UTILS); + + report('Initializing Cognite client...', 95); + this.pyodide.runPython(` +import os +os.environ["COGNITE_PROJECT"] = "${sdk.project}" +os.environ["COGNITE_BASE_URL"] = "${sdk.baseUrl}" +os.environ["COGNITE_TOKEN"] = "${sdk.token}" +os.environ["COGNITE_FUSION_NOTEBOOK"] = "1" +os.environ["MPLBACKEND"] = "AGG" +from cognite.client import CogniteClient +client = CogniteClient() +`); + + this._initialized = true; + report('Ready', 100); + } + + /** Execute Python code asynchronously. PyProxy results are converted to JSON-safe values. */ + async runCodeAsync(code: string): Promise { + const pyodide = this.requirePyodide(); + const raw = await pyodide.runPythonAsync(code); + return this.toJsonSafe(raw); + } + + /** Refresh the Cognite SDK token (e.g. after token rotation). */ + refreshToken(token: string): void { + this.requirePyodide().runPython( + `import os; os.environ["COGNITE_TOKEN"] = "${token}"`, + ); + } + + private requirePyodide(): PyodideInstance { + if (!this.pyodide) { + throw new Error( + 'PyodideRuntime not initialized — call initialize() first', + ); + } + return this.pyodide; + } + + /** Convert a Pyodide result to a JSON-safe JS value. */ + private toJsonSafe(value: unknown): unknown { + if (value == null) return undefined; + if (!isPyProxy(value)) return value; + + const pyodide = this.requirePyodide(); + const converter = pyodide.globals.get('as_json_string') as + | ((obj: unknown) => string) + | undefined; + + if (!converter) { + throw new Error( + 'as_json_string not available — was initialize() called?', + ); + } + + try { + return JSON.parse((converter as (obj: unknown) => string)(value)); + } finally { + destroyIfPyProxy(converter); + destroyIfPyProxy(value); + } + } +} + +// ============================================================================ +// Singleton +// ============================================================================ + +let globalInstance: PyodideRuntime | undefined; + +/** + * Get or create the global PyodideRuntime singleton. + * Config is only used on first call — subsequent calls return the existing instance. + */ +export function getGlobalPyodideRuntime( + config: PyodideRuntimeConfig, +): PyodideRuntime { + if (!globalInstance) { + globalInstance = new PyodideRuntime(config); + } + return globalInstance; +} + +/** Reset the global runtime (e.g. on logout). */ +export function resetGlobalPyodideRuntime(): void { + globalInstance = undefined; +} diff --git a/.agents/skills/integrate-atlas-chat/code/pyodide.ts b/.agents/skills/integrate-atlas-chat/code/pyodide.ts new file mode 100644 index 0000000..7b59c78 --- /dev/null +++ b/.agents/skills/integrate-atlas-chat/code/pyodide.ts @@ -0,0 +1,22 @@ +/** + * Pyodide entry point — browser-based Python execution for Atlas agents. + * + * Import path: @cognite/dune-utils/atlas-agent/pyodide + */ + +// Runtime +export { + PyodideRuntime, + getGlobalPyodideRuntime, + resetGlobalPyodideRuntime, + clearPyodideCache, +} from './pyodide-runtime'; +export type { PyodideRuntimeConfig, PyodideSDKConfig, PyodideInstance } from './pyodide-runtime'; + +// React hook +export { usePyodideRuntime } from './pyodide-react'; +export type { + PyodideProgress, + UsePyodideRuntimeOptions, + UsePyodideRuntimeReturn, +} from './pyodide-react'; diff --git a/.agents/skills/integrate-atlas-chat/code/python.ts b/.agents/skills/integrate-atlas-chat/code/python.ts new file mode 100644 index 0000000..1b8417d --- /dev/null +++ b/.agents/skills/integrate-atlas-chat/code/python.ts @@ -0,0 +1,45 @@ +/** + * Python execution primitives for Pyodide-based tool execution. + * + * buildWrapper — combines tool code + args into a single runnable Python snippet + * formatOutput — serialises the Pyodide result into a string for the agent + */ + +/** + * Unicode-safe base64 encoding. + * Encodes to UTF-8 bytes first so non-ASCII characters survive round-tripping. + */ +function toBase64(str: string): string { + const bytes = new TextEncoder().encode(str); + let binary = ''; + for (const b of bytes) { + binary += String.fromCharCode(b); + } + return btoa(binary); +} + +/** + * Build a Python wrapper that loads the tool code, parses base64-encoded args, + * and calls handle(**args). Supports both sync and async handle functions. + */ +export function buildWrapper(code: string, argsJson: string): string { + const encoded = toBase64(argsJson); + return ` +import json, base64, inspect +_args = json.loads(base64.b64decode("${encoded}").decode("utf-8")) +${code} +async def _exec(): + if "handle" not in globals(): + return {"_error": "No handle() function found in tool code"} + return await handle(**_args) if inspect.iscoroutinefunction(handle) else handle(**_args) +_r = await _exec() +json.dumps(_r) if _r is not None and not isinstance(_r, str) else _r +`.trimStart(); +} + +/** Stringify Python result into tool output text. */ +export function formatOutput(raw: unknown): string { + if (raw == null) return ''; + if (typeof raw === 'string') return raw; + return JSON.stringify(raw); +} diff --git a/.agents/skills/integrate-atlas-chat/code/react.ts b/.agents/skills/integrate-atlas-chat/code/react.ts new file mode 100644 index 0000000..6d75483 --- /dev/null +++ b/.agents/skills/integrate-atlas-chat/code/react.ts @@ -0,0 +1,278 @@ +/** + * useAtlasChat — plug-and-play React hook for Atlas agent conversations. + * + * Manages session lifecycle, message state, streaming, and abort support. + * Separate entry point from core for tree-shaking. + */ + +import { useState, useCallback, useRef, useEffect, useMemo } from 'react'; +import type { CogniteClient } from '@cognite/sdk'; +import { AtlasSession } from './session'; +import type { AtlasTool, AtlasResponse, PythonRuntime, ToolCall } from './types'; + +// ============================================================================ +// Types +// ============================================================================ + +export interface ChatMessage { + id: string; + role: 'user' | 'assistant'; + text: string; + timestamp: Date; + isStreaming?: boolean; + /** Tool calls (client-side and server-side) attached to this message */ + toolCalls?: ToolCall[]; + /** App-specific context data, populated via onResponse */ + context?: TContext; +} + +export interface UseAtlasChatOptions { + client: CogniteClient | null; + agentExternalId: string; + // eslint-disable-next-line @typescript-eslint/no-explicit-any + tools?: AtlasTool[]; + /** Opt-in Python runtime (e.g. Pyodide) — only needed for agents that use Python tools. */ + pythonRuntime?: PythonRuntime; + /** Messages to show on initial render (e.g. welcome message) */ + initialMessages?: ChatMessage[]; + /** Called when a full response is received. Return context to merge into the assistant message. */ + onResponse?: (response: AtlasResponse) => TContext | void; + /** Called before each send to inject app-level context (e.g. current todo state) into the request. */ + getAppContext?: () => string | undefined; +} + +export interface UseAtlasChatReturn { + /** All messages in the conversation */ + messages: ChatMessage[]; + /** Send a user message — automatically creates user + assistant messages, handles streaming */ + send: (text: string) => Promise; + /** True while the agent is responding */ + isStreaming: boolean; + /** Current progress text (e.g. "Agent thinking", "Executing: render_widget") */ + progress: string | null; + /** Error message if last send failed */ + error: string | null; + /** Clear all messages and reset the session */ + reset: () => void; + /** Cancel the current streaming response */ + abort: () => void; + /** Replace messages (e.g. loading conversation history) */ + setMessages: (messages: ChatMessage[]) => void; +} + +// ============================================================================ +// Helpers +// ============================================================================ + +let messageCounter = 0; + +function generateId(): string { + return `msg-${Date.now()}-${++messageCounter}`; +} + +// ============================================================================ +// Hook +// ============================================================================ + +export function useAtlasChat( + options: UseAtlasChatOptions, +): UseAtlasChatReturn { + const { client, agentExternalId, tools, pythonRuntime, initialMessages, onResponse, getAppContext } = options; + + const [messages, setMessages] = useState[]>(initialMessages ?? []); + const [isStreaming, setIsStreaming] = useState(false); + const [progress, setProgress] = useState(null); + const [error, setError] = useState(null); + + const sessionRef = useRef(null); + const abortRef = useRef(null); + const agentExternalIdRef = useRef(agentExternalId); + const toolsRef = useRef(tools); + const pythonRuntimeRef = useRef(pythonRuntime); + const getAppContextRef = useRef(getAppContext); + + // Keep refs updated (array/object identity may change between renders) + toolsRef.current = tools; + pythonRuntimeRef.current = pythonRuntime; + getAppContextRef.current = getAppContext; + + // Stable wrapper — always delegates to the latest getAppContext via ref. + // Passed to AtlasSession once at creation so the session is never stale. + const stableGetAppContext = useMemo( + () => () => getAppContextRef.current?.(), + [], + ); + + // Cleanup on unmount + useEffect(() => { + return () => { + abortRef.current?.abort(); + }; + }, []); + + const getSession = useCallback((): AtlasSession | null => { + if (!client) return null; + + if (!sessionRef.current || agentExternalIdRef.current !== agentExternalId) { + sessionRef.current = new AtlasSession({ + client, + agentExternalId, + tools: toolsRef.current, + pythonRuntime: pythonRuntimeRef.current, + getAppContext: stableGetAppContext, + }); + agentExternalIdRef.current = agentExternalId; + } + + return sessionRef.current; + }, [client, agentExternalId]); + + const send = useCallback( + async (text: string) => { + const session = getSession(); + if (!session || isStreaming) return; + + setError(null); + setIsStreaming(true); + setProgress('Agent thinking'); + + // Add user message + const userMessage: ChatMessage = { + id: generateId(), + role: 'user', + text, + timestamp: new Date(), + }; + + const assistantId = generateId(); + let accumulatedText = ''; + let assistantCreated = false; + + setMessages((prev) => [...prev, userMessage]); + + const abortController = new AbortController(); + abortRef.current = abortController; + + // ---- Helpers scoped to this send() call ---- + + /** Update a single message by id */ + const updateMsg = (id: string, updates: Partial>) => { + setMessages((prev) => + prev.map((m) => (m.id === id ? { ...m, ...updates } : m)), + ); + }; + + /** Finalize the assistant message — update if already created, otherwise add a new one */ + const finalizeAssistant = (fields: Partial>) => { + if (assistantCreated) { + updateMsg(assistantId, { isStreaming: false, ...fields }); + } else { + setMessages((prev) => [ + ...prev, + { + id: assistantId, + role: 'assistant' as const, + timestamp: new Date(), + text: '', + isStreaming: false, + ...fields, + }, + ]); + } + }; + + try { + const response = await session.send( + text, + { + onProgress: (progressText) => { + setProgress(progressText); + }, + onChunk: (chunk) => { + if (!assistantCreated) { + assistantCreated = true; + setMessages((prev) => [ + ...prev, + { + id: assistantId, + role: 'assistant' as const, + text: chunk, + timestamp: new Date(), + isStreaming: true, + }, + ]); + } + accumulatedText += chunk; + updateMsg(assistantId, { text: accumulatedText }); + }, + onToolStart: (toolName) => { + setProgress(`Executing: ${toolName}`); + }, + }, + abortController.signal, + ); + + // Finalize assistant message + finalizeAssistant({ + text: + response.text || + (assistantCreated + ? undefined + : "I apologize, but I couldn't generate a response. Please try again."), + toolCalls: + response.toolCalls.length > 0 + ? response.toolCalls + : undefined, + }); + + // Let the app attach context (e.g. applications) to the message + const ctx = onResponse?.(response); + if (ctx !== undefined) { + updateMsg(assistantId, { context: ctx }); + } + } catch (err) { + if ((err as Error).name === 'AbortError') { + // Cancelled by user — finalize any in-progress message + if (assistantCreated) { + updateMsg(assistantId, { isStreaming: false }); + } + } else { + const errorText = + err instanceof Error ? err.message : 'Unknown error'; + setError(errorText); + finalizeAssistant({ text: `Error: ${errorText}` }); + } + } finally { + setIsStreaming(false); + setProgress(null); + abortRef.current = null; + } + }, + [getSession, isStreaming, onResponse], + ); + + const reset = useCallback(() => { + abortRef.current?.abort(); + setMessages(initialMessages ?? []); + setIsStreaming(false); + setProgress(null); + setError(null); + sessionRef.current = null; + }, [initialMessages]); + + const abort = useCallback(() => { + abortRef.current?.abort(); + }, []); + + return { + messages, + send, + isStreaming, + progress, + error, + reset, + abort, + setMessages, + }; +} + diff --git a/.agents/skills/integrate-atlas-chat/code/session.test.ts b/.agents/skills/integrate-atlas-chat/code/session.test.ts new file mode 100644 index 0000000..139d97d --- /dev/null +++ b/.agents/skills/integrate-atlas-chat/code/session.test.ts @@ -0,0 +1,155 @@ +import { describe, it, expect, vi } from 'vitest'; +import { AtlasSession } from './session'; +import type { AtlasSessionConfig, RawAgentResponse } from './types'; + +/** + * Minimal mock that satisfies CogniteClient just enough for AtlasSession. + * AtlasClient.post is the only call path we exercise, so we stub it via the + * prototype after construction. + */ +function createMockConfig( + overrides?: Partial, +): AtlasSessionConfig { + return { + client: {} as AtlasSessionConfig['client'], + agentExternalId: 'test-agent', + ...overrides, + }; +} + +/** Build a raw response with a tool action that requests a client tool call. */ +function responseWithToolAction( + actionId: string, + toolName: string, + args: Record, +): RawAgentResponse { + return { + agentId: 'test-agent', + agentExternalId: 'test-agent', + response: { + type: 'result', + cursor: 'cursor-1', + messages: [ + { + role: 'assistant', + actions: [ + { + type: 'clientTool', + actionId, + clientTool: { name: toolName, arguments: args }, + }, + ], + }, + ], + }, + }; +} + +/** Build a terminal response (no actions). */ +function terminalResponse(text: string): RawAgentResponse { + return { + agentId: 'test-agent', + agentExternalId: 'test-agent', + response: { + type: 'result', + cursor: 'cursor-2', + messages: [{ role: 'assistant', content: { type: 'text', text } }], + }, + }; +} + +describe(AtlasSession.name, () => { + let postSpy: ReturnType; + + function createSession(config?: Partial): AtlasSession { + const session = new AtlasSession(createMockConfig(config)); + + // Stub the internal client.post so we never hit the network. + postSpy = vi.fn(); + // eslint-disable-next-line @typescript-eslint/no-explicit-any + (session as any).client = { post: postSpy }; + + return session; + } + + describe('appContext in continuation turns', () => { + it('includes contextInformation on the initial user message', async () => { + const session = createSession({ + getAppContext: () => 'todo state here', + }); + + postSpy.mockResolvedValueOnce(terminalResponse('Done')); + + await session.send('hello'); + + const payload = postSpy.mock.calls[0][0]; + expect(payload.contextInformation).toEqual({ + appContext: 'todo state here', + }); + }); + + it('includes contextInformation on continuation turns after tool execution', async () => { + let callCount = 0; + const session = createSession({ + getAppContext: () => { + callCount++; + return `context-v${callCount}`; + }, + tools: [ + { + name: 'TestTool', + description: 'test', + parameters: { type: 'object', properties: {} }, + execute: () => ({ output: 'ok' }), + }, + ], + }); + + // Turn 1: agent requests a tool call + postSpy.mockResolvedValueOnce( + responseWithToolAction('action-1', 'TestTool', {}), + ); + // Turn 2: terminal response + postSpy.mockResolvedValueOnce(terminalResponse('All done')); + + await session.send('do something'); + + // First call: initial payload + expect(postSpy).toHaveBeenCalledTimes(2); + const initialPayload = postSpy.mock.calls[0][0]; + expect(initialPayload.contextInformation).toEqual({ + appContext: 'context-v1', + }); + + // Second call: continuation payload after tool execution + const continuationPayload = postSpy.mock.calls[1][0]; + expect(continuationPayload.contextInformation).toEqual({ + appContext: 'context-v2', + }); + }); + + it('omits contextInformation when getAppContext returns undefined', async () => { + const session = createSession({ + getAppContext: () => undefined, + }); + + postSpy.mockResolvedValueOnce(terminalResponse('Done')); + + await session.send('hello'); + + const payload = postSpy.mock.calls[0][0]; + expect(payload.contextInformation).toBeUndefined(); + }); + + it('omits contextInformation when getAppContext is not provided', async () => { + const session = createSession(); + + postSpy.mockResolvedValueOnce(terminalResponse('Done')); + + await session.send('hello'); + + const payload = postSpy.mock.calls[0][0]; + expect(payload.contextInformation).toBeUndefined(); + }); + }); +}); diff --git a/.agents/skills/integrate-atlas-chat/code/session.ts b/.agents/skills/integrate-atlas-chat/code/session.ts new file mode 100644 index 0000000..12f77ea --- /dev/null +++ b/.agents/skills/integrate-atlas-chat/code/session.ts @@ -0,0 +1,325 @@ +/** + * AtlasSession — stateful conversation with validated tool execution. + * + * Manages the cursor, converts AtlasTool[] to API actions format, + * validates tool arguments with ajv, and runs the tool execution loop. + */ + +import { AtlasClient } from './client'; +import { buildWrapper, formatOutput } from './python'; +import { validateToolArguments } from './validation'; +import type { + AtlasTool, + AtlasToolResult, + AtlasResponse, + AtlasSessionConfig, + AgentToolConfig, + PythonRuntime, + StreamCallbacks, + ChatPayload, + ApiToolDefinition, + RawAction, + RawClientToolAction, + RawAgentResponse, + RequestMessage, + ClientToolActionMessage, + ToolCall, +} from './types'; + +const MAX_TURNS = 50; + +/** + * Parse raw arguments from the API (string or object) into a plain object. + */ +function parseArguments( + raw: string | Record, +): Record { + if (typeof raw === 'string') { + try { + return JSON.parse(raw); + } catch { + return {}; + } + } + return raw || {}; +} + +/** + * Extract all actions from all messages in a raw response. + */ +function extractActions(raw: RawAgentResponse): RawAction[] { + return raw.response.messages.flatMap((msg) => msg.actions ?? []); +} + +/** + * Extract server-side tool calls from reasoning blocks in a raw response. + * Shape: messages[n].reasoning[n].data[n].toolCall + */ +function extractServerToolCalls(raw: RawAgentResponse): ToolCall[] { + const calls: ToolCall[] = []; + for (const msg of raw.response.messages) { + for (const entry of msg.reasoning ?? []) { + const data = (entry as { data?: unknown[] }).data; + if (!Array.isArray(data)) continue; + for (const item of data) { + const tc = (item as { toolCall?: Record }).toolCall; + if (!tc) continue; + const result = tc.result as Record | undefined; + calls.push({ + name: String(tc.name ?? ''), + toolType: tc.toolType !== undefined ? String(tc.toolType) : undefined, + input: tc.input, + output: result?.output !== undefined ? String(result.output) : undefined, + details: result?.result, + }); + } + } + } + return calls; +} + +/** + * Execute a server-declared Python tool via the pythonRuntime. + */ +async function executePythonTool( + action: RawClientToolAction, + toolConfig: AgentToolConfig, + pythonRuntime: PythonRuntime | undefined, + callbacks?: StreamCallbacks, +): Promise<{ result: AtlasToolResult; followup: ClientToolActionMessage }> { + const toolName = action.clientTool.name; + const pythonCode = String(toolConfig.configuration?.pythonCode ?? '').trim(); + + const fail = (msg: string) => { + const result: AtlasToolResult = { output: msg }; + callbacks?.onToolEnd?.(toolName, result); + return { result, followup: createActionReply(action.actionId, result.output) }; + }; + + if (!pythonCode) { + return fail(`ERROR: pythonCode is empty in tool configuration for "${toolName}"`); + } + if (!pythonRuntime) { + return fail(`ERROR: pythonRuntime is required to execute Python tool "${toolName}" but was not provided`); + } + + try { + const argsJson = JSON.stringify(parseArguments(action.clientTool.arguments)); + const wrapper = buildWrapper(pythonCode, argsJson); + const raw = await pythonRuntime.runCodeAsync(wrapper); + const result: AtlasToolResult = { output: formatOutput(raw) }; + callbacks?.onToolEnd?.(toolName, result); + return { result, followup: createActionReply(action.actionId, result.output) }; + } catch (err) { + return fail(`ERROR: ${err instanceof Error ? err.message : String(err)}`); + } +} + +/** + * Execute a client tool action: validate args, run the tool, return result + followup. + * + * Dispatch order: + * 1. Pre-registered JS tool in the tools Map (client-declared tools) + * 2. Server-defined Python tool — fetch config from agent API, run via pythonRuntime + */ +async function executeClientTool( + action: RawClientToolAction, + tools: Map, + fetchToolConfig: (name: string) => Promise, + pythonRuntime: PythonRuntime | undefined, + callbacks?: StreamCallbacks, +): Promise<{ result: AtlasToolResult; followup: ClientToolActionMessage }> { + const toolName = action.clientTool.name; + + callbacks?.onToolStart?.(toolName); + + // 1. Pre-registered JS tool + const tool = tools.get(toolName); + if (tool) { + const args = parseArguments(action.clientTool.arguments); + try { + validateToolArguments(toolName, tool.parameters, args); + } catch (err) { + const errorOutput = err instanceof Error ? err.message : String(err); + const result: AtlasToolResult = { output: `ERROR: ${errorOutput}` }; + callbacks?.onToolEnd?.(toolName, result); + return { result, followup: createActionReply(action.actionId, result.output) }; + } + const result = await tool.execute(args); + callbacks?.onToolEnd?.(toolName, result); + return { result, followup: createActionReply(action.actionId, result.output) }; + } + + // 2. Server-declared tool — look up config from agent API + const toolConfig = await fetchToolConfig(toolName); + if (toolConfig?.type === 'runPythonCode') { + return executePythonTool(action, toolConfig, pythonRuntime, callbacks); + } + + const result: AtlasToolResult = { output: `Unknown client tool: ${toolName}` }; + callbacks?.onToolEnd?.(toolName, result); + return { result, followup: createActionReply(action.actionId, result.output) }; +} + +/** + * Stateful conversation session with validated tool execution. + */ +export class AtlasSession { + private cursor?: string; + private readonly client: AtlasClient; + private readonly agentExternalId: string; + private readonly tools: Map; + private readonly apiActionsOrUndefined: ApiToolDefinition[] | undefined; + private readonly pythonRuntime: PythonRuntime | undefined; + private readonly getAppContext: (() => string | undefined) | undefined; + /** Cached tool configs fetched from the agent API (populated lazily on first Python tool call). */ + private cachedAgentTools: AgentToolConfig[] | undefined; + + constructor(config: AtlasSessionConfig) { + this.client = new AtlasClient(config.client); + this.agentExternalId = config.agentExternalId; + this.tools = new Map((config.tools || []).map((t) => [t.name, t])); + this.pythonRuntime = config.pythonRuntime; + this.getAppContext = config.getAppContext; + + // Inline toApiToolDefinition (only used here) + const apiActions: ApiToolDefinition[] = (config.tools || []).map((tool) => ({ + type: 'clientTool' as const, + clientTool: { + name: tool.name, + description: tool.description, + parameters: tool.parameters, + }, + })); + this.apiActionsOrUndefined = apiActions.length > 0 ? apiActions : undefined; + } + + /** + * Send a user message. Handles the full tool execution loop internally. + */ + async send( + message: string, + callbacks?: StreamCallbacks, + signal?: AbortSignal, + ): Promise { + const allToolCalls: ToolCall[] = []; + + const appContext = this.getAppContext?.(); + let payload: ChatPayload = { + agentExternalId: this.agentExternalId, + messages: [{ role: 'user', content: { type: 'text', text: message } }], + actions: this.apiActionsOrUndefined, + stream: true, + ...(this.cursor && { cursor: this.cursor }), + ...(appContext && { contextInformation: { appContext } }), + }; + + for (let turn = 0; turn < MAX_TURNS; turn++) { + const raw = await this.client.post(payload, this.agentExternalId, callbacks, signal); + const response = raw.response; + + if (response.type !== 'result') { + throw new Error(`Unexpected response type: ${response.type}`); + } + + if (response.cursor) { + this.cursor = response.cursor; + } + + // Collect server-side tool calls from reasoning blocks in this turn + allToolCalls.push(...extractServerToolCalls(raw)); + + const actions = extractActions(raw); + + // No actions → conversation turn is done + if (actions.length === 0) { + const text = response.messages?.[0]?.content?.text || ''; + return { text, cursor: this.cursor, toolCalls: allToolCalls, raw }; + } + + // Execute actions and build follow-up messages + const followups: RequestMessage[] = []; + + for (const action of actions) { + if (action.type === 'clientTool') { + const { result, followup } = await executeClientTool( + action, + this.tools, + (name) => this.fetchToolConfig(name), + this.pythonRuntime, + callbacks, + ); + allToolCalls.push({ name: action.clientTool.name, output: result.output, details: result.details }); + followups.push(followup); + } else if (action.type === 'toolConfirmation') { + const toolName = action.toolConfirmation?.toolName; + if (toolName) callbacks?.onToolStart?.(toolName); + followups.push({ + role: 'action', + type: 'toolConfirmation', + actionId: action.actionId, + status: 'ALLOW', + }); + } + } + + if (followups.length === 0) { + const text = response.messages?.[0]?.content?.text || ''; + return { text, cursor: this.cursor, toolCalls: allToolCalls, raw }; + } + + // Prepare the next turn — re-evaluate appContext so it reflects state changes from tool execution + const updatedAppContext = this.getAppContext?.(); + payload = { + agentExternalId: this.agentExternalId, + messages: followups, + actions: this.apiActionsOrUndefined, + stream: true, + cursor: this.cursor, + ...(updatedAppContext && { contextInformation: { appContext: updatedAppContext } }), + }; + } + + throw new Error(`Max tool execution turns reached (${MAX_TURNS})`); + } + + /** + * Fetch a tool's config from the agent API (lazy, cached per session). + * Used as a fallback when a clientTool action arrives for an unregistered tool. + */ + private async fetchToolConfig(toolName: string): Promise { + if (!this.cachedAgentTools) { + const agent = await this.client.getAgentById(this.agentExternalId); + this.cachedAgentTools = agent?.tools ?? []; + } + return this.cachedAgentTools.find((t) => t.name === toolName) ?? null; + } + + /** Reset the session cursor (start a fresh conversation). */ + reset(): void { + this.cursor = undefined; + } + + /** Get the current cursor value. */ + getCursor(): string | undefined { + return this.cursor; + } + + /** Set the cursor (e.g. when restoring a conversation). */ + setCursor(cursor: string): void { + this.cursor = cursor; + } +} + +function createActionReply( + actionId: string, + text: string, +): ClientToolActionMessage { + return { + role: 'action', + type: 'clientTool', + actionId, + content: { type: 'text', text }, + data: [], + }; +} diff --git a/.agents/skills/integrate-atlas-chat/code/types.ts b/.agents/skills/integrate-atlas-chat/code/types.ts new file mode 100644 index 0000000..c9415eb --- /dev/null +++ b/.agents/skills/integrate-atlas-chat/code/types.ts @@ -0,0 +1,320 @@ +/** + * Core types for the Atlas Agent client library. + * + * This module is self-contained — no imports from outside the library + * except external packages (@sinclair/typebox, @cognite/sdk). + */ + +import type { TSchema, Static } from '@sinclair/typebox'; +import type { CogniteClient } from '@cognite/sdk'; + +// ============================================================================ +// Agent Types +// ============================================================================ + +/** Configuration for a tool stored in the agent's CDF config. */ +export interface AgentToolConfig { + name: string; + type: string; + configuration?: { + pythonCode?: string; + [key: string]: unknown; + }; + [key: string]: unknown; +} + +export interface Agent { + externalId: string; + name: string; + description?: string; + model?: string; + instructions?: string; + ownerId?: string; + tools?: AgentToolConfig[]; + createdTime?: number; + lastUpdatedTime?: number; +} + +// ============================================================================ +// Tool Types +// ============================================================================ + +/** Result from executing a tool */ +export interface AtlasToolResult { + /** Text sent back to the agent as tool output */ + output: string; + /** Structured data for the app/UI */ + details?: TDetails; +} + +/** + * A client-side tool the Atlas agent can invoke. + * TypeBox schema for type-safe params + runtime validation via ajv. + */ +export interface AtlasTool< + TParameters extends TSchema = TSchema, + TDetails = unknown, +> { + name: string; + description: string; + parameters: TParameters; + execute: ( + args: Static, + ) => AtlasToolResult | Promise>; +} + +/** Minimal interface for executing Python code (e.g. Pyodide). */ +export interface PythonRuntime { + runCodeAsync(code: string): Promise; +} + +// ============================================================================ +// Callback Types +// ============================================================================ + +export interface StreamCallbacks { + onProgress?: (text: string) => void; + onChunk?: (text: string) => void; + onToolStart?: (toolName: string) => void; + onToolEnd?: (toolName: string, result: AtlasToolResult) => void; +} + +// ============================================================================ +// Response Types (app-facing) +// ============================================================================ + +/** A single tool invocation (client-side or server-side), ready for the UI. */ +export interface ToolCall { + /** Friendly display name, e.g. "Find files" */ + name: string; + /** Server-side tool type, e.g. "queryKnowledgeGraph" */ + toolType?: string; + /** Raw input arguments */ + input?: unknown; + /** Text returned to the agent as tool output */ + output?: string; + /** Structured data for UI rendering */ + details?: unknown; +} + +export interface AtlasResponse { + text: string; + cursor?: string; + toolCalls: ToolCall[]; + raw: RawAgentResponse; +} + +// ============================================================================ +// Config Types +// ============================================================================ + +export interface AtlasSessionConfig { + client: CogniteClient; + agentExternalId: string; + // eslint-disable-next-line @typescript-eslint/no-explicit-any + tools?: AtlasTool[]; + /** Opt-in Python runtime (e.g. Pyodide) — required only for agents that use Python tools. */ + pythonRuntime?: PythonRuntime; + /** Called before each send to inject app-level context (e.g. current todo state) into the request. */ + getAppContext?: () => string | undefined; +} + +// ============================================================================ +// API Shared Primitives (maps to cog_ai…session.common) +// ============================================================================ + +/** Maps to AgentContentDTO */ +export interface AgentContent { + type: string; + text?: string; +} + +/** Maps to InstanceIdDTO */ +export interface InstanceId { + space: string; + externalId: string; +} + +/** Maps to ViewDTO */ +export interface View { + space: string; + externalId: string; + version: string; +} + +/** Maps to PropertyVal type alias */ +export type PropertyVal = + | string + | number + | boolean + | null + | Record + | (string | null)[] + | (number | null)[] + | (boolean | null)[] + | (Record | null)[]; + +// ============================================================================ +// Agent Data Types (maps to AgentDataDTO) +// ============================================================================ + +/** Maps to InstanceDataDTO — the only variant we narrow on in app code. */ +export interface InstanceData { + type: 'instance'; + view: View; + instanceId: InstanceId; + properties?: Record; +} + +/** + * Data items attached to response messages. + * Only `InstanceData` is typed — other variants pass through as plain objects. + */ +export type AgentData = InstanceData | (Record & { type: string }); + +// ============================================================================ +// Tool Definition Types (maps to CustomClientActionDTO) +// ============================================================================ + +/** Maps to clientToolParameters */ +export interface ClientToolParameters { + type: 'object'; + description?: string; + properties?: Record>; + required?: string[]; + propertyOrdering?: string[]; +} + +/** Maps to CustomClientActionDTO */ +export interface ApiToolDefinition { + type: 'clientTool'; + clientTool: { + name: string; + description?: string; + parameters: ClientToolParameters; + }; +} + +// ============================================================================ +// Request Message Types (maps to RequestMessageDTO) +// ============================================================================ + +/** Maps to AgentChatMessageUserRequestDTO */ +export interface UserRequestMessage { + role: 'user'; + content: AgentContent; +} + +/** Maps to ClientToolCallActionMessageDTO */ +export interface ClientToolActionMessage { + role: 'action'; + type: 'clientTool'; + actionId: string; + content: AgentContent; + data: Array>; +} + +/** Maps to UserConfirmationResponseDTO */ +export interface UserConfirmationMessage { + role: 'action'; + type: 'toolConfirmation'; + actionId: string; + status: 'ALLOW' | 'DENY'; +} + +/** Maps to UserSessionResponseDTO */ +export interface UserSessionMessage { + role: 'action'; + type: 'userSession'; + actionId: string; + nonce: string; +} + +/** Maps to RequestMessageDTO (discriminated union) */ +export type RequestMessage = + | UserRequestMessage + | ClientToolActionMessage + | UserConfirmationMessage + | UserSessionMessage; + +// ============================================================================ +// Session Context (maps to AgentSessionContextDTO) +// ============================================================================ + +export interface AgentSessionContext { + instanceSpaces?: string[]; + dataModels?: Array>; + timeZone?: string; + appContext?: string; +} + +// ============================================================================ +// Chat Payload (maps to AgentSessionRequest) +// ============================================================================ + +export interface ChatPayload { + agentExternalId?: string; + messages: RequestMessage[]; + actions?: ApiToolDefinition[]; + contextInformation?: AgentSessionContext; + cursor?: string; + stream: boolean; +} + +// ============================================================================ +// Raw Response Types +// ============================================================================ + +/** Response action: agent requests client to execute a tool */ +export interface RawClientToolAction { + type: 'clientTool'; + actionId: string; + clientTool: { + name: string; + arguments: string | Record; + }; +} + +/** Response action: agent requests user confirmation */ +export interface RawToolConfirmationAction { + type: 'toolConfirmation'; + actionId: string; + toolConfirmation?: { + toolName?: string; + toolType?: string; + toolArguments?: Record; + toolDescription?: string; + content?: AgentContent; + }; +} + +/** Response action: agent requests user session */ +export interface RawUserSessionAction { + type: 'userSession'; + actionId: string; +} + +/** Action from agent response (discriminated by `type`). Unknown types are skipped by the session loop. */ +export type RawAction = + | RawClientToolAction + | RawToolConfirmationAction + | RawUserSessionAction; + +/** A message in an agent response */ +export interface RawMessage { + content?: AgentContent; + role: string; + data?: AgentData[]; + reasoning?: Array>; + actions?: RawAction[]; +} + +export interface RawAgentResponse { + agentId: string; + agentExternalId: string; + response: { + type: string; + cursor?: string; + messages: RawMessage[]; + }; +} diff --git a/.agents/skills/integrate-atlas-chat/code/validation.ts b/.agents/skills/integrate-atlas-chat/code/validation.ts new file mode 100644 index 0000000..3e3fb50 --- /dev/null +++ b/.agents/skills/integrate-atlas-chat/code/validation.ts @@ -0,0 +1,49 @@ +/** + * Runtime validation for tool arguments using ajv. + * + * Ported from pi-mono packages/ai/src/utils/validation.ts pattern: + * - Singleton ajv instance with coercion + * - Graceful degradation if ajv fails to initialise (CSP) + */ + +import Ajv from 'ajv'; +import addFormats from 'ajv-formats'; +import type { TSchema } from '@sinclair/typebox'; + +let ajvInstance: Ajv | null = null; + +function getAjv(): Ajv | null { + if (ajvInstance) return ajvInstance; + + try { + ajvInstance = new Ajv({ allErrors: true, strict: false, coerceTypes: true }); + addFormats(ajvInstance); + return ajvInstance; + } catch { + // Graceful degradation — skip validation if ajv cannot initialise + return null; + } +} + +/** + * Validate and coerce tool arguments against a TypeBox / JSON Schema. + * Throws a formatted error on validation failure. + * Gracefully skips validation when ajv is unavailable (e.g. CSP). + */ +export function validateToolArguments( + toolName: string, + schema: TSchema, + args: unknown, +): void { + const ajv = getAjv(); + if (!ajv) return; + + const validate = ajv.compile(schema); + const valid = validate(args); + if (valid) return; + + const errors = validate.errors + ?.map((e) => `${e.instancePath || '/'} ${e.message}`) + .join('; '); + throw new Error(`Tool "${toolName}" received invalid arguments: ${errors}`); +} diff --git a/.agents/skills/integrate-file-viewer/SKILL.md b/.agents/skills/integrate-file-viewer/SKILL.md new file mode 100644 index 0000000..313409e --- /dev/null +++ b/.agents/skills/integrate-file-viewer/SKILL.md @@ -0,0 +1,287 @@ +--- +name: integrate-file-viewer +description: "MUST be used whenever integrating CogniteFileViewer into a Flows app to preview CDF files (PDFs, images, text). Do NOT manually wire up react-pdf or file resolution — this skill handles installation, Vite config, worker setup, and component usage. Triggers: file viewer, file preview, CogniteFileViewer, PDF viewer, view CDF files, document viewer, preview file." +allowed-tools: Read, Glob, Grep, Edit, Write, Bash +--- + +# Integrate CogniteFileViewer + +Add `CogniteFileViewer` to this Flows app to preview CDF files (PDF, image, text). + +## Dependencies + +The file-viewer library files (copied in Step 2) require this npm package: + +| Package | Version | +|---|---| +| `react-pdf` | `^9.1.1` | + +`pdfjs-dist` ships as a dependency of `react-pdf` at the correct version — do not install it separately. +`react` and `@cognite/sdk` are assumed to already be present in Flows apps. + +--- + +## Your job + +Complete these steps in order. Read each file before modifying it. + +--- + +## Step 1 — Understand the app + +Read these files before touching anything: + +- `package.json` — detect package manager (`packageManager` field or lock file) and existing deps +- `vite.config.ts` — understand current Vite setup +- The component where the viewer should be added + +--- + +## Step 2 — Copy the file-viewer source files + +The file-viewer library lives in the `code/` directory next to this skill file. Read and copy +**all** files from there into `src/cognite-file-viewer/` inside the app: + +- `code/types.ts` +- `code/mimeTypes.ts` +- `code/fileResolution.ts` +- `code/useViewport.ts` +- `code/useFileResolver.ts` +- `code/useDocumentAnnotations.ts` +- `code/DocumentAnnotationOverlay.tsx` +- `code/CogniteFileViewer.tsx` +- `code/index.ts` + +> The PDF.js worker is configured inside `CogniteFileViewer.tsx` — no separate consumer setup is needed. + +--- + +## Step 3 — Install dependencies + +Install `react-pdf` (see **Dependencies** above) using the app's package manager: + +- pnpm → `pnpm add react-pdf@^9.1.1` +- npm → `npm install react-pdf@^9.1.1` +- yarn → `yarn add react-pdf@^9.1.1` + +> **pnpm users:** pnpm's strict linking may prevent the browser from resolving `pdfjs-dist`. Either add `pdfjs-dist` as a direct dependency (`pnpm add pdfjs-dist`), or add `public-hoist-pattern[]=pdfjs-dist` to `.npmrc`. + +--- + +## Step 4 — Configure Vite + +Add `optimizeDeps.exclude: ['pdfjs-dist']` to `vite.config.ts` to prevent Vite from pre-bundling pdfjs-dist (which breaks the worker): + +```ts +export default defineConfig({ + // ... existing config ... + optimizeDeps: { + exclude: ['pdfjs-dist'], + }, +}); +``` + +--- + +## Step 5 — Use the component + +Import and render `CogniteFileViewer` from the locally copied files: + +```tsx +import { CogniteFileViewer } from './cognite-file-viewer'; +``` + +Get the `sdk` from the `useDune()` hook (already available in every Flows app): + +```tsx +import { useDune } from '@cognite/dune'; +const { sdk } = useDune(); +``` + +### Supported file types + +| Type | Formats | +|---|---| +| PDF | `.pdf` — page navigation, zoom, pan, diagram annotation overlay | +| Office documents | Word, PowerPoint, Excel, ODS, ODP, ODT, RTF, TSV — converted to PDF via the CDF Document Preview API, then rendered identically to PDF | +| Image | JPEG, PNG, WebP, SVG, TIFF — zoom, pan, rotation | +| Text | `.txt`, `.csv`, `.json` — rendered as preformatted text | +| Other | Falls back to `renderUnsupported` | + +### Minimal usage + +This is all you need — zoom, pan, and touch gestures are handled internally: + +```tsx + +``` + +> **The component needs a defined height.** If the parent has no explicit height, the viewer will collapse to zero. Always set a `height` via `style`, `className`, or the parent container. + +### File source + +Pass any of three source types: + +```tsx +// By instance ID (data-modelled file — enables annotations) + + +// By CDF internal ID + + +// By direct URL + +``` + +**Prefer `instanceId` when available** — it's the only source type that enables the diagram annotation overlay. When listing files via `sdk.files.list()`, check `file.instanceId` first: + +```tsx +source={ + file.instanceId + ? { type: 'instanceId', space: file.instanceId.space, externalId: file.instanceId.externalId } + : { type: 'internalId', id: file.id } +} +``` + +### Full props reference + +```tsx + setNumPages(numPages)} + + // Zoom & pan (works on PDF and images) + zoom={zoom} // 1 = 100%; Ctrl/Cmd+wheel, pinch-to-zoom, and middle-click drag built in + onZoomChange={setZoom} + minZoom={0.25} // default + maxZoom={5} // default + panOffset={pan} // controlled pan offset; resets on page change + onPanChange={setPan} + + // Fit mode + fitMode="width" // 'width' fits to container width; 'page' fits entire page in container + + // Rotation (PDFs and images) + rotation={rotation} // 0 | 90 | 180 | 270 + + // Diagram annotations (instanceId sources only) + showAnnotations={true} // default + onAnnotationClick={(annotation) => { /* annotation.linkedResource has space + externalId */ }} + onAnnotationHover={(annotation) => {}} + + // Custom annotation tooltip (replaces native tooltip) + renderAnnotationTooltip={(annotation, rect) => ( + <div style={{ + position: 'absolute', + left: rect.x + rect.width, + top: rect.y, + zIndex: 11, + }}> + {annotation.text} + </div> + )} + + // Custom overlay (SVG paths, highlights, drawings — works on PDF and images) + renderOverlay={({ width, height, originalWidth, originalHeight, pageNumber, rotation }) => ( + <svg + width={width} + height={height} + viewBox={`0 0 ${originalWidth} ${originalHeight}`} + preserveAspectRatio="none" + style={{ position: 'absolute', top: 0, left: 0, pointerEvents: 'all' }} + > + <path d="..." stroke="cyan" fill="none" /> + </svg> + )} + + // Custom renderers (all optional) + renderLoading={() => <MySpinner />} + renderError={(error) => <MyError message={error.message} />} + renderUnsupported={(mimeType) => <div>Cannot preview {mimeType}</div>} + + // Layout + className="..." + style={{ width: '100%', height: '100%' }} +/> +``` + +--- + +## Tips & tricks + +**Reset page, zoom and rotation when the source changes.** +The component does not reset these automatically when you switch files — do it yourself: + +```ts +const navigateToFile = (file: FileInfo) => { + setSelectedFile(file); + setPage(1); + setZoom(1); + setRotation(0); +}; +``` + +**Gate pagination UI on `numPages > 0`.** +`onDocumentLoad` only fires for PDFs. Don't render pagination controls until you know there are pages to paginate: + +```tsx +{numPages > 0 && ( + <> + <button disabled={page <= 1} onClick={() => setPage(p => p - 1)}>‹</button> + <span>{page} / {numPages}</span> + <button disabled={page >= numPages} onClick={() => setPage(p => p + 1)}>›</button> + </> +)} +``` + +**Annotation click → navigate to linked file.** +`annotation.linkedResource` contains the `space` and `externalId` of the linked CDF instance. Match it against `file.instanceId` to navigate: + +```ts +onAnnotationClick={(annotation) => { + if (!annotation.linkedResource) return; + const { space, externalId } = annotation.linkedResource; + const linked = files.find( + f => f.instanceId?.space === space && f.instanceId?.externalId === externalId + ); + if (linked) navigateToFile(linked); +}} +``` + +**Touch support is built in.** Two-finger pinch-to-zoom and two-finger drag-to-pan work on touch devices automatically. No configuration needed. + +**Pan is middle-click drag** (when zoomed in) on desktop. Left-click remains free for annotation clicks and text selection. + +**Ctrl/Cmd + wheel zooms toward the cursor** — also built in. Wire `zoom`/`onZoomChange` if you want programmatic zoom buttons or to persist zoom state; otherwise it works fully uncontrolled. + +**`renderOverlay` receives original page dimensions** (`originalWidth`, `originalHeight`) so you can set up an SVG `viewBox` in the original coordinate space. Paths drawn in PDF-point or image-pixel coordinates will map correctly to the rendered page at any zoom level. + +--- + +## Common pitfalls + +| Problem | Cause | Fix | +|---|---|---| +| `Failed to resolve module specifier 'pdf.worker.mjs'` | pdfjs-dist not hoisted (pnpm) | Add `public-hoist-pattern[]=pdfjs-dist` to `.npmrc`, or `pnpm add pdfjs-dist` directly | +| `API version does not match Worker version` | `pdfjs-dist` version mismatch between app and `react-pdf` | Do not install `pdfjs-dist` separately — let `react-pdf` provide it. If already installed, remove it | +| Annotations never show | `instanceId` is `undefined` — annotation overlay is disabled without it | Use `instanceId` source, or fall back and accept no annotations for classic files | +| Annotations show but are empty | File has no `CogniteDiagramAnnotation` edges in CDF | Expected — only P&ID/diagram files synced to the data model have annotations | +| Viewer collapses to zero height | Parent has no explicit height | Set `height` via `style`, `className`, or parent CSS | diff --git a/.agents/skills/integrate-file-viewer/code/CogniteFileViewer.tsx b/.agents/skills/integrate-file-viewer/code/CogniteFileViewer.tsx new file mode 100644 index 0000000..629b205 --- /dev/null +++ b/.agents/skills/integrate-file-viewer/code/CogniteFileViewer.tsx @@ -0,0 +1,479 @@ +import React, { useState, useEffect, useRef, useCallback } from 'react'; +import { Document, Page, pdfjs } from 'react-pdf'; +import type { PageCallback } from 'react-pdf/dist/shared/types.js'; +import 'react-pdf/dist/Page/TextLayer.css'; +import 'react-pdf/dist/Page/AnnotationLayer.css'; + +pdfjs.GlobalWorkerOptions.workerSrc = new URL( + 'pdfjs-dist/build/pdf.worker.min.mjs', + import.meta.url, +).toString(); +import type { CogniteFileViewerProps } from './types'; +import { getViewerType } from './mimeTypes'; +import { useFileResolver } from './useFileResolver'; +import { useDocumentAnnotations } from './useDocumentAnnotations'; +import { DocumentAnnotationOverlay } from './DocumentAnnotationOverlay'; +import { useViewport, computeBaseWidth } from './useViewport'; + +// ============================================================================ +// Sub-renderers +// ============================================================================ + +function DefaultLoading() { + return <div style={{ padding: 16, color: '#666' }}>Loading file...</div>; +} + +function DefaultError({ error }: { error: Error }) { + return ( + <div style={{ padding: 16, color: '#c00' }}> + Failed to load file: {error.message} + </div> + ); +} + +function DefaultUnsupported({ mimeType }: { mimeType: string | undefined }) { + return ( + <div style={{ padding: 16, color: '#666' }}> + Unsupported file type{mimeType ? `: ${mimeType}` : ''} + </div> + ); +} + +// ---------- Shared blob fetch hook ---------- + +function useBlobUrl(url: string) { + const [blobUrl, setBlobUrl] = useState<string | null>(null); + const [error, setError] = useState<Error | null>(null); + const objectUrlRef = useRef<string | null>(null); + + useEffect(() => { + let cancelled = false; + + // Reset state for new URL + setBlobUrl(null); + setError(null); + + // Revoke previous blob URL + if (objectUrlRef.current) { + URL.revokeObjectURL(objectUrlRef.current); + objectUrlRef.current = null; + } + + fetch(url) + .then((res) => { + if (!res.ok) throw new Error(`HTTP ${res.status}`); + return res.blob(); + }) + .then((blob) => { + if (cancelled) return; + const newUrl = URL.createObjectURL(blob); + objectUrlRef.current = newUrl; + setBlobUrl(newUrl); + }) + .catch((err) => { + if (!cancelled) setError(err instanceof Error ? err : new Error(String(err))); + }); + + return () => { + cancelled = true; + if (objectUrlRef.current) { + URL.revokeObjectURL(objectUrlRef.current); + objectUrlRef.current = null; + } + }; + }, [url]); + + return { blobUrl, error }; +} + +// ---------- Image ---------- + +interface ImageRendererProps + extends Omit<CogniteFileViewerProps, 'source' | 'client' | 'className' | 'style'> { + url: string; +} + +function ImageRenderer(props: ImageRendererProps) { + const { url, rotation = 0, fitMode, width: explicitWidth, renderLoading, renderError, renderOverlay } = props; + const { currentZoom, effectivePan, containerDims, viewportRef, cursor, handleMouseDown } = + useViewport(props); + + const { blobUrl, error } = useBlobUrl(url); + const [naturalSize, setNaturalSize] = useState<{ width: number; height: number } | null>(null); + + // Reset natural size when URL changes + const prevUrlRef = useRef(url); + if (prevUrlRef.current !== url) { + prevUrlRef.current = url; + setNaturalSize(null); + } + + const handleLoad = useCallback((e: React.SyntheticEvent<HTMLImageElement>) => { + setNaturalSize({ width: e.currentTarget.naturalWidth, height: e.currentTarget.naturalHeight }); + }, []); + + if (error) return renderError ? renderError(error) : <DefaultError error={error} />; + if (!blobUrl) return renderLoading ? renderLoading() : <DefaultLoading />; + + const baseWidth = computeBaseWidth(fitMode, explicitWidth, containerDims, naturalSize); + const imgWidth = baseWidth ?? naturalSize?.width; + + // Until we know image dimensions, render hidden to measure + if (!imgWidth || !naturalSize) { + return ( + <div ref={viewportRef} style={{ overflow: 'hidden' }}> + {renderLoading ? renderLoading() : <DefaultLoading />} + <img + src={blobUrl} + alt="" + style={{ position: 'absolute', visibility: 'hidden', pointerEvents: 'none' }} + onLoad={handleLoad} + /> + </div> + ); + } + + const imgHeight = imgWidth * (naturalSize.height / naturalSize.width); + const isSwapped = rotation === 90 || rotation === 270; + const visualW = (isSwapped ? imgHeight : imgWidth) * currentZoom; + const visualH = (isSwapped ? imgWidth : imgHeight) * currentZoom; + + return ( + <div ref={viewportRef} style={{ overflow: currentZoom > 1 ? 'hidden' : 'auto', cursor }} onMouseDown={handleMouseDown}> + <div + style={{ + display: 'inline-block', + transform: + effectivePan.x !== 0 || effectivePan.y !== 0 + ? `translate(${effectivePan.x}px, ${effectivePan.y}px)` + : undefined, + }} + > + <div style={{ width: visualW, height: visualH, position: 'relative' }}> + <img + src={blobUrl} + alt="" + style={{ + position: 'absolute', + width: imgWidth * currentZoom, + top: '50%', + left: '50%', + transform: `translate(-50%, -50%) rotate(${rotation}deg)`, + }} + onLoad={handleLoad} + /> + {renderOverlay && naturalSize && ( + renderOverlay({ + width: visualW, + height: visualH, + originalWidth: naturalSize.width, + originalHeight: naturalSize.height, + pageNumber: 1, + rotation, + }) + )} + </div> + </div> + </div> + ); +} + +// ---------- Text ---------- + +function TextRenderer({ url }: { url: string }) { + const [content, setContent] = useState<string | null>(null); + const [error, setError] = useState<Error | null>(null); + + useEffect(() => { + let cancelled = false; + fetch(url) + .then((res) => { + if (!res.ok) throw new Error(`HTTP ${res.status}`); + return res.text(); + }) + .then((text) => { + if (!cancelled) setContent(text); + }) + .catch((err) => { + if (!cancelled) setError(err instanceof Error ? err : new Error(String(err))); + }); + return () => { + cancelled = true; + }; + }, [url]); + + if (error) return <DefaultError error={error} />; + if (content === null) return <DefaultLoading />; + + return ( + <pre + style={{ + margin: 0, + padding: 16, + overflow: 'auto', + whiteSpace: 'pre-wrap', + wordBreak: 'break-word', + fontSize: 13, + lineHeight: 1.5, + fontFamily: 'monospace', + }} + > + {content} + </pre> + ); +} + +// ============================================================================ +// PDF Renderer (with annotation overlay) +// ============================================================================ + +const PDF_LOAD_ERROR = new Error('Failed to load PDF'); + +interface PdfRendererProps + extends Omit<CogniteFileViewerProps, 'source' | 'className' | 'style' | 'renderUnsupported'> { + url: string; + instanceId?: { space: string; externalId: string }; +} + +function PdfRenderer(props: PdfRendererProps) { + const { + url, + instanceId, + client, + showAnnotations = true, + onAnnotationClick, + onAnnotationHover, + renderAnnotationTooltip, + page: controlledPage, + onPageChange, + onDocumentLoad, + width, + rotation = 0, + fitMode, + onLoadProgress, + renderLoading, + renderError, + renderOverlay, + } = props; + + // -- Viewport (zoom, pan, wheel, drag) -- + const { + currentZoom, + effectivePan, + containerDims, + viewportRef, + cursor, + handleMouseDown, + handlePanChange, + } = useViewport(props); + + // -- Page state (controlled + uncontrolled) -- + const [internalPage, setInternalPage] = useState(1); + const currentPage = controlledPage ?? internalPage; + + const handlePageChange = useCallback( + (newPage: number) => { + setInternalPage(newPage); + onPageChange?.(newPage); + }, + [onPageChange], + ); + + // Reset pan on page change + const handlePanChangeRef = useRef(handlePanChange); + handlePanChangeRef.current = handlePanChange; + useEffect(() => { + handlePanChangeRef.current({ x: 0, y: 0 }); + }, [currentPage]); + + // -- Page dimensions (for annotation overlay) -- + const [pageDims, setPageDims] = useState({ width: 0, height: 0 }); + const pageObserverRef = useRef<ResizeObserver | null>(null); + + const pageWrapperRef = useCallback((node: HTMLDivElement | null) => { + if (pageObserverRef.current) { + pageObserverRef.current.disconnect(); + pageObserverRef.current = null; + } + if (node) { + const measure = () => { + const w = node.clientWidth; + const h = node.clientHeight; + setPageDims((prev) => (prev.width === w && prev.height === h ? prev : { width: w, height: h })); + }; + const observer = new ResizeObserver(measure); + observer.observe(node); + measure(); + pageObserverRef.current = observer; + } + }, []); + + useEffect(() => { + return () => { + pageObserverRef.current?.disconnect(); + pageObserverRef.current = null; + }; + }, []); + + // -- Page natural dimensions (for fitMode='page') -- + const [pageNaturalSize, setPageNaturalSize] = useState<{ width: number; height: number } | null>(null); + + const handlePageLoadSuccess = useCallback((page: PageCallback) => { + const { originalWidth: w, originalHeight: h } = page; + if (w && h) setPageNaturalSize({ width: w, height: h }); + }, []); + + // -- Compute base width from fitMode -- + const baseWidth = computeBaseWidth(fitMode, width, containerDims, pageNaturalSize); + + // -- Annotations -- + const annotationsEnabled = showAnnotations && instanceId !== undefined; + + const { annotations } = useDocumentAnnotations( + client, + instanceId, + currentPage, + { enabled: annotationsEnabled }, + ); + + // -- PDF Document callbacks -- + const currentPageRef = useRef(currentPage); + currentPageRef.current = currentPage; + + const handleLoadSuccess = useCallback( + ({ numPages }: { numPages: number }) => { + onDocumentLoad?.({ numPages }); + if (currentPageRef.current > numPages) handlePageChange(1); + }, + [onDocumentLoad, handlePageChange], + ); + + return ( + <div + ref={viewportRef} + style={{ overflow: currentZoom > 1 ? 'hidden' : 'auto', cursor, height: '100%' }} + onMouseDown={handleMouseDown} + > + <Document + file={url} + onLoadSuccess={handleLoadSuccess} + onLoadProgress={onLoadProgress} + loading={renderLoading ? renderLoading() : <DefaultLoading />} + error={ + renderError ? ( + renderError(PDF_LOAD_ERROR) + ) : ( + <DefaultError error={PDF_LOAD_ERROR} /> + ) + } + > + <div + ref={pageWrapperRef} + style={{ + position: 'relative', + display: 'inline-block', + transform: effectivePan.x !== 0 || effectivePan.y !== 0 + ? `translate(${effectivePan.x}px, ${effectivePan.y}px)` + : undefined, + }} + > + <Page + pageNumber={currentPage} + width={baseWidth} + scale={currentZoom} + rotate={rotation} + onLoadSuccess={handlePageLoadSuccess} + /> + {annotationsEnabled && pageDims.width > 0 && annotations.length > 0 && ( + <DocumentAnnotationOverlay + annotations={annotations} + containerWidth={pageDims.width} + containerHeight={pageDims.height} + rotation={rotation} + onAnnotationClick={onAnnotationClick} + onAnnotationHover={onAnnotationHover} + renderAnnotationTooltip={renderAnnotationTooltip} + /> + )} + {renderOverlay && pageDims.width > 0 && pageDims.height > 0 && pageNaturalSize && ( + renderOverlay({ + width: pageDims.width, + height: pageDims.height, + originalWidth: pageNaturalSize.width, + originalHeight: pageNaturalSize.height, + pageNumber: currentPage, + rotation, + }) + )} + </div> + </Document> + </div> + ); +} + +// ============================================================================ +// Main Component +// ============================================================================ + +export const CogniteFileViewer: React.FC<CogniteFileViewerProps> = (props) => { + const { + source, + client, + renderLoading, + renderError, + renderUnsupported, + className, + style, + } = props; + + const { + url, + mimeType, + instanceId, + isLoading, + error, + } = useFileResolver(source, client); + + const viewerType = getViewerType(mimeType); + const rotation = props.rotation ?? 0; + + // -- Loading -- + if (isLoading) { + return ( + <div className={className} style={style}> + {renderLoading ? renderLoading() : <DefaultLoading />} + </div> + ); + } + + // -- Error -- + if (error || !url) { + return ( + <div className={className} style={style}> + {renderError + ? renderError(error ?? new Error('No URL resolved')) + : <DefaultError error={error ?? new Error('No URL resolved')} />} + </div> + ); + } + + // -- Render by type -- + const renderContent = () => { + switch (viewerType) { + case 'pdf': + return <PdfRenderer {...props} url={url} instanceId={instanceId} rotation={rotation} />; + case 'image': + return <ImageRenderer {...props} url={url} rotation={rotation} />; + case 'text': + return <TextRenderer url={url} />; + default: + return renderUnsupported ? renderUnsupported(mimeType) : <DefaultUnsupported mimeType={mimeType} />; + } + }; + + return ( + <div className={className} style={style}> + {renderContent()} + </div> + ); +}; diff --git a/.agents/skills/integrate-file-viewer/code/DocumentAnnotationOverlay.tsx b/.agents/skills/integrate-file-viewer/code/DocumentAnnotationOverlay.tsx new file mode 100644 index 0000000..1cf0123 --- /dev/null +++ b/.agents/skills/integrate-file-viewer/code/DocumentAnnotationOverlay.tsx @@ -0,0 +1,229 @@ +import React, { useState } from 'react'; +import type { DocumentAnnotation, AnnotationResourceType, BoundingRect } from './types'; + +// ============================================================================ +// Annotation colours (matches cogs.js-v10 design tokens) +// ============================================================================ + +const ANNOTATION_COLORS: Record< + AnnotationResourceType, + { stroke: string; hoverFill: string } +> = { + asset: { + stroke: 'rgb(212, 106, 226)', + hoverFill: 'rgba(212, 106, 226, 0.15)', + }, + file: { + stroke: 'rgb(255, 135, 70)', + hoverFill: 'rgba(255, 135, 70, 0.15)', + }, + timeSeries: { + stroke: 'rgb(164, 178, 252)', + hoverFill: 'rgba(164, 178, 252, 0.15)', + }, + sequence: { + stroke: 'rgb(255, 220, 127)', + hoverFill: 'rgba(255, 220, 127, 0.15)', + }, + event: { + stroke: 'rgb(253, 81, 144)', + hoverFill: 'rgba(253, 81, 144, 0.15)', + }, + diagram: { + stroke: 'rgb(76, 175, 80)', + hoverFill: 'rgba(76, 175, 80, 0.15)', + }, + unknown: { + stroke: 'rgb(89, 89, 89)', + hoverFill: 'rgba(89, 89, 89, 0.15)', + }, +}; + +// ============================================================================ +// Types +// ============================================================================ + +export interface DocumentAnnotationOverlayProps { + /** Annotations to render (coordinates are normalised 0-1) */ + annotations: DocumentAnnotation[]; + /** Rendered page width in CSS pixels */ + containerWidth: number; + /** Rendered page height in CSS pixels */ + containerHeight: number; + /** Document rotation in degrees (0, 90, 180, 270) */ + rotation?: number; + /** Called when a user clicks an annotation */ + onAnnotationClick?: (annotation: DocumentAnnotation) => void; + /** Called when a user hovers over / leaves an annotation */ + onAnnotationHover?: (annotation: DocumentAnnotation | null) => void; + /** Render a custom tooltip for hovered annotations. Receives the annotation and its pixel-space bounding rect. */ + renderAnnotationTooltip?: ( + annotation: DocumentAnnotation, + rect: BoundingRect, + ) => React.ReactNode; +} + +// ============================================================================ +// Helpers +// ============================================================================ + +function getStyle( + resourceType: AnnotationResourceType, + isHovered: boolean, +) { + const colors = ANNOTATION_COLORS[resourceType] ?? ANNOTATION_COLORS.unknown; + return { + stroke: colors.stroke, + fill: isHovered ? colors.hoverFill : 'none', + strokeWidth: isHovered ? 2 : 1.5, + }; +} + +function transformAnnotation( + annotation: DocumentAnnotation, + w: number, + h: number, + rotation: number, +) { + const { x, y, width, height } = annotation; + switch (rotation) { + case 90: + return { + x: (1 - y - height) * w, + y: x * h, + width: height * w, + height: width * h, + }; + case 180: + return { + x: (1 - x - width) * w, + y: (1 - y - height) * h, + width: width * w, + height: height * h, + }; + case 270: + return { + x: y * w, + y: (1 - x - width) * h, + width: height * w, + height: width * h, + }; + default: + return { + x: x * w, + y: y * h, + width: width * w, + height: height * h, + }; + } +} + +// ============================================================================ +// Component +// ============================================================================ + +export const DocumentAnnotationOverlay: React.FC< + DocumentAnnotationOverlayProps +> = ({ + annotations, + containerWidth, + containerHeight, + rotation = 0, + onAnnotationClick, + onAnnotationHover, + renderAnnotationTooltip, +}) => { + const [hoveredId, setHoveredId] = useState<string | null>(null); + + if (annotations.length === 0 || containerWidth === 0 || containerHeight === 0) { + return null; + } + + const hoveredAnnotation = hoveredId + ? annotations.find((a) => a.id === hoveredId) + : null; + const hoveredRect = hoveredAnnotation + ? transformAnnotation(hoveredAnnotation, containerWidth, containerHeight, rotation) + : null; + + return ( + <> + <svg + style={{ + position: 'absolute', + top: 0, + left: 0, + width: '100%', + height: '100%', + pointerEvents: 'none', + overflow: 'visible', + zIndex: 10, + }} + viewBox={`0 0 ${containerWidth} ${containerHeight}`} + > + {annotations.map((annotation) => { + const isHovered = hoveredId === annotation.id; + const style = getStyle(annotation.resourceType, isHovered); + const rect = transformAnnotation( + annotation, + containerWidth, + containerHeight, + rotation, + ); + + return ( + <rect + key={annotation.id} + x={rect.x} + y={rect.y} + width={rect.width} + height={rect.height} + fill={style.fill} + stroke={style.stroke} + strokeWidth={style.strokeWidth} + rx={1} + ry={1} + style={{ pointerEvents: 'auto', cursor: 'pointer' }} + onMouseEnter={() => { + setHoveredId(annotation.id); + onAnnotationHover?.(annotation); + }} + onMouseLeave={() => { + setHoveredId(null); + onAnnotationHover?.(null); + }} + onClick={(e) => { + e.stopPropagation(); + onAnnotationClick?.(annotation); + }} + > + {!renderAnnotationTooltip && annotation.text && ( + <title>{annotation.text} + )} + + ); + })} + + {renderAnnotationTooltip && hoveredAnnotation && hoveredRect && ( + renderAnnotationTooltip(hoveredAnnotation, hoveredRect) + )} + + ); +}; + +// ============================================================================ +// Utilities +// ============================================================================ + +export function getAnnotationColor( + resourceType: AnnotationResourceType, +): { stroke: string; hoverFill: string } { + return ANNOTATION_COLORS[resourceType] ?? ANNOTATION_COLORS.unknown; +} + +export function getAllAnnotationColors(): Record< + AnnotationResourceType, + { stroke: string; hoverFill: string } +> { + return ANNOTATION_COLORS; +} diff --git a/.agents/skills/integrate-file-viewer/code/fileResolution.ts b/.agents/skills/integrate-file-viewer/code/fileResolution.ts new file mode 100644 index 0000000..c6a4d49 --- /dev/null +++ b/.agents/skills/integrate-file-viewer/code/fileResolution.ts @@ -0,0 +1,132 @@ +import type { CogniteClient, FileInfo } from '@cognite/sdk'; +import { + getComputedMimeType, + isNativelySupportedMimeType, + doesDocumentPreviewApiSupportFile, + DocumentMimeType, +} from './mimeTypes'; + +// ============================================================================ +// Cache +// ============================================================================ + +/** CDF URLs expire after 60 min with extendedExpiration — refresh at 59 min. */ +const URL_CACHE_EXPIRE_MS = 59 * 60 * 1000; +const MAX_CACHE_SIZE = 200; + +interface CacheEntry { + url: string; + mimeType: string; + expiresAt: number; +} + +const urlCache = new Map(); + +/** Evict expired entries; if still over limit, drop oldest inserted. */ +function evictStaleEntries(): void { + const now = Date.now(); + for (const [key, entry] of urlCache) { + if (entry.expiresAt <= now) urlCache.delete(key); + } + if (urlCache.size > MAX_CACHE_SIZE) { + for (const key of Array.from(urlCache.keys()).slice(0, urlCache.size - MAX_CACHE_SIZE)) { + urlCache.delete(key); + } + } +} + +export function clearFileCache(fileId: number, project?: string): void { + const prefix = project ? `${project}:` : ''; + urlCache.delete(`${prefix}${fileId}`); +} + +export function clearAllFileCache(): void { + urlCache.clear(); +} + +// ============================================================================ +// Download URL helpers +// ============================================================================ + +/** + * Get download URL with extended expiration (59 min instead of default ~30 min). + * The JS SDK doesn't expose `extendedExpiration`, so we call the API directly. + */ +async function getDownloadUrlExtended( + client: CogniteClient, + fileId: number, +): Promise { + const result = await client.post<{ items: Array<{ downloadUrl: string }> }>( + `/api/v1/projects/${client.project}/files/downloadlink`, + { + data: { items: [{ id: fileId }] }, + params: { extendedExpiration: true }, + }, + ); + const downloadUrl = result.data.items[0]?.downloadUrl; + if (!downloadUrl) throw new Error(`No download URL for file ${fileId}`); + return downloadUrl; +} + +/** + * Get a temporary PDF link via the Document Preview API. + * Converts Office documents to PDF. + */ +async function getPdfTemporaryLink( + client: CogniteClient, + fileId: number, +): Promise { + const response = await client.documents.preview.pdfTemporaryLink(fileId); + return response.temporaryLink; +} + +// ============================================================================ +// Main resolution function +// ============================================================================ + +export interface ResolvedFileConfig { + url: string; + mimeType: string; +} + +/** + * Resolve a CDF file to a download URL and effective MIME type. + * + * Strategy: + * 1. Natively supported (images, PDF, text) → direct download with extended expiry + * 2. Office documents → PDF conversion via Document Preview API + * 3. Otherwise → throws + * + * Results are cached for 59 minutes. + */ +export async function resolveFileDownloadConfig( + client: CogniteClient, + file: FileInfo, +): Promise { + const cacheKey = `${client.project}:${file.id}`; + const now = Date.now(); + const cached = urlCache.get(cacheKey); + if (cached && cached.expiresAt > now) { + return { url: cached.url, mimeType: cached.mimeType }; + } + + const computedMimeType = getComputedMimeType(file); + + let resolved: ResolvedFileConfig; + + if (computedMimeType && isNativelySupportedMimeType(computedMimeType)) { + const url = await getDownloadUrlExtended(client, file.id); + resolved = { url, mimeType: computedMimeType }; + } else if (doesDocumentPreviewApiSupportFile(file)) { + const url = await getPdfTemporaryLink(client, file.id); + resolved = { url, mimeType: DocumentMimeType.PDF }; + } else { + throw new Error( + `Unsupported file type (id: ${file.id}, name: ${file.name}, mimeType: ${file.mimeType})`, + ); + } + + urlCache.set(cacheKey, { ...resolved, expiresAt: now + URL_CACHE_EXPIRE_MS }); + evictStaleEntries(); + return resolved; +} diff --git a/.agents/skills/integrate-file-viewer/code/index.ts b/.agents/skills/integrate-file-viewer/code/index.ts new file mode 100644 index 0000000..f17ed50 --- /dev/null +++ b/.agents/skills/integrate-file-viewer/code/index.ts @@ -0,0 +1,40 @@ +// Component +export { CogniteFileViewer } from './CogniteFileViewer'; + +// Annotation overlay (for custom compositions) +export { + DocumentAnnotationOverlay, + getAnnotationColor, + getAllAnnotationColors, +} from './DocumentAnnotationOverlay'; +export type { DocumentAnnotationOverlayProps } from './DocumentAnnotationOverlay'; + +// Hooks (for advanced / custom usage) +export { useFileResolver } from './useFileResolver'; +export { useDocumentAnnotations, clearAnnotationCache } from './useDocumentAnnotations'; + +// File resolution utilities +export { resolveFileDownloadConfig, clearFileCache, clearAllFileCache } from './fileResolution'; + +// MIME type utilities +export { + getViewerType, + getComputedMimeType, + inferMimeTypeFromUrl, + isNativelySupportedMimeType, + doesDocumentPreviewApiSupportFile, +} from './mimeTypes'; + +// Types +export type { + FileSource, + FileViewerType, + DocumentAnnotation, + AnnotationResourceType, + BoundingRect, + OverlayRenderInfo, + ResolvedFile, + UseFileResolverResult, + UseDocumentAnnotationsResult, + CogniteFileViewerProps, +} from './types'; diff --git a/.agents/skills/integrate-file-viewer/code/mimeTypes.ts b/.agents/skills/integrate-file-viewer/code/mimeTypes.ts new file mode 100644 index 0000000..ca039fd --- /dev/null +++ b/.agents/skills/integrate-file-viewer/code/mimeTypes.ts @@ -0,0 +1,171 @@ +import type { FileViewerType } from './types'; + +// ============================================================================ +// MIME Type Constants +// ============================================================================ + +export const DocumentMimeType = { + PDF: 'application/pdf', +} as const; + +export const ImageMimeType = { + JPEG: 'image/jpeg', + PNG: 'image/png', + SVG: 'image/svg+xml', + TIFF: 'image/tiff', + WEBP: 'image/webp', +} as const; + +export const TextMimeType = { + TXT: 'text/plain', + CSV: 'text/csv', + JSON: 'application/json', +} as const; + +const NativelySupportedMimeTypes = { + ...ImageMimeType, + ...DocumentMimeType, + ...TextMimeType, +} as const; + +type NativelySupportedMimeType = + (typeof NativelySupportedMimeTypes)[keyof typeof NativelySupportedMimeTypes]; + +// Pre-computed Sets for O(1) lookups (these objects are `as const`, never mutated) +const nativelySupportedSet = new Set(Object.values(NativelySupportedMimeTypes)); +const documentMimeSet = new Set(Object.values(DocumentMimeType)); +const imageMimeSet = new Set(Object.values(ImageMimeType)); +const textMimeSet = new Set(Object.values(TextMimeType)); +// ============================================================================ +// Document Preview API Support (Office → PDF conversion) +// Source: https://github.com/cognitedata/document-preview +// ============================================================================ + +const documentPreviewMimeTypes = [ + // Word + 'application/msword', + 'application/vnd.openxmlformats-officedocument.wordprocessingml.document', + 'application/vnd.openxmlformats-officedocument.wordprocessingml.template', + 'application/vnd.ms-word.document.macroEnabled.12', + 'application/vnd.ms-word.template.macroEnabled.12', + 'application/rtf', + 'application/vnd.oasis.opendocument.text', + 'application/vnd.oasis.opendocument.text-template', + // PowerPoint + 'application/vnd.ms-powerpoint', + 'application/vnd.openxmlformats-officedocument.presentationml.presentation', + 'application/vnd.openxmlformats-officedocument.presentationml.template', + 'application/vnd.openxmlformats-officedocument.presentationml.slideshow', + 'application/vnd.ms-powerpoint.presentation.macroEnabled.12', + 'application/vnd.ms-powerpoint.template.macroEnabled.12', + 'application/vnd.ms-powerpoint.slideshow.macroEnabled.12', + 'application/vnd.oasis.opendocument.presentation', + 'application/vnd.oasis.opendocument.presentation-template', + // Excel + 'application/vnd.ms-excel', + 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet', + 'application/vnd.openxmlformats-officedocument.spreadsheetml.template', + 'application/vnd.ms-excel.sheet.macroEnabled.12', + 'application/vnd.ms-excel.template.macroEnabled.12', + 'application/vnd.ms-excel.sheet.binary.macroEnabled.12', + 'application/vnd.apple.numbers', + 'text/tab-separated-values', + 'application/vnd.oasis.opendocument.spreadsheet', +]; + +const documentPreviewExtensions = new Set([ + 'doc', 'dot', 'docx', 'dotx', 'docm', 'dotm', 'rtf', 'odt', 'ott', + 'ppt', 'pot', 'pps', 'pptx', 'potx', 'ppsx', 'pptm', 'potm', 'ppsm', 'odp', 'otp', + 'xls', 'xlt', 'xlsx', 'xltx', 'xlsm', 'xltm', 'xlsb', 'numbers', 'tsv', 'ods', +]); + +const documentPreviewMimeSet = new Set(documentPreviewMimeTypes); + +// ============================================================================ +// Helpers +// ============================================================================ + +function getFileExtension(value: string): string { + const clean = value.split('#')[0].split('?')[0]; + const filename = clean.split('/').pop() ?? ''; + const lastDot = filename.lastIndexOf('.'); + if (lastDot <= 0 || lastDot === filename.length - 1) return ''; + return filename.slice(lastDot + 1).toLowerCase(); +} + +function canonicaliseMimeType(mimeType: string): string { + switch (mimeType) { + case 'image/jpg': + return ImageMimeType.JPEG; + case 'image/tif': + return ImageMimeType.TIFF; + case 'image/svg': + return ImageMimeType.SVG; + case 'application/txt': + return TextMimeType.TXT; + default: + return mimeType; + } +} + +// ============================================================================ +// Public API +// ============================================================================ + +export function isNativelySupportedMimeType( + mimeType: string | null | undefined, +): mimeType is NativelySupportedMimeType { + if (!mimeType) return false; + return nativelySupportedSet.has(mimeType); +} + +export function doesDocumentPreviewApiSupportFile(file: { + mimeType?: string | null; + name?: string | null; +}): boolean { + if (file.mimeType && documentPreviewMimeSet.has(file.mimeType)) return true; + if (file.name && documentPreviewExtensions.has(getFileExtension(file.name))) return true; + return false; +} + +const extensionToMimeType: Record = { + pdf: DocumentMimeType.PDF, + jpg: ImageMimeType.JPEG, + jpeg: ImageMimeType.JPEG, + png: ImageMimeType.PNG, + svg: ImageMimeType.SVG, + tif: ImageMimeType.TIFF, + tiff: ImageMimeType.TIFF, + webp: ImageMimeType.WEBP, + txt: TextMimeType.TXT, + csv: TextMimeType.CSV, + json: TextMimeType.JSON, +}; + +export function inferMimeTypeFromUrl(urlOrName: string): string | undefined { + return extensionToMimeType[getFileExtension(urlOrName)]; +} + +export function getComputedMimeType(file: { + mimeType?: string | null; + name?: string | null; +}): string | undefined { + if (file.mimeType) return canonicaliseMimeType(file.mimeType); + if (file.name) return inferMimeTypeFromUrl(file.name); + return undefined; +} + +export function getViewerType(mimeType: string | undefined): FileViewerType { + if (!mimeType) return 'unsupported'; + + const canonical = canonicaliseMimeType(mimeType); + + if (documentMimeSet.has(canonical)) return 'pdf'; + if (imageMimeSet.has(canonical)) return 'image'; + if (textMimeSet.has(canonical)) return 'text'; + + // Office documents get converted to PDF + if (documentPreviewMimeSet.has(canonical)) return 'pdf'; + + return 'unsupported'; +} diff --git a/.agents/skills/integrate-file-viewer/code/types.ts b/.agents/skills/integrate-file-viewer/code/types.ts new file mode 100644 index 0000000..417f007 --- /dev/null +++ b/.agents/skills/integrate-file-viewer/code/types.ts @@ -0,0 +1,189 @@ +import type React from 'react'; +import type { CogniteClient, FileInfo } from '@cognite/sdk'; + +// ============================================================================ +// File Source (discriminated union) +// ============================================================================ + +export type FileSource = + | { type: 'instanceId'; space: string; externalId: string } + | { type: 'url'; url: string; mimeType?: string } + | { type: 'internalId'; id: number }; + +// ============================================================================ +// Viewer Types +// ============================================================================ + +export type FileViewerType = 'pdf' | 'image' | 'text' | 'unsupported'; + +// ============================================================================ +// Annotations +// ============================================================================ + +export type AnnotationResourceType = + | 'asset' + | 'file' + | 'timeSeries' + | 'sequence' + | 'event' + | 'diagram' + | 'unknown'; + +export interface DocumentAnnotation { + id: string; + /** Normalized bounding box (0-1 range relative to page) */ + x: number; + y: number; + width: number; + height: number; + /** 1-indexed page number */ + page: number; + resourceType: AnnotationResourceType; + linkedResource?: { space: string; externalId: string }; + /** Text content (e.g. tag name) */ + text?: string; + annotationType: string; +} + +// ============================================================================ +// Resolved File +// ============================================================================ + +export interface ResolvedFile { + url: string; + mimeType: string; + fileInfo?: FileInfo; + instanceId?: { space: string; externalId: string }; +} + +// ============================================================================ +// Hook Results +// ============================================================================ + +export interface UseFileResolverResult extends Partial { + isLoading: boolean; + error: Error | null; +} + +export interface UseDocumentAnnotationsResult { + annotations: DocumentAnnotation[]; + isLoading: boolean; + error: Error | null; +} + +// ============================================================================ +// Geometry +// ============================================================================ + +export interface BoundingRect { + x: number; + y: number; + width: number; + height: number; +} + +// ============================================================================ +// Overlay +// ============================================================================ + +export interface OverlayRenderInfo { + /** Rendered page width in CSS pixels (after zoom/scale) */ + width: number; + /** Rendered page height in CSS pixels (after zoom/scale) */ + height: number; + /** Original page width before zoom (PDF points or image natural pixels) */ + originalWidth: number; + /** Original page height before zoom (PDF points or image natural pixels) */ + originalHeight: number; + /** Current page number (1-indexed) */ + pageNumber: number; + /** Current rotation in degrees */ + rotation: 0 | 90 | 180 | 270; +} + +// ============================================================================ +// Component Props +// ============================================================================ + +export interface CogniteFileViewerProps { + /** File source — instance ID, direct URL, or CDF internal ID */ + source: FileSource; + /** CogniteClient instance (required for instanceId and internalId sources) */ + client?: CogniteClient; + + // -- Annotations -- + /** Show diagram annotations overlay on PDFs (default: true) */ + showAnnotations?: boolean; + /** Called when a user clicks an annotation */ + onAnnotationClick?: (annotation: DocumentAnnotation) => void; + /** Called when a user hovers over / leaves an annotation */ + onAnnotationHover?: (annotation: DocumentAnnotation | null) => void; + /** Render a custom tooltip when hovering an annotation. Receives the annotation and its pixel-space bounding rect. */ + renderAnnotationTooltip?: ( + annotation: DocumentAnnotation, + rect: BoundingRect, + ) => React.ReactNode; + + // -- PDF controls -- + /** Current page (1-indexed). Uncontrolled if omitted. */ + page?: number; + /** Called when the displayed page changes */ + onPageChange?: (page: number) => void; + /** Called once the PDF document is loaded */ + onDocumentLoad?: (info: { numPages: number }) => void; + /** Desired page width in pixels */ + width?: number; + /** Page rotation in degrees */ + rotation?: 0 | 90 | 180 | 270; + + // -- Zoom & Pan -- + /** Current zoom level (1 = 100%). Supports controlled + uncontrolled. */ + zoom?: number; + /** Called when zoom changes (Ctrl/Cmd+wheel or pinch) */ + onZoomChange?: (zoom: number) => void; + /** Minimum zoom level (default: 0.25) */ + minZoom?: number; + /** Maximum zoom level (default: 5) */ + maxZoom?: number; + /** Pan offset in pixels. Supports controlled + uncontrolled. Resets on page change. */ + panOffset?: { x: number; y: number }; + /** Called when pan changes (drag when zoomed in) */ + onPanChange?: (offset: { x: number; y: number }) => void; + + // -- Fit & Progress -- + /** Auto-fit mode: 'width' fits page to container width, 'page' fits entire page in container */ + fitMode?: 'width' | 'page'; + /** Called during PDF download with progress info */ + onLoadProgress?: (progress: { loaded: number; total: number }) => void; + + // -- Custom overlay -- + /** + * Render custom content (e.g. SVG paths, highlights, drawings) on top of the page. + * The overlay is absolutely positioned over the rendered page. + * + * Provides both rendered dimensions and original (unscaled) page dimensions, + * so consumers can set up an SVG `viewBox` in the original coordinate space: + * ```tsx + * renderOverlay={({ width, height, originalWidth, originalHeight }) => ( + * + * + * + * )} + * ``` + */ + renderOverlay?: (info: OverlayRenderInfo) => React.ReactNode; + + // -- Customisation -- + /** Override the default loading indicator */ + renderLoading?: () => React.ReactNode; + /** Override the default error view */ + renderError?: (error: Error) => React.ReactNode; + /** Override the default "unsupported file" view */ + renderUnsupported?: (mimeType: string | undefined) => React.ReactNode; + + className?: string; + style?: React.CSSProperties; +} diff --git a/.agents/skills/integrate-file-viewer/code/useDocumentAnnotations.ts b/.agents/skills/integrate-file-viewer/code/useDocumentAnnotations.ts new file mode 100644 index 0000000..e188f9e --- /dev/null +++ b/.agents/skills/integrate-file-viewer/code/useDocumentAnnotations.ts @@ -0,0 +1,268 @@ +import { useState, useEffect, useRef, useMemo } from 'react'; +import type { CogniteClient, EdgeDefinition } from '@cognite/sdk'; +import type { + DocumentAnnotation, + AnnotationResourceType, + UseDocumentAnnotationsResult, +} from './types'; + +// ============================================================================ +// CDM constants +// ============================================================================ + +const CDM_SPACE = 'cdf_cdm'; +const CDM_VERSION = 'v1'; +const DIAGRAM_ANNOTATION_VIEW = 'CogniteDiagramAnnotation'; +const QUERY_LIMIT = 10_000; + +// ============================================================================ +// Cache — stores ALL annotations for a file, filtered by page at read time +// ============================================================================ + +const STALE_TIME = 5 * 60 * 1000; // 5 minutes +const MAX_CACHE_SIZE = 50; + +interface CacheEntry { + data: DocumentAnnotation[]; + timestamp: number; +} + +const annotationCache = new Map(); + +/** Cache key scoped by project + file instance. */ +function fileCacheKey(project: string, space: string, externalId: string): string { + return JSON.stringify([project, space, externalId]); +} + +function evictStaleAnnotations(): void { + const now = Date.now(); + for (const [key, entry] of annotationCache) { + if (now - entry.timestamp > STALE_TIME) annotationCache.delete(key); + } + if (annotationCache.size > MAX_CACHE_SIZE) { + for (const key of Array.from(annotationCache.keys()).slice(0, annotationCache.size - MAX_CACHE_SIZE)) { + annotationCache.delete(key); + } + } +} + +export function clearAnnotationCache(): void { + annotationCache.clear(); +} + +// ============================================================================ +// Helpers +// ============================================================================ + +interface CdmAnnotationProps { + status?: string; + startNodeText?: string; + startNodeYMax?: number; + startNodeYMin?: number; + startNodeXMax?: number; + startNodeXMin?: number; + startNodePageNumber?: number; +} + +function getResourceType(annotationType: string): AnnotationResourceType { + const lower = annotationType.toLowerCase(); + if (lower.includes('asset')) return 'asset'; + if (lower.includes('file')) return 'file'; + if (lower.includes('timeseries') || lower.includes('time_series')) + return 'timeSeries'; + if (lower.includes('sequence')) return 'sequence'; + if (lower.includes('event')) return 'event'; + if (lower.includes('diagram')) return 'diagram'; + return 'unknown'; +} + +// ============================================================================ +// Fetcher — fetches ALL annotations for a file (not per-page) +// ============================================================================ + +async function fetchAllAnnotations( + client: CogniteClient, + space: string, + externalId: string, +): Promise { + const containerId = `${space}:${externalId}`; + const propPath = `${DIAGRAM_ANNOTATION_VIEW}/${CDM_VERSION}`; + + const allEdges: EdgeDefinition[] = []; + let cursor: string | undefined; + + do { + const response = await client.instances.query({ + with: { + files: { + nodes: { + filter: { + and: [ + { + equals: { + property: ['node', 'externalId'], + value: externalId, + }, + }, + { + equals: { + property: ['node', 'space'], + value: space, + }, + }, + ], + }, + }, + }, + annotations: { + edges: { + from: 'files', + direction: 'outwards', + }, + limit: QUERY_LIMIT, + }, + }, + select: { + annotations: { + sources: [ + { + source: { + externalId: DIAGRAM_ANNOTATION_VIEW, + space: CDM_SPACE, + type: 'view' as const, + version: CDM_VERSION, + }, + properties: [ + 'status', + 'startNodeText', + 'startNodeYMax', + 'startNodeYMin', + 'startNodeXMax', + 'startNodeXMin', + 'startNodePageNumber', + ], + }, + ], + limit: QUERY_LIMIT, + }, + }, + cursors: cursor ? { annotations: cursor } : undefined, + }); + + const edges = (response.items?.annotations ?? []).filter( + (a) => a.instanceType === 'edge', + ); + allEdges.push(...edges); + + cursor = + edges.length < QUERY_LIMIT + ? undefined + : response.nextCursor?.annotations; + } while (cursor); + + return allEdges.flatMap((edge) => { + const props: CdmAnnotationProps | undefined = + edge.properties?.[CDM_SPACE]?.[propPath]; + if (!props) return []; + if (props.status === 'Rejected') return []; + + const xMin = Number(props.startNodeXMin ?? 0); + const xMax = Number(props.startNodeXMax ?? 0); + const yMin = Number(props.startNodeYMin ?? 0); + const yMax = Number(props.startNodeYMax ?? 0); + + const annotationType = + edge.type?.externalId ?? 'diagrams.AssetLink'; + + const annotation: DocumentAnnotation = { + id: `${containerId}-${edge.space}-${edge.externalId}`, + x: Math.min(xMin, xMax), + y: Math.min(yMin, yMax), + width: Math.abs(xMax - xMin), + height: Math.abs(yMax - yMin), + page: Number(props.startNodePageNumber ?? 1), + resourceType: getResourceType(annotationType), + linkedResource: edge.endNode + ? { space: edge.endNode.space, externalId: edge.endNode.externalId } + : undefined, + text: props.startNodeText ?? undefined, + annotationType, + }; + return [annotation]; + }); +} + +// ============================================================================ +// Hook +// ============================================================================ + +interface AnnotationState { + allAnnotations: DocumentAnnotation[]; + isLoading: boolean; + error: Error | null; +} + +const INITIAL_STATE: AnnotationState = { + allAnnotations: [], + isLoading: false, + error: null, +}; + +export function useDocumentAnnotations( + client: CogniteClient | undefined, + instanceId: { space: string; externalId: string } | undefined, + currentPage: number = 1, + options?: { enabled?: boolean }, +): UseDocumentAnnotationsResult { + const enabled = options?.enabled ?? true; + const [state, setState] = useState(INITIAL_STATE); + const cancelRef = useRef(0); + + const space = instanceId?.space; + const extId = instanceId?.externalId; + const project = client?.project; + + // Fetch all annotations for the file (not per-page) + useEffect(() => { + if (!enabled || !client || !space || !extId || !project) { + setState(INITIAL_STATE); + return; + } + + const id = ++cancelRef.current; + const cancelled = () => id !== cancelRef.current; + + const key = fileCacheKey(project, space, extId); + const cached = annotationCache.get(key); + if (cached && Date.now() - cached.timestamp < STALE_TIME) { + setState({ allAnnotations: cached.data, isLoading: false, error: null }); + return; + } + + setState((prev) => ({ ...prev, isLoading: true, error: null })); + + fetchAllAnnotations(client, space, extId) + .then((data) => { + if (cancelled()) return; + annotationCache.set(key, { data, timestamp: Date.now() }); + evictStaleAnnotations(); + setState({ allAnnotations: data, isLoading: false, error: null }); + }) + .catch((err) => { + if (cancelled()) return; + setState({ + allAnnotations: [], + isLoading: false, + error: err instanceof Error ? err : new Error(String(err)), + }); + }); + }, [client, project, space, extId, enabled]); + + // Filter by current page (cheap client-side filter on cached data) + const annotations = useMemo( + () => state.allAnnotations.filter((a) => a.page === currentPage), + [state.allAnnotations, currentPage], + ); + + return { annotations, isLoading: state.isLoading, error: state.error }; +} diff --git a/.agents/skills/integrate-file-viewer/code/useFileResolver.ts b/.agents/skills/integrate-file-viewer/code/useFileResolver.ts new file mode 100644 index 0000000..7a26c9f --- /dev/null +++ b/.agents/skills/integrate-file-viewer/code/useFileResolver.ts @@ -0,0 +1,122 @@ +import { useState, useEffect, useRef } from 'react'; +import type { CogniteClient } from '@cognite/sdk'; +import type { FileSource, UseFileResolverResult } from './types'; +import { inferMimeTypeFromUrl } from './mimeTypes'; +import { resolveFileDownloadConfig } from './fileResolution'; + +// ============================================================================ +// Helpers +// ============================================================================ + +function getSourceKey(source: FileSource): string { + switch (source.type) { + case 'instanceId': + return `inst:${source.space}/${source.externalId}`; + case 'internalId': + return `id:${source.id}`; + case 'url': + return `url:${source.url}\0${source.mimeType ?? ''}`; + } +} + +const INITIAL: UseFileResolverResult = { + isLoading: true, + error: null, +}; + +// ============================================================================ +// Hook +// ============================================================================ + +/** + * Resolves a {@link FileSource} to a download URL and MIME type. + * + * - `url` sources are returned directly (no client needed). + * - `internalId` and `instanceId` sources use the CogniteClient to fetch + * metadata and resolve a download URL (with caching). + */ +export function useFileResolver( + source: FileSource, + client?: CogniteClient, +): UseFileResolverResult { + const [result, setResult] = useState(INITIAL); + const sourceKey = getSourceKey(source); + const cancelRef = useRef(0); + + useEffect(() => { + const id = ++cancelRef.current; + const cancelled = () => id !== cancelRef.current; + + async function resolve() { + setResult(INITIAL); + + try { + // ----- URL source: no client needed ----- + if (source.type === 'url') { + const mimeType = source.mimeType ?? inferMimeTypeFromUrl(source.url); + setResult({ + url: source.url, + mimeType: mimeType ?? '', + isLoading: false, + error: null, + }); + return; + } + + // ----- CDF sources: client is required ----- + if (!client) { + throw new Error( + 'CogniteClient is required for instanceId and internalId sources', + ); + } + + // Build the lookup identifier the SDK expects + const idParam = + source.type === 'internalId' + ? { id: source.id } + : { + instanceId: { + space: source.space, + externalId: source.externalId, + }, + }; + + const [fileInfo] = await client.files.retrieve([idParam]); + if (cancelled()) return; + + const resolved = await resolveFileDownloadConfig(client, fileInfo); + if (cancelled()) return; + + // Derive instanceId — prefer the one returned by the API, + // fall back to what the caller passed for instanceId sources. + const instanceId = fileInfo.instanceId + ? { + space: fileInfo.instanceId.space, + externalId: fileInfo.instanceId.externalId, + } + : source.type === 'instanceId' + ? { space: source.space, externalId: source.externalId } + : undefined; + + setResult({ + url: resolved.url, + mimeType: resolved.mimeType, + fileInfo, + instanceId, + isLoading: false, + error: null, + }); + } catch (err) { + if (cancelled()) return; + setResult({ + isLoading: false, + error: err instanceof Error ? err : new Error(String(err)), + }); + } + } + + resolve(); + }, [sourceKey, client]); + + return result; +} diff --git a/.agents/skills/integrate-file-viewer/code/useViewport.ts b/.agents/skills/integrate-file-viewer/code/useViewport.ts new file mode 100644 index 0000000..8463cf8 --- /dev/null +++ b/.agents/skills/integrate-file-viewer/code/useViewport.ts @@ -0,0 +1,280 @@ +import { useState, useRef, useCallback, useEffect } from 'react'; +import type React from 'react'; + +const ZERO_PAN = { x: 0, y: 0 }; + +export interface ViewportOptions { + zoom?: number; + onZoomChange?: (zoom: number) => void; + minZoom?: number; + maxZoom?: number; + panOffset?: { x: number; y: number }; + onPanChange?: (offset: { x: number; y: number }) => void; +} + +/** Get distance between two touch points. */ +function getTouchDistance(t1: Touch, t2: Touch): number { + const dx = t1.clientX - t2.clientX; + const dy = t1.clientY - t2.clientY; + return Math.hypot(dx, dy); +} + +/** Get midpoint between two touch points. */ +function getTouchCenter(t1: Touch, t2: Touch): { x: number; y: number } { + return { + x: (t1.clientX + t2.clientX) / 2, + y: (t1.clientY + t2.clientY) / 2, + }; +} + +export function useViewport(options: ViewportOptions) { + const { + zoom: controlledZoom, + onZoomChange, + minZoom = 0.25, + maxZoom = 5, + panOffset: controlledPan, + onPanChange, + } = options; + + // -- Zoom state (controlled + uncontrolled) -- + const [internalZoom, setInternalZoom] = useState(1); + const currentZoom = controlledZoom ?? internalZoom; + + const clampZoom = useCallback( + (z: number) => Math.min(maxZoom, Math.max(minZoom, z)), + [minZoom, maxZoom], + ); + + const handleZoomChange = useCallback( + (newZoom: number) => { + const clamped = clampZoom(newZoom); + setInternalZoom(clamped); + onZoomChange?.(clamped); + }, + [onZoomChange, clampZoom], + ); + + // -- Pan state (controlled + uncontrolled) -- + const [internalPan, setInternalPan] = useState(ZERO_PAN); + const currentPan = controlledPan ?? internalPan; + + const handlePanChange = useCallback( + (offset: { x: number; y: number }) => { + setInternalPan(offset); + onPanChange?.(offset); + }, + [onPanChange], + ); + + const effectivePan = currentZoom <= 1 ? ZERO_PAN : currentPan; + + // -- Stable refs for event handlers -- + const currentZoomRef = useRef(currentZoom); + currentZoomRef.current = currentZoom; + const currentPanRef = useRef(currentPan); + currentPanRef.current = currentPan; + const clampZoomRef = useRef(clampZoom); + clampZoomRef.current = clampZoom; + const handleZoomChangeRef = useRef(handleZoomChange); + handleZoomChangeRef.current = handleZoomChange; + const handlePanChangeRef = useRef(handlePanChange); + handlePanChangeRef.current = handlePanChange; + + // -- Container dimensions -- + const [containerDims, setContainerDims] = useState({ width: 0, height: 0 }); + const viewportObserverRef = useRef(null); + const eventCleanupRef = useRef<(() => void) | null>(null); + + // -- Touch gesture state (stored in ref to avoid re-renders during gesture) -- + const touchStateRef = useRef<{ + initialDistance: number; + initialZoom: number; + initialPan: { x: number; y: number }; + initialCenter: { x: number; y: number }; + initialRect: DOMRect; + } | null>(null); + + const viewportRef = useCallback((node: HTMLDivElement | null) => { + eventCleanupRef.current?.(); + eventCleanupRef.current = null; + viewportObserverRef.current?.disconnect(); + viewportObserverRef.current = null; + + if (node) { + const measure = () => { + const w = node.clientWidth; + const h = node.clientHeight; + setContainerDims((prev) => + prev.width === w && prev.height === h ? prev : { width: w, height: h }, + ); + }; + const observer = new ResizeObserver(measure); + observer.observe(node); + measure(); + viewportObserverRef.current = observer; + + // Ctrl/Cmd + wheel → zoom toward cursor + const wheelHandler = (e: WheelEvent) => { + // Ctrl/Cmd + wheel → zoom toward cursor + if (e.ctrlKey || e.metaKey) { + e.preventDefault(); + const oldZoom = currentZoomRef.current; + const factor = e.deltaY > 0 ? 0.9 : 1.1; + const newZoom = clampZoomRef.current(oldZoom * factor); + if (newZoom === oldZoom) return; + const rect = node.getBoundingClientRect(); + const cx = e.clientX - rect.left; + const cy = e.clientY - rect.top; + const pan = currentPanRef.current; + const ratio = newZoom / oldZoom; + handleZoomChangeRef.current(newZoom); + handlePanChangeRef.current({ + x: cx - (cx - pan.x) * ratio, + y: cy - (cy - pan.y) * ratio, + }); + return; + } + + // Wheel/trackpad scroll → pan when zoomed in + if (currentZoomRef.current > 1) { + e.preventDefault(); + const pan = currentPanRef.current; + handlePanChangeRef.current({ + x: pan.x - e.deltaX, + y: pan.y - e.deltaY, + }); + } + }; + + // Touch: pinch-to-zoom + two-finger pan + const touchStartHandler = (e: TouchEvent) => { + if (e.touches.length !== 2) return; + e.preventDefault(); + const t1 = e.touches[0]; + const t2 = e.touches[1]; + touchStateRef.current = { + initialDistance: getTouchDistance(t1, t2), + initialZoom: currentZoomRef.current, + initialPan: { ...currentPanRef.current }, + initialCenter: getTouchCenter(t1, t2), + initialRect: node.getBoundingClientRect(), + }; + }; + + const touchMoveHandler = (e: TouchEvent) => { + if (e.touches.length !== 2 || !touchStateRef.current) return; + e.preventDefault(); + const t1 = e.touches[0]; + const t2 = e.touches[1]; + const { initialDistance, initialZoom, initialPan, initialCenter, initialRect } = touchStateRef.current; + + // Zoom + const currentDistance = getTouchDistance(t1, t2); + const scale = currentDistance / initialDistance; + const newZoom = clampZoomRef.current(initialZoom * scale); + handleZoomChangeRef.current(newZoom); + + // Pan toward pinch center (use cached rect to avoid layout thrashing) + const center = getTouchCenter(t1, t2); + const cx = initialCenter.x - initialRect.left; + const cy = initialCenter.y - initialRect.top; + const ratio = newZoom / initialZoom; + handlePanChangeRef.current({ + x: cx - (cx - initialPan.x) * ratio + (center.x - initialCenter.x), + y: cy - (cy - initialPan.y) * ratio + (center.y - initialCenter.y), + }); + }; + + const touchEndHandler = () => { + touchStateRef.current = null; + }; + + node.addEventListener('wheel', wheelHandler, { passive: false }); + node.addEventListener('touchstart', touchStartHandler, { passive: false }); + node.addEventListener('touchmove', touchMoveHandler, { passive: false }); + node.addEventListener('touchend', touchEndHandler); + node.addEventListener('touchcancel', touchEndHandler); + + eventCleanupRef.current = () => { + node.removeEventListener('wheel', wheelHandler); + node.removeEventListener('touchstart', touchStartHandler); + node.removeEventListener('touchmove', touchMoveHandler); + node.removeEventListener('touchend', touchEndHandler); + node.removeEventListener('touchcancel', touchEndHandler); + }; + } + }, []); + + useEffect(() => { + return () => { + eventCleanupRef.current?.(); + viewportObserverRef.current?.disconnect(); + }; + }, []); + + // -- Drag to pan (when zoomed in) -- + const [isDragging, setIsDragging] = useState(false); + const dragStart = useRef(ZERO_PAN); + const panStart = useRef(ZERO_PAN); + + const handleMouseDown = useCallback((e: React.MouseEvent) => { + if (currentZoomRef.current <= 1) return; + if (e.button !== 1) return; // middle-click only + e.preventDefault(); + setIsDragging(true); + dragStart.current = { x: e.clientX, y: e.clientY }; + panStart.current = currentPanRef.current; + }, []); + + useEffect(() => { + if (!isDragging) return; + const handleMouseMove = (e: MouseEvent) => { + handlePanChangeRef.current({ + x: panStart.current.x + (e.clientX - dragStart.current.x), + y: panStart.current.y + (e.clientY - dragStart.current.y), + }); + }; + const handleMouseUp = () => setIsDragging(false); + window.addEventListener('mousemove', handleMouseMove); + window.addEventListener('mouseup', handleMouseUp); + return () => { + window.removeEventListener('mousemove', handleMouseMove); + window.removeEventListener('mouseup', handleMouseUp); + }; + }, [isDragging]); + + const cursor = isDragging ? 'grabbing' : currentZoom > 1 ? 'grab' : 'default'; + + return { + currentZoom, + effectivePan, + containerDims, + viewportRef, + cursor, + handleMouseDown, + handleZoomChange, + handlePanChange, + }; +} + +export function computeBaseWidth( + fitMode: 'width' | 'page' | undefined, + explicitWidth: number | undefined, + containerDims: { width: number; height: number }, + naturalSize: { width: number; height: number } | null, +): number | undefined { + if (!fitMode || containerDims.width <= 0) return explicitWidth; + + if (fitMode === 'width') return containerDims.width; + + if (fitMode === 'page' && naturalSize && naturalSize.height > 0 && containerDims.height > 0) { + const aspect = naturalSize.width / naturalSize.height; + const containerAspect = containerDims.width / containerDims.height; + return containerAspect > aspect + ? containerDims.height * aspect + : containerDims.width; + } + + return explicitWidth; +} diff --git a/.agents/skills/integrate-fusion-agent/SKILL.md b/.agents/skills/integrate-fusion-agent/SKILL.md new file mode 100644 index 0000000..f685c46 --- /dev/null +++ b/.agents/skills/integrate-fusion-agent/SKILL.md @@ -0,0 +1,342 @@ +--- +name: integrate-fusion-agent +description: >- + Integrates a Flows/Dune app with the Fusion built-in PAIA agent panel using + @cognite/app-sdk. Use this skill whenever a developer wants to: open the + agent panel from their app, send the agent a contextual message, let the + agent read app state (resources), or let the agent call actions in the app. + Triggers: "fusion agent", "PAIA", "agent panel", "sendAgentMessage", + "sendAgentLayoutMode", "agent server", "registerAgentServer", + "connectToHostApp", "agent integration", "agent sidebar", "app-sdk agent". + Always use this skill instead of manually writing agent integration code — + it sets up the correct lifecycle, graceful fallback, and recommended file + structure. +allowed-tools: Read, Glob, Grep, Edit, Write, Bash +--- + +# Integrate Fusion Agent Panel + +Wire a Flows/Dune app into the Fusion built-in PAIA agent using `@cognite/app-sdk`. + +There are three independent capabilities — implement only the ones needed: + +1. **Open the agent panel** — a button that shows the sidebar/fullscreen agent UI +2. **Send the agent a message** — inject context into the chat (e.g. on item click) +3. **Register an agent server** — expose app state (resources) and actions the agent can call + +--- + +## Step 0 — Understand the app + +Before writing any code, read: + +- `package.json` — detect package manager and whether `@cognite/app-sdk` is already installed +- `src/App.tsx` (or main entry) — understand current structure, existing SDK usage + +Ask the user which of the three capabilities they need if it's not clear from context. + +--- + +## Step 1 — Install the SDK + +If `@cognite/app-sdk` is not already in `package.json`, install it: + +```shell +pnpm add @cognite/app-sdk # or npm/yarn depending on the app +``` + +Minimum required version: `0.3.1` + +--- + +## Step 2 — Connect to the host app + +All capabilities require a `HostAppAPI` instance. Obtain it once on mount and store it in React state or context. Always catch the rejection — the SDK throws when running outside Fusion (e.g. standalone `vite dev`). + +**Pattern for React apps:** + +```typescript +// src/hooks/useHostApp.ts +import { useState, useEffect } from 'react'; +import { connectToHostApp, type HostAppAPI } from '@cognite/app-sdk'; + +export function useHostApp(): HostAppAPI | null { + const [api, setApi] = useState(null); + + useEffect(() => { + connectToHostApp({ applicationName: 'my-app' }) + .then(({ api: resolvedApi }) => { + // IMPORTANT: use the updater form here. Comlink proxies are callable + // objects, so setApi(proxy) causes React to invoke the proxy as a + // state-updater function — storing a Promise instead of the proxy. + // setApi(() => proxy) returns the proxy as the new state value. + setApi(() => resolvedApi); + }) + .catch(() => { + // Running outside Fusion — agent features disabled, no-op + }); + }, []); + + return api; +} +``` + +Call `useHostApp()` at the root of your app and pass `api` down (or put it in context). When `api` is `null`, all agent UI triggers should be hidden or disabled — not shown as broken. + +--- + +## Step 3 — Opening the agent panel + +Wire a persistent toolbar button (or equivalent trigger) to `api.sendAgentLayoutMode`. + +```typescript +import { type AgentLayoutPayload } from '@cognite/app-sdk'; + +// Open as sidebar (most common) +await api.sendAgentLayoutMode({ mode: 'sidebar' }); + +// Other modes +await api.sendAgentLayoutMode({ mode: 'fullscreen' }); +await api.sendAgentLayoutMode({ mode: 'closed' }); +``` + +The button should only render when `api` is not null — agent features are unavailable outside Fusion. + +```tsx +{api && ( + +)} +``` + +--- + +## Step 4 — Sending the agent a message + +Use `sendAgentMessage` on contextual triggers (e.g. "Analyse this item" button). Always pair it with `sendAgentLayoutMode` so the panel is visible. + +```typescript +// Open sidebar then inject context +await api.sendAgentLayoutMode({ mode: 'sidebar' }); +await api.sendAgentMessage({ + message: `Analyse the schedule for "${itemName}" and suggest how to reduce total duration.`, + newSession: true, // clears previous conversation — appropriate for contextual entry points +}); +``` + +Use `newSession: true` when the user is starting a new task from a specific item. Omit it when you want to continue an existing conversation. + +The message text should include relevant context the agent can act on immediately — item names, IDs, current state summary. + +--- + +## Step 5 — Registering an agent server + +An agent server exposes **resources** (read-only app state the agent can read) and **actions** (tools the agent can invoke). Register once on mount, unregister on unmount. + +### Recommended file structure + +Separate concerns so each piece is independently testable: + +``` +src/features/agent/ + agentActions.ts — pure factory: (deps) => Action[] + agentResources.ts — pure factory: (deps) => Resource[] + useAgentServer.ts — useEffect lifecycle hook; calls the factories and registers +``` + +### Resources + +Resources are the agent's window into app state. Write `description` as you would a function docstring — the agent reads it to decide when to fetch the resource. + +```typescript +// src/features/agent/agentResources.ts +import { createAgentResource } from '@cognite/app-sdk'; +import type { StorageService } from '../storage/StorageService'; + +export function buildAgentResources(storage: StorageService) { + return [ + createAgentResource({ + uri: 'my-app://current-state', + name: 'Current application state', + description: + 'The current list of items visible in the app, their statuses, and any active filters. Read this before answering questions about what the user is looking at.', + async read() { + const data = storage.getAll(); + return [{ type: 'json', data }]; + }, + }), + ]; +} +``` + +Each resource's `read()` returns an array of content parts: +- `{ type: 'json', data: unknown }` — structured data (preferred; agent reasons over it directly) +- `{ type: 'text', text: string }` — free-form text + +### Actions + +Actions are tools the agent can invoke. Use `snake_case` names and Zod for parameter schemas. The `.describe()` on each field is the agent's documentation. + +```typescript +// src/features/agent/agentActions.ts +import { createAgentAction } from '@cognite/app-sdk'; +import { z } from 'zod'; +import type { DataService } from '../data/DataService'; + +export function buildAgentActions(dataService: DataService) { + return [ + createAgentAction({ + name: 'get_item_details', + description: 'Retrieve full details for a specific item by ID. Returns all fields including history.', + parameters: z.object({ + item_id: z.string().describe('The ID of the item to retrieve'), + }), + async handler({ item_id }) { + const item = await dataService.getItem(item_id); + return { content: [{ type: 'json', data: item }] }; + }, + }), + ]; +} +``` + +**Mutating actions:** The agent does NOT ask the user for confirmation before calling actions — so use caution with actions that write data. Be explicit in the `description` that the action is destructive, and require the user to have approved before the agent calls it. + +```typescript +createAgentAction({ + name: 'update_item_status', + description: + 'Update the status of an item. Call this ONLY when the user has explicitly approved the change. The UI updates immediately.', + parameters: z.object({ + item_id: z.string().describe('The item to update'), + status: z.enum(['active', 'closed', 'pending']).describe('The new status'), + }), + async handler({ item_id, status }) { + storage.updateStatus(item_id, status); + return { content: [{ type: 'json', data: { success: true } }] }; + }, +}) +``` + +### Lifecycle hook + +```typescript +// src/features/agent/useAgentServer.ts +import { useEffect } from 'react'; +import { createAgentServer, registerAgentServer, type HostAppAPI } from '@cognite/app-sdk'; +import { buildAgentActions } from './agentActions'; +import { buildAgentResources } from './agentResources'; +import { useStorageService } from '../storage/StorageServiceContext'; +import { useDataService } from '../data/DataServiceContext'; + +export function useAgentServer(api: HostAppAPI | null): void { + const storage = useStorageService(); + const dataService = useDataService(); + + useEffect(() => { + if (!api) return; + + const server = createAgentServer({ + uri: 'my-app', // namespaced by Fusion with instance ID — no need to be globally unique + actions: buildAgentActions(dataService), + resources: buildAgentResources(storage), + }); + + void registerAgentServer(api, server).catch((err: unknown) => { + console.warn('[agent] registerAgentServer failed:', err); + }); + + return () => { + void api.unregisterAgentServer('my-app').catch((err: unknown) => { + console.warn('[agent] unregisterAgentServer failed:', err); + }); + }; + }, [api, storage, dataService]); +} +``` + +Call `useAgentServer(api)` near the root of your component tree, after `api` is available. + +--- + +## Step 6 — Wire it all together + +Call `useHostApp()` at the root, pass `api` to `useAgentServer`, and thread it down to any UI triggers: + +```tsx +// src/App.tsx +function App() { + const api = useHostApp(); + useAgentServer(api); // registers resources + actions when api is ready + + return ( + + + {api && ( + api.sendAgentLayoutMode({ mode: 'sidebar' })}> + Open Assistant + + )} + + ); +} +``` + +--- + +## Dev vs. production + +| Environment | `connectToHostApp` | Effect | +|---|---|---| +| Inside Fusion | Resolves with `{ api }` | All features work | +| Standalone `vite dev` | Rejects | Agent features silently disabled | + +This is handled by the `useHostApp` hook above — no extra conditionals needed elsewhere. + +--- + +## Testing + +Because `buildAgentActions` and `buildAgentResources` are pure factories that accept services as arguments, test them directly without mounting React: + +```typescript +// agentActions.test.ts +const mockDataService = { getItem: vi.fn().mockResolvedValue({ id: '1', name: 'Test' }) }; +const [getItemAction] = buildAgentActions(mockDataService); + +const result = await getItemAction.handler({ item_id: '1' }); +expect(result.content[0].data).toEqual({ id: '1', name: 'Test' }); +``` + +--- + +## Known pitfalls + +### `setApi(resolvedApi)` stores a Promise, not the proxy + +Comlink proxies are callable objects. React's `useState` setter, when given a function, calls it as `fn(prevState)` to compute the new state. Because a Comlink proxy responds to function calls (forwarding them to the remote), `setApi(proxy)` causes React to invoke the proxy, and the resulting Promise becomes the state value. + +**Symptom:** `api` appears non-null (a Promise is truthy), but calling `api.sendAgentLayoutMode(...)` or checking `typeof api.sendAgentLayoutMode` returns nonsense. + +**Fix:** Always use the updater form: `setApi(() => resolvedApi)`. + +### `typeof proxy.method === 'function'` is always `true` + +Comlink Proxy objects return `'function'` for any property access via `typeof`. This means you cannot use `typeof` guards to detect whether a method is actually supported by the host. Use `try/catch` or `.catch()` on the call instead. + +--- + +## Checklist + +- [ ] `@cognite/app-sdk@0.3.1+` installed +- [ ] `useHostApp` hook uses `setApi(() => resolvedApi)` — NOT `setApi(resolvedApi)` +- [ ] `useHostApp` hook catches rejection (outside Fusion), stores `api` in state +- [ ] Agent UI buttons only render when `api` is not null +- [ ] `useAgentServer` registered on mount, unregistered on unmount +- [ ] `registerAgentServer` and `unregisterAgentServer` calls have `.catch()` handlers +- [ ] Resource `description` fields explain what data is returned and when to read it +- [ ] Action `name` fields are `snake_case` +- [ ] Mutating actions warn in their `description` that confirmation is required +- [ ] Services injected into action/resource factories (not imported directly) — enables unit testing diff --git a/.agents/skills/integrate-todo-list/SKILL.md b/.agents/skills/integrate-todo-list/SKILL.md new file mode 100644 index 0000000..0d255f7 --- /dev/null +++ b/.agents/skills/integrate-todo-list/SKILL.md @@ -0,0 +1,158 @@ +--- +name: integrate-todo-list +description: "MUST be used whenever adding a task/todo list feature to a Flows app with Atlas chat. Do NOT manually create todo state management or tool definitions — this skill handles the full module (context, provider, tool, hooks, UI components) and all integration wiring. Prerequisite: integrate-atlas-chat must already be set up. Triggers: todo list, task list, task tracking, TodoWrite, todo panel, task panel, progress tracking, add todos, add tasks." +allowed-tools: Read, Glob, Grep, Edit, Write, Bash +--- + +# Integrate Todo List + +Add a structured task-tracking feature to this Flows app. The agent will use a `TodoWrite` tool +to create and update a task list as it works through multi-step queries, giving the user real-time +visibility into what the agent is doing and why. + +**Prerequisite:** **`integrate-atlas-chat`** must already be complete — `useAtlasChat` must be wired (typically from `./atlas-agent/react`), `src/atlas-agent/` must contain the vendored atlas-agent sources, and `@sinclair/typebox`, `ajv`, `ajv-formats` must be installed per that skill. + +--- + +## Step 1 — Read the app + +Before writing anything, read: + +- `package.json` — confirm `@tabler/icons-react` is installed; if not, install it with the app's package manager +- `src/App.tsx` — find where to add `TodoProvider` +- The file that calls `useAtlasChat` (likely `src/chat/useChatViewModel.ts` or `src/App.tsx`) — this is where the tool gets wired +- The chat view component that renders messages — this is where `TodoPanel` and `TodoToolResultCard` go + +--- + +## Step 2 — Create the `src/todo/` module + +Find the skill directory by running `find . -path "*/.agents/skills/integrate-todo-list/code" -type d` from the project root. + +Read each file from `/code/` and write it into `src/todo/` with the same filename: + +| File | Purpose | +|------|---------| +| `types.ts` | `TodoItem` and `TodoList` types | +| `TodoContext.tsx` | React context + `TodoProvider` | +| `useTodoList.ts` | Hook to read/write the todo list | +| `todoWriteTool.ts` | `createTodoWriteTool` factory — `AtlasTool` with full CDF task-decomposition guidance | +| `useTodoWriteTool.ts` | Hook that memoizes the tool with current state access | +| `TodoPanel.tsx` | Card UI: progress bar + task rows | +| `TodoItemRow.tsx` | Single row with animated status icons | +| `TodoToolResultCard.tsx` | Compact summary card for tool call display | + +All files use relative imports (`./types`, `./TodoContext`, etc.) — no changes needed. + +--- + +## Step 3 — Wrap the app in `TodoProvider` + +In `src/App.tsx` (or the root component), wrap the existing tree with ``: + +```tsx +import { TodoProvider } from './todo/TodoContext'; // adjust path to match app conventions + +function App() { + return ( + + {/* existing children */} + + ); +} +``` + +--- + +## Step 4 — Wire the tool into `useAtlasChat` + +In the file that calls `useAtlasChat`, add the following. Adjust import paths to match the app's conventions. + +```ts +import { useRef, useCallback } from 'react'; +import { useTodoList } from './todo/useTodoList'; +import { useTodoWriteTool } from './todo/useTodoWriteTool'; + +// Inside the hook/component: +const { todos, setTodos } = useTodoList(); +const todoWriteTool = useTodoWriteTool(); + +// Keep a ref so getAppContext always reads fresh state without re-creating the callback. +const todosRef = useRef(todos); +todosRef.current = todos; + +const getAppContext = useCallback(() => { + const t = todosRef.current; + if (t.length === 0) return undefined; + const lines = t.map((item, i) => `${i + 1}. [${item.status}] ${item.content}`); + return `Current todo list:\n${lines.join('\n')}`; +}, []); + +// Add to useAtlasChat options: +const { messages, send, isStreaming, progress, error, reset, abort } = useAtlasChat({ + client: isLoading ? null : sdk, + agentExternalId: AGENT_EXTERNAL_ID, + tools: [todoWriteTool], // add alongside any existing tools + getAppContext, +}); + +// In the reset handler, clear the todo list: +const handleReset = useCallback(() => { + reset(); + setTodos([]); +}, [reset, setTodos]); + +// Expose todos in the return value so the view can render TodoPanel: +return { ..., todos }; +``` + +--- + +## Step 5 — Render `TodoPanel` in the chat view + +In the component that renders the chat input area, add `` above the input field: + +```tsx +import { TodoPanel } from './todo/TodoPanel'; // adjust path + +// In the render: + + +``` + +`TodoPanel` returns `null` when the list is empty, so it's safe to always render it. + +--- + +## Step 6 — Render `TodoToolResultCard` for tool call steps + +In the component that renders per-message tool calls (typically a steps accordion or similar), branch on the tool name: + +```tsx +import { TodoToolResultCard } from './todo/TodoToolResultCard'; // adjust path + +{toolCalls.map((tc, i) => + tc.name === 'TodoWrite' ? ( + + ) : ( + + ) +)} +``` + +--- + +## Step 7 — Verify + +Run the app's type-check command (typically `pnpm tsc --noEmit`) and confirm there are no errors. +If the project has tests, run them to confirm nothing regressed. + +--- + +## Done + +The agent can now use `TodoWrite` to create and track tasks. It will: +- Show a task panel as soon as it starts multi-step work +- Update task status in real-time (`pending` → `in_progress` → `completed`) +- Clear the list automatically when all tasks are done +- Inject the current task list into each prompt via `getAppContext` so it knows where it left off diff --git a/.agents/skills/integrate-todo-list/code/TodoContext.tsx b/.agents/skills/integrate-todo-list/code/TodoContext.tsx new file mode 100644 index 0000000..c3d1b0c --- /dev/null +++ b/.agents/skills/integrate-todo-list/code/TodoContext.tsx @@ -0,0 +1,19 @@ +import { createContext, useState, useMemo } from 'react'; +import type { ReactNode } from 'react'; +import type { TodoList } from './types'; + +export interface TodoStoreValue { + todos: TodoList; + setTodos: (todos: TodoList) => void; +} + +export const TodoContext = createContext({ + todos: [], + setTodos: () => undefined, +}); + +export function TodoProvider({ children }: { children: ReactNode }) { + const [todos, setTodos] = useState([]); + const value = useMemo(() => ({ todos, setTodos }), [todos]); + return {children}; +} diff --git a/.agents/skills/integrate-todo-list/code/TodoItemRow.tsx b/.agents/skills/integrate-todo-list/code/TodoItemRow.tsx new file mode 100644 index 0000000..94cfdba --- /dev/null +++ b/.agents/skills/integrate-todo-list/code/TodoItemRow.tsx @@ -0,0 +1,38 @@ +import { IconCircle, IconCircleFilled, IconCircleCheckFilled } from '@tabler/icons-react'; +import type { TodoItem } from './types'; + +interface TodoItemRowProps { + item: TodoItem; +} + +const STATUS_ICONS = { + pending: , + in_progress: , + completed: , +}; + +export function TodoItemRow({ item }: TodoItemRowProps) { + const label = item.status === 'in_progress' ? item.activeForm : item.content; + const isInProgress = item.status === 'in_progress'; + const isCompleted = item.status === 'completed'; + + return ( +
+ {STATUS_ICONS[item.status]} + + {label} + +
+ ); +} diff --git a/.agents/skills/integrate-todo-list/code/TodoPanel.tsx b/.agents/skills/integrate-todo-list/code/TodoPanel.tsx new file mode 100644 index 0000000..bf1acd8 --- /dev/null +++ b/.agents/skills/integrate-todo-list/code/TodoPanel.tsx @@ -0,0 +1,44 @@ +import { Badge, Card, CardContent, CardHeader, CardHeaderRight, CardTitle } from '@cognite/aura/components'; +import { TodoItemRow } from './TodoItemRow'; +import type { TodoList } from './types'; + +interface TodoPanelProps { + todos: TodoList; +} + +export function TodoPanel({ todos }: TodoPanelProps) { + if (todos.length === 0) return null; + + const completedCount = todos.filter((t) => t.status === 'completed').length; + const progressPct = Math.round((completedCount / todos.length) * 100); + + return ( +
+ + + + Tasks + + + + {completedCount}/{todos.length} + + + +
+
+
+ + {todos.map((item, i) => ( + // Index is safe here: the agent only appends to the end and updates in place — it never reorders or inserts in the middle. + // Using content as a key would cause remounts (and animation resets) whenever the agent updates a task title with discovered node names. + + ))} + + +
+ ); +} diff --git a/.agents/skills/integrate-todo-list/code/TodoToolResultCard.tsx b/.agents/skills/integrate-todo-list/code/TodoToolResultCard.tsx new file mode 100644 index 0000000..cfca06e --- /dev/null +++ b/.agents/skills/integrate-todo-list/code/TodoToolResultCard.tsx @@ -0,0 +1,47 @@ +import { Tool, ToolContent, ToolHeader } from '@cognite/aura/components'; + +interface ToolCall { + name: string; + input?: unknown; + output?: string; + details?: unknown; +} + +interface TodoToolResultCardProps { + toolCall: ToolCall; +} + +interface TodoDetails { + completed: number; + inProgress: number; + pending: number; + newTodos: { content: string; status: string }[]; +} + +function isTodoDetails(value: unknown): value is TodoDetails { + return ( + typeof value === 'object' && + value !== null && + 'completed' in value && + 'inProgress' in value && + 'pending' in value + ); +} + +export function TodoToolResultCard({ toolCall }: TodoToolResultCardProps) { + const details = isTodoDetails(toolCall.details) ? toolCall.details : null; + const total = details ? details.completed + details.inProgress + details.pending : 0; + + const summary = details + ? `${total} task${total !== 1 ? 's' : ''}: ${details.completed} completed, ${details.inProgress} in progress, ${details.pending} pending` + : 'Todo list updated'; + + return ( + + + +

{summary}

+
+
+ ); +} diff --git a/.agents/skills/integrate-todo-list/code/todoWriteTool.ts b/.agents/skills/integrate-todo-list/code/todoWriteTool.ts new file mode 100644 index 0000000..8d9e9d8 --- /dev/null +++ b/.agents/skills/integrate-todo-list/code/todoWriteTool.ts @@ -0,0 +1,344 @@ +import { Type } from '@sinclair/typebox'; +import type { AtlasTool } from '../atlas-agent/types'; + +import type { TodoList } from './types'; + +const parameters = Type.Object({ + todos: Type.Array( + Type.Object({ + content: Type.String({ description: 'Imperative form, e.g. "Fix authentication bug"' }), + status: Type.Unsafe<'pending' | 'in_progress' | 'completed'>({ + type: 'string', + enum: ['pending', 'in_progress', 'completed'], + description: 'Task status', + }), + activeForm: Type.String({ + description: 'Present continuous form, e.g. "Fixing authentication bug"', + }), + }), + { description: 'The complete, updated todo list. Must include ALL items — do not omit any.' } + ), +}); + +const DESCRIPTION = `Use this tool to create and manage a structured task list when answering questions about industrial assets, equipment, maintenance orders, files, time series, and related data in CDF. This helps you track progress across multi-step queries, and helps the user see what you are doing and why. + +## When to Use This Tool +Use this tool proactively in these scenarios: + +1. Fetching related data across types - Any question that involves traversing from one type to another (e.g., "files for an asset", "time series for a pump", "operations on a maintenance order", "notifications for an asset") +2. Reverse list-relation traversals - Finding CogniteFile, CogniteTimeSeries, or CogniteActivity instances that reference a known asset or equipment requires a separate /search step with containsAny — this cannot be done in a single /query call +3. Multi-level traversals - When the question requires stepping through more than one relation (e.g., asset → equipment → time series) +4. User asks multiple questions - When the user asks for several different pieces of data at once +5. After receiving new instructions - Immediately capture user requirements as todos +6. When you start working on a task - Mark it as in_progress BEFORE beginning work. Ideally you should only have one todo as in_progress at a time +7. After completing a task - Mark it as completed and add any new follow-up tasks discovered during execution + +## When NOT to Use This Tool + +Skip using this tool when: +1. The question targets a single type with no relation traversal (e.g., "list all assets", "find the maintenance order with ID X") +2. The question is purely conversational or informational (e.g., "what is a functional location?") +3. The answer requires only one tool call +4. You are clarifying what the user wants before starting work + +NOTE that you should not use this tool if there is only one trivial step to do. In this case you are better off just doing the task directly. + +## Why Multi-Step Decomposition Is Needed for Cross-Type Queries + +In CDF Data Modeling, data is organized into separate views (CogniteAsset, CogniteFile, CogniteEquipment, CogniteTimeSeries, CogniteActivity, CogniteMaintenanceOrder, CogniteOperation, etc.). Relations between views are stored as direct relation properties on one side of the relationship. + +### Reverse Direct Relation Constraint + +A critical constraint governs how relations can be traversed: + +**Single-target relations (targetsList=false)** — the property points to exactly one instance. Reverse traversal CAN be done via /query using \`through\` + \`direction: inwards\`. +- CogniteAsset.children traverses backward through CogniteAsset.parent (single) — works with /query +- CogniteAsset.equipment traverses backward through CogniteEquipment.asset (single) — works with /query +- CogniteMaintenanceOrder.operations traverses backward through CogniteOperation.maintenanceOrder (single) — works with /query + +**List-target relations (targetsList=true)** — the property holds a list of references. Reverse traversal CANNOT be done via /query. You MUST use /search with a containsAny filter instead. +- CogniteAsset.files — reverse through CogniteFile.assets (list) — needs /search + containsAny +- CogniteAsset.timeSeries — reverse through CogniteTimeSeries.assets (list) — needs /search + containsAny +- CogniteAsset.activities — reverse through CogniteActivity.assets (list) — needs /search + containsAny +- CogniteEquipment.timeSeries — reverse through CogniteTimeSeries.equipment (list) — needs /search + containsAny +- CogniteEquipment.activities — reverse through CogniteActivity.equipment (list) — needs /search + containsAny +- CogniteFile.equipment — reverse through CogniteEquipment.files (list) — needs /search + containsAny + +This means that many natural user questions ("show me files for pump X") require AT LEAST two steps: (1) find the asset/equipment instance, (2) search the related type using containsAny. This is why task decomposition is essential. + +### Hierarchy / Path Interpretation + +Use the asset \`path\` property to read and interpret the ancestry of a known node. Do not infer hierarchy from alphabetical ordering or from an item's position in a result list. + +- \`path[-1]\` = current node +- \`path[-2]\` = parent +- \`path[-3]\` = grandparent +- \`path[-4]\` = ancestor three levels above the current node + +Example: +- Path: \`WMT:VAL -> WMT:23 -> WMT:230900 -> WMT:23-1ST STAGE COMPRESSION-PH -> WMT:23-XX-9105\` +- \`path[-4] = WMT:23\`, \`path[-3] = WMT:230900\`, \`path[-2] = WMT:23-1ST STAGE COMPRESSION-PH\`, \`path[-1] = WMT:23-XX-9105\` + +To **find descendants** N levels below a given node, do NOT filter on the \`path\` array. Instead, traverse the \`parent\` relation one level at a time, with **one todo item per level**: +- Level 1 (children): query assets where \`parent == A\` +- Level 2 (grandchildren): query assets where \`parent\` is any of the level-1 results +- Level 3: query assets where \`parent\` is any of the level-2 results +- …and so on until the target depth + +Each level must be its own todo item — do not collapse multiple levels into a single task. + +### Subtree + Maintenance Queries + +A very common industrial pattern is "what maintenance work exists for section X and everything below it?" This combines hierarchy traversal with maintenance order (or notification) lookups and always requires multiple steps: + +1. Traverse the hierarchy level by level to collect all descendant asset IDs (one todo per level, as above) +2. Search for maintenance orders referencing those assets — use \`mainAsset\` if looking for the primary asset on the order (single field, filter directly); use \`assets\` containsAny if looking for any association (list field, needs /search) +3. Optionally fetch operations or notifications for the found maintenance orders + +Note: \`mainAsset\` and \`assets\` on \`CogniteMaintenanceOrder\` serve different purposes. \`mainAsset\` is the primary functional location the order is about; \`assets\` is the broader list of associated assets. For most "what work is planned for asset X" questions, querying \`mainAsset\` is the right starting point. + +## Examples of When to Use the Todo List + + +User: Please give me all the files for 1st stage compressor +Assistant: *Creates todo list with the following items:* +1. Discover relevant views for files and assets in the data model +2. Find the asset instance for "1st stage compressor" +3. Search for files related to the identified asset (using containsAny on CogniteFile.assets) +4. Summarize the files with their names, space, and external IDs +*Begins working on the first task* + + +The assistant used the todo list because: +1. This is a cross-type query spanning CogniteAsset and CogniteFile +2. CogniteFile.assets is a list relation (targetsList=true), so reverse traversal requires a separate /search step with containsAny — it cannot be done in a single /query call +3. The asset must be identified first before files can be searched +4. Multiple sequential tool calls are needed, making task tracking valuable + + + + +User: Show me the time series and recent activities for pump P-101 +Assistant: *Searches for the equipment instance P-101* +Found equipment "P-101" in the data model. +*Creates todo list with specific items:* +1. Retrieve full details of equipment P-101 +2. Search for time series referencing P-101 (CogniteTimeSeries.equipment is a list relation — needs /search + containsAny) +3. Search for activities referencing P-101 (CogniteActivity.equipment is a list relation — needs /search + containsAny) +4. Summarize findings with time series names and activity descriptions +*Begins working on the first task* + + +The assistant used the todo list because: +1. First, the assistant identified the equipment to understand the scope +2. Both CogniteTimeSeries.equipment and CogniteActivity.equipment are list relations requiring separate /search + containsAny calls — neither can be resolved with a single /query traversal +3. Multiple independent search steps are needed, making task tracking valuable + + + + +User: List all child assets of the "North Platform" and their associated equipment and maintenance orders +Assistant: *Creates todo list breaking down the query:* +1. Find the "North Platform" asset instance +2. Query child assets using the parent relation (CogniteAsset.parent is a single relation — can use /query with through + direction: inwards) +3. Query equipment for each child asset (CogniteEquipment.asset is a single relation — can use /query) +4. Find maintenance orders referencing the child assets +5. Compile and present the asset hierarchy with equipment and maintenance information +*Begins working on the first task* + + +The assistant used the todo list because: +1. This involves traversing multiple levels of the data model hierarchy +2. Even though parent/child and asset/equipment are single relations (queryable via /query), the overall task still spans many sequential steps across the hierarchy +3. The user wants a comprehensive view across assets, equipment, and maintenance orders +4. Tracking progress prevents missing any branch of the hierarchy + + + + +User: What operations and notifications are there for maintenance order MO-2024-001? +Assistant: *Creates todo list:* +1. Find the maintenance order MO-2024-001 +2. Query operations for this maintenance order (CogniteOperation.maintenanceOrder is a single relation — can use /query with through + direction: inwards) +3. Search for notifications referencing this maintenance order (CogniteNotification.maintenanceOrder is a single relation — can use /query) +4. Summarize operations and notifications with their status and descriptions +*Begins working on the first task* + + +The assistant used the todo list because: +1. This spans three types: CogniteMaintenanceOrder, CogniteOperation, and CogniteNotification +2. Operations and notifications both require separate query steps after finding the maintenance order +3. Tracking steps ensures both relations are covered and results are organized clearly + + + + +User: What is the maintenance backlog for the entire 1st stage compression section? +Assistant: *Creates todo list:* +1. Find the asset instance for "1st stage compression" +2. Query level-1 children (assets where parent == 1st stage compression asset) +3. Query level-2 children (assets where parent is any level-1 result) +4. Query level-3 children (assets where parent is any level-2 result) +5. Search for maintenance orders where mainAsset is any of the collected asset IDs +6. Summarize maintenance orders with their status, priority, and scheduled dates +*Begins working on the first task* + + +The assistant used the todo list because: +1. "The entire section" means all descendant assets must be collected first — one todo per hierarchy level +2. Once all descendant asset IDs are known, maintenance orders are found by filtering on mainAsset (single indexed field — no containsAny needed for this use case) +3. The hierarchy traversal and the maintenance lookup are separate dependent phases, making step-by-step tracking essential + + + + +User: Give me all files for assets that are two levels below WMT:23 +Assistant: *Creates todo list:* +1. Find the asset instance WMT:23 +2. Query level-1 children of WMT:23 (assets where parent == WMT:23) +3. Query level-2 children (assets where parent is any of the level-1 results) +4. Search for files referencing the level-2 assets (CogniteFile.assets is a list relation — needs /search + containsAny) +5. Summarize files with their names and external IDs +*Begins working on the first task* + + +The assistant used the todo list because: +1. Descendant lookup must be done by stepping through the parent relation one level at a time — each level is its own todo item, never collapsed into one task +2. Two levels of traversal means two separate query steps before any file lookup can happen +3. The final file lookup requires a separate /search + containsAny step because CogniteFile.assets is a list relation +4. The chain of dependent steps makes task tracking essential + + + + +User: Please give me the seventh descendant of WMT:VAL +Assistant: *Creates todo list:* +1. Find the asset instance WMT:VAL +2. Query level-1 children (assets where parent == WMT:VAL) +3. Query level-2 children (assets where parent is any level-1 result) +4. Query level-3 children (assets where parent is any level-2 result) +5. Query level-4 children (assets where parent is any level-3 result) +6. Query level-5 children (assets where parent is any level-4 result) +7. Query level-6 children (assets where parent is any level-5 result) +8. Query level-7 children (assets where parent is any level-6 result) +9. Summarize the level-7 descendants with space and external ID +*Begins working on the first task* + + +The assistant used the todo list because: +1. Seven levels of parent traversal are required — each level is a separate query and gets its own todo item +2. Collapsing all traversal steps into one task would hide progress and make it impossible to track which level is being processed +3. Each step depends on the results of the previous one, making explicit tracking essential + + + +## Examples of When NOT to Use the Todo List + + +User: How many assets do we have? +Assistant: *Uses the aggregate tool on the CogniteAsset view* +Based on the aggregation, you have 1,247 assets in your data model. + + +The assistant did not use the todo list because this is a single aggregation query against one view. No cross-type traversal or multi-step decomposition is needed. + + + + +User: What is a direct relation in CDF? +Assistant: A direct relation in CDF is a property on a node that references another node by its space and externalId. It creates a typed graph connection stored directly on the source instance, without needing a separate edge object. + + +The assistant did not use the todo list because this is an informational request with no data query to execute. The user is asking for an explanation, not for the assistant to retrieve data from CDF. + + + + +User: Find the asset named "Main Turbine" +Assistant: *Searches for the asset using a name filter on CogniteAsset* +Found asset "Main Turbine" (externalId: main-turbine-001, space: plant-assets). + + +The assistant did not use the todo list because this is a single search operation against one view. The result is returned directly with no additional steps needed. + + + + +User: Does equipment EP-200 exist? +Assistant: *Queries the CogniteEquipment view for EP-200* +Yes, equipment EP-200 exists in space "industrial-assets" with name "Centrifugal Pump EP-200". + + +The assistant did not use the todo list because this is a single instance lookup that can be completed in one step. No multi-step decomposition is needed. + + + +## Task States and Management + +1. **Task States**: Use these states to track progress: + - pending: Task not yet started + - in_progress: Currently working on (limit to ONE task at a time) + - completed: Task finished successfully + + **IMPORTANT**: Task descriptions must have two forms: + - content: The imperative form describing what needs to be done (e.g., "Find the asset instance for 1st stage compressor", "Search for files referencing the asset") + - activeForm: The present continuous form shown during execution (e.g., "Finding the asset instance for 1st stage compressor", "Searching for files referencing the asset") + +2. **Task Management**: + - Update task status in real-time as you work + - Mark tasks complete IMMEDIATELY after finishing (don't batch completions) + - Exactly ONE task must be in_progress at any time (not less, not more) + - Complete current tasks before starting new ones + - Remove tasks that are no longer relevant from the list entirely + - **Always mark the final task as completed before delivering your answer** — do not give the response and stop without updating the todo list. The last tool call before responding to the user must be a TodoWrite that marks the final task completed. + - **Update pending task titles with discovered node names**: after completing a step that returns concrete instances, rewrite the titles of downstream pending tasks to reflect what was actually found. If there are many results, use the short form: "WMT:23, WMT:24, WMT:25 … (12 total)". For example, once level-1 children are known, change "Query level-2 children (assets where parent is any level-1 result)" to "Query level-2 children of WMT:23, WMT:24 … (4 total)". + +3. **Task Completion Requirements**: + - ONLY mark a task as completed when you have FULLY accomplished it + - If a query returns errors or unexpected results, keep the task as in_progress + - When blocked, create a new task describing what needs to be resolved + - Never mark a task as completed if: + - The query returned an error + - You received partial or empty results when data was expected + - You need to retry with a different approach or filter + - Required views or instances were not found + +4. **Task Breakdown**: + - Create specific, actionable items + - Break complex queries into smaller, focused steps + - Use clear, descriptive task names + - Always provide both forms: + - content: "Search for files referencing asset X" + - activeForm: "Searching for files referencing asset X" + +When in doubt, use this tool. Being proactive with task management demonstrates attentiveness and ensures you complete all requirements successfully.`; + +export interface TodoWriteToolDeps { + getTodos: () => TodoList; + setTodos: (todos: TodoList) => void; +} + +export function createTodoWriteTool(deps: TodoWriteToolDeps): AtlasTool { + return { + name: 'TodoWrite', + description: DESCRIPTION, + parameters, + execute: (args) => { + const oldTodos = deps.getTodos(); + const allDone = args.todos.every((t) => t.status === 'completed'); + const newTodos = allDone ? [] : args.todos; + deps.setTodos(newTodos); + + const completed = args.todos.filter((t) => t.status === 'completed').length; + const inProgress = args.todos.filter((t) => t.status === 'in_progress').length; + const pending = args.todos.filter((t) => t.status === 'pending').length; + + return { + output: + 'Todos have been modified successfully. Ensure that you continue to use the todo list ' + + 'to track your progress. Please proceed with the current tasks if applicable.', + details: { oldTodos, newTodos: args.todos, completed, inProgress, pending }, + }; + }, + }; +} diff --git a/.agents/skills/integrate-todo-list/code/types.ts b/.agents/skills/integrate-todo-list/code/types.ts new file mode 100644 index 0000000..d133e9a --- /dev/null +++ b/.agents/skills/integrate-todo-list/code/types.ts @@ -0,0 +1,7 @@ +export interface TodoItem { + content: string; + status: 'pending' | 'in_progress' | 'completed'; + activeForm: string; +} + +export type TodoList = TodoItem[]; diff --git a/.agents/skills/integrate-todo-list/code/useTodoList.ts b/.agents/skills/integrate-todo-list/code/useTodoList.ts new file mode 100644 index 0000000..a83ff66 --- /dev/null +++ b/.agents/skills/integrate-todo-list/code/useTodoList.ts @@ -0,0 +1,7 @@ +import { useContext } from 'react'; +import { TodoContext } from './TodoContext'; +import type { TodoStoreValue } from './TodoContext'; + +export function useTodoList(): TodoStoreValue { + return useContext(TodoContext); +} diff --git a/.agents/skills/integrate-todo-list/code/useTodoWriteTool.ts b/.agents/skills/integrate-todo-list/code/useTodoWriteTool.ts new file mode 100644 index 0000000..5e56f48 --- /dev/null +++ b/.agents/skills/integrate-todo-list/code/useTodoWriteTool.ts @@ -0,0 +1,17 @@ +import { useRef, useMemo } from 'react'; +import { useTodoList } from './useTodoList'; +import { createTodoWriteTool } from './todoWriteTool'; + +export function useTodoWriteTool() { + const { todos, setTodos } = useTodoList(); + + // Keep a ref so the memoized execute closure always reads current state. + const todosRef = useRef(todos); + todosRef.current = todos; + + return useMemo( + () => createTodoWriteTool({ getTodos: () => todosRef.current, setTodos }), + // setTodos is stable (from useMemo in TodoProvider), so the tool identity is stable. + [setTodos] + ); +} diff --git a/.agents/skills/migrate-app-to-flows/SKILL.md b/.agents/skills/migrate-app-to-flows/SKILL.md new file mode 100644 index 0000000..b68f548 --- /dev/null +++ b/.agents/skills/migrate-app-to-flows/SKILL.md @@ -0,0 +1,130 @@ +--- +name: migrate-app-to-flows +description: "MUST be used when migrating a legacy Dune app to the new Flows app hosting infrastructure. Orchestrates the full migration: audits current state, updates app.json to appsApi infra, delegates auth wiring to setup-flows-auth, creates or updates manifest.json network permissions, and updates deploy scripts to @cognite/cli. Use this whenever a user says 'migrate to Flows', 'migrate to new infra', 'move from dune to flows', 'migrate legacy app', or wants to move their existing app to the new Flows app hosting." +allowed-tools: Read, Glob, Grep, Edit, Write, Bash +metadata: + argument-hint: "" +--- + +# Migrate App to Flows Infrastructure + +Orchestrates the full migration of a legacy Dune app to the new Flows app hosting (`appsApi`). Works through each area in order, skipping any already in the correct state. + +## Step 1 — Audit current state + +Read `app.json`, `package.json`, `vite.config.ts`, and `manifest.json` (if present). + +Report a concise summary before making any changes: + +``` +Migration audit: +✗ app.json: missing infra field → will add "infra": "appsApi" +✗ Auth: DuneAuthProvider in use → will run setup-flows-auth +✗ manifest.json: missing → will create +✓ Deploy script: already uses @cognite/cli +``` + +Then proceed through Steps 2–5. + +--- + +## Step 2 — Update `app.json` + +If `infra` is already `"appsApi"`, skip this step. Otherwise, add or update the field: + +```json +{ + "name": "My App", + "externalId": "my-app", + "versionTag": "0.0.1", + "infra": "appsApi", + "deployments": [...] +} +``` + +--- + +## Step 3 — Set up Flows auth + +Run the `setup-flows-auth` skill now. It handles everything auth-related: package installation, Vite plugin updates, entry file changes, and wiring up `connectToHostApp`. + +--- + +## Step 4 — Create or update `manifest.json` + +The Flows host uses `manifest.json` to enforce a Content Security Policy for the app. It must exist at the repo root. + +**Create if missing:** + +```json +{ + "manifestVersion": 1, + "permissions": { + "network": [] + } +} +``` + +**Populate network permissions** by scanning for outbound calls to external domains: + +```bash +grep -rn "fetch\|axios\|new XMLHttpRequest" src/ --include="*.ts" --include="*.tsx" +``` + +For each group of external URLs found, add an entry to the `network` array using the `sources`/`directives` shape: + +```json +{ + "manifestVersion": 1, + "permissions": { + "network": [ + { + "sources": ["https://api.example.com", "https://maps.googleapis.com"], + "directives": ["connect-src"] + } + ] + } +} +``` + +Rules: +- Use full origin (scheme + hostname) in `sources`, not just the hostname. +- `"connect-src"` covers `fetch`/`XMLHttpRequest`. Use `"img-src"` for image URLs, `"font-src"` for fonts. +- The CDF cluster URL is allowed automatically; do not list it. +- If no external calls exist, leave `"network": []`. +- Flag any dynamic URLs the user needs to verify manually. + +--- + +## Step 5 — Update deploy scripts + +Replace any `dune deploy` or `npx @cognite/dune` commands in `package.json`: + +```json +{ + "scripts": { + "deploy": "npx @cognite/cli@latest apps deploy --interactive --published", + "deploy-preview": "npx @cognite/cli@latest apps deploy --interactive" + } +} +``` + +Keep all other scripts (`start`, `build`, `test`, etc.) unchanged. + +--- + +## Step 6 — Final check + +```bash +grep -rn "DuneAuthProvider\|useDune\|@cognite/dune" src/ vite.config.ts 2>/dev/null +``` + +List any remaining hits for the user to resolve. Then report: + +``` +Migration complete: +✓ app.json: infra set to "appsApi" +✓ Auth: setup-flows-auth applied +✓ manifest.json: network permissions set +✓ Deploy scripts: updated to @cognite/cli +``` diff --git a/.agents/skills/performance/SKILL.md b/.agents/skills/performance/SKILL.md new file mode 100644 index 0000000..171886f --- /dev/null +++ b/.agents/skills/performance/SKILL.md @@ -0,0 +1,525 @@ +--- +name: performance +description: "MUST be used whenever fixing performance issues in a Flows app. This skill finds AND fixes performance problems — re-renders, inefficient queries, missing pagination, unbounded fetches, large bundles, and memory leaks. It does not just report them. Always measure before and after. Triggers: performance, slow, laggy, optimize, re-render, bundle size, load time, CDF query, large list, memory leak, debounce, virtualize, lazy load, code split." +allowed-tools: Read, Glob, Grep, Shell, Write +metadata: + argument-hint: "[file, component, or area to optimize — e.g. 'src/components/AssetTable.tsx']" +--- + +# Performance Fix + +Systematically find and fix performance issues in **$ARGUMENTS** (or the whole app if no argument is given). Always measure first — never optimize blindly. + +--- + +## Step 1 — Measure baseline before touching anything + +Run the production build and capture metrics before making any changes: + +```bash +pnpm run build +pnpm run preview +``` + +Open the app in Chrome and capture: +- **Lighthouse score** (Performance tab → Run audit) +- **React Profiler** (React DevTools → Profiler → Record an interaction) + - Note the components with the longest render times and highest render counts + +Record baseline numbers. Every fix must be measured against these. + +--- + +## Step 2 — Find and fix unnecessary re-renders + +Read the component tree (start from `src/App.tsx`) and search for these patterns: + +```bash +grep -rn --include="*.tsx" \ + -E "value=\{\{|onClick=\{\(\)" src/ +``` + +For each instance found, **apply the fix directly**: + +**Inline object/array creation in JSX → wrap with `useMemo`:** +```tsx +// BAD — new object on every render causes children to re-render + + +// FIX — wrap with useMemo +const chartOptions = useMemo(() => ({ color: "red" }), []); + +``` + +**Event handlers recreated on every render → wrap with `useCallback`:** +```tsx +// BAD + + ))} +
+ ); +} + +// --- App --- + +export default function App() { + const { sdk: client, isLoading } = useDune(); + // Memoize on sdk.project — prevents RevealProvider from remounting on + // unrelated sdk object reference changes + const sdk = useMemo(() => client, [client.project]); + + const [selected, setSelected] = useState(null); + + // useCallback is mandatory — see ModelBrowser RULE 1 above + const handleSelect = useCallback((m: SelectedModel) => { + setSelected((prev) => (!prev || prev.modelId !== m.modelId ? m : prev)); + }, []); + + if (isLoading) return
Connecting to CDF…
; + + return ( + // CacheProvider + RevealKeepAlive always mounted → StrictMode double-invoke + // completes at startup with no viewer to dispose. + // RevealProvider conditionally mounts → finds stable RevealKeepAlive viewerRef. + + +
+ +
+ {selected && ( + + Loading viewer…
}> + + + + )} +
+
+ + + ); +} +``` + +--- + +## Pattern A (fallback) — FDM auto-discover from an asset instance + +Use only when you have a `DMInstanceRef` and the instance has `CogniteVisualizable.object3D → CogniteCADNode` linkage. Otherwise use Pattern B above. + +### src/components/ViewerContent.tsx (FDM variant) + +```tsx +import { useCallback, useMemo, useState } from 'react'; +import type { DMInstanceRef } from '@cognite/reveal'; +import { + Reveal3DResources, + RevealCanvas, + useModelsForInstanceQuery, + type AddCadResourceOptions, + type TaggedAddResourceOptions, +} from '@/features/reveal-3d'; + +function pickFirstCad(models: TaggedAddResourceOptions[]): AddCadResourceOptions | undefined { + const m = models[0]; + return m?.type === 'cad' + ? { ...m.addOptions, styling: { default: { renderGhosted: true } } } + : undefined; +} + +export function ViewerContent({ instance }: { instance: DMInstanceRef }) { + const { data: models, isLoading } = useModelsForInstanceQuery(instance); + const [loaded, setLoaded] = useState(false); + const selected = useMemo(() => pickFirstCad(models ?? []), [models]); + const resources = useMemo(() => (selected ? [selected] : []), [selected]); + const onLoaded = useCallback(() => setLoaded(true), []); + + if (isLoading) return
Loading 3D model…
; + if (!resources.length) return
No 3D data linked to this instance.
; + + return ( + + + + ); +} +``` + +### src/App.tsx (FDM variant) + +Same `CacheProvider` / `RevealKeepAlive` / `RevealProvider` structure as Pattern B. +Pass `instance: DMInstanceRef` to `ViewerContent` instead of `modelId` / `revisionId`. + +```tsx +import { useMemo, useState } from 'react'; +import * as THREE from 'three'; +import { useDune } from '@cognite/dune'; +import type { DMInstanceRef } from '@cognite/reveal'; +import { + CacheProvider, + RevealKeepAlive, + RevealProvider, + type ViewerOptions, +} from '@/features/reveal-3d'; +import { ViewerContent } from './components/ViewerContent'; + +const BG = new THREE.Color(0x1a1a2e); +const OPTS: ViewerOptions = { + loadingIndicatorStyle: { placement: 'topRight', opacity: 0.1 }, + antiAliasingHint: 'msaa2+fxaa', + ssaoQualityHint: 'medium', +}; + +export default function App() { + const { sdk: client, isLoading } = useDune(); + const sdk = useMemo(() => client, [client.project]); + // Replace with however you receive the instance ref (prop, route param, selection, etc.) + const [instance] = useState(null); + + if (isLoading) return
Connecting to CDF…
; + + return ( + + +
+ {instance && sdk.project && ( + + + + )} +
+
+
+ ); +} +``` diff --git a/.agents/skills/reveal-3d/references/vite-config.md b/.agents/skills/reveal-3d/references/vite-config.md new file mode 100644 index 0000000..37d8e0c --- /dev/null +++ b/.agents/skills/reveal-3d/references/vite-config.md @@ -0,0 +1,136 @@ +# Vite Configuration for @cognite/reveal in a Flows monorepo app + +## src/main.tsx — process polyfill must be the very first two lines + +Prepend these two lines before any other import: + +```tsx +import process from 'process'; +(window as unknown as Record).process = process; + +// all other existing imports below ↓ +``` + +The rest of `main.tsx` stays as-is. Order matters — the polyfill must run before any module that reads `process`. + +--- + +## vite.config.ts — standalone config (not mergeConfig) + +Flows apps use a standalone `vite.config.ts` (not a shared base config from the monorepo root). +Replace the file entirely with the following: + +```typescript +import path from 'node:path'; + +import { fusionOpenPlugin } from '@cognite/dune/vite'; +import tailwindcss from '@tailwindcss/vite'; +import react from '@vitejs/plugin-react'; +import { defineConfig } from 'vite'; +import mkcert from 'vite-plugin-mkcert'; + +export default defineConfig({ + base: './', + plugins: [react(), mkcert(), fusionOpenPlugin(), tailwindcss()], + define: { + // Some CJS deps use `global` instead of `globalThis` + 'process.env': {}, + 'process.platform': JSON.stringify(''), + 'process.version': JSON.stringify(''), + global: 'globalThis', + }, + resolve: { + alias: { + '@': path.resolve(__dirname, './src'), + + // --- Node built-in polyfills --- + // Use explicit package aliases, not vite-plugin-node-polyfills. + // The plugin introduces transitive dep conflicts ("Could not resolve 'inherits'"). + // The `process`, `util`, and `assert` packages must be in app dependencies. + util: 'util/', + assert: 'assert/', + process: 'process/browser', + + // --- Single Three.js instance --- + // @cognite/reveal bundles its own Three.js copy. Without this alias, the app + // and Reveal load two separate copies → "Multiple instances of Three.js" warning + // and broken rendering. Requires `three` as a direct app dependency. + three: path.resolve(__dirname, 'node_modules/three/build/three.module.js'), + }, + + // pnpm uses a virtual store with symlinks. Packages that symlink to different + // paths can resolve to separate module instances even for the same package. + // `dedupe` forces a single physical copy for all pre-bundles. + // Missing `react`/`react-dom` here causes ReactCurrentDispatcher errors. + // Missing `three` causes "Multiple instances" warnings. + dedupe: ['react', 'react-dom', 'react/jsx-runtime', '@tanstack/react-query', 'three'], + + conditions: ['import', 'module', 'browser', 'default'], + }, + optimizeDeps: { + // Do not exclude the copied Reveal feature bundle. Let Vite pre-bundle + // React, React Query, Three.js, and Reveal dependencies as one graph. + esbuildOptions: { + define: { global: 'globalThis' }, + }, + include: [ + // Vite can't auto-discover bare polyfill imports (no source file imports them + // directly). List them explicitly so esbuild pre-bundles them. + 'process', + 'util', + 'assert', + // Heavy/complex deps — explicit listing speeds up cold starts + 'three', + '@cognite/reveal', + // React ecosystem — pre-bundling creates the CJS→ESM singleton all deps share. + // If any of these are missing, a dep that imports React raw can get a second copy. + 'react', + 'react-dom', + '@tanstack/react-query', + ], + }, + server: { + port: 3002, + }, + worker: { + // @cognite/reveal spawns ES module web workers. Without 'es' format they fail + // silently — black screen with no console error. + format: 'es', + }, +}); +``` + +--- + +## Why each setting is needed + + +| Setting | Reason | +| -------------------------------------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `util/`, `assert/`, `process/browser` aliases | Browser-compatible replacements for Node built-ins. Packages must be in `dependencies`. Do NOT use `vite-plugin-node-polyfills` — causes "Could not resolve 'inherits'" | +| `process` polyfill in main.tsx first | `@cognite/reveal` deps call `process.env` at **runtime** (not build-time). The `define` replacements handle build-time; the window assignment handles runtime | +| `define.global = 'globalThis'` | Some CJS deps use `global` instead of `globalThis` | +| `resolve.alias.three` | Single Three.js instance — without this, Reveal's bundled copy and the app's copy conflict | +| `resolve.dedupe` with react + react-dom + react/jsx-runtime | pnpm symlinks can create separate module instances. `dedupe` forces one copy. Critical — missing these causes `ReactCurrentDispatcher` errors | +| `resolve.dedupe` with three | Ensures the copied feature bundle and Reveal share the same Three.js | +| `optimizeDeps.include` for process/util/assert | No source file imports them, so Vite cannot auto-discover them for pre-bundling | +| `optimizeDeps.include` for react + react-dom + @tanstack/react-query | Converts CJS → ESM and creates a single shared instance. All pre-bundled deps that import React get the same copy | +| `worker.format: 'es'` | Reveal spawns ES module workers; Vite defaults to IIFE/UMD which breaks them | +| `conditions: ['import', 'module', 'browser', 'default']` | Ensures browser ESM variants are preferred over CJS/Node variants | + + +## Common mistakes that break the setup + + +| Mistake | Symptom | Fix | +| ---------------------------------------------------------------------------- | --------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------- | +| Installing `three` without checking `@cognite/reveal`'s peer requirement | `unmet peer three@0.180.0: found 0.177.x` warning; potential rendering bugs | After install, compare versions; `pnpm add three@^` if mismatched | +| Not adding `ajv` as a direct dependency | `unmet peer ajv@>=8: found 6.x` | `pnpm add ajv` (installs `^8`) in the app | +| Copied Reveal feature folder in `optimizeDeps.exclude` | `ReactCurrentDispatcher` undefined or `No QueryClient set` | Remove from `exclude` and let Vite pre-bundle shared deps | +| `vite-plugin-node-polyfills` instead of manual aliases | `Could not resolve "inherits"` on transitive deps | Remove the plugin; add `util`, `assert`, `process` to dependencies and use aliases | +| `RevealKeepAlive` inside conditional component | `ObjectUnsubscribedError: object unsubscribed` at model load | Move `CacheProvider` + `RevealKeepAlive` to always-mounted app/page level | +| Inline arrow as `onSelect`/`onLoad` prop | `Maximum update depth exceeded` | `useCallback` at call site; call `onSelect` from `useEffect` inside model browser, never from render | +| Model browser calls `onSelect` during render (`if (revision) onSelect(...)`) | `Maximum update depth exceeded` | Move to `useEffect([revision, onSelect])` | +| Missing `worker.format: 'es'` | Black screen, no error | Add `worker: { format: 'es' }` | +| `react`/`react-dom` missing from `resolve.dedupe` | `ReactCurrentDispatcher` in pnpm monorepo | Add both to `dedupe` | +| Container has no height | Canvas collapses to 0px, nothing renders | Add `height: '70vh'` (or flex/grid height) to the parent element | diff --git a/.agents/skills/security/SKILL.md b/.agents/skills/security/SKILL.md new file mode 100644 index 0000000..062116e --- /dev/null +++ b/.agents/skills/security/SKILL.md @@ -0,0 +1,248 @@ +--- +name: security +description: "MUST be used whenever fixing security issues in a Flows app, or before shipping any feature that handles credentials, user input, or external data. This skill finds AND fixes security problems — it does not just report them. Do NOT skip this when the user asks for a security fix, security hardening, or vulnerability remediation — run every step in order. Triggers: security, security fix, security hardening, vulnerability, XSS, injection, credentials, secrets, auth, authentication, authorization, token, sensitive data, input validation, CORS, CSP, dependency audit." +allowed-tools: Read, Glob, Grep, Shell, Write +metadata: + argument-hint: "[file or directory to audit, or leave blank to audit the whole app]" +--- + +# Security Fix + +Find and fix security issues in **$ARGUMENTS** (or the whole app if no argument is given). Work through every step below in order. Every step that finds an issue must also fix it. + +--- + +## Step 1 — Map the attack surface + +Read these files before checking anything: + +- `src/main.tsx` / `src/App.tsx` — entry point, routing, auth gating +- `vite.config.ts` — dev server proxy, CORS, headers +- `package.json` — list of third-party dependencies +- Any file matching `**/auth*`, `**/login*`, `**/token*`, `**/credential*` + +Identify: +- All pages/routes and whether each is behind an auth guard +- All places where external data enters the app (CDF SDK calls, `fetch`, user form input) +- All places where data is written back (CDF upsert, `fetch` POST/PUT/DELETE) + +--- + +## Step 2 — Migrate all CDF access to the Cognite SDK + +All traffic to **Cognite Data Fusion (CDF)** must go through the **official Cognite SDK**. Find **any** HTTP, WebSocket, or other network call to CDF-like hosts or APIs that **bypasses** the SDK and rewrite it to use the SDK. + +### Search for raw HTTP calls + +```bash +# Find fetch, axios, XMLHttpRequest, and other HTTP client usage +grep -rn --include="*.ts" --include="*.tsx" --include="*.js" \ + -E "(fetch\(|axios\.|axios\(|XMLHttpRequest|\.ajax\(|http\.get\(|http\.post\(|request\()" src/ + +# Find raw URL construction that looks like CDF endpoints +grep -rn --include="*.ts" --include="*.tsx" \ + -E "(cognitedata\.com|cognite\.ai|/api/v1/projects|cdf\.|\.cognite\.)" src/ + +# Find custom Authorization or api-key headers +grep -rn --include="*.ts" --include="*.tsx" \ + -E "(Authorization|api-key|apikey|x-api-key)" src/ | grep -v "node_modules" +``` + +### How to fix + +For each raw CDF call found, read the surrounding code to understand what CDF resource and operation it targets, then rewrite it using the appropriate SDK method. Remove the raw HTTP client import if it's no longer used. + +| Pattern | Action | +|---------|--------| +| `fetch()` or `axios` call to a CDF URL (`*.cognitedata.com`, `/api/v1/projects/*`) | **Rewrite** to use the Cognite SDK (`cognite.files.getDownloadUrls(...)`, `cognite.timeseries.retrieve(...)`, `client.instances.search(...)`, etc.) | +| Custom `Authorization` header with a CDF token | **Remove** — the SDK handles auth automatically | +| WebSocket connection to CDF endpoints | **Rewrite** to use SDK streaming methods | +| Proxy endpoint that forwards to CDF internally | **Rewrite** the proxy to use the SDK internally | +| `fetch()` to a non-CDF URL (static assets, documented third-party API) | **Leave** — but add a comment documenting why it's needed | + +After rewriting all CDF calls, remove any `axios` or `fetch`-related imports that are no longer used. + +### What is acceptable + +- All CDF reads/writes through `sdk.files.*`, `sdk.timeseries.*`, `client.instances.*`, etc. +- Non-CDF network calls that are: + - To known static asset hosts (CDNs, image services) + - To documented third-party APIs required by the product + - Explicitly noted in the app's README or architecture docs + +--- + +## Step 3 — Find and fix credential & secret hygiene + +Search for hard-coded credentials and sensitive values: + +```bash +# Look for anything that smells like a secret in source files +grep -rn --include="*.ts" --include="*.tsx" --include="*.js" \ + -E "(password|secret|apikey|api_key|token|bearer|private_key)\s*=\s*['\"]" src/ +``` + +For each hardcoded secret, replace it with an environment variable. Create or update `.env.example` with a placeholder. Add `.env` to `.gitignore` if missing. + +### How to fix + +1. **Replace each hardcoded secret** with an `import.meta.env.VITE_*` reference. For example: + - `const apiKey = "sk-abc123"` → `const apiKey = import.meta.env.VITE_API_KEY` + - `const token = "eyJhbG..."` → `const token = import.meta.env.VITE_AUTH_TOKEN` + +2. **Add the variable to `.env.example`** with a placeholder value (e.g., `VITE_API_KEY=your-api-key-here`). Create `.env.example` if it doesn't exist. + +3. **Ensure `.env` and `.env.local` are in `.gitignore`** — add them if missing. + +4. **Remove any `console.log`, `console.error`, or similar calls** that print a CDF token, user object, or API key. + +--- + +## Step 4 — Find and fix dangerous DOM APIs + +Search for patterns that allow arbitrary script execution or HTML injection: + +```bash +grep -rn --include="*.tsx" --include="*.ts" \ + -E "dangerouslySetInnerHTML|innerHTML\s*=|eval\(|new Function\(|setTimeout\(['\"]|setInterval\(['\"]" src/ +``` + +For each dangerous DOM pattern, apply the fix directly. Install DOMPurify with `pnpm add dompurify` and `pnpm add -D @types/dompurify` if needed. + +### How to fix + +- **`dangerouslySetInnerHTML`**: Wrap the value with `DOMPurify.sanitize()`. Add `import DOMPurify from 'dompurify'` to the file. Example: + ```tsx + // Before +
+ // After + import DOMPurify from 'dompurify'; +
+ ``` + +- **`eval()` / `new Function()`**: Rewrite using a data-driven approach. Use `JSON.parse()` for data parsing, or a lookup table / switch statement for dynamic logic dispatch. Never pass user-controlled strings to code evaluation. + +- **`setTimeout`/`setInterval` with a string argument**: Convert to a function reference: + ```ts + // Before + setTimeout("doSomething()", 1000) + // After + setTimeout(() => doSomething(), 1000) + ``` + +--- + +## Step 5 — Find and fix authentication & authorization gaps + +Read the auth setup (likely `src/contexts/`, `src/hooks/`, or `setup-flows-auth` output): + +- Every route that shows CDF data must be behind the Flows auth guard (`useCogniteClient` returns a non-null `sdk` before rendering). +- The CDF client must be initialized with short-lived OIDC tokens, not a static API key. +- User role/capability checks must happen server-side (CDF ACLs) — do not rely solely on hiding UI elements. + +Check the `useAtlasChat` / Atlas agent integration: +- The `agentExternalId` must not be constructed from user-supplied input. +- Tool `execute` functions must not trust `args` blindly — validate or guard before using values in CDF queries. + +### How to fix + +For each unguarded route that shows CDF data, wrap it with the auth guard component. For example, ensure the route element is wrapped in a component that checks `useCogniteClient` and renders a loading/login state when the SDK is not ready. + +For Atlas tool `execute` functions, add argument validation at the top of each function. Validate that each `args` field is the expected type and within expected bounds before using it in any CDF query. + +--- + +## Step 6 — Find and fix input validation gaps + +Every value that comes from a form, URL param, or query string before it reaches a CDF call or is rendered to the DOM must be validated: + +```bash +# Find useSearchParams, URLSearchParams, and form onChange handlers +grep -rn --include="*.tsx" --include="*.ts" \ + -E "useSearchParams|URLSearchParams|searchParams\.get|e\.target\.value" src/ +``` + +For each unvalidated external input, add runtime validation. Install Zod if not present (`pnpm add zod`). Create a schema that matches the expected shape and use `.safeParse()` instead of type casts. + +### How to fix + +1. **Add Zod schemas** for URL params and form inputs. Example: + ```ts + import { z } from 'zod'; + const paramSchema = z.object({ + id: z.string().min(1), + page: z.coerce.number().int().positive().default(1), + }); + const result = paramSchema.safeParse({ id: searchParams.get('id'), page: searchParams.get('page') }); + if (!result.success) { /* handle error */ } + ``` + +2. **Replace `as MyType` casts on external data** with Zod `.safeParse()` — never trust data from URL params, form inputs, or API responses without validation. + +3. **Add nullish fallbacks for `searchParams.get()`** — always handle the case where the param is missing or empty. + +--- + +## Step 7 — Find and fix Vite / server configuration + +Read `vite.config.ts` and any `server.ts` / `express.ts` files. + +### How to fix + +Add any missing security headers to the `vite.config.ts` `server.headers` section. If the section doesn't exist, create it. The minimum required headers are: + +```ts +server: { + headers: { + 'Content-Security-Policy': "default-src 'self'; script-src 'self'; style-src 'self' 'unsafe-inline'; img-src 'self' data: https:; connect-src 'self' https://*.cognitedata.com", + 'X-Frame-Options': 'DENY', + 'X-Content-Type-Options': 'nosniff', + }, +} +``` + +Adjust the `Content-Security-Policy` to match the app's actual needs (e.g., adding specific CDN hosts for fonts or images). + +Also: +- **Remove any `define` entries** in `vite.config.ts` that embed raw secrets into the bundle. Use `import.meta.env` instead. +- **Confirm the dev proxy** (`server.proxy`) does not expose internal endpoints in production builds. + +--- + +## Step 8 — Find and fix dependency vulnerabilities + +```bash +pnpm audit --audit-level=high +``` + +### How to fix + +1. Run `pnpm audit fix` first to auto-fix what's possible. +2. For any remaining high/critical CVEs, manually update the package version in `package.json` and run `pnpm install`. +3. If a vulnerable package has no fix available, document it as a known risk and check if there's an alternative package. + +--- + +## Step 9 — Report remaining findings + +Report only issues that could **not** be auto-fixed (e.g., architectural decisions that need human judgment, packages with no available fix, or patterns that require significant refactoring). + +Summarize what was fixed in each step: + +| Step | What was fixed | Remaining issues | +|------|---------------|-----------------| +| 2 — CDF SDK | Migrated N raw calls to SDK | (any that couldn't be migrated) | +| 3 — Credentials | Replaced N hardcoded secrets with env vars | (any that need human decision) | +| 4 — DOM | Sanitized N dangerous patterns | (any that need refactoring) | +| 5 — Auth | Wrapped N unguarded routes | (any architectural gaps) | +| 6 — Validation | Added Zod schemas to N inputs | (any that need custom logic) | +| 7 — Vite config | Added N security headers | (any CSP tuning needed) | +| 8 — Dependencies | Fixed N vulnerable packages | (any with no available fix) | + +If any remaining issues require immediate action before deployment, list them explicitly. + +--- + +## Done + +State what was fixed and confirm the app is more secure. List any remaining items that require human judgment before the next deployment. diff --git a/.agents/skills/setup-flows-auth/SKILL.md b/.agents/skills/setup-flows-auth/SKILL.md new file mode 100644 index 0000000..141807e --- /dev/null +++ b/.agents/skills/setup-flows-auth/SKILL.md @@ -0,0 +1,191 @@ +--- +name: setup-flows-auth +description: "MUST be used when migrating an existing React app to Flows, or when no Flows auth is wired up. Detects classic vs Apps API flow from `app.json` `infra` field, installs the right packages, and wires up the entry file. No-op when a valid auth setup is already in place. Triggers: migrate to Flows, add Flows auth, DuneAuthProvider, AppSdkAuthProvider, connectToHostApp, useDune, Flows setup, setup auth, missing auth provider, CDF authentication, Fusion iframe auth." +allowed-tools: Read, Glob, Grep, Edit, Write, Bash +metadata: + argument-hint: "" +--- + +# Set Up Flows Authentication + +Wire a React app for Flows auth so it can talk to CDF inside Fusion. Two flows exist; pick one based on `app.json`. + +## Pick the flow + +Read `app.json` if present: + +| `app.json` `infra` | Flow | Auth source | Extra package | +|---|---|---|---| +| `"appsApi"` | **Apps API** (new Fusion app host) | `connectToHostApp` from `@cognite/app-sdk` | `@cognite/app-sdk` | +| missing / other | **Classic** (legacy Files API) | `DuneAuthProvider` + `useDune()` from `@cognite/dune` | — | + +No `app.json`? Ask the user. Default to **Apps API** — it's the default for `npx @cognite/cli@latest apps create`. + +## Step 1 — Read state, decide whether to act + +Read `package.json`, `src/main.tsx` (or `src/index.tsx`), `vite.config.ts`, `app.json`. + +**A valid setup already exists if any of these is true — in which case do nothing and report no-op:** + +- **Classic**: `` from `@cognite/dune` wraps `` in the entry file. +- **Apps API, provider pattern**: `` from `@cognite/app-sdk/react` wraps the app (in `App.tsx` or `main.tsx`), and nested components consume the client via `useCogniteSdk()`. Requires `@cognite/app-sdk >= 0.5.1`. + +Detect the package manager from the lock file (`pnpm-lock.yaml` → pnpm, `yarn.lock` → yarn, otherwise npm). + +## Step 2 — Install missing deps + +**Classic flow:** + +| Package | Type | +|---|---| +| `@cognite/dune` | runtime | +| `@cognite/sdk` | runtime | +| `@tanstack/react-query` | runtime | +| `vite-plugin-mkcert` | dev | + +**Apps API flow:** + +| Package | Type | +|---|---| +| `@cognite/app-sdk` | runtime | +| `@cognite/sdk` | runtime | +| `@tanstack/react-query` | runtime | +| `vite-plugin-mkcert` | dev | + +Skip anything already in `package.json`. Use the detected package manager (`pnpm add`, `npm install`, `yarn add`; `-D` / `--save-dev` for dev deps). + +## Step 3 — Vite config + +Add only what's missing. Don't remove existing plugins. + +### Classic flow + +```ts +import { fusionOpenPlugin } from "@cognite/dune/vite"; +import mkcert from "vite-plugin-mkcert"; + +export default defineConfig({ + base: "./", + plugins: [react(), mkcert(), fusionOpenPlugin(), /* ... */], + server: { port: 3001 }, + worker: { format: "es" }, +}); +``` + +### Apps API flow + +```ts +// or see @cognite/cli/_templates/app/new/config/vite.config.ts.ejs.t source file for newest config +import { fusionOpenPlugin, manifestCspPlugin } from "@cognite/app-sdk/vite"; +import mkcert from "vite-plugin-mkcert"; + +export default defineConfig({ + base: "./", + // manifestCspPlugin() must be first — its middleware sets the CSP header before any HTML response + plugins: [manifestCspPlugin(), react(), mkcert(), fusionOpenPlugin(), /* ... */], + server: { port: 3001 }, + worker: { format: "es" }, +}); +``` + +- `base: "./"` — required for Fusion iframe deployment. +- `mkcert()` — provides HTTPS for the dev server (the Fusion parent is HTTPS). +- `fusionOpenPlugin()` — opens the dev URL inside Fusion automatically. +- `manifestCspPlugin()` (Apps API only) — enforces the CSP declared in `manifest.json`; must be first. +- `server.port: 3001` — convention; the plugin falls back to 3001 if no port is set. + +## Step 4 — Wire up the entry file and component + +### Classic flow + +`src/main.tsx`: + +```tsx +import { DuneAuthProvider } from "@cognite/dune"; +import { QueryClient, QueryClientProvider } from "@tanstack/react-query"; +import React from "react"; +import ReactDOM from "react-dom/client"; +import App from "./App.tsx"; + +const queryClient = new QueryClient({ + defaultOptions: { queries: { staleTime: 5 * 60 * 1000, gcTime: 10 * 60 * 1000 } }, +}); + +ReactDOM.createRoot(document.getElementById("root")!).render( + + + + + + + +); +``` + +In components, use `useDune()`: + +```tsx +import { useDune } from "@cognite/dune"; + +const { sdk, isLoading, error } = useDune(); +// sdk is an authenticated CogniteClient +``` + +### Apps API flow (generator default, `@cognite/app-sdk >= 0.5.1`) + +`src/main.tsx` does **not** wrap in any auth provider — auth is handled inside `App.tsx`: + +```tsx +import { QueryClient, QueryClientProvider } from "@tanstack/react-query"; +import React from "react"; +import ReactDOM from "react-dom/client"; +import App from "./App.tsx"; + +const queryClient = new QueryClient({ + defaultOptions: { queries: { staleTime: 5 * 60 * 1000, gcTime: 10 * 60 * 1000 } }, +}); + +ReactDOM.createRoot(document.getElementById("root")!).render( + + + + + +); +``` + +`src/App.tsx` uses `CogniteSdkProvider` from `@cognite/app-sdk/react`. The provider handles the Comlink handshake, loading, and error states internally. Nested components read the client via `useCogniteSdk()`: + +```tsx +import { CogniteSdkProvider, useCogniteSdk } from "@cognite/app-sdk/react"; + +function AppContent() { + const client = useCogniteSdk(); + // client is an authenticated CogniteClient + return
{client.project}
; +} + +function App() { + return ( + Loading...
} + errorFallback={
Failed to connect to Fusion
} + > + + + ); +} +``` + +`useCogniteSdk()` throws if called outside `CogniteSdkProvider` — always nest it inside. + +## Step 5 — Clean up superseded code + +Remove only what's now redundant: + +- Custom CDF auth providers/hooks +- Manual `CogniteClient` instantiation +- OIDC/token-management code +- CDF env vars (`VITE_CDF_PROJECT`, `VITE_CDF_CLUSTER`, etc.) — Flows/the host provide these + +If unsure, leave it and flag to the user. diff --git a/.agents/skills/setup-python-tools/SKILL.md b/.agents/skills/setup-python-tools/SKILL.md new file mode 100644 index 0000000..d552beb --- /dev/null +++ b/.agents/skills/setup-python-tools/SKILL.md @@ -0,0 +1,176 @@ +--- +name: setup-python-tools +description: "MUST be used when adding Pyodide or Python tool support to a Flows app. Do NOT manually configure usePyodideRuntime or wire pythonRuntime into useAtlasChat — this skill handles pyodide installation, hook setup, loading UI, and chat hook wiring. Prerequisite: integrate-atlas-chat (vendored src/atlas-agent + atlas chat wiring). Triggers: Pyodide, Python tools, pythonRuntime, usePyodideRuntime, runPythonCode, Python execution, client-side Python." +allowed-tools: Read, Glob, Grep, Edit, Write, Bash +metadata: + argument-hint: "[tool-names or agent-external-id]" +--- + +# Set Up Python Tool Execution + +Add client-side Python tool execution via Pyodide to this Flows app. + +Target: **$ARGUMENTS** + +## Prerequisite + +**`integrate-atlas-chat`** must already be complete: the app should have vendored atlas-agent code under `src/atlas-agent/` (including `react.ts` for `useAtlasChat`) and the peer dependencies from that skill (`@sinclair/typebox`, `ajv`, `ajv-formats`). Copy the Python-related modules from the **`integrate-atlas-chat`** skill `code/` directory into `src/atlas-agent/` when adding Pyodide (`python.ts`, `pyodide.ts`, `pyodide-react.ts`, `pyodide-runtime.ts` — see **`integrate-atlas-chat`** Step 5). + +## Background + +Atlas agents can have Python tools defined in their CDF config (`type: "runPythonCode"`). +When the agent calls one, it arrives as a `toolConfirmation` (auto-allowed) followed by a +`clientTool` action. The library fetches the tool's Python code from the agent config +automatically and executes it via the provided `pythonRuntime`. + +You only need to: +1. Set up `usePyodideRuntime` to get a runtime instance +2. Pass `pythonRuntime` to `useAtlasChat` + +No `PythonToolConfig` entries — the library reads the code from the agent's CDF config. + +The flow is: +1. `usePyodideRuntime` loads Pyodide (~30MB, cached after first load), installs packages, + and injects Cognite SDK credentials into the Python environment +2. When the agent calls a Python tool, the library fetches its code from the agent's CDF + config (cached per session), wraps it, executes it in Pyodide, and returns the result + +--- + +## Step 1 — Understand the app + +Read these files before touching anything: + +- `package.json` — detect package manager and existing deps +- The component that calls `useAtlasChat` — understand current tools/config + +--- + +## Step 2 — Install Pyodide + +Install **exactly** `pyodide@0.29.3` using the app's package manager. +This version must match the CDN artifacts loaded at runtime — installing a different version will cause errors. + +- pnpm → `pnpm add pyodide@0.29.3` +- npm → `npm install pyodide@0.29.3` +- yarn → `yarn add pyodide@0.29.3` + +> **Note**: After **`integrate-atlas-chat`**, `@sinclair/typebox`, `ajv`, and `ajv-formats` should +> already be installed. If anything is missing, install the versions listed in that skill's **Dependencies** table. + +--- + +## Step 3 — Set up usePyodideRuntime + +In the component that calls `useAtlasChat`, add the Pyodide runtime hook: + +```tsx +import { loadPyodide } from "pyodide"; +import { usePyodideRuntime } from "./atlas-agent/pyodide-react"; +import { useAtlasChat } from "./atlas-agent/react"; + +function MyChat() { + const { sdk, isLoading } = useDune(); + + // Initialize Python runtime (loads Pyodide, installs packages, sets up Cognite SDK) + const { + runtime: pythonRuntime, + loading: pythonLoading, + progress: pythonProgress, + error: pythonError, + isReady: pythonReady, + } = usePyodideRuntime({ + loadPyodide, + client: isLoading ? null : sdk, + requirements: ["pandas", "numpy"], // optional — additional packages + }); + + // ... useAtlasChat below +} +``` + +### Hook API reference + +| Return field | Type | Description | +|---|---|---| +| `runtime` | `PythonRuntime \| undefined` | The initialized runtime, or undefined if not ready | +| `loading` | `boolean` | True while Pyodide is loading / initializing | +| `error` | `string \| null` | Error message if initialization failed | +| `progress` | `{ stage: string; percent: number }` | Current init progress for UI display | +| `isReady` | `boolean` | Convenience: `!loading && !error && runtime !== undefined` | + +### Loading state UI + +Place the loading indicator **above the chat input**, not in the message list. +Keep it compact — a pill/badge showing stage text and percent. Show an error badge separately. +First load is ~30-60s (downloads ~30MB); subsequent loads are <2s from browser cache. + +```tsx +{/* Loading — shown above the input while Pyodide initializes */} +{pythonLoading && ( +
+ {/* Optional: from @tabler/icons-react */} + {pythonProgress.stage || "Initializing Python..."} + {pythonProgress.percent > 0 && pythonProgress.percent < 100 && ( + ({pythonProgress.percent}%) + )} +
+)} + +{/* Error — shown if init fails (after loading finishes) */} +{pythonError && !pythonLoading && ( +
+ Python runtime failed to load +
+)} +``` + +--- + +## Step 4 — Wire into useAtlasChat + +Pass the runtime to `useAtlasChat`. That's all — no tool configs needed: + +```tsx +const { messages, send, isStreaming, progress, error, reset, abort } = useAtlasChat({ + client: isLoading ? null : sdk, + agentExternalId: "my-agent", + tools: [renderTimeSeries], // regular client tools (declared to agent), if any + pythonRuntime, // from usePyodideRuntime — enables Python tool execution +}); +``` + +**Note**: Python tools are NOT declared to the agent via `tools`. The agent already knows +about them from its CDF config. The library fetches the code automatically when needed. + +--- + +## Step 5 — Disable input while Python loads + +The user shouldn't send messages before the runtime is ready. Disable the **entire input area** +(not just the send button) so the state is unambiguous: + +```tsx + +``` + +If you have a home page with suggestion chips, disable those too: + +```tsx + +``` + +--- + +## Done + +The app can now execute Python tools client-side via Pyodide. When the agent calls a Python +tool, the library automatically fetches its code from the agent config, runs it in the +browser, and returns the result to the agent. diff --git a/.agents/skills/skill-creator/SKILL.md b/.agents/skills/skill-creator/SKILL.md new file mode 100644 index 0000000..9419fe8 --- /dev/null +++ b/.agents/skills/skill-creator/SKILL.md @@ -0,0 +1,482 @@ +--- +name: skill-creator +description: Create new skills, modify and improve existing skills, and measure skill performance. Use when users want to create a skill from scratch for Claude Code or Cursor, update or optimize an existing skill, run evals to test a skill, benchmark skill performance with variance analysis, or optimize a skill's description for better triggering accuracy. +--- + +# Skill Creator + +A skill for creating new skills and iteratively improving them. + +At a high level, the process of creating a skill goes like this: + +- Decide what you want the skill to do and roughly how it should do it +- Write a draft of the skill +- Create a few test prompts and run the agent with access to the skill on them +- Help the user evaluate the results both qualitatively and quantitatively + - While the runs happen in the background, draft some quantitative evals if there aren't any (if there are some, you can either use as is or modify if you feel something needs to change about them). Then explain them to the user (or if they already existed, explain the ones that already exist) + - Use the `eval-viewer/generate_review.py` script to show the user the results for them to look at, and also let them look at the quantitative metrics +- Rewrite the skill based on feedback from the user's evaluation of the results (and also if there are any glaring flaws that become apparent from the quantitative benchmarks) +- Repeat until you're satisfied +- Expand the test set and try again at larger scale + +Your job when using this skill is to figure out where the user is in this process and then jump in and help them progress through these stages. So for instance, maybe they're like "I want to make a skill for X". You can help narrow down what they mean, write a draft, write the test cases, figure out how they want to evaluate, run all the prompts, and repeat. + +On the other hand, maybe they already have a draft of the skill. In this case you can go straight to the eval/iterate part of the loop. + +Of course, you should always be flexible and if the user is like "I don't need to run a bunch of evaluations, just vibe with me", you can do that instead. + +Then after the skill is done (but again, the order is flexible), you can also run the skill description improver, which we have a whole separate script for, to optimize the triggering of the skill. + +Cool? Cool. + +## Communicating with the user + +The skill creator is liable to be used by people across a wide range of familiarity with coding jargon. If you haven't heard (and how could you, it's only very recently that it started), there's a trend now where the power of Claude is inspiring plumbers to open up their terminals, parents and grandparents to google "how to install npm". On the other hand, the bulk of users are probably fairly computer-literate. + +So please pay attention to context cues to understand how to phrase your communication! In the default case, just to give you some idea: + +- "evaluation" and "benchmark" are borderline, but OK +- for "JSON" and "assertion" you want to see serious cues from the user that they know what those things are before using them without explaining them + +It's OK to briefly explain terms if you're in doubt, and feel free to clarify terms with a short definition if you're unsure if the user will get it. + +--- + +## Creating a skill + +### Capture Intent + +Start by understanding the user's intent. The current conversation might already contain a workflow the user wants to capture (e.g., they say "turn this into a skill"). If so, extract answers from the conversation history first — the tools used, the sequence of steps, corrections the user made, input/output formats observed. The user may need to fill the gaps, and should confirm before proceeding to the next step. + +1. What should this skill enable the AI agent to do? +2. When should this skill trigger? (what user phrases/contexts) +3. What's the expected output format? +4. Should we set up test cases to verify the skill works? Skills with objectively verifiable outputs (file transforms, data extraction, code generation, fixed workflow steps) benefit from test cases. Skills with subjective outputs (writing style, art) often don't need them. Suggest the appropriate default based on the skill type, but let the user decide. +5. What tool is this skill for? (Claude Code, Cursor, or both?) + +### Target Platform + +The SKILL.md format is identical for both Claude Code and Cursor — same YAML frontmatter, same directory structure. The key differences: + +- **Claude Code** discovers skills from `.claude/skills/` +- **Cursor** discovers skills from `.cursor/skills/`, `.agents/skills/`, `~/.cursor/skills/`, and `.claude/skills/` (legacy) +- **Cross-platform**: Place skills in `.agents/skills/` to be discovered by both tools +- **Cursor-only field**: `disable-model-invocation: true` in frontmatter makes the skill invokable only via `/skill-name` (no auto-triggering) + +When creating a skill for both platforms, avoid referencing tool-specific features in the skill body (e.g., Claude's Skill tool vs Cursor's `/skill-name` invocation). + +### Interview and Research + +Proactively ask questions about edge cases, input/output formats, example files, success criteria, and dependencies. Wait to write test prompts until you've got this part ironed out. + +Check available MCPs - if useful for research (searching docs, finding similar skills, looking up best practices), research in parallel via subagents if available, otherwise inline. Come prepared with context to reduce burden on the user. + +### Write the SKILL.md + +Based on the user interview, fill in these components: + +- **name**: Skill identifier +- **description**: When to trigger, what it does. This is the primary triggering mechanism - include both what the skill does AND specific contexts for when to use it. All "when to use" info goes here, not in the body. Note: currently AI agents (both Claude and Cursor) have a tendency to "undertrigger" skills -- to not use them when they'd be useful. To combat this, please make the skill descriptions a little bit "pushy". So for instance, instead of "How to build a simple fast dashboard to display internal Anthropic data.", you might write "How to build a simple fast dashboard to display internal Anthropic data. Make sure to use this skill whenever the user mentions dashboards, data visualization, internal metrics, or wants to display any kind of company data, even if they don't explicitly ask for a 'dashboard.'" +- **compatibility**: Required tools, dependencies (optional, rarely needed) +- **the rest of the skill :)** + +### Skill Writing Guide + +#### Anatomy of a Skill + +``` +skill-name/ +├── SKILL.md (required) +│ ├── YAML frontmatter (name, description required) +│ └── Markdown instructions +└── Bundled Resources (optional) + ├── scripts/ - Executable code for deterministic/repetitive tasks + ├── references/ - Docs loaded into context as needed + └── assets/ - Files used in output (templates, icons, fonts) +``` + +#### Progressive Disclosure + +Skills use a three-level loading system: +1. **Metadata** (name + description) - Always in context (~100 words) +2. **SKILL.md body** - In context whenever skill triggers (<500 lines ideal) +3. **Bundled resources** - As needed (unlimited, scripts can execute without loading) + +These word counts are approximate and you can feel free to go longer if needed. + +**Key patterns:** +- Keep SKILL.md under 500 lines; if you're approaching this limit, add an additional layer of hierarchy along with clear pointers about where the model using the skill should go next to follow up. +- Reference files clearly from SKILL.md with guidance on when to read them +- For large reference files (>300 lines), include a table of contents + +**Domain organization**: When a skill supports multiple domains/frameworks, organize by variant: +``` +cloud-deploy/ +├── SKILL.md (workflow + selection) +└── references/ + ├── aws.md + ├── gcp.md + └── azure.md +``` +Claude reads only the relevant reference file. + +#### Principle of Lack of Surprise + +This goes without saying, but skills must not contain malware, exploit code, or any content that could compromise system security. A skill's contents should not surprise the user in their intent if described. Don't go along with requests to create misleading skills or skills designed to facilitate unauthorized access, data exfiltration, or other malicious activities. Things like a "roleplay as an XYZ" are OK though. + +#### Writing Patterns + +Prefer using the imperative form in instructions. + +**Defining output formats** - You can do it like this: +```markdown +## Report structure +ALWAYS use this exact template: +# [Title] +## Executive summary +## Key findings +## Recommendations +``` + +**Examples pattern** - It's useful to include examples. You can format them like this (but if "Input" and "Output" are in the examples you might want to deviate a little): +```markdown +## Commit message format +**Example 1:** +Input: Added user authentication with JWT tokens +Output: feat(auth): implement JWT-based authentication +``` + +### Writing Style + +Try to explain to the model why things are important in lieu of heavy-handed musty MUSTs. Use theory of mind and try to make the skill general and not super-narrow to specific examples. Start by writing a draft and then look at it with fresh eyes and improve it. + +### Test Cases + +After writing the skill draft, come up with 2-3 realistic test prompts — the kind of thing a real user would actually say. Share them with the user: [you don't have to use this exact language] "Here are a few test cases I'd like to try. Do these look right, or do you want to add more?" Then run them. + +Save test cases to `evals/evals.json`. Don't write assertions yet — just the prompts. You'll draft assertions in the next step while the runs are in progress. + +```json +{ + "skill_name": "example-skill", + "evals": [ + { + "id": 1, + "prompt": "User's task prompt", + "expected_output": "Description of expected result", + "files": [] + } + ] +} +``` + +See `references/schemas.md` for the full schema (including the `assertions` field, which you'll add later). + +## Running and evaluating test cases + +This section is one continuous sequence — don't stop partway through. Do NOT use `/skill-test` or any other testing skill. + +Put results in `-workspace/` as a sibling to the skill directory. Within the workspace, organize results by iteration (`iteration-1/`, `iteration-2/`, etc.) and within that, each test case gets a directory (`eval-0/`, `eval-1/`, etc.). Don't create all of this upfront — just create directories as you go. + +### Step 1: Spawn all runs (with-skill AND baseline) in the same turn + +For each test case, spawn two runs — one with the skill, one without. + +**In Claude Code** (with subagents): spawn all runs in the same turn so everything finishes around the same time. Don't spawn with-skill runs first and baselines later. + +**In Cursor** (no subagents): run test cases sequentially — read the skill, follow its instructions for each test prompt. This is less rigorous but the human review step compensates. + +**With-skill run:** + +``` +Execute this task: +- Skill path: +- Task: +- Input files: +- Save outputs to: /iteration-/eval-/with_skill/outputs/ +- Outputs to save: +``` + +**Baseline run** (same prompt, but the baseline depends on context): +- **Creating a new skill**: no skill at all. Same prompt, no skill path, save to `without_skill/outputs/`. +- **Improving an existing skill**: the old version. Before editing, snapshot the skill (`cp -r /skill-snapshot/`), then point the baseline subagent at the snapshot. Save to `old_skill/outputs/`. + +Write an `eval_metadata.json` for each test case (assertions can be empty for now). Give each eval a descriptive name based on what it's testing — not just "eval-0". Use this name for the directory too. If this iteration uses new or modified eval prompts, create these files for each new eval directory — don't assume they carry over from previous iterations. + +```json +{ + "eval_id": 0, + "eval_name": "descriptive-name-here", + "prompt": "The user's task prompt", + "assertions": [] +} +``` + +### Step 2: While runs are in progress, draft assertions + +Don't just wait for the runs to finish — you can use this time productively. Draft quantitative assertions for each test case and explain them to the user. If assertions already exist in `evals/evals.json`, review them and explain what they check. + +Good assertions are objectively verifiable and have descriptive names — they should read clearly in the benchmark viewer so someone glancing at the results immediately understands what each one checks. Subjective skills (writing style, design quality) are better evaluated qualitatively — don't force assertions onto things that need human judgment. + +Update the `eval_metadata.json` files and `evals/evals.json` with the assertions once drafted. Also explain to the user what they'll see in the viewer — both the qualitative outputs and the quantitative benchmark. + +### Step 3: As runs complete, capture timing data + +When each subagent task completes, you receive a notification containing `total_tokens` and `duration_ms`. Save this data immediately to `timing.json` in the run directory: + +```json +{ + "total_tokens": 84852, + "duration_ms": 23332, + "total_duration_seconds": 23.3 +} +``` + +This is the only opportunity to capture this data — it comes through the task notification and isn't persisted elsewhere. Process each notification as it arrives rather than trying to batch them. + +### Step 4: Grade — use agents/grader.md, not a custom script + +Programmatic checks (file existence, line counts, string matching) are useful as a *supplement*, but they only catch structural compliance. The grader agent catches things scripts can't: content quality, claim verification, and weak assertions that create false confidence. A skill can produce every file in the right place and still have terrible content — the grader is what catches that. Do NOT generate the viewer or benchmark until `grading.json` exists for every run. + +Once all runs are done: + +1. **Grade each run** — spawn a grader subagent (or grade inline) that reads `agents/grader.md` and evaluates each assertion against the outputs. Save results to `grading.json` in each run directory. The grading.json expectations array must use the fields `text`, `passed`, and `evidence` (not `name`/`met`/`details` or other variants) — the viewer depends on these exact field names. You can run programmatic checks as a *supplement* to the grader (scripts are faster and reusable for things like file existence or line counts), but they do not replace the grader agent — always run the grader first for qualitative assessment. + +2. **Aggregate into benchmark** — run the aggregation script from the skill-creator directory: + ```bash + python -m scripts.aggregate_benchmark /iteration-N --skill-name + ``` + This produces `benchmark.json` and `benchmark.md` with pass_rate, time, and tokens for each configuration, with mean ± stddev and the delta. If generating benchmark.json manually, see `references/schemas.md` for the exact schema the viewer expects. +Put each with_skill version before its baseline counterpart. + +3. **Do an analyst pass** — read the benchmark data and surface patterns the aggregate stats might hide. See `agents/analyzer.md` (the "Analyzing Benchmark Results" section) for what to look for — things like assertions that always pass regardless of skill (non-discriminating), high-variance evals (possibly flaky), and time/token tradeoffs. + +4. **Launch the viewer** with both qualitative outputs and quantitative data: + ```bash + nohup python /eval-viewer/generate_review.py \ + /iteration-N \ + --skill-name "my-skill" \ + --benchmark /iteration-N/benchmark.json \ + > /dev/null 2>&1 & + VIEWER_PID=$! + ``` + For iteration 2+, also pass `--previous-workspace /iteration-`. + + **Headless environments:** If `webbrowser.open()` is not available or the environment has no display, use `--static ` to write a standalone HTML file instead of starting a server. Feedback will be downloaded as a `feedback.json` file when the user clicks "Submit All Reviews". After download, copy `feedback.json` into the workspace directory for the next iteration to pick up. + +Note: please use generate_review.py to create the viewer; there's no need to write custom HTML. + +5. **Tell the user** something like: "I've opened the results in your browser. There are two tabs — 'Outputs' lets you click through each test case and leave feedback, 'Benchmark' shows the quantitative comparison. When you're done, come back here and let me know." + +### What the user sees in the viewer + +The "Outputs" tab shows one test case at a time: +- **Prompt**: the task that was given +- **Output**: the files the skill produced, rendered inline where possible +- **Previous Output** (iteration 2+): collapsed section showing last iteration's output +- **Formal Grades** (if grading was run): collapsed section showing assertion pass/fail +- **Feedback**: a textbox that auto-saves as they type +- **Previous Feedback** (iteration 2+): their comments from last time, shown below the textbox + +The "Benchmark" tab shows the stats summary: pass rates, timing, and token usage for each configuration, with per-eval breakdowns and analyst observations. + +Navigation is via prev/next buttons or arrow keys. When done, they click "Submit All Reviews" which saves all feedback to `feedback.json`. + +### Step 5: Read the feedback + +When the user tells you they're done, read `feedback.json`: + +```json +{ + "reviews": [ + {"run_id": "eval-0-with_skill", "feedback": "the chart is missing axis labels", "timestamp": "..."}, + {"run_id": "eval-1-with_skill", "feedback": "", "timestamp": "..."}, + {"run_id": "eval-2-with_skill", "feedback": "perfect, love this", "timestamp": "..."} + ], + "status": "complete" +} +``` + +Empty feedback means the user thought it was fine. Focus your improvements on the test cases where the user had specific complaints. + +Kill the viewer server when you're done with it: + +```bash +kill $VIEWER_PID 2>/dev/null +``` + +--- + +## Improving the skill + +This is the heart of the loop. You've run the test cases, the user has reviewed the results, and now you need to make the skill better based on their feedback. + +### How to think about improvements + +1. **Generalize from the feedback.** The big picture thing that's happening here is that we're trying to create skills that can be used a million times (maybe literally, maybe even more who knows) across many different prompts. Here you and the user are iterating on only a few examples over and over again because it helps move faster. The user knows these examples in and out and it's quick for them to assess new outputs. But if the skill you and the user are codeveloping works only for those examples, it's useless. Rather than put in fiddly overfitty changes, or oppressively constrictive MUSTs, if there's some stubborn issue, you might try branching out and using different metaphors, or recommending different patterns of working. It's relatively cheap to try and maybe you'll land on something great. + +2. **Keep the prompt lean.** Remove things that aren't pulling their weight. Make sure to read the transcripts, not just the final outputs — if it looks like the skill is making the model waste a bunch of time doing things that are unproductive, you can try getting rid of the parts of the skill that are making it do that and seeing what happens. + +3. **Explain the why.** Try hard to explain the **why** behind everything you're asking the model to do. Today's LLMs are *smart*. They have good theory of mind and when given a good harness can go beyond rote instructions and really make things happen. Even if the feedback from the user is terse or frustrated, try to actually understand the task and why the user is writing what they wrote, and what they actually wrote, and then transmit this understanding into the instructions. If you find yourself writing ALWAYS or NEVER in all caps, or using super rigid structures, that's a yellow flag — if possible, reframe and explain the reasoning so that the model understands why the thing you're asking for is important. That's a more humane, powerful, and effective approach. + +4. **Look for repeated work across test cases.** Read the transcripts from the test runs and notice if the subagents all independently wrote similar helper scripts or took the same multi-step approach to something. If all 3 test cases resulted in the subagent writing a `create_docx.py` or a `build_chart.py`, that's a strong signal the skill should bundle that script. Write it once, put it in `scripts/`, and tell the skill to use it. This saves every future invocation from reinventing the wheel. + +This task is pretty important (we are trying to create billions a year in economic value here!) and your thinking time is not the blocker; take your time and really mull things over. I'd suggest writing a draft revision and then looking at it anew and making improvements. Really do your best to get into the head of the user and understand what they want and need. + +### The iteration loop + +After improving the skill: + +1. Apply your improvements to the skill +2. Rerun all test cases into a new `iteration-/` directory, including baseline runs. If you're creating a new skill, the baseline is always `without_skill` (no skill) — that stays the same across iterations. If you're improving an existing skill, use your judgment on what makes sense as the baseline: the original version the user came in with, or the previous iteration. +3. Launch the reviewer with `--previous-workspace` pointing at the previous iteration +4. Wait for the user to review and tell you they're done +5. Read the new feedback, improve again, repeat + +Keep going until: +- The user says they're happy +- The feedback is all empty (everything looks good) +- You're not making meaningful progress + +--- + +## Advanced: Blind comparison + +For situations where you want a more rigorous comparison between two versions of a skill (e.g., the user asks "is the new version actually better?"), there's a blind comparison system. Read `agents/comparator.md` and `agents/analyzer.md` for the details. The basic idea is: give two outputs to an independent agent without telling it which is which, and let it judge quality. Then analyze why the winner won. + +This is optional, requires subagents, and most users won't need it. The human review loop is usually sufficient. + +--- + +## Description Optimization + +The description field in SKILL.md frontmatter is the primary mechanism that determines whether Claude invokes a skill. After creating or improving a skill, offer to optimize the description for better triggering accuracy. + +### Step 1: Generate trigger eval queries + +Create 20 eval queries — a mix of should-trigger and should-not-trigger. Save as JSON: + +```json +[ + {"query": "the user prompt", "should_trigger": true}, + {"query": "another prompt", "should_trigger": false} +] +``` + +The queries must be realistic and something a Claude Code or Cursor user would actually type. Not abstract requests, but requests that are concrete and specific and have a good amount of detail. For instance, file paths, personal context about the user's job or situation, column names and values, company names, URLs. A little bit of backstory. Some might be in lowercase or contain abbreviations or typos or casual speech. Use a mix of different lengths, and focus on edge cases rather than making them clear-cut (the user will get a chance to sign off on them). + +Bad: `"Format this data"`, `"Extract text from PDF"`, `"Create a chart"` + +Good: `"ok so my boss just sent me this xlsx file (its in my downloads, called something like 'Q4 sales final FINAL v2.xlsx') and she wants me to add a column that shows the profit margin as a percentage. The revenue is in column C and costs are in column D i think"` + +For the **should-trigger** queries (8-10), think about coverage. You want different phrasings of the same intent — some formal, some casual. Include cases where the user doesn't explicitly name the skill or file type but clearly needs it. Throw in some uncommon use cases and cases where this skill competes with another but should win. + +For the **should-not-trigger** queries (8-10), the most valuable ones are the near-misses — queries that share keywords or concepts with the skill but actually need something different. Think adjacent domains, ambiguous phrasing where a naive keyword match would trigger but shouldn't, and cases where the query touches on something the skill does but in a context where another tool is more appropriate. + +The key thing to avoid: don't make should-not-trigger queries obviously irrelevant. "Write a fibonacci function" as a negative test for a PDF skill is too easy — it doesn't test anything. The negative cases should be genuinely tricky. + +### Step 2: Review with user + +Present the eval set to the user for review using the HTML template: + +1. Read the template from `assets/eval_review.html` +2. Replace the placeholders: + - `__EVAL_DATA_PLACEHOLDER__` → the JSON array of eval items (no quotes around it — it's a JS variable assignment) + - `__SKILL_NAME_PLACEHOLDER__` → the skill's name + - `__SKILL_DESCRIPTION_PLACEHOLDER__` → the skill's current description +3. Write to a temp file (e.g., `/tmp/eval_review_.html`) and open it: `open /tmp/eval_review_.html` +4. The user can edit queries, toggle should-trigger, add/remove entries, then click "Export Eval Set" +5. The file downloads to `~/Downloads/eval_set.json` — check the Downloads folder for the most recent version in case there are multiple (e.g., `eval_set (1).json`) + +This step matters — bad eval queries lead to bad descriptions. + +### Step 3: Run the optimization loop + +**API Key Prerequisite:** Ensure credentials are loaded before running. Follow the steps in the **API Credentials** section above — check env vars, source `.env`, or ask the user to create one if missing. + +Tell the user: "This will take some time — I'll run the optimization loop in the background and check on it periodically." + +Save the eval set to the workspace, then run in the background: + +```bash +source .env && python -m scripts.run_loop \ + --eval-set \ + --skill-path \ + --model ${SKILL_MODEL} \ + --platform \ + --max-iterations 5 \ + --verbose +``` + +Use `$SKILL_MODEL` from `.env`. If it's not set, fall back to the model ID from your system prompt (the one powering the current session) so the triggering test matches what the user actually experiences. Set `--platform cursor` for Cursor skills (uses LLM simulation instead of `claude -p` CLI). + +While it runs, periodically tail the output to give the user updates on which iteration it's on and what the scores look like. + +This handles the full optimization loop automatically. It splits the eval set into 60% train and 40% held-out test, evaluates the current description (running each query 3 times to get a reliable trigger rate), then calls Claude with extended thinking to propose improvements based on what failed. It re-evaluates each new description on both train and test, iterating up to 5 times. When it's done, it opens an HTML report in the browser showing the results per iteration and returns JSON with `best_description` — selected by test score rather than train score to avoid overfitting. + +### How skill triggering works + +Understanding the triggering mechanism helps design better eval queries. Skills appear in the agent's `available_skills` list with their name + description, and the agent decides whether to consult a skill based on that description. The important thing to know is that AI agents only consult skills for tasks they can't easily handle on their own — simple, one-step queries like "read this PDF" may not trigger a skill even if the description matches perfectly, because the agent can handle them directly with basic tools. Complex, multi-step, or specialized queries reliably trigger skills when the description matches. + +This means your eval queries should be substantive enough that Claude would actually benefit from consulting a skill. Simple queries like "read file X" are poor test cases — they won't trigger skills regardless of description quality. + +### Step 4: Apply the result + +Take `best_description` from the JSON output and update the skill's SKILL.md frontmatter. Show the user before/after and report the scores. + +--- + +### Package and Present (only if `present_files` tool is available) + +Check whether you have access to the `present_files` tool. If you don't, skip this step. If you do, package the skill and present the .skill file to the user: + +```bash +python -m scripts.package_skill +``` + +After packaging, direct the user to the resulting `.skill` file path so they can install it. + +--- + +## Cursor-Specific Instructions + +When running inside Cursor: + +- The core workflow is the same: draft, test, review, improve, repeat +- Cursor does not have subagents. Run test cases sequentially — read the skill's SKILL.md, then follow its instructions to accomplish the test prompt yourself, one at a time. +- Use `--platform cursor` for all Python scripts (`run_eval`, `run_loop`, `improve_description`, `package_skill`). +- Description optimization uses LLM simulation rather than CLI testing. The simulation asks a model "would you invoke this skill given this query?" — it's directionally accurate for comparing descriptions but not a perfect proxy for Cursor's actual runtime behavior. +- Cursor supports the `disable-model-invocation: true` frontmatter field — set this for skills that should only be invokable via `/skill-name` and never auto-triggered by the agent. +- Blind comparison requires subagents — skip it in Cursor. +- Packaging works identically. Place the resulting skill folder in `.cursor/skills/` (or `.agents/skills/` for cross-platform). +- When generating the eval viewer, use `--static ` if `webbrowser.open()` is not available in your Cursor environment. + +--- + +## Reference files + +The agents/ directory contains instructions for specialized subagents. Read them when you need to spawn the relevant subagent. + +- `agents/grader.md` — How to evaluate assertions against outputs +- `agents/comparator.md` — How to do blind A/B comparison between two outputs +- `agents/analyzer.md` — How to analyze why one version beat another + +The references/ directory has additional documentation: +- `references/schemas.md` — JSON structures for evals.json, grading.json, etc. + +--- + +Repeating one more time the core loop here for emphasis: + +- Figure out what the skill is about +- Draft or edit the skill +- Run the agent with access to the skill on test prompts +- With the user, evaluate the outputs: + - Create benchmark.json and run `eval-viewer/generate_review.py` to help the user review them + - Run quantitative evals +- Repeat until you and the user are satisfied +- Package the final skill and return it to the user. + +Please add steps to your TodoList, if you have such a thing, to make sure you don't forget. + +Good luck! diff --git a/.agents/skills/skill-creator/agents/analyzer.md b/.agents/skills/skill-creator/agents/analyzer.md new file mode 100644 index 0000000..14e41d6 --- /dev/null +++ b/.agents/skills/skill-creator/agents/analyzer.md @@ -0,0 +1,274 @@ +# Post-hoc Analyzer Agent + +Analyze blind comparison results to understand WHY the winner won and generate improvement suggestions. + +## Role + +After the blind comparator determines a winner, the Post-hoc Analyzer "unblids" the results by examining the skills and transcripts. The goal is to extract actionable insights: what made the winner better, and how can the loser be improved? + +## Inputs + +You receive these parameters in your prompt: + +- **winner**: "A" or "B" (from blind comparison) +- **winner_skill_path**: Path to the skill that produced the winning output +- **winner_transcript_path**: Path to the execution transcript for the winner +- **loser_skill_path**: Path to the skill that produced the losing output +- **loser_transcript_path**: Path to the execution transcript for the loser +- **comparison_result_path**: Path to the blind comparator's output JSON +- **output_path**: Where to save the analysis results + +## Process + +### Step 1: Read Comparison Result + +1. Read the blind comparator's output at comparison_result_path +2. Note the winning side (A or B), the reasoning, and any scores +3. Understand what the comparator valued in the winning output + +### Step 2: Read Both Skills + +1. Read the winner skill's SKILL.md and key referenced files +2. Read the loser skill's SKILL.md and key referenced files +3. Identify structural differences: + - Instructions clarity and specificity + - Script/tool usage patterns + - Example coverage + - Edge case handling + +### Step 3: Read Both Transcripts + +1. Read the winner's transcript +2. Read the loser's transcript +3. Compare execution patterns: + - How closely did each follow their skill's instructions? + - What tools were used differently? + - Where did the loser diverge from optimal behavior? + - Did either encounter errors or make recovery attempts? + +### Step 4: Analyze Instruction Following + +For each transcript, evaluate: +- Did the agent follow the skill's explicit instructions? +- Did the agent use the skill's provided tools/scripts? +- Were there missed opportunities to leverage skill content? +- Did the agent add unnecessary steps not in the skill? + +Score instruction following 1-10 and note specific issues. + +### Step 5: Identify Winner Strengths + +Determine what made the winner better: +- Clearer instructions that led to better behavior? +- Better scripts/tools that produced better output? +- More comprehensive examples that guided edge cases? +- Better error handling guidance? + +Be specific. Quote from skills/transcripts where relevant. + +### Step 6: Identify Loser Weaknesses + +Determine what held the loser back: +- Ambiguous instructions that led to suboptimal choices? +- Missing tools/scripts that forced workarounds? +- Gaps in edge case coverage? +- Poor error handling that caused failures? + +### Step 7: Generate Improvement Suggestions + +Based on the analysis, produce actionable suggestions for improving the loser skill: +- Specific instruction changes to make +- Tools/scripts to add or modify +- Examples to include +- Edge cases to address + +Prioritize by impact. Focus on changes that would have changed the outcome. + +### Step 8: Write Analysis Results + +Save structured analysis to `{output_path}`. + +## Output Format + +Write a JSON file with this structure: + +```json +{ + "comparison_summary": { + "winner": "A", + "winner_skill": "path/to/winner/skill", + "loser_skill": "path/to/loser/skill", + "comparator_reasoning": "Brief summary of why comparator chose winner" + }, + "winner_strengths": [ + "Clear step-by-step instructions for handling multi-page documents", + "Included validation script that caught formatting errors", + "Explicit guidance on fallback behavior when OCR fails" + ], + "loser_weaknesses": [ + "Vague instruction 'process the document appropriately' led to inconsistent behavior", + "No script for validation, agent had to improvise and made errors", + "No guidance on OCR failure, agent gave up instead of trying alternatives" + ], + "instruction_following": { + "winner": { + "score": 9, + "issues": [ + "Minor: skipped optional logging step" + ] + }, + "loser": { + "score": 6, + "issues": [ + "Did not use the skill's formatting template", + "Invented own approach instead of following step 3", + "Missed the 'always validate output' instruction" + ] + } + }, + "improvement_suggestions": [ + { + "priority": "high", + "category": "instructions", + "suggestion": "Replace 'process the document appropriately' with explicit steps: 1) Extract text, 2) Identify sections, 3) Format per template", + "expected_impact": "Would eliminate ambiguity that caused inconsistent behavior" + }, + { + "priority": "high", + "category": "tools", + "suggestion": "Add validate_output.py script similar to winner skill's validation approach", + "expected_impact": "Would catch formatting errors before final output" + }, + { + "priority": "medium", + "category": "error_handling", + "suggestion": "Add fallback instructions: 'If OCR fails, try: 1) different resolution, 2) image preprocessing, 3) manual extraction'", + "expected_impact": "Would prevent early failure on difficult documents" + } + ], + "transcript_insights": { + "winner_execution_pattern": "Read skill -> Followed 5-step process -> Used validation script -> Fixed 2 issues -> Produced output", + "loser_execution_pattern": "Read skill -> Unclear on approach -> Tried 3 different methods -> No validation -> Output had errors" + } +} +``` + +## Guidelines + +- **Be specific**: Quote from skills and transcripts, don't just say "instructions were unclear" +- **Be actionable**: Suggestions should be concrete changes, not vague advice +- **Focus on skill improvements**: The goal is to improve the losing skill, not critique the agent +- **Prioritize by impact**: Which changes would most likely have changed the outcome? +- **Consider causation**: Did the skill weakness actually cause the worse output, or is it incidental? +- **Stay objective**: Analyze what happened, don't editorialize +- **Think about generalization**: Would this improvement help on other evals too? + +## Categories for Suggestions + +Use these categories to organize improvement suggestions: + +| Category | Description | +|----------|-------------| +| `instructions` | Changes to the skill's prose instructions | +| `tools` | Scripts, templates, or utilities to add/modify | +| `examples` | Example inputs/outputs to include | +| `error_handling` | Guidance for handling failures | +| `structure` | Reorganization of skill content | +| `references` | External docs or resources to add | + +## Priority Levels + +- **high**: Would likely change the outcome of this comparison +- **medium**: Would improve quality but may not change win/loss +- **low**: Nice to have, marginal improvement + +--- + +# Analyzing Benchmark Results + +When analyzing benchmark results, the analyzer's purpose is to **surface patterns and anomalies** across multiple runs, not suggest skill improvements. + +## Role + +Review all benchmark run results and generate freeform notes that help the user understand skill performance. Focus on patterns that wouldn't be visible from aggregate metrics alone. + +## Inputs + +You receive these parameters in your prompt: + +- **benchmark_data_path**: Path to the in-progress benchmark.json with all run results +- **skill_path**: Path to the skill being benchmarked +- **output_path**: Where to save the notes (as JSON array of strings) + +## Process + +### Step 1: Read Benchmark Data + +1. Read the benchmark.json containing all run results +2. Note the configurations tested (with_skill, without_skill) +3. Understand the run_summary aggregates already calculated + +### Step 2: Analyze Per-Assertion Patterns + +For each expectation across all runs: +- Does it **always pass** in both configurations? (may not differentiate skill value) +- Does it **always fail** in both configurations? (may be broken or beyond capability) +- Does it **always pass with skill but fail without**? (skill clearly adds value here) +- Does it **always fail with skill but pass without**? (skill may be hurting) +- Is it **highly variable**? (flaky expectation or non-deterministic behavior) + +### Step 3: Analyze Cross-Eval Patterns + +Look for patterns across evals: +- Are certain eval types consistently harder/easier? +- Do some evals show high variance while others are stable? +- Are there surprising results that contradict expectations? + +### Step 4: Analyze Metrics Patterns + +Look at time_seconds, tokens, tool_calls: +- Does the skill significantly increase execution time? +- Is there high variance in resource usage? +- Are there outlier runs that skew the aggregates? + +### Step 5: Generate Notes + +Write freeform observations as a list of strings. Each note should: +- State a specific observation +- Be grounded in the data (not speculation) +- Help the user understand something the aggregate metrics don't show + +Examples: +- "Assertion 'Output is a PDF file' passes 100% in both configurations - may not differentiate skill value" +- "Eval 3 shows high variance (50% ± 40%) - run 2 had an unusual failure that may be flaky" +- "Without-skill runs consistently fail on table extraction expectations (0% pass rate)" +- "Skill adds 13s average execution time but improves pass rate by 50%" +- "Token usage is 80% higher with skill, primarily due to script output parsing" +- "All 3 without-skill runs for eval 1 produced empty output" + +### Step 6: Write Notes + +Save notes to `{output_path}` as a JSON array of strings: + +```json +[ + "Assertion 'Output is a PDF file' passes 100% in both configurations - may not differentiate skill value", + "Eval 3 shows high variance (50% ± 40%) - run 2 had an unusual failure", + "Without-skill runs consistently fail on table extraction expectations", + "Skill adds 13s average execution time but improves pass rate by 50%" +] +``` + +## Guidelines + +**DO:** +- Report what you observe in the data +- Be specific about which evals, expectations, or runs you're referring to +- Note patterns that aggregate metrics would hide +- Provide context that helps interpret the numbers + +**DO NOT:** +- Suggest improvements to the skill (that's for the improvement step, not benchmarking) +- Make subjective quality judgments ("the output was good/bad") +- Speculate about causes without evidence +- Repeat information already in the run_summary aggregates diff --git a/.agents/skills/skill-creator/agents/comparator.md b/.agents/skills/skill-creator/agents/comparator.md new file mode 100644 index 0000000..80e00eb --- /dev/null +++ b/.agents/skills/skill-creator/agents/comparator.md @@ -0,0 +1,202 @@ +# Blind Comparator Agent + +Compare two outputs WITHOUT knowing which skill produced them. + +## Role + +The Blind Comparator judges which output better accomplishes the eval task. You receive two outputs labeled A and B, but you do NOT know which skill produced which. This prevents bias toward a particular skill or approach. + +Your judgment is based purely on output quality and task completion. + +## Inputs + +You receive these parameters in your prompt: + +- **output_a_path**: Path to the first output file or directory +- **output_b_path**: Path to the second output file or directory +- **eval_prompt**: The original task/prompt that was executed +- **expectations**: List of expectations to check (optional - may be empty) + +## Process + +### Step 1: Read Both Outputs + +1. Examine output A (file or directory) +2. Examine output B (file or directory) +3. Note the type, structure, and content of each +4. If outputs are directories, examine all relevant files inside + +### Step 2: Understand the Task + +1. Read the eval_prompt carefully +2. Identify what the task requires: + - What should be produced? + - What qualities matter (accuracy, completeness, format)? + - What would distinguish a good output from a poor one? + +### Step 3: Generate Evaluation Rubric + +Based on the task, generate a rubric with two dimensions: + +**Content Rubric** (what the output contains): +| Criterion | 1 (Poor) | 3 (Acceptable) | 5 (Excellent) | +|-----------|----------|----------------|---------------| +| Correctness | Major errors | Minor errors | Fully correct | +| Completeness | Missing key elements | Mostly complete | All elements present | +| Accuracy | Significant inaccuracies | Minor inaccuracies | Accurate throughout | + +**Structure Rubric** (how the output is organized): +| Criterion | 1 (Poor) | 3 (Acceptable) | 5 (Excellent) | +|-----------|----------|----------------|---------------| +| Organization | Disorganized | Reasonably organized | Clear, logical structure | +| Formatting | Inconsistent/broken | Mostly consistent | Professional, polished | +| Usability | Difficult to use | Usable with effort | Easy to use | + +Adapt criteria to the specific task. For example: +- PDF form → "Field alignment", "Text readability", "Data placement" +- Document → "Section structure", "Heading hierarchy", "Paragraph flow" +- Data output → "Schema correctness", "Data types", "Completeness" + +### Step 4: Evaluate Each Output Against the Rubric + +For each output (A and B): + +1. **Score each criterion** on the rubric (1-5 scale) +2. **Calculate dimension totals**: Content score, Structure score +3. **Calculate overall score**: Average of dimension scores, scaled to 1-10 + +### Step 5: Check Assertions (if provided) + +If expectations are provided: + +1. Check each expectation against output A +2. Check each expectation against output B +3. Count pass rates for each output +4. Use expectation scores as secondary evidence (not the primary decision factor) + +### Step 6: Determine the Winner + +Compare A and B based on (in priority order): + +1. **Primary**: Overall rubric score (content + structure) +2. **Secondary**: Assertion pass rates (if applicable) +3. **Tiebreaker**: If truly equal, declare a TIE + +Be decisive - ties should be rare. One output is usually better, even if marginally. + +### Step 7: Write Comparison Results + +Save results to a JSON file at the path specified (or `comparison.json` if not specified). + +## Output Format + +Write a JSON file with this structure: + +```json +{ + "winner": "A", + "reasoning": "Output A provides a complete solution with proper formatting and all required fields. Output B is missing the date field and has formatting inconsistencies.", + "rubric": { + "A": { + "content": { + "correctness": 5, + "completeness": 5, + "accuracy": 4 + }, + "structure": { + "organization": 4, + "formatting": 5, + "usability": 4 + }, + "content_score": 4.7, + "structure_score": 4.3, + "overall_score": 9.0 + }, + "B": { + "content": { + "correctness": 3, + "completeness": 2, + "accuracy": 3 + }, + "structure": { + "organization": 3, + "formatting": 2, + "usability": 3 + }, + "content_score": 2.7, + "structure_score": 2.7, + "overall_score": 5.4 + } + }, + "output_quality": { + "A": { + "score": 9, + "strengths": ["Complete solution", "Well-formatted", "All fields present"], + "weaknesses": ["Minor style inconsistency in header"] + }, + "B": { + "score": 5, + "strengths": ["Readable output", "Correct basic structure"], + "weaknesses": ["Missing date field", "Formatting inconsistencies", "Partial data extraction"] + } + }, + "expectation_results": { + "A": { + "passed": 4, + "total": 5, + "pass_rate": 0.80, + "details": [ + {"text": "Output includes name", "passed": true}, + {"text": "Output includes date", "passed": true}, + {"text": "Format is PDF", "passed": true}, + {"text": "Contains signature", "passed": false}, + {"text": "Readable text", "passed": true} + ] + }, + "B": { + "passed": 3, + "total": 5, + "pass_rate": 0.60, + "details": [ + {"text": "Output includes name", "passed": true}, + {"text": "Output includes date", "passed": false}, + {"text": "Format is PDF", "passed": true}, + {"text": "Contains signature", "passed": false}, + {"text": "Readable text", "passed": true} + ] + } + } +} +``` + +If no expectations were provided, omit the `expectation_results` field entirely. + +## Field Descriptions + +- **winner**: "A", "B", or "TIE" +- **reasoning**: Clear explanation of why the winner was chosen (or why it's a tie) +- **rubric**: Structured rubric evaluation for each output + - **content**: Scores for content criteria (correctness, completeness, accuracy) + - **structure**: Scores for structure criteria (organization, formatting, usability) + - **content_score**: Average of content criteria (1-5) + - **structure_score**: Average of structure criteria (1-5) + - **overall_score**: Combined score scaled to 1-10 +- **output_quality**: Summary quality assessment + - **score**: 1-10 rating (should match rubric overall_score) + - **strengths**: List of positive aspects + - **weaknesses**: List of issues or shortcomings +- **expectation_results**: (Only if expectations provided) + - **passed**: Number of expectations that passed + - **total**: Total number of expectations + - **pass_rate**: Fraction passed (0.0 to 1.0) + - **details**: Individual expectation results + +## Guidelines + +- **Stay blind**: DO NOT try to infer which skill produced which output. Judge purely on output quality. +- **Be specific**: Cite specific examples when explaining strengths and weaknesses. +- **Be decisive**: Choose a winner unless outputs are genuinely equivalent. +- **Output quality first**: Assertion scores are secondary to overall task completion. +- **Be objective**: Don't favor outputs based on style preferences; focus on correctness and completeness. +- **Explain your reasoning**: The reasoning field should make it clear why you chose the winner. +- **Handle edge cases**: If both outputs fail, pick the one that fails less badly. If both are excellent, pick the one that's marginally better. diff --git a/.agents/skills/skill-creator/agents/grader.md b/.agents/skills/skill-creator/agents/grader.md new file mode 100644 index 0000000..558ab05 --- /dev/null +++ b/.agents/skills/skill-creator/agents/grader.md @@ -0,0 +1,223 @@ +# Grader Agent + +Evaluate expectations against an execution transcript and outputs. + +## Role + +The Grader reviews a transcript and output files, then determines whether each expectation passes or fails. Provide clear evidence for each judgment. + +You have two jobs: grade the outputs, and critique the evals themselves. A passing grade on a weak assertion is worse than useless — it creates false confidence. When you notice an assertion that's trivially satisfied, or an important outcome that no assertion checks, say so. + +## Inputs + +You receive these parameters in your prompt: + +- **expectations**: List of expectations to evaluate (strings) +- **transcript_path**: Path to the execution transcript (markdown file) +- **outputs_dir**: Directory containing output files from execution + +## Process + +### Step 1: Read the Transcript + +1. Read the transcript file completely +2. Note the eval prompt, execution steps, and final result +3. Identify any issues or errors documented + +### Step 2: Examine Output Files + +1. List files in outputs_dir +2. Read/examine each file relevant to the expectations. If outputs aren't plain text, use the inspection tools provided in your prompt — don't rely solely on what the transcript says the executor produced. +3. Note contents, structure, and quality + +### Step 3: Evaluate Each Assertion + +For each expectation: + +1. **Search for evidence** in the transcript and outputs +2. **Determine verdict**: + - **PASS**: Clear evidence the expectation is true AND the evidence reflects genuine task completion, not just surface-level compliance + - **FAIL**: No evidence, or evidence contradicts the expectation, or the evidence is superficial (e.g., correct filename but empty/wrong content) +3. **Cite the evidence**: Quote the specific text or describe what you found + +### Step 4: Extract and Verify Claims + +Beyond the predefined expectations, extract implicit claims from the outputs and verify them: + +1. **Extract claims** from the transcript and outputs: + - Factual statements ("The form has 12 fields") + - Process claims ("Used pypdf to fill the form") + - Quality claims ("All fields were filled correctly") + +2. **Verify each claim**: + - **Factual claims**: Can be checked against the outputs or external sources + - **Process claims**: Can be verified from the transcript + - **Quality claims**: Evaluate whether the claim is justified + +3. **Flag unverifiable claims**: Note claims that cannot be verified with available information + +This catches issues that predefined expectations might miss. + +### Step 5: Read User Notes + +If `{outputs_dir}/user_notes.md` exists: +1. Read it and note any uncertainties or issues flagged by the executor +2. Include relevant concerns in the grading output +3. These may reveal problems even when expectations pass + +### Step 6: Critique the Evals + +After grading, consider whether the evals themselves could be improved. Only surface suggestions when there's a clear gap. + +Good suggestions test meaningful outcomes — assertions that are hard to satisfy without actually doing the work correctly. Think about what makes an assertion *discriminating*: it passes when the skill genuinely succeeds and fails when it doesn't. + +Suggestions worth raising: +- An assertion that passed but would also pass for a clearly wrong output (e.g., checking filename existence but not file content) +- An important outcome you observed — good or bad — that no assertion covers at all +- An assertion that can't actually be verified from the available outputs + +Keep the bar high. The goal is to flag things the eval author would say "good catch" about, not to nitpick every assertion. + +### Step 7: Write Grading Results + +Save results to `{outputs_dir}/../grading.json` (sibling to outputs_dir). + +## Grading Criteria + +**PASS when**: +- The transcript or outputs clearly demonstrate the expectation is true +- Specific evidence can be cited +- The evidence reflects genuine substance, not just surface compliance (e.g., a file exists AND contains correct content, not just the right filename) + +**FAIL when**: +- No evidence found for the expectation +- Evidence contradicts the expectation +- The expectation cannot be verified from available information +- The evidence is superficial — the assertion is technically satisfied but the underlying task outcome is wrong or incomplete +- The output appears to meet the assertion by coincidence rather than by actually doing the work + +**When uncertain**: The burden of proof to pass is on the expectation. + +### Step 8: Read Executor Metrics and Timing + +1. If `{outputs_dir}/metrics.json` exists, read it and include in grading output +2. If `{outputs_dir}/../timing.json` exists, read it and include timing data + +## Output Format + +Write a JSON file with this structure: + +```json +{ + "expectations": [ + { + "text": "The output includes the name 'John Smith'", + "passed": true, + "evidence": "Found in transcript Step 3: 'Extracted names: John Smith, Sarah Johnson'" + }, + { + "text": "The spreadsheet has a SUM formula in cell B10", + "passed": false, + "evidence": "No spreadsheet was created. The output was a text file." + }, + { + "text": "The assistant used the skill's OCR script", + "passed": true, + "evidence": "Transcript Step 2 shows: 'Tool: Bash - python ocr_script.py image.png'" + } + ], + "summary": { + "passed": 2, + "failed": 1, + "total": 3, + "pass_rate": 0.67 + }, + "execution_metrics": { + "tool_calls": { + "Read": 5, + "Write": 2, + "Bash": 8 + }, + "total_tool_calls": 15, + "total_steps": 6, + "errors_encountered": 0, + "output_chars": 12450, + "transcript_chars": 3200 + }, + "timing": { + "executor_duration_seconds": 165.0, + "grader_duration_seconds": 26.0, + "total_duration_seconds": 191.0 + }, + "claims": [ + { + "claim": "The form has 12 fillable fields", + "type": "factual", + "verified": true, + "evidence": "Counted 12 fields in field_info.json" + }, + { + "claim": "All required fields were populated", + "type": "quality", + "verified": false, + "evidence": "Reference section was left blank despite data being available" + } + ], + "user_notes_summary": { + "uncertainties": ["Used 2023 data, may be stale"], + "needs_review": [], + "workarounds": ["Fell back to text overlay for non-fillable fields"] + }, + "eval_feedback": { + "suggestions": [ + { + "assertion": "The output includes the name 'John Smith'", + "reason": "A hallucinated document that mentions the name would also pass — consider checking it appears as the primary contact with matching phone and email from the input" + }, + { + "reason": "No assertion checks whether the extracted phone numbers match the input — I observed incorrect numbers in the output that went uncaught" + } + ], + "overall": "Assertions check presence but not correctness. Consider adding content verification." + } +} +``` + +## Field Descriptions + +- **expectations**: Array of graded expectations + - **text**: The original expectation text + - **passed**: Boolean - true if expectation passes + - **evidence**: Specific quote or description supporting the verdict +- **summary**: Aggregate statistics + - **passed**: Count of passed expectations + - **failed**: Count of failed expectations + - **total**: Total expectations evaluated + - **pass_rate**: Fraction passed (0.0 to 1.0) +- **execution_metrics**: Copied from executor's metrics.json (if available) + - **output_chars**: Total character count of output files (proxy for tokens) + - **transcript_chars**: Character count of transcript +- **timing**: Wall clock timing from timing.json (if available) + - **executor_duration_seconds**: Time spent in executor subagent + - **total_duration_seconds**: Total elapsed time for the run +- **claims**: Extracted and verified claims from the output + - **claim**: The statement being verified + - **type**: "factual", "process", or "quality" + - **verified**: Boolean - whether the claim holds + - **evidence**: Supporting or contradicting evidence +- **user_notes_summary**: Issues flagged by the executor + - **uncertainties**: Things the executor wasn't sure about + - **needs_review**: Items requiring human attention + - **workarounds**: Places where the skill didn't work as expected +- **eval_feedback**: Improvement suggestions for the evals (only when warranted) + - **suggestions**: List of concrete suggestions, each with a `reason` and optionally an `assertion` it relates to + - **overall**: Brief assessment — can be "No suggestions, evals look solid" if nothing to flag + +## Guidelines + +- **Be objective**: Base verdicts on evidence, not assumptions +- **Be specific**: Quote the exact text that supports your verdict +- **Be thorough**: Check both transcript and output files +- **Be consistent**: Apply the same standard to each expectation +- **Explain failures**: Make it clear why evidence was insufficient +- **No partial credit**: Each expectation is pass or fail, not partial diff --git a/.agents/skills/skill-creator/assets/eval_review.html b/.agents/skills/skill-creator/assets/eval_review.html new file mode 100644 index 0000000..938ff32 --- /dev/null +++ b/.agents/skills/skill-creator/assets/eval_review.html @@ -0,0 +1,146 @@ + + + + + + Eval Set Review - __SKILL_NAME_PLACEHOLDER__ + + + + + + +

Eval Set Review: __SKILL_NAME_PLACEHOLDER__

+

Current description: __SKILL_DESCRIPTION_PLACEHOLDER__

+ +
+ + +
+ + + + + + + + + + +
QueryShould TriggerActions
+ +

+ + + + diff --git a/.agents/skills/skill-creator/eval-viewer/generate_review.py b/.agents/skills/skill-creator/eval-viewer/generate_review.py new file mode 100644 index 0000000..7fa5978 --- /dev/null +++ b/.agents/skills/skill-creator/eval-viewer/generate_review.py @@ -0,0 +1,471 @@ +#!/usr/bin/env python3 +"""Generate and serve a review page for eval results. + +Reads the workspace directory, discovers runs (directories with outputs/), +embeds all output data into a self-contained HTML page, and serves it via +a tiny HTTP server. Feedback auto-saves to feedback.json in the workspace. + +Usage: + python generate_review.py [--port PORT] [--skill-name NAME] + python generate_review.py --previous-feedback /path/to/old/feedback.json + +No dependencies beyond the Python stdlib are required. +""" + +import argparse +import base64 +import json +import mimetypes +import os +import re +import signal +import subprocess +import sys +import time +import webbrowser +from functools import partial +from http.server import HTTPServer, BaseHTTPRequestHandler +from pathlib import Path + +# Files to exclude from output listings +METADATA_FILES = {"transcript.md", "user_notes.md", "metrics.json"} + +# Extensions we render as inline text +TEXT_EXTENSIONS = { + ".txt", ".md", ".json", ".csv", ".py", ".js", ".ts", ".tsx", ".jsx", + ".yaml", ".yml", ".xml", ".html", ".css", ".sh", ".rb", ".go", ".rs", + ".java", ".c", ".cpp", ".h", ".hpp", ".sql", ".r", ".toml", +} + +# Extensions we render as inline images +IMAGE_EXTENSIONS = {".png", ".jpg", ".jpeg", ".gif", ".svg", ".webp"} + +# MIME type overrides for common types +MIME_OVERRIDES = { + ".svg": "image/svg+xml", + ".xlsx": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", + ".docx": "application/vnd.openxmlformats-officedocument.wordprocessingml.document", + ".pptx": "application/vnd.openxmlformats-officedocument.presentationml.presentation", +} + + +def get_mime_type(path: Path) -> str: + ext = path.suffix.lower() + if ext in MIME_OVERRIDES: + return MIME_OVERRIDES[ext] + mime, _ = mimetypes.guess_type(str(path)) + return mime or "application/octet-stream" + + +def find_runs(workspace: Path) -> list[dict]: + """Recursively find directories that contain an outputs/ subdirectory.""" + runs: list[dict] = [] + _find_runs_recursive(workspace, workspace, runs) + runs.sort(key=lambda r: (r.get("eval_id", float("inf")), r["id"])) + return runs + + +def _find_runs_recursive(root: Path, current: Path, runs: list[dict]) -> None: + if not current.is_dir(): + return + + outputs_dir = current / "outputs" + if outputs_dir.is_dir(): + run = build_run(root, current) + if run: + runs.append(run) + return + + skip = {"node_modules", ".git", "__pycache__", "skill", "inputs"} + for child in sorted(current.iterdir()): + if child.is_dir() and child.name not in skip: + _find_runs_recursive(root, child, runs) + + +def build_run(root: Path, run_dir: Path) -> dict | None: + """Build a run dict with prompt, outputs, and grading data.""" + prompt = "" + eval_id = None + + # Try eval_metadata.json + for candidate in [run_dir / "eval_metadata.json", run_dir.parent / "eval_metadata.json"]: + if candidate.exists(): + try: + metadata = json.loads(candidate.read_text()) + prompt = metadata.get("prompt", "") + eval_id = metadata.get("eval_id") + except (json.JSONDecodeError, OSError): + pass + if prompt: + break + + # Fall back to transcript.md + if not prompt: + for candidate in [run_dir / "transcript.md", run_dir / "outputs" / "transcript.md"]: + if candidate.exists(): + try: + text = candidate.read_text() + match = re.search(r"## Eval Prompt\n\n([\s\S]*?)(?=\n##|$)", text) + if match: + prompt = match.group(1).strip() + except OSError: + pass + if prompt: + break + + if not prompt: + prompt = "(No prompt found)" + + run_id = str(run_dir.relative_to(root)).replace("/", "-").replace("\\", "-") + + # Collect output files + outputs_dir = run_dir / "outputs" + output_files: list[dict] = [] + if outputs_dir.is_dir(): + for f in sorted(outputs_dir.iterdir()): + if f.is_file() and f.name not in METADATA_FILES: + output_files.append(embed_file(f)) + + # Load grading if present + grading = None + for candidate in [run_dir / "grading.json", run_dir.parent / "grading.json"]: + if candidate.exists(): + try: + grading = json.loads(candidate.read_text()) + except (json.JSONDecodeError, OSError): + pass + if grading: + break + + return { + "id": run_id, + "prompt": prompt, + "eval_id": eval_id, + "outputs": output_files, + "grading": grading, + } + + +def embed_file(path: Path) -> dict: + """Read a file and return an embedded representation.""" + ext = path.suffix.lower() + mime = get_mime_type(path) + + if ext in TEXT_EXTENSIONS: + try: + content = path.read_text(errors="replace") + except OSError: + content = "(Error reading file)" + return { + "name": path.name, + "type": "text", + "content": content, + } + elif ext in IMAGE_EXTENSIONS: + try: + raw = path.read_bytes() + b64 = base64.b64encode(raw).decode("ascii") + except OSError: + return {"name": path.name, "type": "error", "content": "(Error reading file)"} + return { + "name": path.name, + "type": "image", + "mime": mime, + "data_uri": f"data:{mime};base64,{b64}", + } + elif ext == ".pdf": + try: + raw = path.read_bytes() + b64 = base64.b64encode(raw).decode("ascii") + except OSError: + return {"name": path.name, "type": "error", "content": "(Error reading file)"} + return { + "name": path.name, + "type": "pdf", + "data_uri": f"data:{mime};base64,{b64}", + } + elif ext == ".xlsx": + try: + raw = path.read_bytes() + b64 = base64.b64encode(raw).decode("ascii") + except OSError: + return {"name": path.name, "type": "error", "content": "(Error reading file)"} + return { + "name": path.name, + "type": "xlsx", + "data_b64": b64, + } + else: + # Binary / unknown — base64 download link + try: + raw = path.read_bytes() + b64 = base64.b64encode(raw).decode("ascii") + except OSError: + return {"name": path.name, "type": "error", "content": "(Error reading file)"} + return { + "name": path.name, + "type": "binary", + "mime": mime, + "data_uri": f"data:{mime};base64,{b64}", + } + + +def load_previous_iteration(workspace: Path) -> dict[str, dict]: + """Load previous iteration's feedback and outputs. + + Returns a map of run_id -> {"feedback": str, "outputs": list[dict]}. + """ + result: dict[str, dict] = {} + + # Load feedback + feedback_map: dict[str, str] = {} + feedback_path = workspace / "feedback.json" + if feedback_path.exists(): + try: + data = json.loads(feedback_path.read_text()) + feedback_map = { + r["run_id"]: r["feedback"] + for r in data.get("reviews", []) + if r.get("feedback", "").strip() + } + except (json.JSONDecodeError, OSError, KeyError): + pass + + # Load runs (to get outputs) + prev_runs = find_runs(workspace) + for run in prev_runs: + result[run["id"]] = { + "feedback": feedback_map.get(run["id"], ""), + "outputs": run.get("outputs", []), + } + + # Also add feedback for run_ids that had feedback but no matching run + for run_id, fb in feedback_map.items(): + if run_id not in result: + result[run_id] = {"feedback": fb, "outputs": []} + + return result + + +def generate_html( + runs: list[dict], + skill_name: str, + previous: dict[str, dict] | None = None, + benchmark: dict | None = None, +) -> str: + """Generate the complete standalone HTML page with embedded data.""" + template_path = Path(__file__).parent / "viewer.html" + template = template_path.read_text() + + # Build previous_feedback and previous_outputs maps for the template + previous_feedback: dict[str, str] = {} + previous_outputs: dict[str, list[dict]] = {} + if previous: + for run_id, data in previous.items(): + if data.get("feedback"): + previous_feedback[run_id] = data["feedback"] + if data.get("outputs"): + previous_outputs[run_id] = data["outputs"] + + embedded = { + "skill_name": skill_name, + "runs": runs, + "previous_feedback": previous_feedback, + "previous_outputs": previous_outputs, + } + if benchmark: + embedded["benchmark"] = benchmark + + data_json = json.dumps(embedded) + + return template.replace("/*__EMBEDDED_DATA__*/", f"const EMBEDDED_DATA = {data_json};") + + +# --------------------------------------------------------------------------- +# HTTP server (stdlib only, zero dependencies) +# --------------------------------------------------------------------------- + +def _kill_port(port: int) -> None: + """Kill any process listening on the given port.""" + try: + result = subprocess.run( + ["lsof", "-ti", f":{port}"], + capture_output=True, text=True, timeout=5, + ) + for pid_str in result.stdout.strip().split("\n"): + if pid_str.strip(): + try: + os.kill(int(pid_str.strip()), signal.SIGTERM) + except (ProcessLookupError, ValueError): + pass + if result.stdout.strip(): + time.sleep(0.5) + except subprocess.TimeoutExpired: + pass + except FileNotFoundError: + print("Note: lsof not found, cannot check if port is in use", file=sys.stderr) + +class ReviewHandler(BaseHTTPRequestHandler): + """Serves the review HTML and handles feedback saves. + + Regenerates the HTML on each page load so that refreshing the browser + picks up new eval outputs without restarting the server. + """ + + def __init__( + self, + workspace: Path, + skill_name: str, + feedback_path: Path, + previous: dict[str, dict], + benchmark_path: Path | None, + *args, + **kwargs, + ): + self.workspace = workspace + self.skill_name = skill_name + self.feedback_path = feedback_path + self.previous = previous + self.benchmark_path = benchmark_path + super().__init__(*args, **kwargs) + + def do_GET(self) -> None: + if self.path == "/" or self.path == "/index.html": + # Regenerate HTML on each request (re-scans workspace for new outputs) + runs = find_runs(self.workspace) + benchmark = None + if self.benchmark_path and self.benchmark_path.exists(): + try: + benchmark = json.loads(self.benchmark_path.read_text()) + except (json.JSONDecodeError, OSError): + pass + html = generate_html(runs, self.skill_name, self.previous, benchmark) + content = html.encode("utf-8") + self.send_response(200) + self.send_header("Content-Type", "text/html; charset=utf-8") + self.send_header("Content-Length", str(len(content))) + self.end_headers() + self.wfile.write(content) + elif self.path == "/api/feedback": + data = b"{}" + if self.feedback_path.exists(): + data = self.feedback_path.read_bytes() + self.send_response(200) + self.send_header("Content-Type", "application/json") + self.send_header("Content-Length", str(len(data))) + self.end_headers() + self.wfile.write(data) + else: + self.send_error(404) + + def do_POST(self) -> None: + if self.path == "/api/feedback": + length = int(self.headers.get("Content-Length", 0)) + body = self.rfile.read(length) + try: + data = json.loads(body) + if not isinstance(data, dict) or "reviews" not in data: + raise ValueError("Expected JSON object with 'reviews' key") + self.feedback_path.write_text(json.dumps(data, indent=2) + "\n") + resp = b'{"ok":true}' + self.send_response(200) + except (json.JSONDecodeError, OSError, ValueError) as e: + resp = json.dumps({"error": str(e)}).encode() + self.send_response(500) + self.send_header("Content-Type", "application/json") + self.send_header("Content-Length", str(len(resp))) + self.end_headers() + self.wfile.write(resp) + else: + self.send_error(404) + + def log_message(self, format: str, *args: object) -> None: + # Suppress request logging to keep terminal clean + pass + + +def main() -> None: + parser = argparse.ArgumentParser(description="Generate and serve eval review") + parser.add_argument("workspace", type=Path, help="Path to workspace directory") + parser.add_argument("--port", "-p", type=int, default=3117, help="Server port (default: 3117)") + parser.add_argument("--skill-name", "-n", type=str, default=None, help="Skill name for header") + parser.add_argument( + "--previous-workspace", type=Path, default=None, + help="Path to previous iteration's workspace (shows old outputs and feedback as context)", + ) + parser.add_argument( + "--benchmark", type=Path, default=None, + help="Path to benchmark.json to show in the Benchmark tab", + ) + parser.add_argument( + "--static", "-s", type=Path, default=None, + help="Write standalone HTML to this path instead of starting a server", + ) + args = parser.parse_args() + + workspace = args.workspace.resolve() + if not workspace.is_dir(): + print(f"Error: {workspace} is not a directory", file=sys.stderr) + sys.exit(1) + + runs = find_runs(workspace) + if not runs: + print(f"No runs found in {workspace}", file=sys.stderr) + sys.exit(1) + + skill_name = args.skill_name or workspace.name.replace("-workspace", "") + feedback_path = workspace / "feedback.json" + + previous: dict[str, dict] = {} + if args.previous_workspace: + previous = load_previous_iteration(args.previous_workspace.resolve()) + + benchmark_path = args.benchmark.resolve() if args.benchmark else None + benchmark = None + if benchmark_path and benchmark_path.exists(): + try: + benchmark = json.loads(benchmark_path.read_text()) + except (json.JSONDecodeError, OSError): + pass + + if args.static: + html = generate_html(runs, skill_name, previous, benchmark) + args.static.parent.mkdir(parents=True, exist_ok=True) + args.static.write_text(html) + print(f"\n Static viewer written to: {args.static}\n") + sys.exit(0) + + # Kill any existing process on the target port + port = args.port + _kill_port(port) + handler = partial(ReviewHandler, workspace, skill_name, feedback_path, previous, benchmark_path) + try: + server = HTTPServer(("127.0.0.1", port), handler) + except OSError: + # Port still in use after kill attempt — find a free one + server = HTTPServer(("127.0.0.1", 0), handler) + port = server.server_address[1] + + url = f"http://localhost:{port}" + print(f"\n Eval Viewer") + print(f" ─────────────────────────────────") + print(f" URL: {url}") + print(f" Workspace: {workspace}") + print(f" Feedback: {feedback_path}") + if previous: + print(f" Previous: {args.previous_workspace} ({len(previous)} runs)") + if benchmark_path: + print(f" Benchmark: {benchmark_path}") + print(f"\n Press Ctrl+C to stop.\n") + + webbrowser.open(url) + + try: + server.serve_forever() + except KeyboardInterrupt: + print("\nStopped.") + server.server_close() + + +if __name__ == "__main__": + main() diff --git a/.agents/skills/skill-creator/eval-viewer/viewer.html b/.agents/skills/skill-creator/eval-viewer/viewer.html new file mode 100644 index 0000000..6d8e963 --- /dev/null +++ b/.agents/skills/skill-creator/eval-viewer/viewer.html @@ -0,0 +1,1325 @@ + + + + + + Eval Review + + + + + + + +
+
+
+

Eval Review:

+
Review each output and leave feedback below. Navigate with arrow keys or buttons. When done, copy feedback and paste into Claude Code.
+
+
+
+ + + + + +
+
+ +
+
Prompt
+
+
+
+
+ + +
+
Output
+
+
No output files found
+
+
+ + + + + + + + +
+
Your Feedback
+
+ + + +
+
+
+ + +
+ + +
+
+
No benchmark data available. Run a benchmark to see quantitative results here.
+
+
+
+ + +
+
+

Review Complete

+

Your feedback has been saved. Go back to your Claude Code session and tell Claude you're done reviewing.

+
+ +
+
+
+ + +
+ + + + diff --git a/.agents/skills/skill-creator/references/schemas.md b/.agents/skills/skill-creator/references/schemas.md new file mode 100644 index 0000000..188eed5 --- /dev/null +++ b/.agents/skills/skill-creator/references/schemas.md @@ -0,0 +1,446 @@ +# JSON Schemas + +This document defines the JSON schemas used by skill-creator. + +--- + +## SKILL.md Frontmatter + +Valid frontmatter fields for SKILL.md: + +| Field | Required | Platforms | Description | +|-------|----------|-----------|-------------| +| `name` | Yes | All | Skill identifier (kebab-case, max 64 chars) | +| `description` | Yes | All | When to trigger, what it does (max 1024 chars) | +| `license` | No | All | License name or reference | +| `allowed-tools` | No | All | Tool access restrictions | +| `metadata` | No | All | Custom key-value pairs | +| `compatibility` | No | All | Environment/system requirements (max 500 chars) | +| `disable-model-invocation` | No | Cursor | When `true`, skill is only invokable via `/skill-name` (not auto-triggered) | + +--- + +## evals.json + +Defines the evals for a skill. Located at `evals/evals.json` within the skill directory. + +```json +{ + "skill_name": "example-skill", + "evals": [ + { + "id": 1, + "prompt": "User's example prompt", + "expected_output": "Description of expected result", + "files": ["evals/files/sample1.pdf"], + "expectations": [ + "The output includes X", + "The skill used script Y" + ] + } + ] +} +``` + +**Fields:** +- `skill_name`: Name matching the skill's frontmatter +- `evals[].id`: Unique integer identifier +- `evals[].prompt`: The task to execute +- `evals[].expected_output`: Human-readable description of success +- `evals[].files`: Optional list of input file paths (relative to skill root) +- `evals[].expectations`: List of verifiable statements + +--- + +## history.json + +Tracks version progression in Improve mode. Located at workspace root. + +```json +{ + "started_at": "2026-01-15T10:30:00Z", + "skill_name": "pdf", + "current_best": "v2", + "iterations": [ + { + "version": "v0", + "parent": null, + "expectation_pass_rate": 0.65, + "grading_result": "baseline", + "is_current_best": false + }, + { + "version": "v1", + "parent": "v0", + "expectation_pass_rate": 0.75, + "grading_result": "won", + "is_current_best": false + }, + { + "version": "v2", + "parent": "v1", + "expectation_pass_rate": 0.85, + "grading_result": "won", + "is_current_best": true + } + ] +} +``` + +**Fields:** +- `started_at`: ISO timestamp of when improvement started +- `skill_name`: Name of the skill being improved +- `current_best`: Version identifier of the best performer +- `iterations[].version`: Version identifier (v0, v1, ...) +- `iterations[].parent`: Parent version this was derived from +- `iterations[].expectation_pass_rate`: Pass rate from grading +- `iterations[].grading_result`: "baseline", "won", "lost", or "tie" +- `iterations[].is_current_best`: Whether this is the current best version + +--- + +## grading.json + +Output from the grader agent. Located at `/grading.json`. + +```json +{ + "expectations": [ + { + "text": "The output includes the name 'John Smith'", + "passed": true, + "evidence": "Found in transcript Step 3: 'Extracted names: John Smith, Sarah Johnson'" + }, + { + "text": "The spreadsheet has a SUM formula in cell B10", + "passed": false, + "evidence": "No spreadsheet was created. The output was a text file." + } + ], + "summary": { + "passed": 2, + "failed": 1, + "total": 3, + "pass_rate": 0.67 + }, + "execution_metrics": { + "tool_calls": { + "Read": 5, + "Write": 2, + "Bash": 8 + }, + "total_tool_calls": 15, + "total_steps": 6, + "errors_encountered": 0, + "output_chars": 12450, + "transcript_chars": 3200 + }, + "timing": { + "executor_duration_seconds": 165.0, + "grader_duration_seconds": 26.0, + "total_duration_seconds": 191.0 + }, + "claims": [ + { + "claim": "The form has 12 fillable fields", + "type": "factual", + "verified": true, + "evidence": "Counted 12 fields in field_info.json" + } + ], + "user_notes_summary": { + "uncertainties": ["Used 2023 data, may be stale"], + "needs_review": [], + "workarounds": ["Fell back to text overlay for non-fillable fields"] + }, + "eval_feedback": { + "suggestions": [ + { + "assertion": "The output includes the name 'John Smith'", + "reason": "A hallucinated document that mentions the name would also pass" + } + ], + "overall": "Assertions check presence but not correctness." + } +} +``` + +**Fields:** +- `expectations[]`: Graded expectations with evidence +- `summary`: Aggregate pass/fail counts +- `execution_metrics`: Tool usage and output size (from executor's metrics.json) +- `timing`: Wall clock timing (from timing.json) +- `claims`: Extracted and verified claims from the output +- `user_notes_summary`: Issues flagged by the executor +- `eval_feedback`: (optional) Improvement suggestions for the evals, only present when the grader identifies issues worth raising + +--- + +## metrics.json + +Output from the executor agent. Located at `/outputs/metrics.json`. + +```json +{ + "tool_calls": { + "Read": 5, + "Write": 2, + "Bash": 8, + "Edit": 1, + "Glob": 2, + "Grep": 0 + }, + "total_tool_calls": 18, + "total_steps": 6, + "files_created": ["filled_form.pdf", "field_values.json"], + "errors_encountered": 0, + "output_chars": 12450, + "transcript_chars": 3200 +} +``` + +**Fields:** +- `tool_calls`: Count per tool type +- `total_tool_calls`: Sum of all tool calls +- `total_steps`: Number of major execution steps +- `files_created`: List of output files created +- `errors_encountered`: Number of errors during execution +- `output_chars`: Total character count of output files +- `transcript_chars`: Character count of transcript + +--- + +## timing.json + +Wall clock timing for a run. Located at `/timing.json`. + +**How to capture:** When a subagent task completes, the task notification includes `total_tokens` and `duration_ms`. Save these immediately — they are not persisted anywhere else and cannot be recovered after the fact. + +```json +{ + "total_tokens": 84852, + "duration_ms": 23332, + "total_duration_seconds": 23.3, + "executor_start": "2026-01-15T10:30:00Z", + "executor_end": "2026-01-15T10:32:45Z", + "executor_duration_seconds": 165.0, + "grader_start": "2026-01-15T10:32:46Z", + "grader_end": "2026-01-15T10:33:12Z", + "grader_duration_seconds": 26.0 +} +``` + +--- + +## benchmark.json + +Output from Benchmark mode. Located at `benchmarks//benchmark.json`. + +```json +{ + "metadata": { + "skill_name": "pdf", + "skill_path": "/path/to/pdf", + "executor_model": "claude-sonnet-4-20250514", + "analyzer_model": "most-capable-model", + "timestamp": "2026-01-15T10:30:00Z", + "evals_run": [1, 2, 3], + "runs_per_configuration": 3 + }, + + "runs": [ + { + "eval_id": 1, + "eval_name": "Ocean", + "configuration": "with_skill", + "run_number": 1, + "result": { + "pass_rate": 0.85, + "passed": 6, + "failed": 1, + "total": 7, + "time_seconds": 42.5, + "tokens": 3800, + "tool_calls": 18, + "errors": 0 + }, + "expectations": [ + {"text": "...", "passed": true, "evidence": "..."} + ], + "notes": [ + "Used 2023 data, may be stale", + "Fell back to text overlay for non-fillable fields" + ] + } + ], + + "run_summary": { + "with_skill": { + "pass_rate": {"mean": 0.85, "stddev": 0.05, "min": 0.80, "max": 0.90}, + "time_seconds": {"mean": 45.0, "stddev": 12.0, "min": 32.0, "max": 58.0}, + "tokens": {"mean": 3800, "stddev": 400, "min": 3200, "max": 4100} + }, + "without_skill": { + "pass_rate": {"mean": 0.35, "stddev": 0.08, "min": 0.28, "max": 0.45}, + "time_seconds": {"mean": 32.0, "stddev": 8.0, "min": 24.0, "max": 42.0}, + "tokens": {"mean": 2100, "stddev": 300, "min": 1800, "max": 2500} + }, + "delta": { + "pass_rate": "+0.50", + "time_seconds": "+13.0", + "tokens": "+1700" + } + }, + + "notes": [ + "Assertion 'Output is a PDF file' passes 100% in both configurations - may not differentiate skill value", + "Eval 3 shows high variance (50% ± 40%) - may be flaky or model-dependent", + "Without-skill runs consistently fail on table extraction expectations", + "Skill adds 13s average execution time but improves pass rate by 50%" + ] +} +``` + +**Fields:** +- `metadata`: Information about the benchmark run + - `skill_name`: Name of the skill + - `timestamp`: When the benchmark was run + - `evals_run`: List of eval names or IDs + - `runs_per_configuration`: Number of runs per config (e.g. 3) +- `runs[]`: Individual run results + - `eval_id`: Numeric eval identifier + - `eval_name`: Human-readable eval name (used as section header in the viewer) + - `configuration`: Must be `"with_skill"` or `"without_skill"` (the viewer uses this exact string for grouping and color coding) + - `run_number`: Integer run number (1, 2, 3...) + - `result`: Nested object with `pass_rate`, `passed`, `total`, `time_seconds`, `tokens`, `errors` +- `run_summary`: Statistical aggregates per configuration + - `with_skill` / `without_skill`: Each contains `pass_rate`, `time_seconds`, `tokens` objects with `mean` and `stddev` fields + - `delta`: Difference strings like `"+0.50"`, `"+13.0"`, `"+1700"` +- `notes`: Freeform observations from the analyzer + +**Important:** The viewer reads these field names exactly. Using `config` instead of `configuration`, or putting `pass_rate` at the top level of a run instead of nested under `result`, will cause the viewer to show empty/zero values. Always reference this schema when generating benchmark.json manually. + +--- + +## comparison.json + +Output from blind comparator. Located at `/comparison-N.json`. + +```json +{ + "winner": "A", + "reasoning": "Output A provides a complete solution with proper formatting and all required fields. Output B is missing the date field and has formatting inconsistencies.", + "rubric": { + "A": { + "content": { + "correctness": 5, + "completeness": 5, + "accuracy": 4 + }, + "structure": { + "organization": 4, + "formatting": 5, + "usability": 4 + }, + "content_score": 4.7, + "structure_score": 4.3, + "overall_score": 9.0 + }, + "B": { + "content": { + "correctness": 3, + "completeness": 2, + "accuracy": 3 + }, + "structure": { + "organization": 3, + "formatting": 2, + "usability": 3 + }, + "content_score": 2.7, + "structure_score": 2.7, + "overall_score": 5.4 + } + }, + "output_quality": { + "A": { + "score": 9, + "strengths": ["Complete solution", "Well-formatted", "All fields present"], + "weaknesses": ["Minor style inconsistency in header"] + }, + "B": { + "score": 5, + "strengths": ["Readable output", "Correct basic structure"], + "weaknesses": ["Missing date field", "Formatting inconsistencies", "Partial data extraction"] + } + }, + "expectation_results": { + "A": { + "passed": 4, + "total": 5, + "pass_rate": 0.80, + "details": [ + {"text": "Output includes name", "passed": true} + ] + }, + "B": { + "passed": 3, + "total": 5, + "pass_rate": 0.60, + "details": [ + {"text": "Output includes name", "passed": true} + ] + } + } +} +``` + +--- + +## analysis.json + +Output from post-hoc analyzer. Located at `/analysis.json`. + +```json +{ + "comparison_summary": { + "winner": "A", + "winner_skill": "path/to/winner/skill", + "loser_skill": "path/to/loser/skill", + "comparator_reasoning": "Brief summary of why comparator chose winner" + }, + "winner_strengths": [ + "Clear step-by-step instructions for handling multi-page documents", + "Included validation script that caught formatting errors" + ], + "loser_weaknesses": [ + "Vague instruction 'process the document appropriately' led to inconsistent behavior", + "No script for validation, agent had to improvise" + ], + "instruction_following": { + "winner": { + "score": 9, + "issues": ["Minor: skipped optional logging step"] + }, + "loser": { + "score": 6, + "issues": [ + "Did not use the skill's formatting template", + "Invented own approach instead of following step 3" + ] + } + }, + "improvement_suggestions": [ + { + "priority": "high", + "category": "instructions", + "suggestion": "Replace 'process the document appropriately' with explicit steps", + "expected_impact": "Would eliminate ambiguity that caused inconsistent behavior" + } + ], + "transcript_insights": { + "winner_execution_pattern": "Read skill -> Followed 5-step process -> Used validation script", + "loser_execution_pattern": "Read skill -> Unclear on approach -> Tried 3 different methods" + } +} +``` diff --git a/.agents/skills/skill-creator/scripts/__init__.py b/.agents/skills/skill-creator/scripts/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/.agents/skills/skill-creator/scripts/aggregate_benchmark.py b/.agents/skills/skill-creator/scripts/aggregate_benchmark.py new file mode 100755 index 0000000..3e66e8c --- /dev/null +++ b/.agents/skills/skill-creator/scripts/aggregate_benchmark.py @@ -0,0 +1,401 @@ +#!/usr/bin/env python3 +""" +Aggregate individual run results into benchmark summary statistics. + +Reads grading.json files from run directories and produces: +- run_summary with mean, stddev, min, max for each metric +- delta between with_skill and without_skill configurations + +Usage: + python aggregate_benchmark.py + +Example: + python aggregate_benchmark.py benchmarks/2026-01-15T10-30-00/ + +The script supports two directory layouts: + + Workspace layout (from skill-creator iterations): + / + └── eval-N/ + ├── with_skill/ + │ ├── run-1/grading.json + │ └── run-2/grading.json + └── without_skill/ + ├── run-1/grading.json + └── run-2/grading.json + + Legacy layout (with runs/ subdirectory): + / + └── runs/ + └── eval-N/ + ├── with_skill/ + │ └── run-1/grading.json + └── without_skill/ + └── run-1/grading.json +""" + +import argparse +import json +import math +import sys +from datetime import datetime, timezone +from pathlib import Path + + +def calculate_stats(values: list[float]) -> dict: + """Calculate mean, stddev, min, max for a list of values.""" + if not values: + return {"mean": 0.0, "stddev": 0.0, "min": 0.0, "max": 0.0} + + n = len(values) + mean = sum(values) / n + + if n > 1: + variance = sum((x - mean) ** 2 for x in values) / (n - 1) + stddev = math.sqrt(variance) + else: + stddev = 0.0 + + return { + "mean": round(mean, 4), + "stddev": round(stddev, 4), + "min": round(min(values), 4), + "max": round(max(values), 4) + } + + +def load_run_results(benchmark_dir: Path) -> dict: + """ + Load all run results from a benchmark directory. + + Returns dict keyed by config name (e.g. "with_skill"/"without_skill", + or "new_skill"/"old_skill"), each containing a list of run results. + """ + # Support both layouts: eval dirs directly under benchmark_dir, or under runs/ + runs_dir = benchmark_dir / "runs" + if runs_dir.exists(): + search_dir = runs_dir + elif list(benchmark_dir.glob("eval-*")): + search_dir = benchmark_dir + else: + print(f"No eval directories found in {benchmark_dir} or {benchmark_dir / 'runs'}") + return {} + + results: dict[str, list] = {} + + for eval_idx, eval_dir in enumerate(sorted(search_dir.glob("eval-*"))): + metadata_path = eval_dir / "eval_metadata.json" + if metadata_path.exists(): + try: + with open(metadata_path) as mf: + eval_id = json.load(mf).get("eval_id", eval_idx) + except (json.JSONDecodeError, OSError): + eval_id = eval_idx + else: + try: + eval_id = int(eval_dir.name.split("-")[1]) + except ValueError: + eval_id = eval_idx + + # Discover config directories dynamically rather than hardcoding names + for config_dir in sorted(eval_dir.iterdir()): + if not config_dir.is_dir(): + continue + # Skip non-config directories (inputs, outputs, etc.) + if not list(config_dir.glob("run-*")): + continue + config = config_dir.name + if config not in results: + results[config] = [] + + for run_dir in sorted(config_dir.glob("run-*")): + run_number = int(run_dir.name.split("-")[1]) + grading_file = run_dir / "grading.json" + + if not grading_file.exists(): + print(f"Warning: grading.json not found in {run_dir}") + continue + + try: + with open(grading_file) as f: + grading = json.load(f) + except json.JSONDecodeError as e: + print(f"Warning: Invalid JSON in {grading_file}: {e}") + continue + + # Extract metrics + result = { + "eval_id": eval_id, + "run_number": run_number, + "pass_rate": grading.get("summary", {}).get("pass_rate", 0.0), + "passed": grading.get("summary", {}).get("passed", 0), + "failed": grading.get("summary", {}).get("failed", 0), + "total": grading.get("summary", {}).get("total", 0), + } + + # Extract timing — check grading.json first, then sibling timing.json + timing = grading.get("timing", {}) + result["time_seconds"] = timing.get("total_duration_seconds", 0.0) + timing_file = run_dir / "timing.json" + if result["time_seconds"] == 0.0 and timing_file.exists(): + try: + with open(timing_file) as tf: + timing_data = json.load(tf) + result["time_seconds"] = timing_data.get("total_duration_seconds", 0.0) + result["tokens"] = timing_data.get("total_tokens", 0) + except json.JSONDecodeError: + pass + + # Extract metrics if available + metrics = grading.get("execution_metrics", {}) + result["tool_calls"] = metrics.get("total_tool_calls", 0) + if not result.get("tokens"): + result["tokens"] = metrics.get("output_chars", 0) + result["errors"] = metrics.get("errors_encountered", 0) + + # Extract expectations — viewer requires fields: text, passed, evidence + raw_expectations = grading.get("expectations", []) + for exp in raw_expectations: + if "text" not in exp or "passed" not in exp: + print(f"Warning: expectation in {grading_file} missing required fields (text, passed, evidence): {exp}") + result["expectations"] = raw_expectations + + # Extract notes from user_notes_summary + notes_summary = grading.get("user_notes_summary", {}) + notes = [] + notes.extend(notes_summary.get("uncertainties", [])) + notes.extend(notes_summary.get("needs_review", [])) + notes.extend(notes_summary.get("workarounds", [])) + result["notes"] = notes + + results[config].append(result) + + return results + + +def aggregate_results(results: dict) -> dict: + """ + Aggregate run results into summary statistics. + + Returns run_summary with stats for each configuration and delta. + """ + run_summary = {} + configs = list(results.keys()) + + for config in configs: + runs = results.get(config, []) + + if not runs: + run_summary[config] = { + "pass_rate": {"mean": 0.0, "stddev": 0.0, "min": 0.0, "max": 0.0}, + "time_seconds": {"mean": 0.0, "stddev": 0.0, "min": 0.0, "max": 0.0}, + "tokens": {"mean": 0, "stddev": 0, "min": 0, "max": 0} + } + continue + + pass_rates = [r["pass_rate"] for r in runs] + times = [r["time_seconds"] for r in runs] + tokens = [r.get("tokens", 0) for r in runs] + + run_summary[config] = { + "pass_rate": calculate_stats(pass_rates), + "time_seconds": calculate_stats(times), + "tokens": calculate_stats(tokens) + } + + # Calculate delta between the first two configs (if two exist) + if len(configs) >= 2: + primary = run_summary.get(configs[0], {}) + baseline = run_summary.get(configs[1], {}) + else: + primary = run_summary.get(configs[0], {}) if configs else {} + baseline = {} + + delta_pass_rate = primary.get("pass_rate", {}).get("mean", 0) - baseline.get("pass_rate", {}).get("mean", 0) + delta_time = primary.get("time_seconds", {}).get("mean", 0) - baseline.get("time_seconds", {}).get("mean", 0) + delta_tokens = primary.get("tokens", {}).get("mean", 0) - baseline.get("tokens", {}).get("mean", 0) + + run_summary["delta"] = { + "pass_rate": f"{delta_pass_rate:+.2f}", + "time_seconds": f"{delta_time:+.1f}", + "tokens": f"{delta_tokens:+.0f}" + } + + return run_summary + + +def generate_benchmark(benchmark_dir: Path, skill_name: str = "", skill_path: str = "") -> dict: + """ + Generate complete benchmark.json from run results. + """ + results = load_run_results(benchmark_dir) + run_summary = aggregate_results(results) + + # Build runs array for benchmark.json + runs = [] + for config in results: + for result in results[config]: + runs.append({ + "eval_id": result["eval_id"], + "configuration": config, + "run_number": result["run_number"], + "result": { + "pass_rate": result["pass_rate"], + "passed": result["passed"], + "failed": result["failed"], + "total": result["total"], + "time_seconds": result["time_seconds"], + "tokens": result.get("tokens", 0), + "tool_calls": result.get("tool_calls", 0), + "errors": result.get("errors", 0) + }, + "expectations": result["expectations"], + "notes": result["notes"] + }) + + # Determine eval IDs from results + eval_ids = sorted(set( + r["eval_id"] + for config in results.values() + for r in config + )) + + benchmark = { + "metadata": { + "skill_name": skill_name or "", + "skill_path": skill_path or "", + "executor_model": "", + "analyzer_model": "", + "timestamp": datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ"), + "evals_run": eval_ids, + "runs_per_configuration": 3 + }, + "runs": runs, + "run_summary": run_summary, + "notes": [] # To be filled by analyzer + } + + return benchmark + + +def generate_markdown(benchmark: dict) -> str: + """Generate human-readable benchmark.md from benchmark data.""" + metadata = benchmark["metadata"] + run_summary = benchmark["run_summary"] + + # Determine config names (excluding "delta") + configs = [k for k in run_summary if k != "delta"] + config_a = configs[0] if len(configs) >= 1 else "config_a" + config_b = configs[1] if len(configs) >= 2 else "config_b" + label_a = config_a.replace("_", " ").title() + label_b = config_b.replace("_", " ").title() + + lines = [ + f"# Skill Benchmark: {metadata['skill_name']}", + "", + f"**Model**: {metadata['executor_model']}", + f"**Date**: {metadata['timestamp']}", + f"**Evals**: {', '.join(map(str, metadata['evals_run']))} ({metadata['runs_per_configuration']} runs each per configuration)", + "", + "## Summary", + "", + f"| Metric | {label_a} | {label_b} | Delta |", + "|--------|------------|---------------|-------|", + ] + + a_summary = run_summary.get(config_a, {}) + b_summary = run_summary.get(config_b, {}) + delta = run_summary.get("delta", {}) + + # Format pass rate + a_pr = a_summary.get("pass_rate", {}) + b_pr = b_summary.get("pass_rate", {}) + lines.append(f"| Pass Rate | {a_pr.get('mean', 0)*100:.0f}% ± {a_pr.get('stddev', 0)*100:.0f}% | {b_pr.get('mean', 0)*100:.0f}% ± {b_pr.get('stddev', 0)*100:.0f}% | {delta.get('pass_rate', '—')} |") + + # Format time + a_time = a_summary.get("time_seconds", {}) + b_time = b_summary.get("time_seconds", {}) + lines.append(f"| Time | {a_time.get('mean', 0):.1f}s ± {a_time.get('stddev', 0):.1f}s | {b_time.get('mean', 0):.1f}s ± {b_time.get('stddev', 0):.1f}s | {delta.get('time_seconds', '—')}s |") + + # Format tokens + a_tokens = a_summary.get("tokens", {}) + b_tokens = b_summary.get("tokens", {}) + lines.append(f"| Tokens | {a_tokens.get('mean', 0):.0f} ± {a_tokens.get('stddev', 0):.0f} | {b_tokens.get('mean', 0):.0f} ± {b_tokens.get('stddev', 0):.0f} | {delta.get('tokens', '—')} |") + + # Notes section + if benchmark.get("notes"): + lines.extend([ + "", + "## Notes", + "" + ]) + for note in benchmark["notes"]: + lines.append(f"- {note}") + + return "\n".join(lines) + + +def main(): + parser = argparse.ArgumentParser( + description="Aggregate benchmark run results into summary statistics" + ) + parser.add_argument( + "benchmark_dir", + type=Path, + help="Path to the benchmark directory" + ) + parser.add_argument( + "--skill-name", + default="", + help="Name of the skill being benchmarked" + ) + parser.add_argument( + "--skill-path", + default="", + help="Path to the skill being benchmarked" + ) + parser.add_argument( + "--output", "-o", + type=Path, + help="Output path for benchmark.json (default: /benchmark.json)" + ) + + args = parser.parse_args() + + if not args.benchmark_dir.exists(): + print(f"Directory not found: {args.benchmark_dir}") + sys.exit(1) + + # Generate benchmark + benchmark = generate_benchmark(args.benchmark_dir, args.skill_name, args.skill_path) + + # Determine output paths + output_json = args.output or (args.benchmark_dir / "benchmark.json") + output_md = output_json.with_suffix(".md") + + # Write benchmark.json + with open(output_json, "w") as f: + json.dump(benchmark, f, indent=2) + print(f"Generated: {output_json}") + + # Write benchmark.md + markdown = generate_markdown(benchmark) + with open(output_md, "w") as f: + f.write(markdown) + print(f"Generated: {output_md}") + + # Print summary + run_summary = benchmark["run_summary"] + configs = [k for k in run_summary if k != "delta"] + delta = run_summary.get("delta", {}) + + print(f"\nSummary:") + for config in configs: + pr = run_summary[config]["pass_rate"]["mean"] + label = config.replace("_", " ").title() + print(f" {label}: {pr*100:.1f}% pass rate") + print(f" Delta: {delta.get('pass_rate', '—')}") + + +if __name__ == "__main__": + main() diff --git a/.agents/skills/skill-creator/scripts/generate_report.py b/.agents/skills/skill-creator/scripts/generate_report.py new file mode 100755 index 0000000..959e30a --- /dev/null +++ b/.agents/skills/skill-creator/scripts/generate_report.py @@ -0,0 +1,326 @@ +#!/usr/bin/env python3 +"""Generate an HTML report from run_loop.py output. + +Takes the JSON output from run_loop.py and generates a visual HTML report +showing each description attempt with check/x for each test case. +Distinguishes between train and test queries. +""" + +import argparse +import html +import json +import sys +from pathlib import Path + + +def generate_html(data: dict, auto_refresh: bool = False, skill_name: str = "") -> str: + """Generate HTML report from loop output data. If auto_refresh is True, adds a meta refresh tag.""" + history = data.get("history", []) + holdout = data.get("holdout", 0) + title_prefix = html.escape(skill_name + " \u2014 ") if skill_name else "" + + # Get all unique queries from train and test sets, with should_trigger info + train_queries: list[dict] = [] + test_queries: list[dict] = [] + if history: + for r in history[0].get("train_results", history[0].get("results", [])): + train_queries.append({"query": r["query"], "should_trigger": r.get("should_trigger", True)}) + if history[0].get("test_results"): + for r in history[0].get("test_results", []): + test_queries.append({"query": r["query"], "should_trigger": r.get("should_trigger", True)}) + + refresh_tag = ' \n' if auto_refresh else "" + + html_parts = [""" + + + +""" + refresh_tag + """ """ + title_prefix + """Skill Description Optimization + + + + + + +

""" + title_prefix + """Skill Description Optimization

+
+ Optimizing your skill's description. This page updates automatically as Claude tests different versions of your skill's description. Each row is an iteration — a new description attempt. The columns show test queries: green checkmarks mean the skill triggered correctly (or correctly didn't trigger), red crosses mean it got it wrong. The "Train" score shows performance on queries used to improve the description; the "Test" score shows performance on held-out queries the optimizer hasn't seen. When it's done, Claude will apply the best-performing description to your skill. +
+"""] + + # Summary section + best_test_score = data.get('best_test_score') + best_train_score = data.get('best_train_score') + html_parts.append(f""" +
+

Original: {html.escape(data.get('original_description', 'N/A'))}

+

Best: {html.escape(data.get('best_description', 'N/A'))}

+

Best Score: {data.get('best_score', 'N/A')} {'(test)' if best_test_score else '(train)'}

+

Iterations: {data.get('iterations_run', 0)} | Train: {data.get('train_size', '?')} | Test: {data.get('test_size', '?')}

+
+""") + + # Legend + html_parts.append(""" +
+ Query columns: + Should trigger + Should NOT trigger + Train + Test +
+""") + + # Table header + html_parts.append(""" +
+ + + + + + + +""") + + # Add column headers for train queries + for qinfo in train_queries: + polarity = "positive-col" if qinfo["should_trigger"] else "negative-col" + html_parts.append(f' \n') + + # Add column headers for test queries (different color) + for qinfo in test_queries: + polarity = "positive-col" if qinfo["should_trigger"] else "negative-col" + html_parts.append(f' \n') + + html_parts.append(""" + + +""") + + # Find best iteration for highlighting + if test_queries: + best_iter = max(history, key=lambda h: h.get("test_passed") or 0).get("iteration") + else: + best_iter = max(history, key=lambda h: h.get("train_passed", h.get("passed", 0))).get("iteration") + + # Add rows for each iteration + for h in history: + iteration = h.get("iteration", "?") + train_passed = h.get("train_passed", h.get("passed", 0)) + train_total = h.get("train_total", h.get("total", 0)) + test_passed = h.get("test_passed") + test_total = h.get("test_total") + description = h.get("description", "") + train_results = h.get("train_results", h.get("results", [])) + test_results = h.get("test_results", []) + + # Create lookups for results by query + train_by_query = {r["query"]: r for r in train_results} + test_by_query = {r["query"]: r for r in test_results} if test_results else {} + + # Compute aggregate correct/total runs across all retries + def aggregate_runs(results: list[dict]) -> tuple[int, int]: + correct = 0 + total = 0 + for r in results: + runs = r.get("runs", 0) + triggers = r.get("triggers", 0) + total += runs + if r.get("should_trigger", True): + correct += triggers + else: + correct += runs - triggers + return correct, total + + train_correct, train_runs = aggregate_runs(train_results) + test_correct, test_runs = aggregate_runs(test_results) + + # Determine score classes + def score_class(correct: int, total: int) -> str: + if total > 0: + ratio = correct / total + if ratio >= 0.8: + return "score-good" + elif ratio >= 0.5: + return "score-ok" + return "score-bad" + + train_class = score_class(train_correct, train_runs) + test_class = score_class(test_correct, test_runs) + + row_class = "best-row" if iteration == best_iter else "" + + html_parts.append(f""" + + + + +""") + + # Add result for each train query + for qinfo in train_queries: + r = train_by_query.get(qinfo["query"], {}) + did_pass = r.get("pass", False) + triggers = r.get("triggers", 0) + runs = r.get("runs", 0) + + icon = "✓" if did_pass else "✗" + css_class = "pass" if did_pass else "fail" + + html_parts.append(f' \n') + + # Add result for each test query (with different background) + for qinfo in test_queries: + r = test_by_query.get(qinfo["query"], {}) + did_pass = r.get("pass", False) + triggers = r.get("triggers", 0) + runs = r.get("runs", 0) + + icon = "✓" if did_pass else "✗" + css_class = "pass" if did_pass else "fail" + + html_parts.append(f' \n') + + html_parts.append(" \n") + + html_parts.append(""" +
IterTrainTestDescription{html.escape(qinfo["query"])}{html.escape(qinfo["query"])}
{iteration}{train_correct}/{train_runs}{test_correct}/{test_runs}{html.escape(description)}{icon}{triggers}/{runs}{icon}{triggers}/{runs}
+
+""") + + html_parts.append(""" + + +""") + + return "".join(html_parts) + + +def main(): + parser = argparse.ArgumentParser(description="Generate HTML report from run_loop output") + parser.add_argument("input", help="Path to JSON output from run_loop.py (or - for stdin)") + parser.add_argument("-o", "--output", default=None, help="Output HTML file (default: stdout)") + parser.add_argument("--skill-name", default="", help="Skill name to include in the report title") + args = parser.parse_args() + + if args.input == "-": + data = json.load(sys.stdin) + else: + data = json.loads(Path(args.input).read_text()) + + html_output = generate_html(data, skill_name=args.skill_name) + + if args.output: + Path(args.output).write_text(html_output) + print(f"Report written to {args.output}", file=sys.stderr) + else: + print(html_output) + + +if __name__ == "__main__": + main() diff --git a/.agents/skills/skill-creator/scripts/improve_description.py b/.agents/skills/skill-creator/scripts/improve_description.py new file mode 100755 index 0000000..69f26d7 --- /dev/null +++ b/.agents/skills/skill-creator/scripts/improve_description.py @@ -0,0 +1,251 @@ +#!/usr/bin/env python3 +"""Improve a skill description based on eval results. + +Takes eval results (from run_eval.py) and generates an improved description +using an LLM with extended thinking. +""" + +import argparse +import json +import re +import sys +from pathlib import Path + +import anthropic + +from scripts.utils import detect_platform, get_platform_name, parse_skill_md + + +def _call_llm( + prompt: str, + model: str, + thinking_budget: int = 10000, + client: anthropic.Anthropic | None = None, +) -> tuple[str, str]: + """Call Anthropic's API with extended thinking and return (thinking_text, response_text).""" + anth_client = client or anthropic.Anthropic() + response = anth_client.messages.create( + model=model, + max_tokens=16000, + thinking={"type": "adaptive", "budget_tokens": thinking_budget}, + messages=[{"role": "user", "content": prompt}], + ) + thinking_text = "" + text = "" + for block in response.content: + if block.type == "thinking": + thinking_text = block.thinking + elif block.type == "text": + text = block.text + return thinking_text, text + + +def improve_description( + client: anthropic.Anthropic, + skill_name: str, + skill_content: str, + current_description: str, + eval_results: dict, + history: list[dict], + model: str, + test_results: dict | None = None, + log_dir: Path | None = None, + iteration: int | None = None, + platform: str = "claude", +) -> str: + """Call an LLM to improve the description based on eval results.""" + failed_triggers = [ + r for r in eval_results["results"] + if r["should_trigger"] and not r["pass"] + ] + false_triggers = [ + r for r in eval_results["results"] + if not r["should_trigger"] and not r["pass"] + ] + + # Build scores summary + train_score = f"{eval_results['summary']['passed']}/{eval_results['summary']['total']}" + if test_results: + test_score = f"{test_results['summary']['passed']}/{test_results['summary']['total']}" + scores_summary = f"Train: {train_score}, Test: {test_score}" + else: + scores_summary = f"Train: {train_score}" + + platform_name = get_platform_name(platform) + prompt = f"""You are optimizing a skill description for a {platform_name} skill called "{skill_name}". A "skill" is sort of like a prompt, but with progressive disclosure -- there's a title and description that the agent sees when deciding whether to use the skill, and then if it does use the skill, it reads the .md file which has lots more details and potentially links to other resources in the skill folder like helper files and scripts and additional documentation or examples. + +The description appears in the agent's "available_skills" list. When a user sends a query, the agent decides whether to invoke the skill based solely on the title and on this description. Your goal is to write a description that triggers for relevant queries, and doesn't trigger for irrelevant ones. + +Here's the current description: + +"{current_description}" + + +Current scores ({scores_summary}): + +""" + if failed_triggers: + prompt += "FAILED TO TRIGGER (should have triggered but didn't):\n" + for r in failed_triggers: + prompt += f' - "{r["query"]}" (triggered {r["triggers"]}/{r["runs"]} times)\n' + prompt += "\n" + + if false_triggers: + prompt += "FALSE TRIGGERS (triggered but shouldn't have):\n" + for r in false_triggers: + prompt += f' - "{r["query"]}" (triggered {r["triggers"]}/{r["runs"]} times)\n' + prompt += "\n" + + if history: + prompt += "PREVIOUS ATTEMPTS (do NOT repeat these — try something structurally different):\n\n" + for h in history: + train_s = f"{h.get('train_passed', h.get('passed', 0))}/{h.get('train_total', h.get('total', 0))}" + test_s = f"{h.get('test_passed', '?')}/{h.get('test_total', '?')}" if h.get('test_passed') is not None else None + score_str = f"train={train_s}" + (f", test={test_s}" if test_s else "") + prompt += f'\n' + prompt += f'Description: "{h["description"]}"\n' + if "results" in h: + prompt += "Train results:\n" + for r in h["results"]: + status = "PASS" if r["pass"] else "FAIL" + prompt += f' [{status}] "{r["query"][:80]}" (triggered {r["triggers"]}/{r["runs"]})\n' + if h.get("note"): + prompt += f'Note: {h["note"]}\n' + prompt += "\n\n" + + prompt += f""" + +Skill content (for context on what the skill does): + +{skill_content} + + +Based on the failures, write a new and improved description that is more likely to trigger correctly. When I say "based on the failures", it's a bit of a tricky line to walk because we don't want to overfit to the specific cases you're seeing. So what I DON'T want you to do is produce an ever-expanding list of specific queries that this skill should or shouldn't trigger for. Instead, try to generalize from the failures to broader categories of user intent and situations where this skill would be useful or not useful. The reason for this is twofold: + +1. Avoid overfitting +2. The list might get loooong and it's injected into ALL queries and there might be a lot of skills, so we don't want to blow too much space on any given description. + +Concretely, your description should not be more than about 100-200 words, even if that comes at the cost of accuracy. + +Here are some tips that we've found to work well in writing these descriptions: +- The skill should be phrased in the imperative -- "Use this skill for" rather than "this skill does" +- The skill description should focus on the user's intent, what they are trying to achieve, vs. the implementation details of how the skill works. +- The description competes with other skills for the agent's attention — make it distinctive and immediately recognizable. +- If you're getting lots of failures after repeated attempts, change things up. Try different sentence structures or wordings. + +I'd encourage you to be creative and mix up the style in different iterations since you'll have multiple opportunities to try different approaches and we'll just grab the highest-scoring one at the end. + +Please respond with only the new description text in tags, nothing else.""" + + thinking_text, text = _call_llm(prompt, model, client=client) + + # Parse out the tags + match = re.search(r"(.*?)", text, re.DOTALL) + description = match.group(1).strip().strip('"') if match else text.strip().strip('"') + + # Log the transcript + transcript: dict = { + "iteration": iteration, + "prompt": prompt, + "thinking": thinking_text, + "response": text, + "parsed_description": description, + "char_count": len(description), + "over_limit": len(description) > 1024, + } + + # If over 1024 chars, ask the model to shorten it + if len(description) > 1024: + shorten_prompt = ( + f"Your description is {len(description)} characters, which exceeds the hard " + "1024 character limit. Please rewrite it to be under 1024 characters while " + "preserving the most important trigger words and intent coverage. Respond with " + "only the new description in tags." + ) + # For the shortening step, use a simple multi-turn prompt via _call_llm + full_shorten_prompt = f"{prompt}\n\nAssistant response:\n{text}\n\nUser follow-up:\n{shorten_prompt}" + shorten_thinking, shorten_text = _call_llm(full_shorten_prompt, model, client=client) + + match = re.search(r"(.*?)", shorten_text, re.DOTALL) + shortened = match.group(1).strip().strip('"') if match else shorten_text.strip().strip('"') + + transcript["rewrite_prompt"] = shorten_prompt + transcript["rewrite_thinking"] = shorten_thinking + transcript["rewrite_response"] = shorten_text + transcript["rewrite_description"] = shortened + transcript["rewrite_char_count"] = len(shortened) + description = shortened + + transcript["final_description"] = description + + if log_dir: + log_dir.mkdir(parents=True, exist_ok=True) + log_file = log_dir / f"improve_iter_{iteration or 'unknown'}.json" + log_file.write_text(json.dumps(transcript, indent=2)) + + return description + + +def main(): + parser = argparse.ArgumentParser(description="Improve a skill description based on eval results") + parser.add_argument("--eval-results", required=True, help="Path to eval results JSON (from run_eval.py)") + parser.add_argument("--skill-path", required=True, help="Path to skill directory") + parser.add_argument("--history", default=None, help="Path to history JSON (previous attempts)") + parser.add_argument("--model", required=True, help="Model for improvement") + parser.add_argument("--platform", default=None, choices=["claude", "cursor"], help="Target platform (default: auto-detect)") + parser.add_argument("--verbose", action="store_true", help="Print thinking to stderr") + args = parser.parse_args() + + platform = args.platform or detect_platform() + + skill_path = Path(args.skill_path) + if not (skill_path / "SKILL.md").exists(): + print(f"Error: No SKILL.md found at {skill_path}", file=sys.stderr) + sys.exit(1) + + eval_results = json.loads(Path(args.eval_results).read_text()) + history = [] + if args.history: + history = json.loads(Path(args.history).read_text()) + + parsed = parse_skill_md(skill_path) + name, content = parsed["name"], parsed["content"] + current_description = eval_results["description"] + + if args.verbose: + print(f"Platform: {platform}", file=sys.stderr) + print(f"Current: {current_description}", file=sys.stderr) + print(f"Score: {eval_results['summary']['passed']}/{eval_results['summary']['total']}", file=sys.stderr) + + client = anthropic.Anthropic() + + new_description = improve_description( + client=client, + skill_name=name, + skill_content=content, + current_description=current_description, + eval_results=eval_results, + history=history, + model=args.model, + platform=platform, + ) + + if args.verbose: + print(f"Improved: {new_description}", file=sys.stderr) + + # Output as JSON with both the new description and updated history + output = { + "description": new_description, + "history": history + [{ + "description": current_description, + "passed": eval_results["summary"]["passed"], + "failed": eval_results["summary"]["failed"], + "total": eval_results["summary"]["total"], + "results": eval_results["results"], + }], + } + print(json.dumps(output, indent=2)) + + +if __name__ == "__main__": + main() diff --git a/.agents/skills/skill-creator/scripts/package_skill.py b/.agents/skills/skill-creator/scripts/package_skill.py new file mode 100755 index 0000000..6e715ca --- /dev/null +++ b/.agents/skills/skill-creator/scripts/package_skill.py @@ -0,0 +1,143 @@ +#!/usr/bin/env python3 +""" +Skill Packager - Creates a distributable .skill file of a skill folder + +Usage: + python utils/package_skill.py [output-directory] + +Example: + python utils/package_skill.py skills/public/my-skill + python utils/package_skill.py skills/public/my-skill ./dist +""" + +import argparse +import fnmatch +import sys +import zipfile +from pathlib import Path +from scripts.quick_validate import validate_skill +from scripts.utils import detect_platform, get_platform_name, get_skills_dir + +# Patterns to exclude when packaging skills. +EXCLUDE_DIRS = {"__pycache__", "node_modules"} +EXCLUDE_GLOBS = {"*.pyc"} +EXCLUDE_FILES = {".DS_Store"} +# Directories excluded only at the skill root (not when nested deeper). +ROOT_EXCLUDE_DIRS = {"evals"} + + +def should_exclude(rel_path: Path) -> bool: + """Check if a path should be excluded from packaging.""" + parts = rel_path.parts + if any(part in EXCLUDE_DIRS for part in parts): + return True + # rel_path is relative to skill_path.parent, so parts[0] is the skill + # folder name and parts[1] (if present) is the first subdir. + if len(parts) > 1 and parts[1] in ROOT_EXCLUDE_DIRS: + return True + name = rel_path.name + if name in EXCLUDE_FILES: + return True + return any(fnmatch.fnmatch(name, pat) for pat in EXCLUDE_GLOBS) + + +def package_skill(skill_path, output_dir=None): + """ + Package a skill folder into a .skill file. + + Args: + skill_path: Path to the skill folder + output_dir: Optional output directory for the .skill file (defaults to current directory) + + Returns: + Path to the created .skill file, or None if error + """ + skill_path = Path(skill_path).resolve() + + # Validate skill folder exists + if not skill_path.exists(): + print(f"❌ Error: Skill folder not found: {skill_path}") + return None + + if not skill_path.is_dir(): + print(f"❌ Error: Path is not a directory: {skill_path}") + return None + + # Validate SKILL.md exists + skill_md = skill_path / "SKILL.md" + if not skill_md.exists(): + print(f"❌ Error: SKILL.md not found in {skill_path}") + return None + + # Run validation before packaging + print("🔍 Validating skill...") + valid, message = validate_skill(skill_path) + if not valid: + print(f"❌ Validation failed: {message}") + print(" Please fix the validation errors before packaging.") + return None + print(f"✅ {message}\n") + + # Determine output location + skill_name = skill_path.name + if output_dir: + output_path = Path(output_dir).resolve() + output_path.mkdir(parents=True, exist_ok=True) + else: + output_path = Path.cwd() + + skill_filename = output_path / f"{skill_name}.skill" + + # Create the .skill file (zip format) + try: + with zipfile.ZipFile(skill_filename, 'w', zipfile.ZIP_DEFLATED) as zipf: + # Walk through the skill directory, excluding build artifacts + for file_path in skill_path.rglob('*'): + if not file_path.is_file(): + continue + arcname = file_path.relative_to(skill_path.parent) + if should_exclude(arcname): + print(f" Skipped: {arcname}") + continue + zipf.write(file_path, arcname) + print(f" Added: {arcname}") + + print(f"\n✅ Successfully packaged skill to: {skill_filename}") + return skill_filename + + except Exception as e: + print(f"❌ Error creating .skill file: {e}") + return None + + +def main(): + parser = argparse.ArgumentParser( + description="Package a skill folder into a distributable .skill file" + ) + parser.add_argument("skill_path", help="Path to the skill folder") + parser.add_argument("output_dir", nargs="?", default=None, help="Output directory for the .skill file") + parser.add_argument("--platform", default=None, choices=["claude", "cursor"], + help="Target platform (default: auto-detect)") + args = parser.parse_args() + + platform = args.platform or detect_platform() + platform_name = get_platform_name(platform) + skills_dir = get_skills_dir(platform) + + print(f"Packaging skill: {args.skill_path}") + print(f"Target platform: {platform_name}") + if args.output_dir: + print(f"Output directory: {args.output_dir}") + print() + + result = package_skill(args.skill_path, args.output_dir) + + if result: + print(f"\nTo install, place the skill folder in {skills_dir}") + sys.exit(0) + else: + sys.exit(1) + + +if __name__ == "__main__": + main() diff --git a/.agents/skills/skill-creator/scripts/quick_validate.py b/.agents/skills/skill-creator/scripts/quick_validate.py new file mode 100755 index 0000000..06f569b --- /dev/null +++ b/.agents/skills/skill-creator/scripts/quick_validate.py @@ -0,0 +1,103 @@ +#!/usr/bin/env python3 +""" +Quick validation script for skills - minimal version +""" + +import sys +import os +import re +import yaml +from pathlib import Path + +def validate_skill(skill_path): + """Basic validation of a skill""" + skill_path = Path(skill_path) + + # Check SKILL.md exists + skill_md = skill_path / 'SKILL.md' + if not skill_md.exists(): + return False, "SKILL.md not found" + + # Read and validate frontmatter + content = skill_md.read_text() + if not content.startswith('---'): + return False, "No YAML frontmatter found" + + # Extract frontmatter + match = re.match(r'^---\n(.*?)\n---', content, re.DOTALL) + if not match: + return False, "Invalid frontmatter format" + + frontmatter_text = match.group(1) + + # Parse YAML frontmatter + try: + frontmatter = yaml.safe_load(frontmatter_text) + if not isinstance(frontmatter, dict): + return False, "Frontmatter must be a YAML dictionary" + except yaml.YAMLError as e: + return False, f"Invalid YAML in frontmatter: {e}" + + # Define allowed properties + ALLOWED_PROPERTIES = {'name', 'description', 'license', 'allowed-tools', 'metadata', 'compatibility', 'disable-model-invocation'} + + # Check for unexpected properties (excluding nested keys under metadata) + unexpected_keys = set(frontmatter.keys()) - ALLOWED_PROPERTIES + if unexpected_keys: + return False, ( + f"Unexpected key(s) in SKILL.md frontmatter: {', '.join(sorted(unexpected_keys))}. " + f"Allowed properties are: {', '.join(sorted(ALLOWED_PROPERTIES))}" + ) + + # Check required fields + if 'name' not in frontmatter: + return False, "Missing 'name' in frontmatter" + if 'description' not in frontmatter: + return False, "Missing 'description' in frontmatter" + + # Extract name for validation + name = frontmatter.get('name', '') + if not isinstance(name, str): + return False, f"Name must be a string, got {type(name).__name__}" + name = name.strip() + if name: + # Check naming convention (kebab-case: lowercase with hyphens) + if not re.match(r'^[a-z0-9-]+$', name): + return False, f"Name '{name}' should be kebab-case (lowercase letters, digits, and hyphens only)" + if name.startswith('-') or name.endswith('-') or '--' in name: + return False, f"Name '{name}' cannot start/end with hyphen or contain consecutive hyphens" + # Check name length (max 64 characters per spec) + if len(name) > 64: + return False, f"Name is too long ({len(name)} characters). Maximum is 64 characters." + + # Extract and validate description + description = frontmatter.get('description', '') + if not isinstance(description, str): + return False, f"Description must be a string, got {type(description).__name__}" + description = description.strip() + if description: + # Check for angle brackets + if '<' in description or '>' in description: + return False, "Description cannot contain angle brackets (< or >)" + # Check description length (max 1024 characters per spec) + if len(description) > 1024: + return False, f"Description is too long ({len(description)} characters). Maximum is 1024 characters." + + # Validate compatibility field if present (optional) + compatibility = frontmatter.get('compatibility', '') + if compatibility: + if not isinstance(compatibility, str): + return False, f"Compatibility must be a string, got {type(compatibility).__name__}" + if len(compatibility) > 500: + return False, f"Compatibility is too long ({len(compatibility)} characters). Maximum is 500 characters." + + return True, "Skill is valid!" + +if __name__ == "__main__": + if len(sys.argv) != 2: + print("Usage: python quick_validate.py ") + sys.exit(1) + + valid, message = validate_skill(sys.argv[1]) + print(message) + sys.exit(0 if valid else 1) \ No newline at end of file diff --git a/.agents/skills/skill-creator/scripts/run_eval.py b/.agents/skills/skill-creator/scripts/run_eval.py new file mode 100755 index 0000000..9f5bf69 --- /dev/null +++ b/.agents/skills/skill-creator/scripts/run_eval.py @@ -0,0 +1,368 @@ +#!/usr/bin/env python3 +"""Run trigger evaluation for a skill description. + +Tests whether a skill's description causes an AI agent to trigger (read the +skill) for a set of queries. Supports both Claude Code (via `claude -p` CLI) +and Cursor (via LLM simulation). Outputs results as JSON. +""" + +import argparse +import json +import os +import select +import subprocess +import sys +import time +from concurrent.futures import ProcessPoolExecutor, as_completed +from pathlib import Path + +from scripts.utils import detect_platform, parse_skill_md + + +def find_project_root() -> Path: + """Find the project root by walking up from cwd looking for config dirs. + + Checks for .claude/ and .cursor/ directories, mimicking how both + Claude Code and Cursor discover their project root. + """ + current = Path.cwd() + for parent in [current, *current.parents]: + if (parent / ".claude").is_dir() or (parent / ".cursor").is_dir(): + return parent + return current + + +def _run_query_cursor( + query: str, + skill_name: str, + skill_description: str, + model: str | None = None, +) -> bool: + """Test skill triggering via LLM simulation (for Cursor). + + Since Cursor has no CLI equivalent to `claude -p`, we simulate triggering + by asking a model whether it would invoke the skill for the given query. + This tests description quality rather than actual runtime behavior, but is + directionally accurate for A/B testing descriptions. + """ + import anthropic + + system_prompt = ( + "You are a coding assistant with access to skills. Available skills:\n" + f"- {skill_name}: {skill_description}\n\n" + f'Given the following user query, would you invoke the "{skill_name}" skill? ' + "Reply with ONLY \"YES\" or \"NO\"." + ) + + client = anthropic.Anthropic() + response = client.messages.create( + model=model or "claude-sonnet-4-6", + max_tokens=5, + system=system_prompt, + messages=[{"role": "user", "content": query}], + ) + text = response.content[0].text.strip().upper() if response.content else "" + + return "YES" in text + + +def _run_query_claude( + query: str, + skill_name: str, + skill_description: str, + timeout: int, + project_root: str, + model: str | None = None, +) -> bool: + """Run a single query against Claude Code CLI and return whether the skill was triggered. + + Tests the real skill in .claude/skills/ by running `claude -p` and watching + for ToolSearch/Skill/Read tool calls that reference the skill name. + + Claude Code's modern flow is: ToolSearch -> Skill tool. The older flow + used Read to load command files directly. Both are detected. + """ + cmd = [ + "claude", + "-p", query, + "--output-format", "stream-json", + "--verbose", + "--include-partial-messages", + ] + if model: + cmd.extend(["--model", model]) + + # Remove CLAUDECODE env var to allow nesting claude -p inside a + # Claude Code session. The guard is for interactive terminal conflicts; + # programmatic subprocess usage is safe. + env = {k: v for k, v in os.environ.items() if k != "CLAUDECODE"} + + process = subprocess.Popen( + cmd, + stdout=subprocess.PIPE, + stderr=subprocess.DEVNULL, + cwd=project_root, + env=env, + ) + + triggered = False + start_time = time.time() + buffer = "" + # Track state for stream event detection + pending_tool_name = None + accumulated_json = "" + # Track the multi-turn flow: ToolSearch("select:Skill") -> Skill("skill-name") + # The first ToolSearch loads the Skill tool, then the Skill tool invokes the skill. + seen_skill_tool_loaded = False + first_tool_seen = False + + # Tools that are part of the skill invocation flow + skill_tools = {"Skill", "Read", "ToolSearch"} + + try: + while time.time() - start_time < timeout: + if process.poll() is not None: + remaining = process.stdout.read() + if remaining: + buffer += remaining.decode("utf-8", errors="replace") + break + + ready, _, _ = select.select([process.stdout], [], [], 1.0) + if not ready: + continue + + chunk = os.read(process.stdout.fileno(), 8192) + if not chunk: + break + buffer += chunk.decode("utf-8", errors="replace") + + while "\n" in buffer: + line, buffer = buffer.split("\n", 1) + line = line.strip() + if not line: + continue + + try: + event = json.loads(line) + except json.JSONDecodeError: + continue + + # Early detection via stream events + if event.get("type") == "stream_event": + se = event.get("event", {}) + se_type = se.get("type", "") + + if se_type == "content_block_start": + cb = se.get("content_block", {}) + if cb.get("type") == "tool_use": + tool_name = cb.get("name", "") + if tool_name in skill_tools: + pending_tool_name = tool_name + accumulated_json = "" + elif not first_tool_seen and not seen_skill_tool_loaded: + # Very first tool call is unrelated to skills + return False + + first_tool_seen = True + + elif se_type == "content_block_delta" and pending_tool_name: + delta = se.get("delta", {}) + if delta.get("type") == "input_json_delta": + accumulated_json += delta.get("partial_json", "") + + elif se_type == "content_block_stop": + if pending_tool_name: + if pending_tool_name == "ToolSearch": + # ToolSearch("select:Skill") loads the Skill tool + if "Skill" in accumulated_json: + seen_skill_tool_loaded = True + elif pending_tool_name == "Skill": + # Skill("executive-assistant-setup") invokes the skill + if skill_name in accumulated_json: + return True + elif pending_tool_name == "Read": + if skill_name in accumulated_json: + return True + pending_tool_name = None + accumulated_json = "" + + # Don't bail on message_stop -- conversation continues + # across multiple turns (ToolSearch -> user result -> Skill) + + # Fallback: full assistant message + elif event.get("type") == "assistant": + message = event.get("message", {}) + for content_item in message.get("content", []): + if content_item.get("type") != "tool_use": + continue + tool_name = content_item.get("name", "") + tool_input = content_item.get("input", {}) + if tool_name == "ToolSearch": + if "Skill" in json.dumps(tool_input): + seen_skill_tool_loaded = True + elif tool_name == "Skill" and skill_name in tool_input.get("skill", ""): + return True + elif tool_name == "Read" and skill_name in tool_input.get("file_path", ""): + return True + + elif event.get("type") == "result": + return triggered + finally: + # Clean up process on any exit path (return, exception, timeout) + if process.poll() is None: + process.kill() + process.wait() + + return triggered + + +def run_single_query( + query: str, + skill_name: str, + skill_description: str, + timeout: int, + project_root: str, + model: str | None = None, + platform: str = "claude", +) -> bool: + """Dispatch to the appropriate backend based on platform.""" + if platform == "cursor": + return _run_query_cursor(query, skill_name, skill_description, model) + return _run_query_claude(query, skill_name, skill_description, timeout, project_root, model) + + +def run_eval( + eval_set: list[dict], + skill_name: str, + description: str, + num_workers: int, + timeout: int, + project_root: Path, + runs_per_query: int = 1, + trigger_threshold: float = 0.5, + model: str | None = None, + platform: str = "claude", +) -> dict: + """Run the full eval set and return results.""" + results = [] + + with ProcessPoolExecutor(max_workers=num_workers) as executor: + future_to_info = {} + for item in eval_set: + for run_idx in range(runs_per_query): + future = executor.submit( + run_single_query, + item["query"], + skill_name, + description, + timeout, + str(project_root), + model, + platform, + ) + future_to_info[future] = (item, run_idx) + + query_triggers: dict[str, list[bool]] = {} + query_items: dict[str, dict] = {} + for future in as_completed(future_to_info): + item, _ = future_to_info[future] + query = item["query"] + query_items[query] = item + if query not in query_triggers: + query_triggers[query] = [] + try: + query_triggers[query].append(future.result()) + except Exception as e: + print(f"Warning: query failed: {e}", file=sys.stderr) + query_triggers[query].append(False) + + for query, triggers in query_triggers.items(): + item = query_items[query] + trigger_rate = sum(triggers) / len(triggers) + should_trigger = item["should_trigger"] + if should_trigger: + did_pass = trigger_rate >= trigger_threshold + else: + did_pass = trigger_rate < trigger_threshold + results.append({ + "query": query, + "should_trigger": should_trigger, + "trigger_rate": trigger_rate, + "triggers": sum(triggers), + "runs": len(triggers), + "pass": did_pass, + }) + + passed = sum(1 for r in results if r["pass"]) + total = len(results) + + return { + "skill_name": skill_name, + "description": description, + "results": results, + "summary": { + "total": total, + "passed": passed, + "failed": total - passed, + }, + } + + +def main(): + parser = argparse.ArgumentParser(description="Run trigger evaluation for a skill description") + parser.add_argument("--eval-set", required=True, help="Path to eval set JSON file") + parser.add_argument("--skill-path", required=True, help="Path to skill directory") + parser.add_argument("--description", default=None, help="Override description to test") + parser.add_argument("--num-workers", type=int, default=10, help="Number of parallel workers") + parser.add_argument("--timeout", type=int, default=30, help="Timeout per query in seconds") + parser.add_argument("--runs-per-query", type=int, default=3, help="Number of runs per query") + parser.add_argument("--trigger-threshold", type=float, default=0.5, help="Trigger rate threshold") + parser.add_argument("--model", default=None, help="Model to use (default: claude-sonnet-4-6)") + parser.add_argument("--platform", default=None, choices=["claude", "cursor"], help="Target platform (default: auto-detect)") + parser.add_argument("--verbose", action="store_true", help="Print progress to stderr") + args = parser.parse_args() + + platform = args.platform or detect_platform() + + eval_set = json.loads(Path(args.eval_set).read_text()) + skill_path = Path(args.skill_path) + + if not (skill_path / "SKILL.md").exists(): + print(f"Error: No SKILL.md found at {skill_path}", file=sys.stderr) + sys.exit(1) + + parsed = parse_skill_md(skill_path) + name, original_description = parsed["name"], parsed["description"] + description = args.description or original_description + project_root = find_project_root() + + if args.verbose: + print(f"Platform: {platform}", file=sys.stderr) + print(f"Evaluating: {description}", file=sys.stderr) + + output = run_eval( + eval_set=eval_set, + skill_name=name, + description=description, + num_workers=args.num_workers, + timeout=args.timeout, + project_root=project_root, + runs_per_query=args.runs_per_query, + trigger_threshold=args.trigger_threshold, + model=args.model, + platform=platform, + ) + + if args.verbose: + summary = output["summary"] + print(f"Results: {summary['passed']}/{summary['total']} passed", file=sys.stderr) + for r in output["results"]: + status = "PASS" if r["pass"] else "FAIL" + rate_str = f"{r['triggers']}/{r['runs']}" + print(f" [{status}] rate={rate_str} expected={r['should_trigger']}: {r['query'][:70]}", file=sys.stderr) + + print(json.dumps(output, indent=2)) + + +if __name__ == "__main__": + main() diff --git a/.agents/skills/skill-creator/scripts/run_loop.py b/.agents/skills/skill-creator/scripts/run_loop.py new file mode 100755 index 0000000..55c7ec4 --- /dev/null +++ b/.agents/skills/skill-creator/scripts/run_loop.py @@ -0,0 +1,343 @@ +#!/usr/bin/env python3 +"""Run the eval + improve loop until all pass or max iterations reached. + +Combines run_eval.py and improve_description.py in a loop, tracking history +and returning the best description found. Supports train/test split to prevent +overfitting. +""" + +import argparse +import json +import random +import sys +import tempfile +import time +import webbrowser +from pathlib import Path + +from scripts.generate_report import generate_html +from scripts.improve_description import improve_description +from scripts.run_eval import find_project_root, run_eval +from scripts.utils import detect_platform, get_platform_name, parse_skill_md + + +def split_eval_set(eval_set: list[dict], holdout: float, seed: int = 42) -> tuple[list[dict], list[dict]]: + """Split eval set into train and test sets, stratified by should_trigger.""" + random.seed(seed) + + # Separate by should_trigger + trigger = [e for e in eval_set if e["should_trigger"]] + no_trigger = [e for e in eval_set if not e["should_trigger"]] + + # Shuffle each group + random.shuffle(trigger) + random.shuffle(no_trigger) + + # Calculate split points + n_trigger_test = max(1, int(len(trigger) * holdout)) + n_no_trigger_test = max(1, int(len(no_trigger) * holdout)) + + # Split + test_set = trigger[:n_trigger_test] + no_trigger[:n_no_trigger_test] + train_set = trigger[n_trigger_test:] + no_trigger[n_no_trigger_test:] + + return train_set, test_set + + +def run_loop( + eval_set: list[dict], + skill_path: Path, + description_override: str | None, + num_workers: int, + timeout: int, + max_iterations: int, + runs_per_query: int, + trigger_threshold: float, + holdout: float, + model: str, + verbose: bool, + live_report_path: Path | None = None, + log_dir: Path | None = None, + platform: str = "claude", +) -> dict: + """Run the eval + improvement loop.""" + import anthropic + + project_root = find_project_root() + parsed = parse_skill_md(skill_path) + name, original_description, content = parsed["name"], parsed["description"], parsed["content"] + current_description = description_override or original_description + + # Split into train/test if holdout > 0 + if holdout > 0: + train_set, test_set = split_eval_set(eval_set, holdout) + if verbose: + print(f"Split: {len(train_set)} train, {len(test_set)} test (holdout={holdout})", file=sys.stderr) + else: + train_set = eval_set + test_set = [] + + client = anthropic.Anthropic() + history = [] + exit_reason = "unknown" + + for iteration in range(1, max_iterations + 1): + if verbose: + print(f"\n{'='*60}", file=sys.stderr) + print(f"Iteration {iteration}/{max_iterations}", file=sys.stderr) + print(f"Description: {current_description}", file=sys.stderr) + print(f"{'='*60}", file=sys.stderr) + + # Evaluate train + test together in one batch for parallelism + all_queries = train_set + test_set + t0 = time.time() + all_results = run_eval( + eval_set=all_queries, + skill_name=name, + description=current_description, + num_workers=num_workers, + timeout=timeout, + project_root=project_root, + runs_per_query=runs_per_query, + trigger_threshold=trigger_threshold, + model=model, + platform=platform, + ) + eval_elapsed = time.time() - t0 + + # Split results back into train/test by matching queries + train_queries_set = {q["query"] for q in train_set} + train_result_list = [r for r in all_results["results"] if r["query"] in train_queries_set] + test_result_list = [r for r in all_results["results"] if r["query"] not in train_queries_set] + + train_passed = sum(1 for r in train_result_list if r["pass"]) + train_total = len(train_result_list) + train_summary = {"passed": train_passed, "failed": train_total - train_passed, "total": train_total} + train_results = {"results": train_result_list, "summary": train_summary} + + if test_set: + test_passed = sum(1 for r in test_result_list if r["pass"]) + test_total = len(test_result_list) + test_summary = {"passed": test_passed, "failed": test_total - test_passed, "total": test_total} + test_results = {"results": test_result_list, "summary": test_summary} + else: + test_results = None + test_summary = None + + history.append({ + "iteration": iteration, + "description": current_description, + "train_passed": train_summary["passed"], + "train_failed": train_summary["failed"], + "train_total": train_summary["total"], + "train_results": train_results["results"], + "test_passed": test_summary["passed"] if test_summary else None, + "test_failed": test_summary["failed"] if test_summary else None, + "test_total": test_summary["total"] if test_summary else None, + "test_results": test_results["results"] if test_results else None, + # For backward compat with report generator + "passed": train_summary["passed"], + "failed": train_summary["failed"], + "total": train_summary["total"], + "results": train_results["results"], + }) + + # Write live report if path provided + if live_report_path: + partial_output = { + "original_description": original_description, + "best_description": current_description, + "best_score": "in progress", + "iterations_run": len(history), + "holdout": holdout, + "train_size": len(train_set), + "test_size": len(test_set), + "history": history, + } + live_report_path.write_text(generate_html(partial_output, auto_refresh=True, skill_name=name)) + + if verbose: + def print_eval_stats(label, results, elapsed): + pos = [r for r in results if r["should_trigger"]] + neg = [r for r in results if not r["should_trigger"]] + tp = sum(r["triggers"] for r in pos) + pos_runs = sum(r["runs"] for r in pos) + fn = pos_runs - tp + fp = sum(r["triggers"] for r in neg) + neg_runs = sum(r["runs"] for r in neg) + tn = neg_runs - fp + total = tp + tn + fp + fn + precision = tp / (tp + fp) if (tp + fp) > 0 else 1.0 + recall = tp / (tp + fn) if (tp + fn) > 0 else 1.0 + accuracy = (tp + tn) / total if total > 0 else 0.0 + print(f"{label}: {tp+tn}/{total} correct, precision={precision:.0%} recall={recall:.0%} accuracy={accuracy:.0%} ({elapsed:.1f}s)", file=sys.stderr) + for r in results: + status = "PASS" if r["pass"] else "FAIL" + rate_str = f"{r['triggers']}/{r['runs']}" + print(f" [{status}] rate={rate_str} expected={r['should_trigger']}: {r['query'][:60]}", file=sys.stderr) + + print_eval_stats("Train", train_results["results"], eval_elapsed) + if test_summary: + print_eval_stats("Test ", test_results["results"], 0) + + if train_summary["failed"] == 0: + exit_reason = f"all_passed (iteration {iteration})" + if verbose: + print(f"\nAll train queries passed on iteration {iteration}!", file=sys.stderr) + break + + if iteration == max_iterations: + exit_reason = f"max_iterations ({max_iterations})" + if verbose: + print(f"\nMax iterations reached ({max_iterations}).", file=sys.stderr) + break + + # Improve the description based on train results + if verbose: + print(f"\nImproving description...", file=sys.stderr) + + t0 = time.time() + # Strip test scores from history so improvement model can't see them + blinded_history = [ + {k: v for k, v in h.items() if not k.startswith("test_")} + for h in history + ] + new_description = improve_description( + client=client, + skill_name=name, + skill_content=content, + current_description=current_description, + eval_results=train_results, + history=blinded_history, + model=model, + log_dir=log_dir, + iteration=iteration, + platform=platform, + ) + improve_elapsed = time.time() - t0 + + if verbose: + print(f"Proposed ({improve_elapsed:.1f}s): {new_description}", file=sys.stderr) + + current_description = new_description + + # Find the best iteration by TEST score (or train if no test set) + if test_set: + best = max(history, key=lambda h: h["test_passed"] or 0) + best_score = f"{best['test_passed']}/{best['test_total']}" + else: + best = max(history, key=lambda h: h["train_passed"]) + best_score = f"{best['train_passed']}/{best['train_total']}" + + if verbose: + print(f"\nExit reason: {exit_reason}", file=sys.stderr) + print(f"Best score: {best_score} (iteration {best['iteration']})", file=sys.stderr) + + return { + "exit_reason": exit_reason, + "original_description": original_description, + "best_description": best["description"], + "best_score": best_score, + "best_train_score": f"{best['train_passed']}/{best['train_total']}", + "best_test_score": f"{best['test_passed']}/{best['test_total']}" if test_set else None, + "final_description": current_description, + "iterations_run": len(history), + "holdout": holdout, + "train_size": len(train_set), + "test_size": len(test_set), + "history": history, + } + + +def main(): + parser = argparse.ArgumentParser(description="Run eval + improve loop") + parser.add_argument("--eval-set", required=True, help="Path to eval set JSON file") + parser.add_argument("--skill-path", required=True, help="Path to skill directory") + parser.add_argument("--description", default=None, help="Override starting description") + parser.add_argument("--num-workers", type=int, default=10, help="Number of parallel workers") + parser.add_argument("--timeout", type=int, default=30, help="Timeout per query in seconds") + parser.add_argument("--max-iterations", type=int, default=5, help="Max improvement iterations") + parser.add_argument("--runs-per-query", type=int, default=3, help="Number of runs per query") + parser.add_argument("--trigger-threshold", type=float, default=0.5, help="Trigger rate threshold") + parser.add_argument("--holdout", type=float, default=0.4, help="Fraction of eval set to hold out for testing (0 to disable)") + parser.add_argument("--model", required=True, help="Model for improvement") + parser.add_argument("--platform", default=None, choices=["claude", "cursor"], help="Target platform (default: auto-detect)") + parser.add_argument("--verbose", action="store_true", help="Print progress to stderr") + parser.add_argument("--report", default="auto", help="Generate HTML report at this path (default: 'auto' for temp file, 'none' to disable)") + parser.add_argument("--results-dir", default=None, help="Save all outputs (results.json, report.html, log.txt) to a timestamped subdirectory here") + args = parser.parse_args() + + platform = args.platform or detect_platform() + + eval_set = json.loads(Path(args.eval_set).read_text()) + skill_path = Path(args.skill_path) + + if not (skill_path / "SKILL.md").exists(): + print(f"Error: No SKILL.md found at {skill_path}", file=sys.stderr) + sys.exit(1) + + name = parse_skill_md(skill_path)["name"] + + # Set up live report path + if args.report != "none": + if args.report == "auto": + timestamp = time.strftime("%Y%m%d_%H%M%S") + live_report_path = Path(tempfile.gettempdir()) / f"skill_description_report_{skill_path.name}_{timestamp}.html" + else: + live_report_path = Path(args.report) + # Open the report immediately so the user can watch + live_report_path.write_text("

Starting optimization loop...

") + webbrowser.open(str(live_report_path)) + else: + live_report_path = None + + # Determine output directory (create before run_loop so logs can be written) + if args.results_dir: + timestamp = time.strftime("%Y-%m-%d_%H%M%S") + results_dir = Path(args.results_dir) / timestamp + results_dir.mkdir(parents=True, exist_ok=True) + else: + results_dir = None + + log_dir = results_dir / "logs" if results_dir else None + + if args.verbose: + print(f"Platform: {platform}", file=sys.stderr) + + output = run_loop( + eval_set=eval_set, + skill_path=skill_path, + description_override=args.description, + num_workers=args.num_workers, + timeout=args.timeout, + max_iterations=args.max_iterations, + runs_per_query=args.runs_per_query, + trigger_threshold=args.trigger_threshold, + holdout=args.holdout, + model=args.model, + verbose=args.verbose, + live_report_path=live_report_path, + log_dir=log_dir, + platform=platform, + ) + + # Save JSON output + json_output = json.dumps(output, indent=2) + print(json_output) + if results_dir: + (results_dir / "results.json").write_text(json_output) + + # Write final HTML report (without auto-refresh) + if live_report_path: + live_report_path.write_text(generate_html(output, auto_refresh=False, skill_name=name)) + print(f"\nReport: {live_report_path}", file=sys.stderr) + + if results_dir and live_report_path: + (results_dir / "report.html").write_text(generate_html(output, auto_refresh=False, skill_name=name)) + + if results_dir: + print(f"Results saved to: {results_dir}", file=sys.stderr) + + +if __name__ == "__main__": + main() diff --git a/.agents/skills/skill-creator/scripts/utils.py b/.agents/skills/skill-creator/scripts/utils.py new file mode 100644 index 0000000..f98cf92 --- /dev/null +++ b/.agents/skills/skill-creator/scripts/utils.py @@ -0,0 +1,102 @@ +"""Shared utilities for skill-creator scripts.""" + +import os +import shutil +from pathlib import Path + + +# --------------------------------------------------------------------------- +# Platform detection & helpers +# --------------------------------------------------------------------------- + +PLATFORMS = ("claude", "cursor") + + +def detect_platform() -> str: + """Auto-detect the current platform (claude or cursor). + + Checks environment variables first, then falls back to CLI availability. + """ + if os.environ.get("CLAUDECODE"): + return "claude" + if os.environ.get("CURSOR_SESSION"): + return "cursor" + # Fall back to CLI availability + if shutil.which("cursor"): + return "cursor" + return "claude" + + +def get_platform_name(platform: str) -> str: + """Human-readable platform name for display and prompts.""" + return {"claude": "Claude Code", "cursor": "Cursor"}.get(platform, platform) + + +def get_skills_dir(platform: str) -> str: + """Return the conventional skills directory for a platform.""" + return {"claude": ".claude/skills/", "cursor": ".cursor/skills/"}.get( + platform, ".claude/skills/" + ) + + +_BASE_FRONTMATTER_KEYS = { + "name", "description", "license", "allowed-tools", "metadata", "compatibility", +} + + +def get_allowed_frontmatter_keys(platform: str) -> set[str]: + """Return the set of valid SKILL.md frontmatter keys for a platform.""" + keys = set(_BASE_FRONTMATTER_KEYS) + if platform == "cursor": + keys.add("disable-model-invocation") + return keys + + +# --------------------------------------------------------------------------- +# SKILL.md parsing +# --------------------------------------------------------------------------- + +def parse_skill_md(skill_path: Path) -> dict: + """Parse a SKILL.md file. + + Returns a dict with keys: name, description, content. + """ + content = (skill_path / "SKILL.md").read_text() + lines = content.split("\n") + + if lines[0].strip() != "---": + raise ValueError("SKILL.md missing frontmatter (no opening ---)") + + end_idx = None + for i, line in enumerate(lines[1:], start=1): + if line.strip() == "---": + end_idx = i + break + + if end_idx is None: + raise ValueError("SKILL.md missing frontmatter (no closing ---)") + + name = "" + description = "" + frontmatter_lines = lines[1:end_idx] + i = 0 + while i < len(frontmatter_lines): + line = frontmatter_lines[i] + if line.startswith("name:"): + name = line[len("name:"):].strip().strip('"').strip("'") + elif line.startswith("description:"): + value = line[len("description:"):].strip() + # Handle YAML multiline indicators (>, |, >-, |-) + if value in (">", "|", ">-", "|-"): + continuation_lines: list[str] = [] + i += 1 + while i < len(frontmatter_lines) and (frontmatter_lines[i].startswith(" ") or frontmatter_lines[i].startswith("\t")): + continuation_lines.append(frontmatter_lines[i].strip()) + i += 1 + description = " ".join(continuation_lines) + continue + else: + description = value.strip('"').strip("'") + i += 1 + + return {"name": name, "description": description, "content": content} diff --git a/.agents/skills/test-coverage/SKILL.md b/.agents/skills/test-coverage/SKILL.md new file mode 100644 index 0000000..7078a64 --- /dev/null +++ b/.agents/skills/test-coverage/SKILL.md @@ -0,0 +1,375 @@ +--- +name: test-coverage +description: "MUST be used whenever fixing test coverage for a Flows app to meet the 80% line coverage hard gate. This skill finds AND fixes coverage gaps — it configures tooling, writes missing tests, covers untested paths, and refactors code for testability. It does not just report. Triggers: test coverage, fix tests, write tests, add tests, coverage fix, 80% coverage, coverage gate, missing tests, testability, vitest coverage, jest coverage." +allowed-tools: Read, Glob, Grep, Shell, Write +metadata: + argument-hint: "[directory or file to audit, or leave blank for the whole app]" +--- + +# Test Coverage Fix + +Fix test coverage for **$ARGUMENTS** (or the whole app if no argument is given). This skill enforces the **80% line coverage hard gate** required for Flows app approval by finding AND fixing coverage gaps. Work through every step in order. + +--- + +## Step 1 — Verify test framework and coverage tooling + +Check that the project has a working test framework with coverage configured: + +```bash +# Check for vitest or jest in package.json +grep -E "(vitest|jest)" package.json + +# Check for coverage configuration +cat vitest.config.ts 2>/dev/null || cat vitest.config.js 2>/dev/null || cat jest.config.ts 2>/dev/null || cat jest.config.js 2>/dev/null +``` + +Verify: +- A test framework (Vitest or Jest) is installed and configured +- The config file has a `coverage` section (e.g. `coverage: { provider: 'v8', ... }` in vitest.config.ts) +- A coverage reporter is configured (at least `text` and `lcov` or `json-summary`) + +**If coverage tooling is not configured, fix it now:** + +1. Install the coverage provider: +```bash +pnpm add -D @vitest/coverage-v8 +``` + +2. Add the coverage configuration to `vitest.config.ts`. Read the existing config file, then add the `coverage` section inside `test`: + +```typescript +// vitest.config.ts — minimum coverage configuration to add +test: { + coverage: { + provider: 'v8', + reporter: ['text', 'text-summary', 'lcov'], + include: ['src/**/*.{ts,tsx}'], + exclude: [ + 'src/**/*.test.{ts,tsx}', + 'src/**/*.spec.{ts,tsx}', + 'src/**/vite-env.d.ts', + 'src/main.tsx', + ], + }, +} +``` + +Write the updated config file. If no vitest.config.ts exists at all, create one with the full `defineConfig` wrapper. + +--- + +## Step 2 — Validate coverage scope + +The 80% threshold applies to **all `.ts` and `.tsx` files** under `src/`, excluding only: +- Test files (`*.test.ts`, `*.test.tsx`, `*.spec.ts`, `*.spec.tsx`) +- Type declaration files (`vite-env.d.ts`) +- The entry point (`main.tsx`) + +Apps must **not** exclude pages, components, hooks, or other production code from coverage measurement. + +```bash +# Check what files are excluded from coverage in the config +grep -A 20 "exclude" vitest.config.ts 2>/dev/null || grep -A 20 "exclude" vitest.config.js 2>/dev/null + +# Check for coveragePathIgnorePatterns in jest config +grep -A 10 "coveragePathIgnorePatterns\|collectCoverageFrom" jest.config.ts 2>/dev/null +``` + +**If the config excludes production files, fix it now:** + +Remove any exclusion that hides production code from coverage measurement. Only test files, type declarations, and the entry point should be excluded. Rewrite the `exclude` array to contain only: + +```typescript +exclude: [ + 'src/**/*.test.{ts,tsx}', + 'src/**/*.spec.{ts,tsx}', + 'src/**/vite-env.d.ts', + 'src/main.tsx', +], +``` + +Specifically remove any exclusions for: +- `src/pages/` or `src/components/` or `src/hooks/` — **NOT allowed** +- Specific feature files — **NOT allowed** unless they are generated code +- `src/**/*.tsx` (all components) — **NOT allowed**, this hides the majority of the app + +Write the corrected config file. + +--- + +## Step 3 — Run tests and collect coverage + +```bash +# Try common coverage commands based on project setup +npx vitest run --coverage 2>/dev/null || npx jest --coverage 2>/dev/null || npm test -- --coverage 2>/dev/null +``` + +Record the coverage summary: +- **Statements:** X% +- **Branches:** X% +- **Functions:** X% +- **Lines:** X% + +**Hard gate:** Overall line coverage must be **at least 80%**. Apps below this threshold are listed as **must fix**. + +**If tests fail to run, fix them now:** + +Common fixes: +- **Missing imports:** Read the failing test file, add the missing import statement, write the fixed file. +- **Broken mocks:** Read the test to understand what is being mocked. Fix the mock to match the current API of the mocked module. +- **Outdated snapshots:** Run `npx vitest run --update` to update snapshots, then review the diff to ensure correctness. +- **Missing dependencies:** Run `pnpm add -D ` for any test utilities not yet installed. +- **Config errors:** Read the config file, fix syntax or option errors, write the corrected file. + +Re-run tests after each fix until all tests pass. Then record the coverage summary. + +--- + +## Step 4 — Find and write missing test files + +For every non-trivial `.ts`/`.tsx` file under `src/`, check whether a corresponding test file exists: + +```bash +# List all production files and check for test counterparts +for file in $(find src -name "*.ts" -o -name "*.tsx" | grep -v ".test." | grep -v ".spec." | grep -v "node_modules" | grep -v "vite-env" | sort); do + base="${file%.*}" + ext="${file##*.}" + dir=$(dirname "$file") + filename=$(basename "$base") + + # Check for test file in same directory or __tests__ directory + test_exists="false" + for pattern in "${base}.test.${ext}" "${base}.spec.${ext}" "${base}.test.ts" "${base}.spec.ts" "${dir}/__tests__/${filename}.test.${ext}" "${dir}/__tests__/${filename}.spec.${ext}"; do + if [ -f "$pattern" ]; then + test_exists="true" + break + fi + done + + if [ "$test_exists" = "false" ]; then + echo "NO TEST: $file" + fi +done +``` + +Categorize each file without a test: +- **Services, hooks, utils, contexts, ViewModel hooks** — **Write the test file now** (see below) +- **Pure presentational components** with no logic — Mark as **N/A** (no test required) +- **Barrel exports** (`index.ts` that only re-exports) — Mark as **N/A** +- **Type-only files** (`.d.ts`, files with only type/interface exports) — Mark as **N/A** + +**For each file missing a test, create a comprehensive test file.** Use context injection for dependency mocking where the production code supports it. If the production code uses hard-coded imports, note this as a testability concern but still write the test using `vi.mock` with a justification comment. Follow this process for each: + +1. **Read the source file** to understand its exports, dependencies, and logic. +2. **Create a `.test.ts` or `.test.tsx` file** in the same directory as the source file. +3. **Write tests covering:** happy path, error path, empty state, and edge cases. + +Use the right testing pattern for each file type: + +**For hooks:** +- Test with `renderHook` from `@testing-library/react` +- Wrap with necessary providers (QueryClientProvider, custom context providers, etc.) +- Test initial state, loading state, success state, and error state +- Example structure: +```typescript +import { renderHook, waitFor } from '@testing-library/react'; +import { QueryClient, QueryClientProvider } from '@tanstack/react-query'; +// import the hook + +const createWrapper = () => { + const queryClient = new QueryClient({ + defaultOptions: { queries: { retry: false } }, + }); + return ({ children }: { children: React.ReactNode }) => ( + {children} + ); +}; + +describe('useMyHook', () => { + it('returns data on success', async () => { + const { result } = renderHook(() => useMyHook(), { wrapper: createWrapper() }); + await waitFor(() => expect(result.current.data).toBeDefined()); + }); + + it('handles errors', async () => { + // set up error condition + const { result } = renderHook(() => useMyHook(), { wrapper: createWrapper() }); + await waitFor(() => expect(result.current.error).toBeDefined()); + }); +}); +``` + +**For services/utils:** +- Test with direct function calls +- Mock CDF SDK responses where needed +- Test return values, side effects, and thrown errors +- Example structure: +```typescript +import { describe, it, expect, vi } from 'vitest'; +// import the service/util functions + +describe('myService', () => { + it('returns expected result for valid input', () => { + const result = myFunction(validInput); + expect(result).toEqual(expectedOutput); + }); + + it('throws on invalid input', () => { + expect(() => myFunction(invalidInput)).toThrow(); + }); +}); +``` + +**For components with logic:** +- Test with `render` from `@testing-library/react` +- Verify loading, error, and data states +- Test user interactions that trigger state changes +- Example structure: +```typescript +import { render, screen, waitFor } from '@testing-library/react'; +import userEvent from '@testing-library/user-event'; +// import the component and providers + +describe('MyComponent', () => { + it('shows loading state initially', () => { + render(, { wrapper: createWrapper() }); + expect(screen.getByText(/loading/i)).toBeInTheDocument(); + }); + + it('renders data after loading', async () => { + render(, { wrapper: createWrapper() }); + await waitFor(() => { + expect(screen.getByText('expected content')).toBeInTheDocument(); + }); + }); +}); +``` + +**Dependency mocking guidelines:** +- Use context injection (not `vi.mock`) where possible — provide test dependencies via the hook's context. +- If the production code uses hard-coded imports that prevent context injection, use `vi.mock` with a justification comment explaining why (e.g., `// vi.mock required: useDataSource uses direct import, not context injection`). +- Ensure mocks are type-safe — no `as unknown as T` casts. Define proper mock objects that satisfy the interface. + +After writing each test file, run `npx vitest run ` to verify it passes. + +--- + +## Step 5 — Fix low-coverage files + +If the coverage tool produces per-file metrics, list files below 80% line coverage: + +```bash +# Parse lcov or text output for per-file coverage +cat coverage/coverage-summary.json 2>/dev/null | node -e " + const data = JSON.parse(require('fs').readFileSync('/dev/stdin','utf8')); + Object.entries(data).forEach(([file, metrics]) => { + if (file === 'total') return; + const pct = metrics.lines?.pct ?? 0; + if (pct < 80) console.log(pct.toFixed(1) + '% — ' + file); + }); +" 2>/dev/null +``` + +**For each file below 80% coverage, read the uncovered lines from the coverage report, then add test cases that exercise those specific code paths:** + +1. **Read the uncovered lines** from the coverage report. Check the `coverage/` directory for detailed per-file reports (lcov or html) that show which lines are uncovered. +2. **Read the source file** to understand what those uncovered lines do. +3. **Add test cases** that exercise those specific code paths: + +- **Uncovered error paths:** Add tests that trigger error conditions (network failures, invalid input, null values, permission errors). Force errors by providing bad input or mocking dependencies to throw. +- **Uncovered branches:** Add tests for each conditional branch. If an `if/else` has only the `true` branch tested, write a test that triggers the `false` branch. +- **Uncovered functions:** Add tests that call each exported function that lacks coverage. Verify return values and side effects. +- **Uncovered catch blocks:** Mock the upstream call to reject/throw, verify the catch block behavior. + +4. **Run coverage again** after adding tests to verify the file now meets 80%: +```bash +npx vitest run --coverage +``` + +Repeat until each file reaches at least 80% line coverage or you have covered all feasible paths. + +--- + +## Step 6 — Fix testability patterns + +Assess and fix testability issues in the codebase: + +```bash +# Check for dependency injection via context +grep -rn --include="*.ts" --include="*.tsx" "useContext\|createContext" src/hooks/ src/contexts/ + +# Check for vi.mock usage (red flag for testability) +grep -rn --include="*.ts" --include="*.tsx" "vi\.mock" src/ + +# Check for unsafe casts in tests +grep -rn --include="*.ts" --include="*.tsx" "as unknown as" src/ | grep -E "\.test\.|\.spec\." + +# Check for interface-based services +grep -rn --include="*.ts" --include="*.tsx" -E "implements\s+\w+" src/ +``` + +**For each testability issue found, refactor the production code to support better testing patterns. Then update the corresponding test to use the improved pattern.** + +| Issue | Fix | +|-------|-----| +| Hook imports dependencies directly instead of using context | Add a context type with default dependencies. Create a context with `createContext`, provide defaults that use the real implementation. In the hook, use `useContext` to get dependencies. Tests can then provide mock dependencies via the context provider without `vi.mock`. | +| Service has no interface | Extract a TypeScript interface describing the service's public API. Have the class/object implement the interface. Tests mock against the interface, not the concrete implementation. | +| Page component mixes data fetching with rendering | Extract data logic into a `use*ViewModel` hook. The page component calls the ViewModel hook and renders based on its return value. Test the ViewModel hook separately with `renderHook`, test the page component with a mocked ViewModel. | +| Tests use `vi.mock` for modules that could use context injection | After refactoring the production code to use context injection (above), update the test to provide mock dependencies via the context provider. Remove the `vi.mock` call. Add a comment explaining the pattern. | +| Tests use `as unknown as T` casts for mocks | Define a proper mock type or object that satisfies the required interface. Replace the cast with a properly typed mock. If the interface is large, create a helper function that returns a partial mock with only the methods used, typed correctly. | + +For each refactored file: +1. Read the source file and its test file. +2. Refactor the production code (add context, extract interface, extract ViewModel hook). +3. Update the test to use the improved pattern. +4. Run `npx vitest run ` to verify the test still passes. + +--- + +## Step 7 — Report remaining gaps + +Re-run the full test suite with coverage to get final numbers: + +```bash +npx vitest run --coverage 2>/dev/null || npx jest --coverage 2>/dev/null +``` + +Produce a summary of what was done and what remains: + +```markdown +### Test Coverage Summary (After Fixes) + +| Metric | Before | After | Gate | +|--------|--------|-------|------| +| Lines | X% | Y% | ≥80% required | +| Branches | X% | Y% | — | +| Functions | X% | Y% | — | +| Statements | X% | Y% | — | + +### Coverage verdict: PASS / FAIL + +### What was fixed +- [ ] Coverage tooling configured/corrected +- [ ] Exclusions cleaned up (removed N production file exclusions) +- [ ] N failing tests fixed +- [ ] N new test files written (list them) +- [ ] N existing test files expanded for coverage +- [ ] N files refactored for testability + +### Remaining gaps (needs human review) +Only list issues that could not be auto-fixed: +- Complex business logic where correct test assertions require domain knowledge +- Integration tests that need real API credentials or environment setup +- Files where coverage cannot reach 80% without major architectural changes (explain why) +``` + +--- + +## Done + +Summarize: +- Overall coverage before and after fixes, vs the 80% gate (PASS or FAIL) +- Number of test files written and tests added +- Number of files refactored for testability +- Any remaining **must fix** items that need human review diff --git a/.agents/skills/use-topbar/IMPLEMENTATION.md b/.agents/skills/use-topbar/IMPLEMENTATION.md new file mode 100644 index 0000000..55c238a --- /dev/null +++ b/.agents/skills/use-topbar/IMPLEMENTATION.md @@ -0,0 +1,179 @@ +# Topbar Implementation + +Install, theme-hook wiring, and composition for `@aura/topbar`. Run the interview ([INTERVIEW.md](INTERVIEW.md)) first. + +--- + +## Step 3 — Install + +**Installation is mandatory.** If `@aura/topbar` cannot be installed, stop and surface the blocker. Do not build a custom component or any workaround. + +### 3a — Check if already installed + +Check `package.json` for `@aura/topbar`. If present, skip to Step 3d. + +### 3b — Determine install method + +`@aura/topbar` is a **shadcn registry component** — not on npm. The only valid install path is the shadcn CLI (`pnpm dlx shadcn@latest add`). Do not use `npm install`, `pnpm add`, or `yarn add`. + +Before running the install: + +1. **Ensure `components.json` has the `@aura` registry.** If absent or missing the entry, add: + + ```json + { + "registries": { + "@aura": "https://cognitedata.github.io/aura/r/{name}.json" + } + } + ``` + + If `components.json` does not exist at all, run `pnpm dlx shadcn@latest init` first, then add the entry. + +2. **Detect the package manager:** + - `pnpm-lock.yaml` → pnpm + - `yarn.lock` → yarn + - `package-lock.json` → npm + +### 3c — Install + +```bash +pnpm dlx shadcn@latest add @aura/topbar +``` + +> **If this fails**, stop. Tell the user exactly what failed and ask them to resolve the blocker. Do not proceed with a workaround. + +### 3d — Tailwind check + +Confirm `tailwind.config` has `darkMode: 'class'`. Add it if missing. + +--- + +## Step 4 — Dark mode hook + +Always implement theme switching (light / dark). Check for an existing theme system first: + +- Search for `useDarkMode`, `useTheme`, `useColorScheme`, or a `ThemeProvider` in `src/` +- If found, wire into it and skip creating a new hook. + +If none exists, create `src/hooks/use-theme-mode.ts` (or extend your existing hook) so the Topbar menu can **set** light or dark explicitly: + +```ts +import { useEffect, useState } from 'react'; + +export type ThemeMode = 'light' | 'dark'; + +export function useThemeMode() { + const [mode, setMode] = useState(() => { + const stored = localStorage.getItem('theme'); + if (stored === 'dark' || stored === 'light') return stored; + return window.matchMedia('(prefers-color-scheme: dark)').matches ? 'dark' : 'light'; + }); + + useEffect(() => { + const isDark = mode === 'dark'; + document.documentElement.classList.toggle('dark', isDark); + localStorage.setItem('theme', isDark ? 'dark' : 'light'); + }, [mode]); + + return { + mode, + isDark: mode === 'dark', + setTheme: (next: ThemeMode) => setMode(next), + }; +} +``` + +Apply the initial class on page load in `main.tsx` / `index.tsx`: + +```ts +const stored = localStorage.getItem('theme'); +const prefersDark = window.matchMedia('(prefers-color-scheme: dark)').matches; +if (stored === 'dark' || (!stored && prefersDark)) { + document.documentElement.classList.add('dark'); +} +``` + +The Topbar **theme** trigger should open a **Menu** whose items call `setTheme('light')` and `setTheme('dark')` and show a **checkmark** on the active row. + +--- + +## Step 5 — Implement the Topbar + +**Always check Storybook for exact prop names before writing code.** The names below are illustrative — verify against the current `@aura/topbar` package. + +```tsx +import { Topbar } from '@aura/topbar'; +import { Breadcrumb, BreadcrumbItem } from '@aura/topbar'; // adjust to actual exports +// App + user Avatar: import from the Aura package / path Storybook documents for Topbar. +import { useThemeMode } from '@/hooks/use-theme-mode'; + +export function AppShell({ children }: { children: React.ReactNode }) { + const { mode, setTheme } = useThemeMode(); + + return ( + <> + + } + breadcrumbs={ + + + {/* */} + + } + + // Inline metadata — optional string immediately after breadcrumb, left-aligned only + // breadcrumbMetadata="Updated 3 hours ago" + + // Middle — optional Tabs (routes) OR Segmented control (modes); size small; omit if unused + centerSlot={ + null + // Example Tabs: + // Example Segmented: + } + + // Right strip — fixed order when each is visible: share → notifications → theme → atlas → avatar + // Theme: sun when light, moon when dark; Menu with Light mode / Dark mode + checkmark on active + // Storybook may still call this darkMode or split props differently — map menu choice to setTheme('light'|'dark'). + trailingSlot={null} + systemActions={{ + share: { visible: true }, + notifications: { visible: true }, + darkMode: { + visible: true, + mode, // 'light' | 'dark' — illustrative; use whatever resolvedTheme API Aura exposes + onSelectLight: () => setTheme('light'), + onSelectDark: () => setTheme('dark'), + }, + atlas: { visible: true }, + avatar: { visible: true, src: userPhotoSrc, alt: userName }, + }} + /> +
{children}
+ + ); +} +``` + +**Layout wrapper:** The parent element must allow the Topbar to be full-width and sticky: + +```tsx +
+ + {/* page content — primary actions for the current screen live here */} + +
+``` + +--- + +## Additional resources + +- Full Topbar architecture rules: [RULES.md](RULES.md) +- Aura Topbar Storybook: https://cognitedata.github.io/aura/storybook/?path=/docs/primitives-topbar--docs +- Aura Breadcrumb Storybook: https://cognitedata.github.io/aura/storybook/?path=/docs/primitives-breadcrumb--docs +- Aura Button Storybook: https://cognitedata.github.io/aura/storybook/?path=/docs/primitives-button--docs +- Aura colors / dark mode tokens: https://cognitedata.github.io/aura/storybook/?path=/docs/foundations-colors--docs diff --git a/.agents/skills/use-topbar/INTERVIEW.md b/.agents/skills/use-topbar/INTERVIEW.md new file mode 100644 index 0000000..93ec2c0 --- /dev/null +++ b/.agents/skills/use-topbar/INTERVIEW.md @@ -0,0 +1,178 @@ +# Topbar Configuration Interview + +**Complete this interview before writing any implementation code.** Do not skip, shorten, or defer it. If completing it mid-task feels disruptive, pause the task, run the interview, then resume. + +Ask **one question at a time** and wait for the answer. Skip only questions that the pre-flight read (Step 1 in [SKILL.md](SKILL.md)) already answered definitively. + +--- + +## Topbar layout reference + +Use this diagram to orient yourself and the user throughout the interview: + +``` +┌──────────────────────────────────────────────────────────────────────────────────────────────────────────┐ +│ [App Avatar] [App] > [Object▾] metadata… │ (optional) Tabs or Segmented — sm │ Share Bell Theme Atlas User │ +│ ←── Left: fjord Avatar + breadcrumb + metadata (left-aligned, not centered) ──→ │ ←── Middle ──→ │ ←── Right strip (fixed order) ──→ │ +└──────────────────────────────────────────────────────────────────────────────────────────────────────────┘ +``` + +**Left section — breadcrumb states:** + +``` +No object open: [App Avatar] My App +Object open: [App Avatar] My App > Root Cause Analysis ▾ +With inline metadata: [App Avatar] My App > Root Cause Analysis ▾ Updated 3 hours ago +``` + +- Metadata always continues **on the left**, immediately after the breadcrumb — it is **never** centered in the Topbar. +- App name clicking navigates to the app's home/root route — but only when an object is open. If no object is open, the app name is not a link. +- Object name is clickable only when it acts as a dropdown trigger (▾). Each breadcrumb segment is an interactive link that navigates to its route. + +**Middle section — Tabs or Segmented control (optional):** + +``` +Tabs (routes): [Overview] [Well analysis] [Settings] +Segmented (modes): [Canvas view] [Code view] +``` + +- **Tabs** — mutually exclusive **page-level** views (routes). +- **Segmented control** — **mode** or layout switching (e.g. canvas vs code) when that fits better than route tabs. +- **Always `size="sm"`** (or Aura's equivalent **small** size for these primitives). +- Omit entirely if the app has no global navigation or only one view. +- **Do not** put app-specific **primary** actions here — those belong **below** the Topbar. + +**Right section — utility strip (fixed order when each control is shown):** + +``` +Share (ghost, sm) Notifications (ghost, sm) Theme (ghost, sm) Atlas (secondary, sm) User Avatar (sm) + optional optional optional optional optional* +``` + +\*Theme and user Avatar are **typically on**; turn off only when the API and product policy allow. + +**Theme control:** + +- **Light mode** → **sun** icon on the trigger; **dark mode** → **moon** icon. +- Clicking opens a **Menu** with **Light mode** and **Dark mode** rows; a **checkmark** shows the current selection; only **one** row is active at a time. + +If Storybook exposes extra right-slot entries (e.g. legacy agent), follow the **current** Aura API and [RULES.md §3.3](RULES.md). + +--- + +## Left section + +**Q1 — Application mark (Avatar)** + +> "At the far left we use a small Aura Avatar in the fjord colorway for the app mark. Does your app already have branding or an image in config, or should we use the default fjord Avatar treatment from Aura?" + +- Prefer assets from Flows/Fusion app config when present. +- Compose with Aura **`Avatar`**, **`size="small"`**, **`fjord`** (exact props from Storybook). + +**Q2 — App name** + +> "What is the name of your application? It will appear as the first breadcrumb and always be visible." + +- If already defined in app config (`displayName`, `name`), apply it and skip. + +**Q3 — App structure and breadcrumbs** + +> "Referring to the breadcrumb states above — which best describes your app: single app name only (no objects), or does the app let users open specific named items like a canvas, report, or document?" + +- **No objects** → only the app name appears in the breadcrumb. The app name is not a link (there is nowhere to navigate back to). +- **Named objects** → app name always visible; object name added as the last segment only when an object is open. If yes: "What do you call these items? (e.g. Canvas, Report, Dashboard)" + +**Breadcrumb interactivity rules (non-negotiable):** +- All breadcrumb segments are **interactive links** — they must navigate to their corresponding route, not be plain text. +- When an object is open, clicking the **app name** navigates back to the app's home/root (e.g. the object list or landing page). +- When no object is open, the app name segment is **not** a link (it is the current location). +- Do not render breadcrumbs as static/non-interactive text. + +**Q4 — Object actions dropdown** _(ask only if Q3 identified named objects)_ + +> "When a user has a specific [object type] open, would you like a dropdown menu on its name in the breadcrumb for object-level actions — like rename, duplicate, export, or delete?" + +- This dropdown appears **only on the object name** (the last breadcrumb segment), and **only when an object is currently open**. +- **There is no dropdown on the app name.** If users need app-level settings (e.g. manage permissions, configure defaults), place entry points in the **content area below the Topbar** (or another approved pattern), not inside the object dropdown. +- All actions in the object dropdown must apply **only to the currently open object** — do not mix in app-level or global actions. +- Examples: "Rename this canvas", "Duplicate this report", "Export this document", "Delete this item". +- If yes: "What object-specific actions should appear in the menu?" + +--- + +## Left section — inline metadata + +**Q5 — Inline metadata** _(optional)_ + +> "Would you like a short status string directly after the breadcrumb on the **left** — things like 'Updated 3 hours ago' or 'Read-only'? It stays in the left cluster with the breadcrumb, never centered in the bar." + +- If yes: "What text should appear there?" +- **String only** — no links, icons, or interactive elements. +- Omit entirely if unused — do not add a placeholder. +- Typical use: last-modified time, read-only state, a status label tied to the current object or page. + +--- + +## Middle section — navigation + +**Q6 — Global navigation (Tabs or Segmented)** _(optional)_ + +> "Does your app need global navigation in the center of the Topbar — either **Tabs** for mutually exclusive pages/routes, or a **Segmented control** for modes like canvas vs code?" + +- **Tabs** — primary app sections as routes (e.g. Overview → `/overview`, Settings → `/settings`). +- **Segmented control** — switching **views or modes** within the app without changing the top-level route model, when that fits better. +- **Always small** size to match the rest of the Topbar. +- **Never use a sidebar for primary navigation.** If the app needs additional internal navigation beyond this slot, it must live within the content area — not as a sidebar. +- Only include controls that are relevant globally. Page-specific sub-navigation belongs in the content area. +- If yes: "Which pattern (Tabs vs Segmented), what are the labels, and where does each choice lead?" +- **Default:** leave the center empty and keep **primary actions below the Topbar**. + +--- + +## Right section — utility strip + +**Q7 — Primary actions in the Topbar** _(reframe as guidance, not a button inventory)_ + +> "We no longer place app-specific primary CTAs in the Topbar — those should live in the content area below it. Are you comfortable leaving the Topbar without '+ Create' / 'Export' style buttons, or is there a rare, truly app-wide control you still need next to the utility icons?" + +- **Default:** no extra action buttons in the Topbar shell. +- If something is proposed, apply the test: does it apply to the **entire app on every screen**? If not, it belongs **below** the Topbar. +- When in doubt, omit it from the Topbar. + +**Q8 — Right-strip controls** _(ask each sub-question separately)_ + +> "The right side is a fixed-order strip; you can turn each control on or off depending on capabilities. I'll ask about each one." + +Ask each separately (in this order for consistency with the bar): + +- **Share:** "Do users need share? Should the Share icon (ghost, small) appear?" +- **Notifications:** "Does this app surface notifications? Should the bell appear?" +- **Theme menu:** "Should users switch light/dark theme from the Topbar (sun/moon trigger + menu with checkmarked Light/Dark rows)?" +- **Atlas:** "Does this app use Atlas? Should the secondary Atlas button (leading icon + label) appear?" +- **User Avatar:** "Should the signed-in user Avatar appear on the far right?" + +Fixed order when visible (left → right): **Share → Notifications → Theme → Atlas → user Avatar**. + +Apps **must not** reorder these items; styling follows Aura. If Aura documents additional optional controls, align with Storybook. + +--- + +## Omissions + +**Q9 — Excluded routes** + +> "Are there any screens where the top bar should NOT appear? Common exceptions: login/auth screens, fullscreen flows, onboarding. Default is to show it everywhere." + +--- + +## Closing + +Before implementing, summarize the configuration in five bullets or fewer: + +- left (fjord `Avatar` + breadcrumb pattern + inline metadata if any) +- middle (Tabs, Segmented control, or none) +- primary actions (confirm they live **below** the Topbar) +- right strip (which of Share / Notifications / Theme / Atlas / Avatar are on) +- excluded routes + +Then proceed to install and implement (see [IMPLEMENTATION.md](IMPLEMENTATION.md)). diff --git a/.agents/skills/use-topbar/RULES.md b/.agents/skills/use-topbar/RULES.md new file mode 100644 index 0000000..de1a921 --- /dev/null +++ b/.agents/skills/use-topbar/RULES.md @@ -0,0 +1,205 @@ +# Topbar Rules — Full Reference (Aura) + +Detailed architecture and usage rules for the Topbar across Flows and Fusion applications. Read this file when you need the full rule set beyond the quick reference in `SKILL.md`. + +**Storybook:** https://cognitedata.github.io/aura/storybook/?path=/docs/primitives-topbar--docs +**Install:** `pnpm dlx shadcn@latest add @aura/topbar` — registry component only; do not use `npm install` / `pnpm add` / `yarn add` + +--- + +## 1. Non-negotiables + +1. Every authenticated app view must render exactly **one** Topbar, composed only from Aura Topbar primitives and `@aura/topbar` documented APIs. +2. Do **not** implement a custom top bar, duplicate header, or alternate app chrome that replaces or shadows the Topbar. +3. Do **not** render **multiple** Topbars on a single page (including embedded views or nested frames). +4. Styling and behavior must follow Aura: **token-level theming only**. No ad-hoc overrides that break Aura semantics. + +--- + +## 2. Where Topbar is omitted + +The Topbar **may** be omitted only on: + +- Login / auth-only screens +- Fullscreen modal or fullscreen flows that hide global chrome by design +- Other explicit shell exceptions documented by the platform team + +If unsure whether a route qualifies, **default to including the Topbar**. + +--- + +## 3. Layout contract (three regions) + +### 3.1 Left section (required) + +Order (left → right), all in one **left-aligned cluster** (metadata is never centered in the bar): + +1. **Application mark** — Aura **`Avatar`**, **`size="small"`**, **`fjord`** colorway. Use the app image or branding supplied by app config / Aura Topbar API when available. Confirm props and slots in Storybook. +2. **Breadcrumbs** — Aura Breadcrumb component. All segments are **interactive links** that navigate to their corresponding route. Do not render breadcrumbs as static/non-interactive text. +3. **Inline metadata** _(optional)_ — immediately to the right of the breadcrumb, still in the left cluster (e.g. "Updated 3 hours ago", "Read-only"). String only; no links, icons, or interactive elements. Omit entirely when unused. + +Breadcrumb rules: + +- Application name is always the **first** segment. +- When an object is open, clicking the app name navigates back to the app's home/root route. +- When no object is open, the app name is **not** a link (it is the current location). +- Current object name appears in the **last** segment **only** when a specific object is open; otherwise omit. + +Object dropdown: + +- The object name may act as the **sole** trigger for an object-level dropdown **only when an object is open**. +- **No dropdown on the app name.** If app-level settings are needed, surface them in the **content area below the Topbar** (or another approved shell pattern), not in the object dropdown. +- All dropdown actions must apply **only to the currently open object** (rename, duplicate, export, delete, etc.). Do not mix in app-level or global actions. +- Use Aura components for the trigger, menu, and items only. + +### 3.2 Middle section — global navigation (optional) + +- **Optional slot** for **global** navigation only: controls that apply across the app shell, not page-local toolbars. +- **Tabs** — preferred for **mutually exclusive page-level** views (routes): one tab = one primary destination. +- **Segmented control** — alternative when the user switches **modes or layouts** within the app (e.g. canvas view vs code view) rather than top-level routes. +- **Size** — always **small** to match other Topbar controls. +- **Do not** place app-specific **primary** actions or a **primary CTA** in the Topbar. Those belong in the **content region below the Topbar** unless Aura documents a dedicated exception. +- **Never use a sidebar** for primary app navigation. If additional sub-navigation is needed beyond this slot, it must live within the content area. +- If the app has no global navigation or only one view, leave this section empty entirely. + +### 3.3 Right section — utility strip (component API, app visibility) + +The right area is defined by `@aura/topbar` and exposed as **one ordered strip**. Apps choose **which controls are visible** based on capabilities and product policy; **order is fixed** when a control is shown. + +Fixed order (left → right within the strip): + +1. **Share** — icon button, **`size="sm"`**, **`variant` / styling: ghost**. +2. **Notifications** — bell icon button, **`size="sm"`**, **ghost**. +3. **Theme** — icon button, **`size="sm"`**, **ghost**: + - **Light mode** → show **sun** icon; **dark mode** → show **moon** icon. + - On click: open an Aura **Menu** with two items, **Light mode** and **Dark mode**. Exactly **one** is selected at a time; a **checkmark** indicates the current theme. Choosing an item sets that theme (radio-like behavior, not a blind toggle). +4. **Atlas** — **`size="sm"`**, **secondary** button, **leading icon** + label **"Atlas"**. +5. **User** — **`Avatar`**, **`size="small"`**. + +Rules: + +- Visibility of Share, Notifications, Theme, Atlas, and user Avatar is **configurable** per app where the Aura API allows — enable only what the product needs. +- Theme and user Avatar are **typically always on** for authenticated apps; hide them only when the Aura/shell API and product policy explicitly allow. +- **Do not reorder** items; **do not** override Aura styling or behavior for these controls. +- If Storybook documents additional entries (e.g. a separate agent affordance), follow the **current** `@aura/topbar` API — this document lists the canonical Flows/Fusion strip above. + +--- + +## 4. Sizing + +All Topbar interactive elements use **small** size unless Aura Topbar documentation explicitly prescribes otherwise. (Avatar application mark and user Avatar use **`size="small"`** as in Aura docs.) + +--- + +## 5. Responsive behavior + +- Follow Aura Topbar default responsive behavior. +- If an app fills the center slot or right strip densely, the app must handle overflow (prioritization, fewer items, progressive disclosure). This is an **app responsibility** when content is dense. + +--- + +## 6. Accessibility & keyboard + +- Tab / focus order follows **visual** order: **left section** → **middle** (if any) → **right strip** in the order defined in §3.3. +- No extra skip link or focus management requirements beyond Aura defaults at this time. + +--- + +## 7. Loading & long labels + +- Truncation, ellipsis, tooltips, loading states: follow Aura default behavior for Topbar and related primitives. Do not invent one-off patterns. + +--- + +## 8. Configuration model + +| Concern | Who controls | Notes | +|---------|-------------|-------| +| Topbar presence & single instance | App + shell | One per page; no duplicates | +| Left: application mark | App (via Aura API) | `Avatar`, `size="small"`, `fjord` colorway | +| Left: breadcrumbs | App (via Aura API) | Interactive links; app name navigates to home only when object is open | +| Left: inline metadata | App | Optional plain string **after breadcrumb**, left cluster only; never centered; no interactive elements | +| Object dropdown | App (items), platform/Aura (presentation) | Object name only, object-scoped actions only; no app-level dropdown | +| Middle: Tabs or Segmented control | App | Optional; **small**; global routes (Tabs) or global modes (Segmented); never a sidebar; no primary CTAs here | +| Right: Share, Notifications, Theme menu, Atlas, user Avatar | App + Aura API | Fixed **order** when shown; each visibility toggled per capability/policy; theme uses sun/moon icons and menu with checkmarked selection | +| Theming | Aura tokens only | No arbitrary CSS | + +**Open items (to be finalized):** + +- System-level configuration matrix (tenant vs build-time vs runtime) per control. +- Telemetry/analytics for notifications, share, Atlas. +- Shell responsibility details (single mount point vs per-app composition). +- Lint rules and automated checks for Topbar compliance. +- Automated binding of app config / Fusion config to pre-fill Atlas answers. + +--- + +## 9. Configuration interview protocol + +The full interview is defined in `SKILL.md §2` (Step 2, pre-flight through closing summary). Run it before implementing or changing any Topbar wiring. + +--- + +## 10. Composition & API guidance + +- Prefer the Aura Topbar API as documented in Storybook. Use documented props and slots for each region. +- Structured data (names, breadcrumb items, menu definitions) is easier to validate than arbitrary JSX. +- Where Aura exposes slots, only place Aura-approved components inside each slot. +- Do **not** bypass the Topbar by injecting a second header row or fake breadcrumbs outside the Topbar. +- Always fetch actual component names, props, and slot names from the current Aura package and Storybook before writing code — the pseudo-code in this document is illustrative only. + +--- + +## 11. Do / Don't + +**Do** + +- Use `@aura/topbar` and compose Topbar exactly as Aura documents. +- Keep **one** Topbar per page. +- Use **`Avatar`**, **`size="small"`**, **`fjord`** for the application mark at the far left. +- Keep inline metadata in the **left cluster**, **after** the breadcrumb — **never** centered in the bar. +- Make breadcrumb segments interactive links that navigate to their routes. +- Add the app name link to home/root only when an object is currently open. +- Put object dropdown only on the object name; scope all actions to that object only. +- Use the middle section for **Tabs** (page views) or **Segmented control** (mode views), **size small**, when global navigation is needed. +- Put **app-specific primary actions** in the **content area below the Topbar**, not in the Topbar. +- Use **small** size for Topbar controls; use **ghost** icon buttons for Share, Notifications, and Theme; **secondary** for Atlas. +- Implement theme as **sun** in light mode, **moon** in dark mode, with a **menu** and **checkmark** on the active **Light mode** / **Dark mode** row. +- Respect fixed **order** and Aura styling for the right strip. + +**Don't** + +- Don't build a custom top bar or duplicate global chrome. +- Don't use multiple Topbars or a second header in embedded views. +- Don't use a sidebar for navigation — ever. Use middle Tabs/Segmented control or content-area navigation instead. +- Don't put page-specific or app-primary CTAs in the Topbar — they belong below it. +- Don't add a dropdown to the app name — use patterns **below the Topbar** for app-level settings. +- Don't mix app-level and object-level actions in the object dropdown. +- Don't render breadcrumbs as static/non-interactive text. +- Don't use a filled Tabler icon stack in place of the **fjord Avatar** application mark unless Aura docs explicitly allow an alternative. +- Don't override right-strip appearance or behavior outside Aura options. +- Don't use non-token styling on Topbar or its children. +- Don't use `@cognite/dune-industrial-components/navigation` (deprecated — use `@aura/topbar`). + +--- + +## 12. Enforcement + +1. Verify `@aura/topbar` is the only top-level app chrome; reject any parallel header implementation. +2. Check left section: **`Avatar` (small, fjord)** → breadcrumbs (interactive links) → optional inline metadata (string only, left-aligned after breadcrumb). +3. Check breadcrumb behavior: app name links to home only when object is open; object dropdown (if any) is on the object name only and contains only object-scoped actions. +4. Check middle section: **Tabs** or **Segmented control** at **small** if present; no sidebar; no primary CTA in the bar. +5. Check app actions: primary / app-specific actions live **below** the Topbar, not in the middle or as an extra ad-hoc header row. +6. Check right strip: when present, order is Share → Notifications → Theme → Atlas → user Avatar; ghost icon buttons for Share, Notifications, Theme; theme shows **sun** in light / **moon** in dark and a **menu** with checkmarked **Light mode** / **Dark mode**; Atlas **secondary** with leading icon + "Atlas"; **`Avatar` small** for user. +7. Confirm the configuration interview (`SKILL.md §2`) was completed for new Topbar work. + +--- + +## 13. Revision history intent + +Update `RULES.md` when: + +- Aura Topbar API or package name changes. +- Platform finalizes configuration, telemetry, shell mount, or lint rules. +- New controls are added to the right strip or center slot contract changes. +- Flows/Fusion config paths or fields used for pre-flight are standardized. diff --git a/.agents/skills/use-topbar/SKILL.md b/.agents/skills/use-topbar/SKILL.md new file mode 100644 index 0000000..9d14556 --- /dev/null +++ b/.agents/skills/use-topbar/SKILL.md @@ -0,0 +1,91 @@ +--- +name: use-topbar +description: >- + Wires the Aura Topbar (@aura/topbar) into Flows/Fusion apps as the single top + navigation bar — breadcrumbs, optional center Tabs or Segmented control, right + utility strip (Share, notifications, theme menu, Atlas, user Avatar), and + light/dark theme switching. Use when adding a topbar, app header, navigation + bar, breadcrumbs, or theme switcher to a Flows or Fusion app, or when + scaffolding a new Flows/Fusion application. +allowed-tools: Read, Glob, Grep, Write, Edit, Bash +--- + +# Topbar + +`@aura/topbar` is the single, compliant top navigation bar for every authenticated Flows/Fusion app. It is a **shadcn registry component** (installed via `pnpm dlx shadcn@latest add @aura/topbar`), not an npm package. + +- **Storybook:** https://cognitedata.github.io/aura/storybook/?path=/docs/primitives-topbar--docs +- **Full rules:** [RULES.md](RULES.md) · **Interview:** [INTERVIEW.md](INTERVIEW.md) · **Install + code:** [IMPLEMENTATION.md](IMPLEMENTATION.md) + +> **Non-negotiables:** Exactly one Topbar per page, composed only from `@aura/topbar` primitives, installed via the shadcn CLI only. If installation fails, surface the blocker — never build a custom header fallback. See [RULES.md §1, §11, §12](RULES.md). + +--- + +## Step 0 — Auto-prompt hook setup + +Ensure the topbar interview is triggered at the start of every future Cursor session in this project. + +1. Check whether `.cursor/hooks.json` exists in the project root. +2. If it does not exist, create it: + +```json +{ + "version": 1, + "hooks": { + "sessionStart": [ + { + "type": "prompt", + "prompt": "This is a Flows/Fusion application. Before implementing any UI or navigation work, read and follow the topbar skill and run the topbar configuration interview with the user." + } + ] + } +} +``` + +3. If `.cursor/hooks.json` already exists without a topbar `sessionStart` entry, merge the entry above — do not overwrite unrelated hooks. +4. Confirm to the user, then continue. + +--- + +## Step 1 — Pre-flight: read the app + +Before asking any questions, read: + +- `package.json` — package manager, existing UI deps, existing `@aura/topbar` +- `src/App.tsx` (or main layout file) — routing, existing dark-mode hook/context +- Flows/Fusion app config (`app.config.ts`, `fusion.config.ts`, manifest) — `displayName`, `name`, app mark / branding + +Apply any found defaults and skip the corresponding interview questions. State what was inferred. + +--- + +## Step 2 — Configuration interview (mandatory) + +Run the full Q1–Q9 interview in [INTERVIEW.md](INTERVIEW.md) before writing any implementation code. Ask one question at a time; skip only questions that Step 1 already answered definitively. + +--- + +## Steps 3–5 — Install, theme hook, implement + +See [IMPLEMENTATION.md](IMPLEMENTATION.md) for: + +- Installing `@aura/topbar` via the shadcn CLI (mandatory, no workarounds) +- `useThemeMode` hook wiring for light/dark switching +- Topbar component composition example and layout wrapper + +--- + +## Step 6 — Compliance checklist + +Verify before finishing (see [RULES.md §12](RULES.md) for the full enforcement checklist): + +- [ ] Exactly **one** Topbar per page +- [ ] Left: `Avatar` application mark (**small**, **fjord**) → app name breadcrumb → object name breadcrumb (only when an object is open) +- [ ] Breadcrumb segments are interactive links — not static text +- [ ] Object dropdown (if present) only on the object name segment; actions are object-scoped only +- [ ] Inline metadata (if present) is a plain string, left-aligned after the breadcrumb — not centered +- [ ] Middle: **Tabs** or **Segmented control** at **small** if present; no sidebar; no primary CTA in the Topbar +- [ ] **Primary / app-specific actions** live in the content area **below** the Topbar +- [ ] Right strip order when used: **Share → Notifications → Theme → Atlas → user Avatar**; Share/Notifications/Theme as **ghost small**, Atlas as **secondary small** with leading icon + "Atlas" +- [ ] Theme: **sun** in light mode, **moon** in dark mode; Menu with Light/Dark rows + checkmark on active; wired to `document.documentElement` +- [ ] `tailwind.config` has `darkMode: 'class'` diff --git a/.claude/commands/speckit.analyze.md b/.claude/commands/speckit.analyze.md new file mode 100644 index 0000000..1237e10 --- /dev/null +++ b/.claude/commands/speckit.analyze.md @@ -0,0 +1,249 @@ +--- +description: Perform a non-destructive cross-artifact consistency and quality analysis across spec.md, plan.md, and tasks.md after task generation. +--- + +## User Input + +```text +$ARGUMENTS +``` + +You **MUST** consider the user input before proceeding (if not empty). + +## Pre-Execution Checks + +**Check for extension hooks (before analysis)**: +- Check if `.specify/extensions.yml` exists in the project root. +- If it exists, read it and look for entries under the `hooks.before_analyze` key +- If the YAML cannot be parsed or is invalid, skip hook checking silently and continue normally +- Filter out hooks where `enabled` is explicitly `false`. Treat hooks without an `enabled` field as enabled by default. +- For each remaining hook, do **not** attempt to interpret or evaluate hook `condition` expressions: + - If the hook has no `condition` field, or it is null/empty, treat the hook as executable + - If the hook defines a non-empty `condition`, skip the hook and leave condition evaluation to the HookExecutor implementation +- For each executable hook, output the following based on its `optional` flag: + - **Optional hook** (`optional: true`): + ``` + ## Extension Hooks + + **Optional Pre-Hook**: {extension} + Command: `/{command}` + Description: {description} + + Prompt: {prompt} + To execute: `/{command}` + ``` + - **Mandatory hook** (`optional: false`): + ``` + ## Extension Hooks + + **Automatic Pre-Hook**: {extension} + Executing: `/{command}` + EXECUTE_COMMAND: {command} + + Wait for the result of the hook command before proceeding to the Goal. + ``` +- If no hooks are registered or `.specify/extensions.yml` does not exist, skip silently + +## Goal + +Identify inconsistencies, duplications, ambiguities, and underspecified items across the three core artifacts (`spec.md`, `plan.md`, `tasks.md`) before implementation. This command MUST run only after `/speckit.tasks` has successfully produced a complete `tasks.md`. + +## Operating Constraints + +**STRICTLY READ-ONLY**: Do **not** modify any files. Output a structured analysis report. Offer an optional remediation plan (user must explicitly approve before any follow-up editing commands would be invoked manually). + +**Constitution Authority**: The project constitution (`.specify/memory/constitution.md`) is **non-negotiable** within this analysis scope. Constitution conflicts are automatically CRITICAL and require adjustment of the spec, plan, or tasks—not dilution, reinterpretation, or silent ignoring of the principle. If a principle itself needs to change, that must occur in a separate, explicit constitution update outside `/speckit.analyze`. + +## Execution Steps + +### 1. Initialize Analysis Context + +Run `.specify/scripts/bash/check-prerequisites.sh --json --require-tasks --include-tasks` once from repo root and parse JSON for FEATURE_DIR and AVAILABLE_DOCS. Derive absolute paths: + +- SPEC = FEATURE_DIR/spec.md +- PLAN = FEATURE_DIR/plan.md +- TASKS = FEATURE_DIR/tasks.md + +Abort with an error message if any required file is missing (instruct the user to run missing prerequisite command). +For single quotes in args like "I'm Groot", use escape syntax: e.g 'I'\''m Groot' (or double-quote if possible: "I'm Groot"). + +### 2. Load Artifacts (Progressive Disclosure) + +Load only the minimal necessary context from each artifact: + +**From spec.md:** + +- Overview/Context +- Functional Requirements +- Success Criteria (measurable outcomes — e.g., performance, security, availability, user success, business impact) +- User Stories +- Edge Cases (if present) + +**From plan.md:** + +- Architecture/stack choices +- Data Model references +- Phases +- Technical constraints + +**From tasks.md:** + +- Task IDs +- Descriptions +- Phase grouping +- Parallel markers [P] +- Referenced file paths + +**From constitution:** + +- Load `.specify/memory/constitution.md` for principle validation + +### 3. Build Semantic Models + +Create internal representations (do not include raw artifacts in output): + +- **Requirements inventory**: For each Functional Requirement (FR-###) and Success Criterion (SC-###), record a stable key. Use the explicit FR-/SC- identifier as the primary key when present, and optionally also derive an imperative-phrase slug for readability (e.g., "User can upload file" → `user-can-upload-file`). Include only Success Criteria items that require buildable work (e.g., load-testing infrastructure, security audit tooling), and exclude post-launch outcome metrics and business KPIs (e.g., "Reduce support tickets by 50%"). +- **User story/action inventory**: Discrete user actions with acceptance criteria +- **Task coverage mapping**: Map each task to one or more requirements or stories (inference by keyword / explicit reference patterns like IDs or key phrases) +- **Constitution rule set**: Extract principle names and MUST/SHOULD normative statements + +### 4. Detection Passes (Token-Efficient Analysis) + +Focus on high-signal findings. Limit to 50 findings total; aggregate remainder in overflow summary. + +#### A. Duplication Detection + +- Identify near-duplicate requirements +- Mark lower-quality phrasing for consolidation + +#### B. Ambiguity Detection + +- Flag vague adjectives (fast, scalable, secure, intuitive, robust) lacking measurable criteria +- Flag unresolved placeholders (TODO, TKTK, ???, ``, etc.) + +#### C. Underspecification + +- Requirements with verbs but missing object or measurable outcome +- User stories missing acceptance criteria alignment +- Tasks referencing files or components not defined in spec/plan + +#### D. Constitution Alignment + +- Any requirement or plan element conflicting with a MUST principle +- Missing mandated sections or quality gates from constitution + +#### E. Coverage Gaps + +- Requirements with zero associated tasks +- Tasks with no mapped requirement/story +- Success Criteria requiring buildable work (performance, security, availability) not reflected in tasks + +#### F. Inconsistency + +- Terminology drift (same concept named differently across files) +- Data entities referenced in plan but absent in spec (or vice versa) +- Task ordering contradictions (e.g., integration tasks before foundational setup tasks without dependency note) +- Conflicting requirements (e.g., one requires Next.js while other specifies Vue) + +### 5. Severity Assignment + +Use this heuristic to prioritize findings: + +- **CRITICAL**: Violates constitution MUST, missing core spec artifact, or requirement with zero coverage that blocks baseline functionality +- **HIGH**: Duplicate or conflicting requirement, ambiguous security/performance attribute, untestable acceptance criterion +- **MEDIUM**: Terminology drift, missing non-functional task coverage, underspecified edge case +- **LOW**: Style/wording improvements, minor redundancy not affecting execution order + +### 6. Produce Compact Analysis Report + +Output a Markdown report (no file writes) with the following structure: + +## Specification Analysis Report + +| ID | Category | Severity | Location(s) | Summary | Recommendation | +|----|----------|----------|-------------|---------|----------------| +| A1 | Duplication | HIGH | spec.md:L120-134 | Two similar requirements ... | Merge phrasing; keep clearer version | + +(Add one row per finding; generate stable IDs prefixed by category initial.) + +**Coverage Summary Table:** + +| Requirement Key | Has Task? | Task IDs | Notes | +|-----------------|-----------|----------|-------| + +**Constitution Alignment Issues:** (if any) + +**Unmapped Tasks:** (if any) + +**Metrics:** + +- Total Requirements +- Total Tasks +- Coverage % (requirements with >=1 task) +- Ambiguity Count +- Duplication Count +- Critical Issues Count + +### 7. Provide Next Actions + +At end of report, output a concise Next Actions block: + +- If CRITICAL issues exist: Recommend resolving before `/speckit.implement` +- If only LOW/MEDIUM: User may proceed, but provide improvement suggestions +- Provide explicit command suggestions: e.g., "Run /speckit.specify with refinement", "Run /speckit.plan to adjust architecture", "Manually edit tasks.md to add coverage for 'performance-metrics'" + +### 8. Offer Remediation + +Ask the user: "Would you like me to suggest concrete remediation edits for the top N issues?" (Do NOT apply them automatically.) + +### 9. Check for extension hooks + +After reporting, check if `.specify/extensions.yml` exists in the project root. +- If it exists, read it and look for entries under the `hooks.after_analyze` key +- If the YAML cannot be parsed or is invalid, skip hook checking silently and continue normally +- Filter out hooks where `enabled` is explicitly `false`. Treat hooks without an `enabled` field as enabled by default. +- For each remaining hook, do **not** attempt to interpret or evaluate hook `condition` expressions: + - If the hook has no `condition` field, or it is null/empty, treat the hook as executable + - If the hook defines a non-empty `condition`, skip the hook and leave condition evaluation to the HookExecutor implementation +- For each executable hook, output the following based on its `optional` flag: + - **Optional hook** (`optional: true`): + ``` + ## Extension Hooks + + **Optional Hook**: {extension} + Command: `/{command}` + Description: {description} + + Prompt: {prompt} + To execute: `/{command}` + ``` + - **Mandatory hook** (`optional: false`): + ``` + ## Extension Hooks + + **Automatic Hook**: {extension} + Executing: `/{command}` + EXECUTE_COMMAND: {command} + ``` +- If no hooks are registered or `.specify/extensions.yml` does not exist, skip silently + +## Operating Principles + +### Context Efficiency + +- **Minimal high-signal tokens**: Focus on actionable findings, not exhaustive documentation +- **Progressive disclosure**: Load artifacts incrementally; don't dump all content into analysis +- **Token-efficient output**: Limit findings table to 50 rows; summarize overflow +- **Deterministic results**: Rerunning without changes should produce consistent IDs and counts + +### Analysis Guidelines + +- **NEVER modify files** (this is read-only analysis) +- **NEVER hallucinate missing sections** (if absent, report them accurately) +- **Prioritize constitution violations** (these are always CRITICAL) +- **Use examples over exhaustive rules** (cite specific instances, not generic patterns) +- **Report zero issues gracefully** (emit success report with coverage statistics) + +## Context + +$ARGUMENTS diff --git a/.claude/commands/speckit.checklist.md b/.claude/commands/speckit.checklist.md new file mode 100644 index 0000000..93ec785 --- /dev/null +++ b/.claude/commands/speckit.checklist.md @@ -0,0 +1,361 @@ +--- +description: Generate a custom checklist for the current feature based on user requirements. +--- + +## Checklist Purpose: "Unit Tests for English" + +**CRITICAL CONCEPT**: Checklists are **UNIT TESTS FOR REQUIREMENTS WRITING** - they validate the quality, clarity, and completeness of requirements in a given domain. + +**NOT for verification/testing**: + +- ❌ NOT "Verify the button clicks correctly" +- ❌ NOT "Test error handling works" +- ❌ NOT "Confirm the API returns 200" +- ❌ NOT checking if code/implementation matches the spec + +**FOR requirements quality validation**: + +- ✅ "Are visual hierarchy requirements defined for all card types?" (completeness) +- ✅ "Is 'prominent display' quantified with specific sizing/positioning?" (clarity) +- ✅ "Are hover state requirements consistent across all interactive elements?" (consistency) +- ✅ "Are accessibility requirements defined for keyboard navigation?" (coverage) +- ✅ "Does the spec define what happens when logo image fails to load?" (edge cases) + +**Metaphor**: If your spec is code written in English, the checklist is its unit test suite. You're testing whether the requirements are well-written, complete, unambiguous, and ready for implementation - NOT whether the implementation works. + +## User Input + +```text +$ARGUMENTS +``` + +You **MUST** consider the user input before proceeding (if not empty). + +## Pre-Execution Checks + +**Check for extension hooks (before checklist generation)**: +- Check if `.specify/extensions.yml` exists in the project root. +- If it exists, read it and look for entries under the `hooks.before_checklist` key +- If the YAML cannot be parsed or is invalid, skip hook checking silently and continue normally +- Filter out hooks where `enabled` is explicitly `false`. Treat hooks without an `enabled` field as enabled by default. +- For each remaining hook, do **not** attempt to interpret or evaluate hook `condition` expressions: + - If the hook has no `condition` field, or it is null/empty, treat the hook as executable + - If the hook defines a non-empty `condition`, skip the hook and leave condition evaluation to the HookExecutor implementation +- For each executable hook, output the following based on its `optional` flag: + - **Optional hook** (`optional: true`): + ``` + ## Extension Hooks + + **Optional Pre-Hook**: {extension} + Command: `/{command}` + Description: {description} + + Prompt: {prompt} + To execute: `/{command}` + ``` + - **Mandatory hook** (`optional: false`): + ``` + ## Extension Hooks + + **Automatic Pre-Hook**: {extension} + Executing: `/{command}` + EXECUTE_COMMAND: {command} + + Wait for the result of the hook command before proceeding to the Execution Steps. + ``` +- If no hooks are registered or `.specify/extensions.yml` does not exist, skip silently + +## Execution Steps + +1. **Setup**: Run `.specify/scripts/bash/check-prerequisites.sh --json` from repo root and parse JSON for FEATURE_DIR and AVAILABLE_DOCS list. + - All file paths must be absolute. + - For single quotes in args like "I'm Groot", use escape syntax: e.g 'I'\''m Groot' (or double-quote if possible: "I'm Groot"). + +2. **Clarify intent (dynamic)**: Derive up to THREE initial contextual clarifying questions (no pre-baked catalog). They MUST: + - Be generated from the user's phrasing + extracted signals from spec/plan/tasks + - Only ask about information that materially changes checklist content + - Be skipped individually if already unambiguous in `$ARGUMENTS` + - Prefer precision over breadth + + Generation algorithm: + 1. Extract signals: feature domain keywords (e.g., auth, latency, UX, API), risk indicators ("critical", "must", "compliance"), stakeholder hints ("QA", "review", "security team"), and explicit deliverables ("a11y", "rollback", "contracts"). + 2. Cluster signals into candidate focus areas (max 4) ranked by relevance. + 3. Identify probable audience & timing (author, reviewer, QA, release) if not explicit. + 4. Detect missing dimensions: scope breadth, depth/rigor, risk emphasis, exclusion boundaries, measurable acceptance criteria. + 5. Formulate questions chosen from these archetypes: + - Scope refinement (e.g., "Should this include integration touchpoints with X and Y or stay limited to local module correctness?") + - Risk prioritization (e.g., "Which of these potential risk areas should receive mandatory gating checks?") + - Depth calibration (e.g., "Is this a lightweight pre-commit sanity list or a formal release gate?") + - Audience framing (e.g., "Will this be used by the author only or peers during PR review?") + - Boundary exclusion (e.g., "Should we explicitly exclude performance tuning items this round?") + - Scenario class gap (e.g., "No recovery flows detected—are rollback / partial failure paths in scope?") + + Question formatting rules: + - If presenting options, generate a compact table with columns: Option | Candidate | Why It Matters + - Limit to A–E options maximum; omit table if a free-form answer is clearer + - Never ask the user to restate what they already said + - Avoid speculative categories (no hallucination). If uncertain, ask explicitly: "Confirm whether X belongs in scope." + + Defaults when interaction impossible: + - Depth: Standard + - Audience: Reviewer (PR) if code-related; Author otherwise + - Focus: Top 2 relevance clusters + + Output the questions (label Q1/Q2/Q3). After answers: if ≥2 scenario classes (Alternate / Exception / Recovery / Non-Functional domain) remain unclear, you MAY ask up to TWO more targeted follow‑ups (Q4/Q5) with a one-line justification each (e.g., "Unresolved recovery path risk"). Do not exceed five total questions. Skip escalation if user explicitly declines more. + +3. **Understand user request**: Combine `$ARGUMENTS` + clarifying answers: + - Derive checklist theme (e.g., security, review, deploy, ux) + - Consolidate explicit must-have items mentioned by user + - Map focus selections to category scaffolding + - Infer any missing context from spec/plan/tasks (do NOT hallucinate) + +4. **Load feature context**: Read from FEATURE_DIR: + - spec.md: Feature requirements and scope + - plan.md (if exists): Technical details, dependencies + - tasks.md (if exists): Implementation tasks + + **Context Loading Strategy**: + - Load only necessary portions relevant to active focus areas (avoid full-file dumping) + - Prefer summarizing long sections into concise scenario/requirement bullets + - Use progressive disclosure: add follow-on retrieval only if gaps detected + - If source docs are large, generate interim summary items instead of embedding raw text + +5. **Generate checklist** - Create "Unit Tests for Requirements": + - Create `FEATURE_DIR/checklists/` directory if it doesn't exist + - Generate unique checklist filename: + - Use short, descriptive name based on domain (e.g., `ux.md`, `api.md`, `security.md`) + - Format: `[domain].md` + - File handling behavior: + - If file does NOT exist: Create new file and number items starting from CHK001 + - If file exists: Append new items to existing file, continuing from the last CHK ID (e.g., if last item is CHK015, start new items at CHK016) + - Never delete or replace existing checklist content - always preserve and append + + **CORE PRINCIPLE - Test the Requirements, Not the Implementation**: + Every checklist item MUST evaluate the REQUIREMENTS THEMSELVES for: + - **Completeness**: Are all necessary requirements present? + - **Clarity**: Are requirements unambiguous and specific? + - **Consistency**: Do requirements align with each other? + - **Measurability**: Can requirements be objectively verified? + - **Coverage**: Are all scenarios/edge cases addressed? + + **Category Structure** - Group items by requirement quality dimensions: + - **Requirement Completeness** (Are all necessary requirements documented?) + - **Requirement Clarity** (Are requirements specific and unambiguous?) + - **Requirement Consistency** (Do requirements align without conflicts?) + - **Acceptance Criteria Quality** (Are success criteria measurable?) + - **Scenario Coverage** (Are all flows/cases addressed?) + - **Edge Case Coverage** (Are boundary conditions defined?) + - **Non-Functional Requirements** (Performance, Security, Accessibility, etc. - are they specified?) + - **Dependencies & Assumptions** (Are they documented and validated?) + - **Ambiguities & Conflicts** (What needs clarification?) + + **HOW TO WRITE CHECKLIST ITEMS - "Unit Tests for English"**: + + ❌ **WRONG** (Testing implementation): + - "Verify landing page displays 3 episode cards" + - "Test hover states work on desktop" + - "Confirm logo click navigates home" + + ✅ **CORRECT** (Testing requirements quality): + - "Are the exact number and layout of featured episodes specified?" [Completeness] + - "Is 'prominent display' quantified with specific sizing/positioning?" [Clarity] + - "Are hover state requirements consistent across all interactive elements?" [Consistency] + - "Are keyboard navigation requirements defined for all interactive UI?" [Coverage] + - "Is the fallback behavior specified when logo image fails to load?" [Edge Cases] + - "Are loading states defined for asynchronous episode data?" [Completeness] + - "Does the spec define visual hierarchy for competing UI elements?" [Clarity] + + **ITEM STRUCTURE**: + Each item should follow this pattern: + - Question format asking about requirement quality + - Focus on what's WRITTEN (or not written) in the spec/plan + - Include quality dimension in brackets [Completeness/Clarity/Consistency/etc.] + - Reference spec section `[Spec §X.Y]` when checking existing requirements + - Use `[Gap]` marker when checking for missing requirements + + **EXAMPLES BY QUALITY DIMENSION**: + + Completeness: + - "Are error handling requirements defined for all API failure modes? [Gap]" + - "Are accessibility requirements specified for all interactive elements? [Completeness]" + - "Are mobile breakpoint requirements defined for responsive layouts? [Gap]" + + Clarity: + - "Is 'fast loading' quantified with specific timing thresholds? [Clarity, Spec §NFR-2]" + - "Are 'related episodes' selection criteria explicitly defined? [Clarity, Spec §FR-5]" + - "Is 'prominent' defined with measurable visual properties? [Ambiguity, Spec §FR-4]" + + Consistency: + - "Do navigation requirements align across all pages? [Consistency, Spec §FR-10]" + - "Are card component requirements consistent between landing and detail pages? [Consistency]" + + Coverage: + - "Are requirements defined for zero-state scenarios (no episodes)? [Coverage, Edge Case]" + - "Are concurrent user interaction scenarios addressed? [Coverage, Gap]" + - "Are requirements specified for partial data loading failures? [Coverage, Exception Flow]" + + Measurability: + - "Are visual hierarchy requirements measurable/testable? [Acceptance Criteria, Spec §FR-1]" + - "Can 'balanced visual weight' be objectively verified? [Measurability, Spec §FR-2]" + + **Scenario Classification & Coverage** (Requirements Quality Focus): + - Check if requirements exist for: Primary, Alternate, Exception/Error, Recovery, Non-Functional scenarios + - For each scenario class, ask: "Are [scenario type] requirements complete, clear, and consistent?" + - If scenario class missing: "Are [scenario type] requirements intentionally excluded or missing? [Gap]" + - Include resilience/rollback when state mutation occurs: "Are rollback requirements defined for migration failures? [Gap]" + + **Traceability Requirements**: + - MINIMUM: ≥80% of items MUST include at least one traceability reference + - Each item should reference: spec section `[Spec §X.Y]`, or use markers: `[Gap]`, `[Ambiguity]`, `[Conflict]`, `[Assumption]` + - If no ID system exists: "Is a requirement & acceptance criteria ID scheme established? [Traceability]" + + **Surface & Resolve Issues** (Requirements Quality Problems): + Ask questions about the requirements themselves: + - Ambiguities: "Is the term 'fast' quantified with specific metrics? [Ambiguity, Spec §NFR-1]" + - Conflicts: "Do navigation requirements conflict between §FR-10 and §FR-10a? [Conflict]" + - Assumptions: "Is the assumption of 'always available podcast API' validated? [Assumption]" + - Dependencies: "Are external podcast API requirements documented? [Dependency, Gap]" + - Missing definitions: "Is 'visual hierarchy' defined with measurable criteria? [Gap]" + + **Content Consolidation**: + - Soft cap: If raw candidate items > 40, prioritize by risk/impact + - Merge near-duplicates checking the same requirement aspect + - If >5 low-impact edge cases, create one item: "Are edge cases X, Y, Z addressed in requirements? [Coverage]" + + **🚫 ABSOLUTELY PROHIBITED** - These make it an implementation test, not a requirements test: + - ❌ Any item starting with "Verify", "Test", "Confirm", "Check" + implementation behavior + - ❌ References to code execution, user actions, system behavior + - ❌ "Displays correctly", "works properly", "functions as expected" + - ❌ "Click", "navigate", "render", "load", "execute" + - ❌ Test cases, test plans, QA procedures + - ❌ Implementation details (frameworks, APIs, algorithms) + + **✅ REQUIRED PATTERNS** - These test requirements quality: + - ✅ "Are [requirement type] defined/specified/documented for [scenario]?" + - ✅ "Is [vague term] quantified/clarified with specific criteria?" + - ✅ "Are requirements consistent between [section A] and [section B]?" + - ✅ "Can [requirement] be objectively measured/verified?" + - ✅ "Are [edge cases/scenarios] addressed in requirements?" + - ✅ "Does the spec define [missing aspect]?" + +6. **Structure Reference**: Generate the checklist following the canonical template in `.specify/templates/checklist-template.md` for title, meta section, category headings, and ID formatting. If template is unavailable, use: H1 title, purpose/created meta lines, `##` category sections containing `- [ ] CHK### ` lines with globally incrementing IDs starting at CHK001. + +7. **Report**: Output full path to checklist file, item count, and summarize whether the run created a new file or appended to an existing one. Summarize: + - Focus areas selected + - Depth level + - Actor/timing + - Any explicit user-specified must-have items incorporated + +**Important**: Each `/speckit.checklist` command invocation uses a short, descriptive checklist filename and either creates a new file or appends to an existing one. This allows: + +- Multiple checklists of different types (e.g., `ux.md`, `test.md`, `security.md`) +- Simple, memorable filenames that indicate checklist purpose +- Easy identification and navigation in the `checklists/` folder + +To avoid clutter, use descriptive types and clean up obsolete checklists when done. + +## Example Checklist Types & Sample Items + +**UX Requirements Quality:** `ux.md` + +Sample items (testing the requirements, NOT the implementation): + +- "Are visual hierarchy requirements defined with measurable criteria? [Clarity, Spec §FR-1]" +- "Is the number and positioning of UI elements explicitly specified? [Completeness, Spec §FR-1]" +- "Are interaction state requirements (hover, focus, active) consistently defined? [Consistency]" +- "Are accessibility requirements specified for all interactive elements? [Coverage, Gap]" +- "Is fallback behavior defined when images fail to load? [Edge Case, Gap]" +- "Can 'prominent display' be objectively measured? [Measurability, Spec §FR-4]" + +**API Requirements Quality:** `api.md` + +Sample items: + +- "Are error response formats specified for all failure scenarios? [Completeness]" +- "Are rate limiting requirements quantified with specific thresholds? [Clarity]" +- "Are authentication requirements consistent across all endpoints? [Consistency]" +- "Are retry/timeout requirements defined for external dependencies? [Coverage, Gap]" +- "Is versioning strategy documented in requirements? [Gap]" + +**Performance Requirements Quality:** `performance.md` + +Sample items: + +- "Are performance requirements quantified with specific metrics? [Clarity]" +- "Are performance targets defined for all critical user journeys? [Coverage]" +- "Are performance requirements under different load conditions specified? [Completeness]" +- "Can performance requirements be objectively measured? [Measurability]" +- "Are degradation requirements defined for high-load scenarios? [Edge Case, Gap]" + +**Security Requirements Quality:** `security.md` + +Sample items: + +- "Are authentication requirements specified for all protected resources? [Coverage]" +- "Are data protection requirements defined for sensitive information? [Completeness]" +- "Is the threat model documented and requirements aligned to it? [Traceability]" +- "Are security requirements consistent with compliance obligations? [Consistency]" +- "Are security failure/breach response requirements defined? [Gap, Exception Flow]" + +## Anti-Examples: What NOT To Do + +**❌ WRONG - These test implementation, not requirements:** + +```markdown +- [ ] CHK001 - Verify landing page displays 3 episode cards [Spec §FR-001] +- [ ] CHK002 - Test hover states work correctly on desktop [Spec §FR-003] +- [ ] CHK003 - Confirm logo click navigates to home page [Spec §FR-010] +- [ ] CHK004 - Check that related episodes section shows 3-5 items [Spec §FR-005] +``` + +**✅ CORRECT - These test requirements quality:** + +```markdown +- [ ] CHK001 - Are the number and layout of featured episodes explicitly specified? [Completeness, Spec §FR-001] +- [ ] CHK002 - Are hover state requirements consistently defined for all interactive elements? [Consistency, Spec §FR-003] +- [ ] CHK003 - Are navigation requirements clear for all clickable brand elements? [Clarity, Spec §FR-010] +- [ ] CHK004 - Is the selection criteria for related episodes documented? [Gap, Spec §FR-005] +- [ ] CHK005 - Are loading state requirements defined for asynchronous episode data? [Gap] +- [ ] CHK006 - Can "visual hierarchy" requirements be objectively measured? [Measurability, Spec §FR-001] +``` + +**Key Differences:** + +- Wrong: Tests if the system works correctly +- Correct: Tests if the requirements are written correctly +- Wrong: Verification of behavior +- Correct: Validation of requirement quality +- Wrong: "Does it do X?" +- Correct: "Is X clearly specified?" + +## Post-Execution Checks + +**Check for extension hooks (after checklist generation)**: +Check if `.specify/extensions.yml` exists in the project root. +- If it exists, read it and look for entries under the `hooks.after_checklist` key +- If the YAML cannot be parsed or is invalid, skip hook checking silently and continue normally +- Filter out hooks where `enabled` is explicitly `false`. Treat hooks without an `enabled` field as enabled by default. +- For each remaining hook, do **not** attempt to interpret or evaluate hook `condition` expressions: + - If the hook has no `condition` field, or it is null/empty, treat the hook as executable + - If the hook defines a non-empty `condition`, skip the hook and leave condition evaluation to the HookExecutor implementation +- For each executable hook, output the following based on its `optional` flag: + - **Optional hook** (`optional: true`): + ``` + ## Extension Hooks + + **Optional Hook**: {extension} + Command: `/{command}` + Description: {description} + + Prompt: {prompt} + To execute: `/{command}` + ``` + - **Mandatory hook** (`optional: false`): + ``` + ## Extension Hooks + + **Automatic Hook**: {extension} + Executing: `/{command}` + EXECUTE_COMMAND: {command} + ``` +- If no hooks are registered or `.specify/extensions.yml` does not exist, skip silently diff --git a/.claude/commands/speckit.clarify.md b/.claude/commands/speckit.clarify.md new file mode 100644 index 0000000..9de13e1 --- /dev/null +++ b/.claude/commands/speckit.clarify.md @@ -0,0 +1,247 @@ +--- +description: Identify underspecified areas in the current feature spec by asking up to 5 highly targeted clarification questions and encoding answers back into the spec. +handoffs: + - label: Build Technical Plan + agent: speckit.plan + prompt: Create a plan for the spec. I am building with... +--- + +## User Input + +```text +$ARGUMENTS +``` + +You **MUST** consider the user input before proceeding (if not empty). + +## Pre-Execution Checks + +**Check for extension hooks (before clarification)**: +- Check if `.specify/extensions.yml` exists in the project root. +- If it exists, read it and look for entries under the `hooks.before_clarify` key +- If the YAML cannot be parsed or is invalid, skip hook checking silently and continue normally +- Filter out hooks where `enabled` is explicitly `false`. Treat hooks without an `enabled` field as enabled by default. +- For each remaining hook, do **not** attempt to interpret or evaluate hook `condition` expressions: + - If the hook has no `condition` field, or it is null/empty, treat the hook as executable + - If the hook defines a non-empty `condition`, skip the hook and leave condition evaluation to the HookExecutor implementation +- For each executable hook, output the following based on its `optional` flag: + - **Optional hook** (`optional: true`): + ``` + ## Extension Hooks + + **Optional Pre-Hook**: {extension} + Command: `/{command}` + Description: {description} + + Prompt: {prompt} + To execute: `/{command}` + ``` + - **Mandatory hook** (`optional: false`): + ``` + ## Extension Hooks + + **Automatic Pre-Hook**: {extension} + Executing: `/{command}` + EXECUTE_COMMAND: {command} + + Wait for the result of the hook command before proceeding to the Outline. + ``` +- If no hooks are registered or `.specify/extensions.yml` does not exist, skip silently + +## Outline + +Goal: Detect and reduce ambiguity or missing decision points in the active feature specification and record the clarifications directly in the spec file. + +Note: This clarification workflow is expected to run (and be completed) BEFORE invoking `/speckit.plan`. If the user explicitly states they are skipping clarification (e.g., exploratory spike), you may proceed, but must warn that downstream rework risk increases. + +Execution steps: + +1. Run `.specify/scripts/bash/check-prerequisites.sh --json --paths-only` from repo root **once** (combined `--json --paths-only` mode / `-Json -PathsOnly`). Parse minimal JSON payload fields: + - `FEATURE_DIR` + - `FEATURE_SPEC` + - (Optionally capture `IMPL_PLAN`, `TASKS` for future chained flows.) + - If JSON parsing fails, abort and instruct user to re-run `/speckit.specify` or verify feature branch environment. + - For single quotes in args like "I'm Groot", use escape syntax: e.g 'I'\''m Groot' (or double-quote if possible: "I'm Groot"). + +2. Load the current spec file. Perform a structured ambiguity & coverage scan using this taxonomy. For each category, mark status: Clear / Partial / Missing. Produce an internal coverage map used for prioritization (do not output raw map unless no questions will be asked). + + Functional Scope & Behavior: + - Core user goals & success criteria + - Explicit out-of-scope declarations + - User roles / personas differentiation + + Domain & Data Model: + - Entities, attributes, relationships + - Identity & uniqueness rules + - Lifecycle/state transitions + - Data volume / scale assumptions + + Interaction & UX Flow: + - Critical user journeys / sequences + - Error/empty/loading states + - Accessibility or localization notes + + Non-Functional Quality Attributes: + - Performance (latency, throughput targets) + - Scalability (horizontal/vertical, limits) + - Reliability & availability (uptime, recovery expectations) + - Observability (logging, metrics, tracing signals) + - Security & privacy (authN/Z, data protection, threat assumptions) + - Compliance / regulatory constraints (if any) + + Integration & External Dependencies: + - External services/APIs and failure modes + - Data import/export formats + - Protocol/versioning assumptions + + Edge Cases & Failure Handling: + - Negative scenarios + - Rate limiting / throttling + - Conflict resolution (e.g., concurrent edits) + + Constraints & Tradeoffs: + - Technical constraints (language, storage, hosting) + - Explicit tradeoffs or rejected alternatives + + Terminology & Consistency: + - Canonical glossary terms + - Avoided synonyms / deprecated terms + + Completion Signals: + - Acceptance criteria testability + - Measurable Definition of Done style indicators + + Misc / Placeholders: + - TODO markers / unresolved decisions + - Ambiguous adjectives ("robust", "intuitive") lacking quantification + + For each category with Partial or Missing status, add a candidate question opportunity unless: + - Clarification would not materially change implementation or validation strategy + - Information is better deferred to planning phase (note internally) + +3. Generate (internally) a prioritized queue of candidate clarification questions (maximum 5). Do NOT output them all at once. Apply these constraints: + - Maximum of 5 total questions across the whole session. + - Each question must be answerable with EITHER: + - A short multiple‑choice selection (2–5 distinct, mutually exclusive options), OR + - A one-word / short‑phrase answer (explicitly constrain: "Answer in <=5 words"). + - Only include questions whose answers materially impact architecture, data modeling, task decomposition, test design, UX behavior, operational readiness, or compliance validation. + - Ensure category coverage balance: attempt to cover the highest impact unresolved categories first; avoid asking two low-impact questions when a single high-impact area (e.g., security posture) is unresolved. + - Exclude questions already answered, trivial stylistic preferences, or plan-level execution details (unless blocking correctness). + - Favor clarifications that reduce downstream rework risk or prevent misaligned acceptance tests. + - If more than 5 categories remain unresolved, select the top 5 by (Impact * Uncertainty) heuristic. + +4. Sequential questioning loop (interactive): + - Present EXACTLY ONE question at a time. + - For multiple‑choice questions: + - **Analyze all options** and determine the **most suitable option** based on: + - Best practices for the project type + - Common patterns in similar implementations + - Risk reduction (security, performance, maintainability) + - Alignment with any explicit project goals or constraints visible in the spec + - Present your **recommended option prominently** at the top with clear reasoning (1-2 sentences explaining why this is the best choice). + - Format as: `**Recommended:** Option [X] - ` + - Then render all options as a Markdown table: + + | Option | Description | + |--------|-------------| + | A |