pulseengine · avrabe · Apr 2, 2026 · Apr 1, 2026 · Apr 1, 2026 · Apr 1, 2026
diff --git a/.claude/settings.local.json b/.claude/settings.local.json
@@ -18,7 +18,24 @@
       "Bash(cargo clippy:*)",
       "Bash(git branch:*)",
       "Bash(gh run:*)",
-      "Bash(cargo fmt:*)"
+      "Bash(cargo fmt:*)",
+      "Bash(git revert:*)",
+      "Bash(git stash:*)",
+      "Bash(gh repo:*)",
+      "Bash(gh api:*)",
+      "Bash(grep common-mistake:*)",
+      "Bash(cargo run:*)",
+      "Bash(git -C /Users/r/git/pulseengine/rivet/.claude/worktrees/agent-a67ac7e0 diff HEAD -- rivet-core/src/validate.rs)",
+      "Read(//Users/r/.claude/projects/-Users-r-git-pulseengine-rivet/e8d52dc9-2f0a-4563-8eea-6628709bf407/subagents/**)",
+      "Bash(git worktree:*)",
+      "Bash(git -C /Users/r/git/pulseengine/rivet/.claude/worktrees/agent-a1e2061b diff HEAD -- rivet-cli/src/main.rs)",
+      "Bash(grep -c 'test$')",
+      "Bash(cargo clean:*)",
+      "Bash(git -C /Users/r/git/pulseengine/rivet/.claude/worktrees/agent-ae21d534 diff HEAD -- rivet-core/src/validate.rs)",
+      "Bash(grep -rn 'Diagnostic {$\\\\|Diagnostic{' /Users/r/git/pulseengine/rivet/rivet-core/src/validate.rs)",
+      "Bash(sed:*)",
+      "Bash(python3 -c \"import sys,json; d=json.load\\(sys.stdin\\); types=[t['name'] for t in d['artifact_types']]; print\\(f'{len\\(types\\)} types'\\); [print\\(f'  {t}'\\) for t in types if t.startswith\\('ai-'\\) or t.startswith\\('risk'\\) or t.startswith\\('data'\\)]\")",
+      "Bash(git -C /Users/r/git/pulseengine/rivet/.claude/worktrees/agent-a5e68f53 diff HEAD -- rivet-cli/src/main.rs)"
     ]
   }
 }
diff --git a/examples/eu-ai-act/artifacts/compliance.yaml b/examples/eu-ai-act/artifacts/compliance.yaml
@@ -0,0 +1,181 @@
+# Compliance artifacts — Art. 10, 12-15, Annex IV §3-4, §7-9
+artifacts:
+  - id: DGR-001
+    type: data-governance-record
+    title: Training and validation data governance
+    status: approved
+    fields:
+      data-sources: >
+        5 years of operational telemetry from 200 substations (Nordic Energy Grid).
+        2 years of maintenance logs from 3 partner utilities (anonymized).
+        Public weather data from ECMWF ERA5 reanalysis dataset.
+      collection-method: >
+        SCADA system automated export (15-minute intervals).
+        Manual maintenance log entry via field service app.
+        Weather data via API batch download.
+      labeling-method: >
+        Failure events labeled by maintenance engineers using root-cause
+        analysis reports. Inter-rater agreement: Cohen's κ = 0.87.
+      preparation-steps: >
+        Outlier removal (>5σ), gap filling (linear interpolation for <1h gaps),
+        feature engineering (rolling statistics, FFT components),
+        train/val/test split: 70/15/15 by installation (not by time).
+      bias-assessment: >
+        Geographic bias: 80% Nordic data, 20% Central European.
+        Equipment age bias: median 8 years, range 1-25.
+        Mitigation: stratified sampling + synthetic augmentation for
+        underrepresented regions and equipment ages.
+      data-size: >
+        Training: 2.1M samples, Validation: 450K, Test: 450K.
+    links:
+      - type: governs
+        target: DS-001
+
+  - id: MON-001
+    type: monitoring-measure
+    title: Production monitoring and drift detection
+    status: approved
+    fields:
+      mechanism-type: drift-detection
+      logging-scope: >
+        All predictions logged: input features, confidence score,
+        prediction result, and actual outcome (when known).
+        PSI (Population Stability Index) computed daily per feature group.
+      alert-conditions: >
+        PSI > 0.2 on any feature group → data team review.
+        Accuracy below 90% over 30-day rolling window → model retraining.
+        Confidence calibration error > 5% → recalibration triggered.
+      human-intervention-capability: >
+        Operators can disable AI recommendations per-substation
+        via dashboard toggle. Emergency global disable via API.
+      retention-period: 5 years (per GDPR Art. 17 + EU AI Act Art. 12)
+    links:
+      - type: monitors
+        target: AI-SYS-001
+
+  - id: PE-001
+    type: performance-evaluation
+    title: Failure prediction accuracy
+    status: approved
+    fields:
+      metric-name: Critical failure recall
+      metric-value: "98.7%"
+      methodology: >
+        Measured on held-out test set (450K samples from installations
+        not seen during training). Critical failures defined as ASAI
+        severity ≥ 3 (grid instability or customer outage).
+      evaluation-scope: accuracy
+      population-subgroups: >
+        Nordic installations: 99.1% recall.
+        Central European: 97.2% recall.
+        Equipment age >15y: 96.8% recall.
+      bias-results: >
+        2.3% recall gap between Nordic and CE installations.
+        Accepted as within tolerance; active data collection program
+        to close gap over next 2 release cycles.
+    links:
+      - type: evaluates
+        target: DS-001
+
+  - id: HO-001
+    type: human-oversight-measure
+    title: Operator review and override dashboard
+    status: approved
+    fields:
+      oversight-type: intervention
+      capability-description: >
+        Web dashboard showing all pending recommendations with
+        explainability view (SHAP feature importance per prediction).
+        Operators can approve, defer, escalate, or override each
+        recommendation. Audit log of all human decisions.
+      training-required: >
+        8-hour initial training on system capabilities and limitations.
+        Annual refresher (2 hours). Domain expertise in substation
+        maintenance required (minimum 3 years field experience).
+    links:
+      - type: overseen-by
+        target: AI-SYS-001
+
+  - id: TRANS-001
+    type: transparency-record
+    title: Deployer and user information
+    status: approved
+    fields:
+      information-scope: >
+        Model card (IEEE 1597.1 format). Training data summary.
+        Performance metrics by subgroup. Known limitations.
+        Integration guide for SCADA systems.
+      user-facing-docs: >
+        Operator manual with decision-making guidance.
+        FAQ on interpreting confidence scores.
+      limitations-disclosed: >
+        Not validated for tropical climate installations.
+        Degraded performance for equipment older than 20 years.
+        Requires minimum 6 months of historical data per installation.
+    links:
+      - type: transparency-for
+        target: AI-SYS-001
+
+  - id: STD-001
+    type: standards-reference
+    title: ISO/IEC 42001 AI Management System
+    status: approved
+    fields:
+      standard-id: ISO/IEC 42001:2023
+      standard-title: AI Management System
+      coverage-scope: >
+        Full scope — AI governance, risk management, performance
+        monitoring, continual improvement.
+    links:
+      - type: applied-to
+        target: AI-SYS-001
+
+  - id: STD-002
+    type: standards-reference
+    title: ISO/IEC 23894 AI Risk Management
+    status: approved
+    fields:
+      standard-id: ISO/IEC 23894:2023
+      standard-title: Guidance on AI Risk Management
+      coverage-scope: >
+        Risk identification and mitigation methodology.
+      partial-application-rationale: >
+        Applied to risk assessment process; incident response
+        procedures follow existing ISO 27001 framework instead.
+    links:
+      - type: applied-to
+        target: AI-SYS-001
+
+  - id: CONF-001
+    type: conformity-declaration
+    title: EU Declaration of Conformity
+    status: draft
+    fields:
+      declaration-date: "2026-07-15"
+      conformity-scope: >
+        Conformity with EU AI Act requirements for high-risk AI systems
+        as classified under Annex III, point 2(b) — management and
+        operation of critical infrastructure (energy).
+    links:
+      - type: declares
+        target: AI-SYS-001
+
+  - id: PMP-001
+    type: post-market-plan
+    title: Post-market monitoring plan
+    status: approved
+    fields:
+      monitoring-scope: >
+        Continuous monitoring of prediction accuracy, data drift,
+        incident rates, and operator feedback across all deployments.
+      drift-detection: >
+        Daily PSI computation. Monthly concept drift analysis.
+        Quarterly full model performance re-evaluation.
+      incident-reporting: >
+        Serious incidents reported to national market surveillance
+        authority within 15 days (Art. 73). Internal incident database
+        with automated severity classification.
+      review-frequency: Quarterly review of monitoring data + annual comprehensive review
+    links:
+      - type: monitors-post-market
+        target: AI-SYS-001
diff --git a/examples/eu-ai-act/artifacts/risk-management.yaml b/examples/eu-ai-act/artifacts/risk-management.yaml
@@ -0,0 +1,100 @@
+# Risk Management — Art. 9 + Annex IV §5
+artifacts:
+  - id: RMP-001
+    type: risk-management-process
+    title: Continuous AI risk management
+    status: approved
+    fields:
+      scope: >
+        All risks arising from the AI system's predictions influencing
+        maintenance decisions for critical infrastructure components.
+      methodology: >
+        FMEA-based risk identification with STPA extensions for
+        control-theoretic hazards. ISO/IEC 23894 framework.
+        Quarterly review cycle. Incident-triggered ad-hoc reviews.
+      review-frequency: Quarterly + incident-driven
+    links:
+      - type: manages-risk-for
+        target: AI-SYS-001
+
+  - id: RA-001
+    type: risk-assessment
+    title: False negative — missed critical failure
+    status: approved
+    fields:
+      risk-description: >
+        System fails to predict an imminent component failure,
+        leading to unplanned outage of critical infrastructure.
+      likelihood: unlikely
+      severity: catastrophic
+      risk-level: critical
+      affected-rights: >
+        Public safety. Right to essential services (energy supply).
+    links:
+      - type: leads-to
+        target: AI-SYS-001
+
+  - id: RA-002
+    type: risk-assessment
+    title: False positive — unnecessary maintenance dispatch
+    status: approved
+    fields:
+      risk-description: >
+        System predicts failure that does not materialize, causing
+        unnecessary maintenance cost and resource allocation.
+      likelihood: possible
+      severity: minor
+      risk-level: medium
+    links:
+      - type: leads-to
+        target: AI-SYS-001
+
+  - id: RM-001
+    type: risk-mitigation
+    title: Dual-threshold alert system
+    status: approved
+    fields:
+      measure-description: >
+        Two-tier alert: "watch" (60-85% confidence) triggers monitoring
+        escalation, "act" (>85%) triggers maintenance dispatch.
+        All critical component alerts require human engineer sign-off.
+      residual-risk: >
+        Human reviewer may override alert under time pressure.
+        Mitigated by mandatory 4-hour cooling-off period.
+      effectiveness-evidence: >
+        6-month pilot reduced missed critical failures by 94%
+        while keeping false alarm rate below 12%.
+    links:
+      - type: mitigates
+        target: RA-001
+
+  - id: RM-002
+    type: risk-mitigation
+    title: Confidence calibration with rejection option
+    status: approved
+    fields:
+      measure-description: >
+        Model output calibrated using Platt scaling on held-out data.
+        Predictions with calibrated confidence below 50% are rejected
+        and flagged for manual assessment.
+      residual-risk: >
+        Calibration may drift as data distribution changes.
+        Mitigated by weekly recalibration check (MON-001).
+    links:
+      - type: mitigates
+        target: RA-002
+
+  - id: MR-001
+    type: misuse-risk
+    title: Override fatigue from excessive alerts
+    status: draft
+    fields:
+      misuse-scenario: >
+        Operators routinely dismiss alerts due to high false positive
+        rate, eventually ignoring a genuine critical failure prediction.
+      likelihood: possible
+      harm-potential: >
+        Same as RA-001 — missed critical failure leading to outage.
+    links:
+      - type: identified-by
+        target: RMP-001
diff --git a/examples/eu-ai-act/artifacts/system.yaml b/examples/eu-ai-act/artifacts/system.yaml
@@ -0,0 +1,56 @@
+# AI System Description and Design — Annex IV §1-2
+artifacts:
+  - id: AI-SYS-001
+    type: ai-system-description
+    title: Predictive Maintenance AI for Critical Infrastructure
+    status: approved
+    description: >
+      High-risk AI system that analyzes infrastructure sensor telemetry
+      to predict component failures and recommend maintenance schedules.
+      Deployed in energy grid substations across the EU.
+    fields:
+      intended-purpose: >
+        Real-time analysis of infrastructure sensor data (vibration,
+        temperature, power quality) to predict equipment failures
+        24-72 hours in advance and recommend preventive maintenance.
+      provider: InfraAI GmbH, Berlin
+      ai-version: "2.1.0"
+      hardware-deps: >
+        Edge inference on NVIDIA Jetson Orin NX (8GB).
+        Cloud training on A100 GPU cluster.
+      software-deps: >
+        PyTorch 2.1, XGBoost 2.0, scikit-learn 1.3,
+        ONNX Runtime 1.16 for edge deployment.
+      deployment-forms: >
+        Cloud SaaS dashboard for fleet management.
+        Edge container for local inference at substations.
+      risk-class: high-risk
+
+  - id: DS-001
+    type: design-specification
+    title: Hybrid prediction model design
+    status: approved
+    description: >
+      Two-stage model: XGBoost for failure probability estimation,
+      LSTM for remaining useful life prediction.
+    fields:
+      algorithms: >
+        Stage 1: XGBoost gradient-boosted ensemble (500 trees, max depth 8)
+        for binary failure classification.
+        Stage 2: LSTM network (2 layers, 128 hidden units) for remaining
+        useful life regression on classified-at-risk components.
+      design-choices: >
+        Hybrid approach chosen over end-to-end deep learning for:
+        (a) interpretability of Stage 1 decisions (Art. 13),
+        (b) reliability of Stage 2 predictions under distribution shift,
+        (c) ability to deploy Stage 1 on edge with Stage 2 in cloud.
+      optimization-objectives: >
+        Minimize false negative rate (missed failures) at the cost of
+        higher false positive rate (unnecessary inspections).
+        Target: <1% missed critical failures, <15% false alarm rate.
+      training-methodology: >
+        Stage 1 trained on 5 years of labeled failure events.
+        Stage 2 trained on degradation curves from 200+ components.
+    links:
+      - type: satisfies
+        target: AI-SYS-001