enable perf_evaluation notifier

LiliDeng · LiliDeng · commit c7497b8c4b74 · 2025-12-05T08:19:30.000+08:00
diff --git a/docs/run_test/runbook.rst b/docs/run_test/runbook.rst
@@ -959,6 +959,95 @@ Example of log_agent notifier:
 The AI analysis results are stored in the test result message's ``analysis["AI"]``
 field and can be consumed by other notifiers like HTML or custom reporting systems.
 
+perfevaluation
+^^^^^^^^^^^^^^
+
+Evaluates performance test results against predefined criteria and optionally fails tests when targets are not met.
+
+**Basic Usage:**
+
+.. code:: yaml
+
+   notifier:
+     - type: perfevaluation
+       criteria_file: "perf_criteria.yml"
+       output_file: "results.json"
+       fail_test_on_performance_failure: true
+
+**Parameters:**
+
+criteria_file
+'''''''''''''
+type: str, optional, default: "*_criteria.yml"
+
+Path or glob pattern to YAML files containing performance criteria.
+
+criteria
+''''''''
+type: dict, optional, default: None
+
+Direct criteria definition in runbook. Takes priority over criteria_file.
+
+output_file
+'''''''''''
+type: str, optional, default: None
+
+Output path for detailed evaluation results in JSON format.
+
+fail_test_on_performance_failure
+''''''''''''''''''''''''''''''''
+type: bool, optional, default: False
+
+Mark tests as failed when performance criteria are not met.
+
+**YAML Criteria Format:**
+
+Hierarchical format with groups and conditions:
+
+.. code:: yaml
+
+   # Global settings
+   statistics_times: 3
+   error_threshold: 0.1
+   statistics_type: average
+
+   groups:
+     - name: "Storage Performance"
+       conditions:
+         - name: "test_case"
+           type: "metadata"
+           value: "*fio*"
+         - name: "vm_size"
+           type: "information"
+           value: "Standard_D*"
+       
+       metrics:
+         - name: "IOPS_Read"
+           min_value: 1000
+           target_value: 5000
+           error_threshold: 0.10
+
+**Metric Properties:**
+
+- ``min_value``: Minimum acceptable value
+- ``max_value``: Maximum acceptable value
+- ``target_value``: Expected target value
+- ``error_threshold``: Acceptable deviation from target (as decimal, e.g., 0.10 = 10%)
+
+**Pattern Matching:**
+
+Uses fnmatch patterns:
+
+- ``Standard_D*``: All D-series VMs
+- ``*fio*``: Test cases containing "fio"
+- ``Standard_L??s_v2``: L-series with specific patterns
+
+**Condition Types:**
+
+- ``test_case``: Match test case name
+- ``vm_size``: Match VM size
+- All conditions must match (AND logic)
+
 environment
 ~~~~~~~~~~~
 
diff --git a/lisa/mixin_modules.py b/lisa/mixin_modules.py
@@ -15,6 +15,7 @@
 import lisa.notifiers.file  # noqa: F401
 import lisa.notifiers.junit  # noqa: F401
 import lisa.notifiers.perfdump  # noqa: F401
+import lisa.notifiers.perfevaluation.perfevaluation  # noqa: F401
 import lisa.notifiers.text_result  # noqa: F401
 import lisa.runners.lisa_runner  # noqa: F401
 import lisa.sut_orchestrator.ready  # noqa: F401
diff --git a/lisa/notifiers/perfevaluation/__init__.py b/lisa/notifiers/perfevaluation/__init__.py
@@ -0,0 +1,14 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT license.
+
+"""
+Performance Evaluation Notifier Package
+
+This package provides performance evaluation capabilities for LISA tests,
+including criteria validation and test result modification based on
+performance metrics.
+"""
+
+__all__ = ["PerfEvaluation", "PerfEvaluationSchema", "MetricCriteria"]
+
+from .perfevaluation import MetricCriteria, PerfEvaluation, PerfEvaluationSchema
diff --git a/lisa/notifiers/perfevaluation/perf_nvme_criteria.yml b/lisa/notifiers/perfevaluation/perf_nvme_criteria.yml
@@ -0,0 +1,180 @@
+# Performance evaluation criteria for NVMe storage tests
+# Global configuration
+statistics_times: 3  # Default: run each test 3 times to calculate statistics
+error_threshold: 0.1  # Default: 10% tolerance
+statistics_type: average  # Default statistics method
+
+# Test suite groups
+groups:
+  - name: "NVMe Performance - L64s_v2 Specific"
+    description: "Performance criteria for Standard_L64s_v2 VM"
+    error_threshold: 0.20
+    statistics_type: average
+    statistics_times: 1
+    
+    conditions:
+      - name: "test_case"
+        type: "metadata"
+        value: "perf_nvme"
+      - name: "vm_size"
+        type: "information"
+        value: "Standard_L64s_v2"
+    
+    metrics:
+      # 32 cores, 8 disks performance criteria for L64s_v2
+      - name: "qdepth_32_iodepth_1_numjob_32_setup_raw_bs_4k_cores_32_disks_8_read_iops"
+        min_value: 800000.0
+        target_value: 1033000.0
+        error_threshold: 0.25
+        
+      - name: "qdepth_32_iodepth_1_numjob_32_setup_raw_bs_4k_cores_32_disks_8_write_iops"
+        min_value: 750000.0
+        target_value: 950000.0
+        error_threshold: 0.25
+        
+      - name: "qdepth_32_iodepth_1_numjob_32_setup_raw_bs_4k_cores_32_disks_8_randread_iops"
+        min_value: 500000.0
+        target_value: 1200000.0
+        error_threshold: 0.30
+        
+      - name: "qdepth_32_iodepth_1_numjob_32_setup_raw_bs_4k_cores_32_disks_8_randwrite_iops"
+        min_value: 150000.0
+        target_value: 650000.0
+        error_threshold: 0.30
+        
+      - name: "qdepth_32_iodepth_1_numjob_32_setup_raw_bs_4k_cores_32_disks_8_read_latency"
+        max_value: 50.0
+        target_value: 10
+        error_threshold: 0.30
+        
+      - name: "qdepth_32_iodepth_1_numjob_32_setup_raw_bs_4k_cores_32_disks_8_write_latency"
+        max_value: 60.0
+        target_value: 10
+        error_threshold: 0.30
+        
+      - name: "qdepth_32_iodepth_1_numjob_32_setup_raw_bs_4k_cores_32_disks_8_randread_latency"
+        max_value: 80.0
+        target_value: 25.0
+        error_threshold: 0.30
+        
+      - name: "qdepth_32_iodepth_1_numjob_32_setup_raw_bs_4k_cores_32_disks_8_randwrite_latency"
+        max_value: 100.0
+        target_value: 80.0
+        error_threshold: 0.30
+        
+      # Higher queue depth tests
+      - name: "qdepth_64_iodepth_2_numjob_32_setup_raw_bs_4k_cores_32_disks_8_read_iops"
+        min_value: 1200000.0
+        target_value: 1500000.0
+        error_threshold: 0.25
+        
+      - name: "qdepth_64_iodepth_2_numjob_32_setup_raw_bs_4k_cores_32_disks_8_write_iops"
+        min_value: 1000000.0
+        target_value: 1300000.0
+        error_threshold: 0.25
+        
+      - name: "qdepth_64_iodepth_2_numjob_32_setup_raw_bs_4k_cores_32_disks_8_randread_iops"
+        min_value: 800000.0
+        target_value: 1100000.0
+        error_threshold: 0.30
+        
+      - name: "qdepth_64_iodepth_2_numjob_32_setup_raw_bs_4k_cores_32_disks_8_randwrite_iops"
+        min_value: 700000.0
+        target_value: 1000000.0
+        error_threshold: 0.30
+        
+      # Highest queue depth tests
+      - name: "qdepth_128_iodepth_4_numjob_32_setup_raw_bs_4k_cores_32_disks_8_read_iops"
+        min_value: 1800000.0
+        target_value: 2200000.0
+        error_threshold: 0.25
+        
+      - name: "qdepth_128_iodepth_4_numjob_32_setup_raw_bs_4k_cores_32_disks_8_write_iops"
+        min_value: 1600000.0
+        target_value: 2000000.0
+        error_threshold: 0.25
+        
+      - name: "qdepth_128_iodepth_4_numjob_32_setup_raw_bs_4k_cores_32_disks_8_randread_iops"
+        min_value: 1200000.0
+        target_value: 1600000.0
+        error_threshold: 0.30
+        
+      - name: "qdepth_128_iodepth_4_numjob_32_setup_raw_bs_4k_cores_32_disks_8_randwrite_iops"
+        min_value: 1000000.0
+        target_value: 1400000.0
+        error_threshold: 0.30
+
+  - name: "TCP NTTTCP SRIOV Performance - D2ads_v5 Specific"
+    description: "Performance criteria for Standard_D2ads_v5 VM - TCP NTTTCP SRIOV"
+    error_threshold: 0.20
+    statistics_type: average
+    statistics_times: 1
+    
+    conditions:
+      - name: "test_case"
+        type: "metadata"
+        value: "perf_tcp_ntttcp_sriov"
+      - name: "vm_size"
+        type: "information"
+        value: "Standard_D2ads_v5"
+    
+    metrics:
+      - name: "buffer_size_conn_1"
+        min_value: 0
+        target_value: 65536
+        error_threshold: 0.30
+        
+      - name: "client_mtu_conn_1"
+        min_value: 1400
+        target_value: 1500
+        error_threshold: 0.05
+        
+      - name: "connections_created_time_conn_1"
+        max_value: 5.0
+        target_value: 1.0
+        error_threshold: 0.50
+        
+      - name: "latency_us_conn_1"
+        max_value: 500.0
+        target_value: 100.0
+        error_threshold: 0.50
+        
+      - name: "pkts_interrupts_conn_1"
+        min_value: 0
+        target_value: 100000
+        error_threshold: 0.50
+        
+      - name: "receiver_cycles_per_byte_conn_1"
+        max_value: 100.0
+        target_value: 20.0
+        error_threshold: 0.50
+        
+      - name: "retrans_segments_conn_1"
+        max_value: 1000
+        target_value: 0
+        error_threshold: 2.0
+        
+      - name: "rx_packets_conn_1"
+        min_value: 100000
+        target_value: 1000000
+        error_threshold: 0.50
+        
+      - name: "sender_cycles_per_byte_conn_1"
+        max_value: 100.0
+        target_value: 20.0
+        error_threshold: 0.50
+        
+      - name: "server_mtu_conn_1"
+        min_value: 1400
+        target_value: 1500
+        error_threshold: 0.05
+        
+      - name: "throughput_in_gbps_conn_1"
+        min_value: 0.5
+        target_value: 2.0
+        error_threshold: 0.30
+        
+      - name: "tx_packets_conn_1"
+        min_value: 100000
+        target_value: 1000000
+        error_threshold: 0.50
diff --git a/lisa/notifiers/perfevaluation/perf_tcp_ntttcp_sriov_criteria.json b/lisa/notifiers/perfevaluation/perf_tcp_ntttcp_sriov_criteria.json
diff --git a/lisa/notifiers/perfevaluation/perfevaluation.py b/lisa/notifiers/perfevaluation/perfevaluation.py