codeflash-ai · mohammedahmed18 · Sep 8, 2025 · Sep 9, 2025 · Dec 9, 2025 · Dec 9, 2025
diff --git a/codeflash/code_utils/config_consts.py b/codeflash/code_utils/config_consts.py
@@ -8,6 +8,7 @@
 MAX_CUMULATIVE_TEST_RUNTIME_NANOSECONDS = 100e6  # 100ms
 N_TESTS_TO_GENERATE = 2
 TOTAL_LOOPING_TIME = 10.0  # 10 second candidate benchmarking budget
+CONSISTENT_LOOP_COUNT = 3
 COVERAGE_THRESHOLD = 60.0
 MIN_TESTCASE_PASSED_THRESHOLD = 6
 REPEAT_OPTIMIZATION_PROBABILITY = 0.1

diff --git a/codeflash/code_utils/env_utils.py b/codeflash/code_utils/env_utils.py
@@ -19,7 +19,6 @@
 def check_formatter_installed(formatter_cmds: list[str], exit_on_failure: bool = True) -> bool:  # noqa
     if not formatter_cmds or formatter_cmds[0] == "disabled":
         return True
-
     first_cmd = formatter_cmds[0]
     cmd_tokens = shlex.split(first_cmd) if isinstance(first_cmd, str) else [first_cmd]
 

diff --git a/codeflash/optimization/function_optimizer.py b/codeflash/optimization/function_optimizer.py
@@ -1806,7 +1806,6 @@ def establish_original_code_baseline(
                 benchmarking_results, self.function_to_optimize.function_name
             )
             logger.debug(f"Original async function throughput: {async_throughput} calls/second")
-            console.rule()
 
         if self.args.benchmark:
             replay_benchmarking_test_results = benchmarking_results.group_by_benchmarks(

diff --git a/codeflash/verification/pytest_plugin.py b/codeflash/verification/pytest_plugin.py
@@ -2,23 +2,27 @@
 
 import contextlib
 import inspect
-
-# System Imports
 import logging
 import os
 import platform
 import re
+import statistics
 import sys
 import time as _time_module
 import warnings
+from collections import deque
+
+# System Imports
 from pathlib import Path
-from typing import TYPE_CHECKING, Any, Callable
+from typing import TYPE_CHECKING, Any, Callable, Optional
 from unittest import TestCase
 
 # PyTest Imports
 import pytest
 from pluggy import HookspecMarker
 
+from codeflash.code_utils.config_consts import CONSISTENT_LOOP_COUNT
+
 if TYPE_CHECKING:
     from _pytest.config import Config, Parser
     from _pytest.main import Session
@@ -77,6 +81,7 @@ class UnexpectedError(Exception):
 # Store references to original functions before any patching
 _ORIGINAL_TIME_TIME = _time_module.time
 _ORIGINAL_PERF_COUNTER = _time_module.perf_counter
+_ORIGINAL_PERF_COUNTER_NS = _time_module.perf_counter_ns
 _ORIGINAL_TIME_SLEEP = _time_module.sleep
 
 
@@ -260,6 +265,29 @@ def pytest_configure(config: Config) -> None:
     _apply_deterministic_patches()
 
 
+def get_runtime_from_stdout(stdout: str) -> Optional[int]:
+    marker_start = "!######"
+    marker_end = "######!"
+
+    if not stdout:
+        return None
+
+    end = stdout.rfind(marker_end)
+    if end == -1:
+        return None
+
+    start = stdout.rfind(marker_start, 0, end)
+    if start == -1:
+        return None
+
+    payload = stdout[start + len(marker_start) : end]
+    last_colon = payload.rfind(":")
+    if last_colon == -1:
+        return None
+
+    return int(payload[last_colon + 1 :])
+
+
 class PytestLoops:
     name: str = "pytest-loops"
 
@@ -268,9 +296,30 @@ def __init__(self, config: Config) -> None:
         level = logging.DEBUG if config.option.verbose > 1 else logging.INFO
         logging.basicConfig(level=level)
         self.logger = logging.getLogger(self.name)
+        self.current_loop_durations_in_nano: list[int] = []
+
+    def dynamic_tolerance(self, avg: float) -> float:
+        if avg < 0.0001:  # < 100 µs
+            return 0.7
+        if avg < 0.0005:  # < 500 µs
+            return 0.5
+        if avg < 0.001:  # < 1 ms
+            return 0.4
+        if avg < 0.01:  # < 10 ms
+            return 0.2
+        if avg < 0.1:  # < 100 ms
+            return 0.1
+        return 0.03  # > 0.1 s
+
+    @pytest.hookimpl
+    def pytest_runtest_logreport(self, report: pytest.TestReport) -> None:
+        if report.when == "call" and (duration_ns := get_runtime_from_stdout(report.capstdout)):
+            self.current_loop_durations_in_nano.append(duration_ns)
 
     @hookspec(firstresult=True)
     def pytest_runtestloop(self, session: Session) -> bool:
+        durations = deque(maxlen=CONSISTENT_LOOP_COUNT)
+
         """Reimplement the test loop but loop for the user defined amount of time."""
         if session.testsfailed and not session.config.option.continue_on_collection_errors:
             msg = "{} error{} during collection".format(session.testsfailed, "s" if session.testsfailed != 1 else "")
@@ -283,10 +332,10 @@ def pytest_runtestloop(self, session: Session) -> bool:
         total_time: float = self._get_total_time(session)
 
         count: int = 0
-
-        while total_time >= SHORTEST_AMOUNT_OF_TIME:  # need to run at least one for normal tests
+        runtimes = []
+        while total_time >= SHORTEST_AMOUNT_OF_TIME:
             count += 1
-            total_time = self._get_total_time(session)
+            self.current_loop_durations_in_nano.clear()
 
             for index, item in enumerate(session.items):
                 item: pytest.Item = item  # noqa: PLW0127, PLW2901
@@ -304,8 +353,31 @@ def pytest_runtestloop(self, session: Session) -> bool:
                     raise session.Failed(session.shouldfail)
                 if session.shouldstop:
                     raise session.Interrupted(session.shouldstop)
+
+            runtimes.extend(list(self.current_loop_durations_in_nano))
+
+            total_duration_in_nano = sum(self.current_loop_durations_in_nano)
+            if total_duration_in_nano > 0:
+                durations.append(total_duration_in_nano)
+            else:
+                durations.clear()
+
+            # Consistency check
+            if len(durations) == CONSISTENT_LOOP_COUNT and count >= session.config.option.codeflash_min_loops:
+                avg = statistics.median(durations)
+                if avg == 0:
+                    consistent = all(d == 0 for d in durations)
+                else:
+                    consistent = all(abs(d - avg) / avg <= self.dynamic_tolerance(avg) for d in durations)
+                if consistent:
+                    Path(f"/home/mohammed/Documents/test-results/break-{int(_ORIGINAL_TIME_TIME())}.txt").write_text(
+                        f"loops: {count}, runtime: {runtimes}"
+                    )
+                    break
+
             if self._timed_out(session, start_time, count):
-                break  # exit loop
+                break
+
             _ORIGINAL_TIME_SLEEP(self._get_delay_time(session))
         return True