Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions codeflash/code_utils/config_consts.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
MAX_CUMULATIVE_TEST_RUNTIME_NANOSECONDS = 100e6 # 100ms
N_TESTS_TO_GENERATE = 2
TOTAL_LOOPING_TIME = 10.0 # 10 second candidate benchmarking budget
CONSISTENT_LOOP_COUNT = 3
COVERAGE_THRESHOLD = 60.0
MIN_TESTCASE_PASSED_THRESHOLD = 6
REPEAT_OPTIMIZATION_PROBABILITY = 0.1
Expand Down
1 change: 0 additions & 1 deletion codeflash/code_utils/env_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@
def check_formatter_installed(formatter_cmds: list[str], exit_on_failure: bool = True) -> bool: # noqa
if not formatter_cmds or formatter_cmds[0] == "disabled":
return True

first_cmd = formatter_cmds[0]
cmd_tokens = shlex.split(first_cmd) if isinstance(first_cmd, str) else [first_cmd]

Expand Down
1 change: 0 additions & 1 deletion codeflash/optimization/function_optimizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -1806,7 +1806,6 @@ def establish_original_code_baseline(
benchmarking_results, self.function_to_optimize.function_name
)
logger.debug(f"Original async function throughput: {async_throughput} calls/second")
console.rule()

if self.args.benchmark:
replay_benchmarking_test_results = benchmarking_results.group_by_benchmarks(
Expand Down
86 changes: 79 additions & 7 deletions codeflash/verification/pytest_plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,23 +2,27 @@

import contextlib
import inspect

# System Imports
import logging
import os
import platform
import re
import statistics
import sys
import time as _time_module
import warnings
from collections import deque

# System Imports
from pathlib import Path
from typing import TYPE_CHECKING, Any, Callable
from typing import TYPE_CHECKING, Any, Callable, Optional
from unittest import TestCase

# PyTest Imports
import pytest
from pluggy import HookspecMarker

from codeflash.code_utils.config_consts import CONSISTENT_LOOP_COUNT

if TYPE_CHECKING:
from _pytest.config import Config, Parser
from _pytest.main import Session
Expand Down Expand Up @@ -77,6 +81,7 @@ class UnexpectedError(Exception):
# Store references to original functions before any patching
_ORIGINAL_TIME_TIME = _time_module.time
_ORIGINAL_PERF_COUNTER = _time_module.perf_counter
_ORIGINAL_PERF_COUNTER_NS = _time_module.perf_counter_ns
_ORIGINAL_TIME_SLEEP = _time_module.sleep


Expand Down Expand Up @@ -260,6 +265,29 @@ def pytest_configure(config: Config) -> None:
_apply_deterministic_patches()


def get_runtime_from_stdout(stdout: str) -> Optional[int]:
marker_start = "!######"
marker_end = "######!"

if not stdout:
return None

end = stdout.rfind(marker_end)
if end == -1:
return None

start = stdout.rfind(marker_start, 0, end)
if start == -1:
return None

payload = stdout[start + len(marker_start) : end]
last_colon = payload.rfind(":")
if last_colon == -1:
return None

return int(payload[last_colon + 1 :])


class PytestLoops:
name: str = "pytest-loops"

Expand All @@ -268,9 +296,30 @@ def __init__(self, config: Config) -> None:
level = logging.DEBUG if config.option.verbose > 1 else logging.INFO
logging.basicConfig(level=level)
self.logger = logging.getLogger(self.name)
self.current_loop_durations_in_nano: list[int] = []

def dynamic_tolerance(self, avg: float) -> float:
if avg < 0.0001: # < 100 µs
return 0.7
if avg < 0.0005: # < 500 µs
return 0.5
if avg < 0.001: # < 1 ms
return 0.4
if avg < 0.01: # < 10 ms
return 0.2
if avg < 0.1: # < 100 ms
return 0.1
return 0.03 # > 0.1 s
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@mohammedahmed18 how did you set these numbers?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@aseembits93
there are just experiment numbers, the whole idea is very small runtimes should have high tolerance value because the noise is relatively large


@pytest.hookimpl
def pytest_runtest_logreport(self, report: pytest.TestReport) -> None:
if report.when == "call" and (duration_ns := get_runtime_from_stdout(report.capstdout)):
self.current_loop_durations_in_nano.append(duration_ns)

@hookspec(firstresult=True)
def pytest_runtestloop(self, session: Session) -> bool:
durations = deque(maxlen=CONSISTENT_LOOP_COUNT)

"""Reimplement the test loop but loop for the user defined amount of time."""
if session.testsfailed and not session.config.option.continue_on_collection_errors:
msg = "{} error{} during collection".format(session.testsfailed, "s" if session.testsfailed != 1 else "")
Expand All @@ -283,10 +332,10 @@ def pytest_runtestloop(self, session: Session) -> bool:
total_time: float = self._get_total_time(session)

count: int = 0

while total_time >= SHORTEST_AMOUNT_OF_TIME: # need to run at least one for normal tests
runtimes = []
while total_time >= SHORTEST_AMOUNT_OF_TIME:
count += 1
total_time = self._get_total_time(session)
self.current_loop_durations_in_nano.clear()

for index, item in enumerate(session.items):
item: pytest.Item = item # noqa: PLW0127, PLW2901
Expand All @@ -304,8 +353,31 @@ def pytest_runtestloop(self, session: Session) -> bool:
raise session.Failed(session.shouldfail)
if session.shouldstop:
raise session.Interrupted(session.shouldstop)

runtimes.extend(list(self.current_loop_durations_in_nano))

total_duration_in_nano = sum(self.current_loop_durations_in_nano)
if total_duration_in_nano > 0:
durations.append(total_duration_in_nano)
else:
durations.clear()

# Consistency check
if len(durations) == CONSISTENT_LOOP_COUNT and count >= session.config.option.codeflash_min_loops:
avg = statistics.median(durations)
if avg == 0:
consistent = all(d == 0 for d in durations)
else:
consistent = all(abs(d - avg) / avg <= self.dynamic_tolerance(avg) for d in durations)
if consistent:
Path(f"/home/mohammed/Documents/test-results/break-{int(_ORIGINAL_TIME_TIME())}.txt").write_text(
f"loops: {count}, runtime: {runtimes}"
)
break

if self._timed_out(session, start_time, count):
break # exit loop
break

_ORIGINAL_TIME_SLEEP(self._get_delay_time(session))
return True

Expand Down
Loading