diff --git a/src/babel_validation/sources/github/github_issues_test_cases.py b/src/babel_validation/sources/github/github_issues_test_cases.py index f7c7b82..c8c94d4 100644 --- a/src/babel_validation/sources/github/github_issues_test_cases.py +++ b/src/babel_validation/sources/github/github_issues_test_cases.py @@ -15,7 +15,7 @@ class GitHubIssueTest: - def __init__(self, github_issue: Issue.Issue, assertion: str, param_sets: list[list[str]] = None): + def __init__(self, github_issue: Issue.Issue, assertion: str, param_sets: list[list[str]] = None, repo_id: str = ""): self.github_issue = github_issue self.assertion = assertion if param_sets is None: @@ -23,12 +23,13 @@ def __init__(self, github_issue: Issue.Issue, assertion: str, param_sets: list[l self.param_sets = param_sets if not isinstance(self.param_sets, list): raise ValueError(f"param_sets must be a list when creating a GitHubIssueTest({self.github_issue}, {self.assertion}, {self.param_sets})") + self.repo_id = repo_id self.logger = logging.getLogger(str(self)) self.logger.info(f"Creating GitHubIssueTest for {github_issue.html_url} {assertion}({param_sets})") def __str__(self): - return f"{self.github_issue.repository.organization.name}/{self.github_issue.repository.name}#{self.github_issue.number}: {self.assertion}({len(self.param_sets)} param sets: {json.dumps(self.param_sets)})" + return f"{self.repo_id}#{self.github_issue.number}: {self.assertion}({len(self.param_sets)} param sets: {json.dumps(self.param_sets)})" def test_with_nodenorm(self, nodenorm: CachedNodeNorm) -> Iterator[TestResult]: handler = ASSERTION_HANDLERS.get(self.assertion.lower()) @@ -83,7 +84,7 @@ def __init__(self, github_token: str, github_repositories=None): self.babeltest_pattern = re.compile(r'{{BabelTest\|.*?}}') self.babeltest_yaml_pattern = re.compile(r'```yaml\s+babel_tests:\s+.*?\s+```', re.DOTALL) - def get_test_issues_from_issue(self, github_issue: Issue.Issue) -> list[GitHubIssueTest]: + def get_test_issues_from_issue(self, github_issue: Issue.Issue, repo_id: str = "") -> list[GitHubIssueTest]: """ Extract test rows from a single GitHub issue. @@ -102,15 +103,11 @@ def get_test_issues_from_issue(self, github_issue: Issue.Issue) -> list[GitHubIs src/babel_validation/assertions/README.md or inspect ASSERTION_HANDLERS.keys(). :param github_issue: A single GitHub issue to extract test cases from. + :param repo_id: The repository identifier string (e.g. "NCATSTranslator/Babel"). :return: A list of GitHubIssueTest objects found in the issue body. """ - github_issue_id = f"{github_issue.number}" - # Ideally, we would use: - # f"{github_issue.repository.organization.name}/{github_issue.repository.name}#{github_issue.number}" - # But that is very slow. - # TODO: Wrap Issue.Issue so that we can store orgName and repoName locally so we don't need to call out - # to figure it out. + github_issue_id = f"{repo_id}#{github_issue.number}" if repo_id else f"{github_issue.number}" self.logger.debug(f"Looking for tests in issue {github_issue_id}: {github_issue.title} ({str(github_issue.state)}, {github_issue.html_url})") # Is there an issue body at all? @@ -135,7 +132,7 @@ def get_test_issues_from_issue(self, github_issue: Issue.Issue) -> list[GitHubIs if len(params) < 2: raise ValueError(f"Too few parameters found in BabelTest in issue {github_issue_id}: {match}") else: - testrows.append(GitHubIssueTest(github_issue, params[0], [params[1:]])) + testrows.append(GitHubIssueTest(github_issue, params[0], [params[1:]], repo_id=repo_id)) babeltest_yaml_matches = re.findall(self.babeltest_yaml_pattern, github_issue.body) if babeltest_yaml_matches: @@ -158,10 +155,64 @@ def get_test_issues_from_issue(self, github_issue: Issue.Issue) -> list[GitHubIs param_sets.append(param_set) else: raise RuntimeError(f"Unknown parameter set type {param_set} in issue {github_issue_id}") - testrows.append(GitHubIssueTest(github_issue, assertion, param_sets)) + testrows.append(GitHubIssueTest(github_issue, assertion, param_sets, repo_id=repo_id)) return testrows + def get_specific_test_issues(self, issue_specs: list[str]) -> list: + """ + Fetch only the issues identified by issue_specs (values from --issue flag). + + Accepted formats (same as --issue CLI option): + - "637" bare number → try all configured repos + - "Babel#637" repo#number → match by repo name + - "NCATSTranslator/Babel#637" org/repo#number → exact match + + Uses the single-issue API endpoint (fast: 1–4 calls instead of paginating + through all issues). + """ + import pytest as _pytest + from github import GithubException + + result = [] + seen = set() # avoid duplicates if a spec matches the same issue twice + + for spec in issue_specs: + if '/' in spec and '#' in spec: + # "NCATSTranslator/Babel#637" + repo_id, issue_num_str = spec.rsplit('#', 1) + repos_to_check = [(repo_id, int(issue_num_str))] + elif '#' in spec: + # "Babel#637" + repo_name, issue_num_str = spec.split('#', 1) + repos_to_check = [ + (repo_id, int(issue_num_str)) + for repo_id in self.github_repositories + if repo_id.split('/')[-1] == repo_name + ] + else: + # bare number "637" + repos_to_check = [(repo_id, int(spec)) for repo_id in self.github_repositories] + + for repo_id, issue_num in repos_to_check: + key = (repo_id, issue_num) + if key in seen: + continue + seen.add(key) + try: + repo = self.github.get_repo(repo_id, lazy=True) + issue = repo.get_issue(issue_num) + tests = self.get_test_issues_from_issue(issue, repo_id=repo_id) + for test_issue in tests: + result.append(_pytest.param(test_issue, id=str(test_issue))) + except GithubException as e: + if e.status == 404: + pass # issue doesn't exist in this repo, that's fine + else: + raise + + return result + def get_all_issues(self, github_repositories = None) -> Iterator[Issue.Issue]: """ Get a list of test rows from one or more repositories. @@ -183,3 +234,38 @@ def get_all_issues(self, github_repositories = None) -> Iterator[Issue.Issue]: yield issue self.logger.info(f"Found {issue_count} issues in GitHub repository {repo_id}") + + def get_all_test_issues(self, github_repositories=None) -> list: + """ + Get all BabelTest assertions across one or more repositories as a flat list of pytest ParameterSets. + + Each GitHubIssueTest (one assertion from one issue) becomes its own ParameterSet, so pytest + reports a separate pass/fail for every assertion rather than collapsing an entire issue into + a single test. + + Issues that contain no BabelTest assertions are silently skipped. + + :param github_repositories: A list of GitHub repositories to search for test cases. If none is + provided, we default to the list specified when creating this GitHubIssuesTestCases class. + :return: A list of pytest.param objects, one per GitHubIssueTest. + """ + import pytest as _pytest + + if github_repositories is None: + github_repositories = self.github_repositories + + result = [] + for repo_id in github_repositories: + self.logger.info(f"Looking up issues in GitHub repository {repo_id}") + repo = self.github.get_repo(repo_id, lazy=True) + + issue_count = 0 + for issue in tqdm(repo.get_issues(state='all', sort='updated'), desc=repo_id): + issue_count += 1 + tests = self.get_test_issues_from_issue(issue, repo_id=repo_id) + for test_issue in tests: + result.append(_pytest.param(test_issue, id=str(test_issue))) + + self.logger.info(f"Found {issue_count} issues in GitHub repository {repo_id}") + + return result diff --git a/tests/github_issues/conftest.py b/tests/github_issues/conftest.py new file mode 100644 index 0000000..6464d1f --- /dev/null +++ b/tests/github_issues/conftest.py @@ -0,0 +1,24 @@ +import os + +import dotenv + +from src.babel_validation.sources.github.github_issues_test_cases import GitHubIssuesTestCases + +dotenv.load_dotenv() +_github_issues_test_cases = GitHubIssuesTestCases(os.getenv('GITHUB_TOKEN'), [ + 'NCATSTranslator/Babel', + 'NCATSTranslator/NodeNormalization', + 'NCATSTranslator/NameResolution', + 'TranslatorSRI/babel-validation', +]) + + +def pytest_generate_tests(metafunc): + if "github_issue_test" not in metafunc.fixturenames: + return + selected_issues = metafunc.config.getoption('issue', default=[]) + if selected_issues: + params = _github_issues_test_cases.get_specific_test_issues(selected_issues) + else: + params = _github_issues_test_cases.get_all_test_issues() + metafunc.parametrize("github_issue_test", params) diff --git a/tests/github_issues/test_github_issues.py b/tests/github_issues/test_github_issues.py index 94252f2..81d820f 100644 --- a/tests/github_issues/test_github_issues.py +++ b/tests/github_issues/test_github_issues.py @@ -1,74 +1,34 @@ import itertools -import os -import dotenv import pytest -from github import Issue -from src.babel_validation.sources.github.github_issues_test_cases import GitHubIssuesTestCases +from src.babel_validation.sources.github.github_issues_test_cases import GitHubIssueTest from src.babel_validation.services.nameres import CachedNameRes from src.babel_validation.services.nodenorm import CachedNodeNorm from src.babel_validation.core.testrow import TestResult, TestStatus -# Helper functions -def get_github_issue_id(github_issue: Issue.Issue): - return f"{github_issue.repository.organization.name}/{github_issue.repository.name}#{github_issue.number}" -# Initialize the test. -dotenv.load_dotenv() -github_token = os.getenv('GITHUB_TOKEN') -github_issues_test_cases = GitHubIssuesTestCases(github_token, [ - 'NCATSTranslator/Babel', # https://github.com/NCATSTranslator/Babel - 'NCATSTranslator/NodeNormalization', # https://github.com/NCATSTranslator/NodeNormalization - 'NCATSTranslator/NameResolution', # https://github.com/NCATSTranslator/NameResolution - 'TranslatorSRI/babel-validation', # https://github.com/TranslatorSRI/babel-validation -]) - -@pytest.mark.parametrize("github_issue", github_issues_test_cases.get_all_issues()) -def test_github_issue(target_info, github_issue, selected_github_issues): - # If github_issues is provided, we can skip all others. - if selected_github_issues: - # Check all three possible ways in which this issue might be specified. - github_issue_matched = False - for selected_github_issue in selected_github_issues: - if '/' in selected_github_issue: - github_issue_matched = (f"{github_issue.repository.organization.name}/{github_issue.repository.name}#{github_issue.number}" == selected_github_issue) - elif '#' in selected_github_issue: - github_issue_matched = (f"{github_issue.repository.name}#{github_issue.number}" == selected_github_issue) - else: - github_issue_matched = int(selected_github_issue) == github_issue.number - if github_issue_matched: - break - - if github_issue_matched: - # This issue is one of those that should be tested. - pass - else: - pytest.skip(f"GitHub Issue {str(github_issue)} not included in list of GitHub issues to be tested: {selected_github_issues}.") - return - - # Test this issue with NodeNorm. +def test_github_issue(target_info, github_issue_test: GitHubIssueTest): nodenorm = CachedNodeNorm.from_url(target_info['NodeNormURL']) nameres = CachedNameRes.from_url(target_info['NameResURL']) - tests = github_issues_test_cases.get_test_issues_from_issue(github_issue) - if not tests: - pytest.skip(f"No tests found in issue {github_issue}") - return - - for test_issue in tests: - results_nodenorm = test_issue.test_with_nodenorm(nodenorm) - results_nameres = test_issue.test_with_nameres(nodenorm, nameres) - - for result in itertools.chain(results_nodenorm, results_nameres): - match result: - case TestResult(status=TestStatus.Passed, message=message): - assert True, f"{get_github_issue_id(github_issue)} ({github_issue.state}): {message}" - - case TestResult(status=TestStatus.Failed, message=message): - assert False, f"{get_github_issue_id(github_issue)} ({github_issue.state}): {message}" - - case TestResult(status=TestStatus.Skipped, message=message): - pytest.skip(f"{get_github_issue_id(github_issue)} ({github_issue.state}): {message}") - case _: - assert False, f"Unknown result from {get_github_issue_id(github_issue)}: {result}" + results = itertools.chain( + github_issue_test.test_with_nodenorm(nodenorm), + github_issue_test.test_with_nameres(nodenorm, nameres), + ) + + issue_label = ( + f"{github_issue_test.repo_id}#{github_issue_test.github_issue.number}" + f" ({github_issue_test.github_issue.state})" + ) + + for result in results: + match result: + case TestResult(status=TestStatus.Passed, message=message): + assert True, f"{issue_label}: {message}" + case TestResult(status=TestStatus.Failed, message=message): + assert False, f"{issue_label}: {message}" + case TestResult(status=TestStatus.Skipped, message=message): + pytest.skip(f"{issue_label}: {message}") + case _: + assert False, f"Unknown result from {issue_label}: {result}"