From 2021bf172b99945e129e1962920dce9b341f7a1f Mon Sep 17 00:00:00 2001 From: Aarni Koskela Date: Tue, 7 Feb 2023 16:42:25 +0200 Subject: [PATCH] Improve extract performance via ignoring directories early during os.walk Co-authored-by: Steven Kao --- babel/messages/extract.py | 45 ++++++++++++++++++++++--- tests/messages/frontend/test_extract.py | 5 +-- 2 files changed, 43 insertions(+), 7 deletions(-) diff --git a/babel/messages/extract.py b/babel/messages/extract.py index 07e13e34f..a8f15469d 100644 --- a/babel/messages/extract.py +++ b/babel/messages/extract.py @@ -23,6 +23,7 @@ import os import sys import tokenize +import warnings from collections.abc import ( Callable, Collection, @@ -114,7 +115,35 @@ def _strip(line: str): comments[:] = [_strip(c) for c in comments] -def default_directory_filter(dirpath: str | os.PathLike[str]) -> bool: +def _make_default_directory_filter( + method_map: Iterable[tuple[str, str]], + root_dir: str | os.PathLike[str], +): + method_map = tuple(method_map) + + def directory_filter(dirpath: str | os.PathLike[str]) -> bool: + subdir = os.path.basename(dirpath) + # Legacy default behavior: ignore dot and underscore directories + if subdir.startswith('.') or subdir.startswith('_'): + return False + + dir_rel = os.path.relpath(dirpath, root_dir).replace(os.sep, '/') + + for pattern, method in method_map: + if method == "ignore" and pathmatch(pattern, dir_rel): + return False + + return True + + return directory_filter + + +def default_directory_filter(dirpath: str | os.PathLike[str]) -> bool: # pragma: no cover + warnings.warn( + "`default_directory_filter` is deprecated and will be removed in a future version of Babel.", + DeprecationWarning, + stacklevel=2, + ) subdir = os.path.basename(dirpath) # Legacy default behavior: ignore dot and underscore directories return not (subdir.startswith('.') or subdir.startswith('_')) @@ -201,13 +230,19 @@ def extract_from_dir( """ if dirname is None: dirname = os.getcwd() + if options_map is None: options_map = {} + + dirname = os.path.abspath(dirname) + if directory_filter is None: - directory_filter = default_directory_filter + directory_filter = _make_default_directory_filter( + method_map=method_map, + root_dir=dirname, + ) - absname = os.path.abspath(dirname) - for root, dirnames, filenames in os.walk(absname): + for root, dirnames, filenames in os.walk(dirname): dirnames[:] = [ subdir for subdir in dirnames if directory_filter(os.path.join(root, subdir)) ] @@ -224,7 +259,7 @@ def extract_from_dir( keywords, comment_tags, strip_comment_tags, - dirpath=absname, + dirpath=dirname, ) diff --git a/tests/messages/frontend/test_extract.py b/tests/messages/frontend/test_extract.py index 7980eddad..1c4532f5f 100644 --- a/tests/messages/frontend/test_extract.py +++ b/tests/messages/frontend/test_extract.py @@ -202,10 +202,11 @@ def test_extraction_with_mapping_file(extract_cmd, pot_file): @freeze_time("1994-11-11") -def test_extraction_with_mapping_dict(extract_cmd, pot_file): +@pytest.mark.parametrize("ignore_pattern", ['**/ignored/**.*', 'ignored']) +def test_extraction_with_mapping_dict(extract_cmd, pot_file, ignore_pattern): extract_cmd.distribution.message_extractors = { 'project': [ - ('**/ignored/**.*', 'ignore', None), + (ignore_pattern, 'ignore', None), ('**.py', 'python', None), ], }