File search
Find files using git, with filesystem walk fallback.
Migrated from calcipy.file_search for git-based file discovery.
Functions⚓︎
find_project_files ⚓︎
find_project_files(path_project, ignore_patterns)
Find project files in git version control or via filesystem walk.
Note: Uses git ls-files and verifies that each file exists. Falls back to recursive filesystem walk when git is unavailable.
| PARAMETER | DESCRIPTION |
|---|---|
path_project
|
Path to the project directory
TYPE:
|
ignore_patterns
|
Glob ignore patterns
TYPE:
|
| RETURNS | DESCRIPTION |
|---|---|
list[Path]
|
List of Path objects for all tracked, non-ignored files |
Example
from pathlib import Path files = find_project_files( … Path(‘.’), … ignore_patterns=[‘.pyc’, ‘pycache/’, ‘.git/*’] … )
Source code in corallium/file_search.py
def find_project_files(path_project: Path, ignore_patterns: list[str]) -> list[Path]:
"""Find project files in git version control or via filesystem walk.
Note: Uses git ls-files and verifies that each file exists.
Falls back to recursive filesystem walk when git is unavailable.
Args:
path_project: Path to the project directory
ignore_patterns: Glob ignore patterns
Returns:
List of Path objects for all tracked, non-ignored files
Example:
>>> from pathlib import Path
>>> files = find_project_files(
... Path('.'),
... ignore_patterns=['*.pyc', '__pycache__/*', '.git/*']
... )
"""
file_paths = []
rel_filepaths, used_git = _get_all_files(cwd=path_project)
effective_patterns = ignore_patterns
if not used_git and not ignore_patterns:
effective_patterns = _get_default_ignore_patterns()
LOGGER.info(
'Using default ignore patterns for filesystem walk. Specify --ignore-patterns to customize.',
pattern_count=len(effective_patterns),
)
filtered_rel_files = _filter_files(
rel_filepaths=rel_filepaths,
ignore_patterns=effective_patterns,
)
for rel_file in filtered_rel_files:
path_file = path_project / rel_file
if path_file.is_file():
file_paths.append(path_file)
else: # pragma: no cover
LOGGER.warning('Could not find the specified file', path_file=path_file)
return file_paths
find_project_files_by_suffix ⚓︎
find_project_files_by_suffix(path_project, *, ignore_patterns=None)
Find project files in git version control grouped by file extension.
Note: Uses git ls-files and verifies that each file exists. Falls back to recursive filesystem walk when git is unavailable.
| PARAMETER | DESCRIPTION |
|---|---|
path_project
|
Path to the project directory
TYPE:
|
ignore_patterns
|
Glob ignore patterns (optional)
TYPE:
|
| RETURNS | DESCRIPTION |
|---|---|
dict[str, list[Path]]
|
Dictionary where keys are file extensions (without leading dot) and |
dict[str, list[Path]]
|
values are lists of Path objects with that extension |
Example
from pathlib import Path files_by_ext = find_project_files_by_suffix( … Path(‘.’), … ignore_patterns=[‘.pyc’, ‘pycache/’] … ) py_files = files_by_ext.get(‘py’, []) md_files = files_by_ext.get(‘md’, [])
Source code in corallium/file_search.py
def find_project_files_by_suffix(
path_project: Path,
*,
ignore_patterns: list[str] | None = None,
) -> dict[str, list[Path]]:
"""Find project files in git version control grouped by file extension.
Note: Uses git ls-files and verifies that each file exists.
Falls back to recursive filesystem walk when git is unavailable.
Args:
path_project: Path to the project directory
ignore_patterns: Glob ignore patterns (optional)
Returns:
Dictionary where keys are file extensions (without leading dot) and
values are lists of Path objects with that extension
Example:
>>> from pathlib import Path
>>> files_by_ext = find_project_files_by_suffix(
... Path('.'),
... ignore_patterns=['*.pyc', '__pycache__/*']
... )
>>> py_files = files_by_ext.get('py', [])
>>> md_files = files_by_ext.get('md', [])
"""
file_lookup: dict[str, list[Path]] = defaultdict(list)
for path_file in find_project_files(path_project, ignore_patterns or []):
file_lookup[path_file.suffix.lstrip('.')].append(path_file)
return dict(file_lookup)