Skip to content

File search

Find files using git, with filesystem walk fallback.

Migrated from calcipy.file_search for git-based file discovery.

Functions⚓︎

find_project_files ⚓︎

find_project_files(path_project, ignore_patterns)

Find project files in git version control or via filesystem walk.

Note: Uses git ls-files and verifies that each file exists. Falls back to recursive filesystem walk when git is unavailable.

PARAMETER DESCRIPTION
path_project

Path to the project directory

TYPE: Path

ignore_patterns

Glob ignore patterns

TYPE: list[str]

RETURNS DESCRIPTION
list[Path]

List of Path objects for all tracked, non-ignored files

Example

from pathlib import Path files = find_project_files( … Path(‘.’), … ignore_patterns=[‘.pyc’, ‘pycache/’, ‘.git/*’] … )

Source code in corallium/file_search.py
def find_project_files(path_project: Path, ignore_patterns: list[str]) -> list[Path]:
    """Find project files in git version control or via filesystem walk.

    Note: Uses git ls-files and verifies that each file exists.
    Falls back to recursive filesystem walk when git is unavailable.

    Args:
        path_project: Path to the project directory
        ignore_patterns: Glob ignore patterns

    Returns:
        List of Path objects for all tracked, non-ignored files

    Example:
        >>> from pathlib import Path
        >>> files = find_project_files(
        ...     Path('.'),
        ...     ignore_patterns=['*.pyc', '__pycache__/*', '.git/*']
        ... )

    """
    file_paths = []
    rel_filepaths, used_git = _get_all_files(cwd=path_project)

    effective_patterns = ignore_patterns
    if not used_git and not ignore_patterns:
        effective_patterns = _get_default_ignore_patterns()
        LOGGER.info(
            'Using default ignore patterns for filesystem walk. Specify --ignore-patterns to customize.',
            pattern_count=len(effective_patterns),
        )

    filtered_rel_files = _filter_files(
        rel_filepaths=rel_filepaths,
        ignore_patterns=effective_patterns,
    )
    for rel_file in filtered_rel_files:
        path_file = path_project / rel_file
        if path_file.is_file():
            file_paths.append(path_file)
        else:  # pragma: no cover
            LOGGER.warning('Could not find the specified file', path_file=path_file)
    return file_paths

find_project_files_by_suffix ⚓︎

find_project_files_by_suffix(path_project, *, ignore_patterns=None)

Find project files in git version control grouped by file extension.

Note: Uses git ls-files and verifies that each file exists. Falls back to recursive filesystem walk when git is unavailable.

PARAMETER DESCRIPTION
path_project

Path to the project directory

TYPE: Path

ignore_patterns

Glob ignore patterns (optional)

TYPE: list[str] | None DEFAULT: None

RETURNS DESCRIPTION
dict[str, list[Path]]

Dictionary where keys are file extensions (without leading dot) and

dict[str, list[Path]]

values are lists of Path objects with that extension

Example

from pathlib import Path files_by_ext = find_project_files_by_suffix( … Path(‘.’), … ignore_patterns=[‘.pyc’, ‘pycache/’] … ) py_files = files_by_ext.get(‘py’, []) md_files = files_by_ext.get(‘md’, [])

Source code in corallium/file_search.py
def find_project_files_by_suffix(
    path_project: Path,
    *,
    ignore_patterns: list[str] | None = None,
) -> dict[str, list[Path]]:
    """Find project files in git version control grouped by file extension.

    Note: Uses git ls-files and verifies that each file exists.
    Falls back to recursive filesystem walk when git is unavailable.

    Args:
        path_project: Path to the project directory
        ignore_patterns: Glob ignore patterns (optional)

    Returns:
        Dictionary where keys are file extensions (without leading dot) and
        values are lists of Path objects with that extension

    Example:
        >>> from pathlib import Path
        >>> files_by_ext = find_project_files_by_suffix(
        ...     Path('.'),
        ...     ignore_patterns=['*.pyc', '__pycache__/*']
        ... )
        >>> py_files = files_by_ext.get('py', [])
        >>> md_files = files_by_ext.get('md', [])

    """
    file_lookup: dict[str, list[Path]] = defaultdict(list)
    for path_file in find_project_files(path_project, ignore_patterns or []):
        file_lookup[path_file.suffix.lstrip('.')].append(path_file)
    return dict(file_lookup)