File helpers
File Helpers.
Attributes⚓︎
ALLOWED_CHARS
module-attribute
⚓︎
ALLOWED_CHARS = ascii_lowercase + ascii_uppercase + digits + '-_.'
Default string of acceptable characters in a filename.
COPIER_ANSWERS
module-attribute
⚓︎
COPIER_ANSWERS = Path('.copier-answers.yml')
Copier Answer file name.
LOCK
module-attribute
⚓︎
LOCK = Path('poetry.lock')
[DEPRECATED] Use get_lock() instead. This constant assumes poetry.lock and doesn’t support uv.lock.
RESERVED_NAMES
module-attribute
⚓︎
RESERVED_NAMES = frozenset(
{
'CON',
'PRN',
'AUX',
'NUL',
'COM1',
'COM2',
'COM3',
'COM4',
'COM5',
'COM6',
'COM7',
'COM8',
'COM9',
'LPT1',
'LPT2',
'LPT3',
'LPT4',
'LPT5',
'LPT6',
'LPT7',
'LPT8',
'LPT9',
}
)
Windows reserved filenames.
Functions⚓︎
delete_dir ⚓︎
delete_dir(dir_path)
Delete the specified directory from a doit task.
| PARAMETER | DESCRIPTION |
|---|---|
dir_path
|
Path to directory to delete
TYPE:
|
Source code in corallium/file_helpers.py
def delete_dir(dir_path: Path) -> None:
"""Delete the specified directory from a doit task.
Args:
dir_path: Path to directory to delete
"""
if dir_path.is_dir():
LOGGER.text('Deleting', dir_path=dir_path)
shutil.rmtree(dir_path)
delete_old_files ⚓︎
delete_old_files(dir_path, *, ttl_seconds)
Delete old files within the specified directory.
Skips symlinks to avoid deleting files outside the target directory.
| PARAMETER | DESCRIPTION |
|---|---|
dir_path
|
Path to directory to delete
TYPE:
|
ttl_seconds
|
if last modified within this number of seconds, will not be deleted
TYPE:
|
Source code in corallium/file_helpers.py
def delete_old_files(dir_path: Path, *, ttl_seconds: int) -> None:
"""Delete old files within the specified directory.
Skips symlinks to avoid deleting files outside the target directory.
Args:
dir_path: Path to directory to delete
ttl_seconds: if last modified within this number of seconds, will not be deleted
"""
for pth in dir_path.rglob('*'):
# Skip symlinks to avoid deleting files outside directory
if pth.is_symlink():
continue
if pth.is_file() and (time.time() - pth.stat().st_mtime) > ttl_seconds:
pth.unlink()
ensure_dir ⚓︎
ensure_dir(dir_path)
Make sure that the specified dir_path exists and create any missing folders from a doit task.
| PARAMETER | DESCRIPTION |
|---|---|
dir_path
|
Path to directory that needs to exists
TYPE:
|
Source code in corallium/file_helpers.py
def ensure_dir(dir_path: Path) -> None:
"""Make sure that the specified dir_path exists and create any missing folders from a doit task.
Args:
dir_path: Path to directory that needs to exists
"""
LOGGER.text('Creating', dir_path=dir_path)
dir_path.mkdir(parents=True, exist_ok=True)
find_in_parents ⚓︎
find_in_parents(*, name, cwd=None)
Return path to specific file by recursively searching in cwd and parents.
| RAISES | DESCRIPTION |
|---|---|
FileNotFoundError
|
if not found |
Source code in corallium/file_helpers.py
def find_in_parents(*, name: str, cwd: Path | None = None) -> Path:
"""Return path to specific file by recursively searching in cwd and parents.
Raises:
FileNotFoundError: if not found
"""
msg = f'Could not locate {name} in {cwd} or in any parent directory'
start_path = (cwd or Path()).resolve() / name
try:
while not start_path.is_file():
start_path = start_path.parents[1] / name
except IndexError:
raise FileNotFoundError(msg) from None
return start_path
get_lock
cached
⚓︎
get_lock()
Return path to dependency manager’s lock file.
Supports both uv.lock and poetry.lock files.
| RAISES | DESCRIPTION |
|---|---|
FileNotFoundError
|
if a lock file can’t be located |
Source code in corallium/file_helpers.py
@lru_cache(maxsize=1)
def get_lock() -> Path:
"""Return path to dependency manager's lock file.
Supports both uv.lock and poetry.lock files.
Raises:
FileNotFoundError: if a lock file can't be located
"""
for name in ('uv.lock', 'poetry.lock'):
with suppress(FileNotFoundError):
return find_in_parents(name=name)
raise FileNotFoundError('Could not locate a known lock file type (uv.lock or poetry.lock)')
get_relative ⚓︎
get_relative(full_path, other_path)
Try to return the relative path between the two paths. None if no match.
| PARAMETER | DESCRIPTION |
|---|---|
full_path
|
the full path to use
TYPE:
|
other_path
|
the path that the full_path may be relative to
TYPE:
|
| RETURNS | DESCRIPTION |
|---|---|
Path | None
|
relative path |
Source code in corallium/file_helpers.py
def get_relative(full_path: Path, other_path: Path) -> Path | None:
"""Try to return the relative path between the two paths. None if no match.
Args:
full_path: the full path to use
other_path: the path that the full_path may be relative to
Returns:
relative path
"""
with suppress(ValueError):
return full_path.relative_to(other_path)
return None
get_tool_versions ⚓︎
get_tool_versions(cwd=None)
Return versions from mise.lock, mise.toml, or .tool-versions file.
Priority order: mise.lock, mise.toml, .tool-versions (legacy asdf format).
Source code in corallium/file_helpers.py
def get_tool_versions(cwd: Path | None = None) -> dict[str, list[str]]:
"""Return versions from mise.lock, mise.toml, or .tool-versions file.
Priority order: mise.lock, mise.toml, .tool-versions (legacy asdf format).
"""
with suppress(FileNotFoundError):
lock_path = find_in_parents(name='mise.lock', cwd=cwd)
return _parse_mise_lock(lock_path)
with suppress(FileNotFoundError):
mise_path = find_in_parents(name='mise.toml', cwd=cwd)
return _parse_mise_toml(mise_path)
tv_path = find_in_parents(name='.tool-versions', cwd=cwd)
return _parse_tool_versions(tv_path)
if_found_unlink ⚓︎
if_found_unlink(path_file)
Remove file if it exists. Function is intended to a doit action.
| PARAMETER | DESCRIPTION |
|---|---|
path_file
|
Path to file to remove
TYPE:
|
Source code in corallium/file_helpers.py
def if_found_unlink(path_file: Path) -> None:
"""Remove file if it exists. Function is intended to a doit action.
Args:
path_file: Path to file to remove
"""
if path_file.is_file():
LOGGER.text('Deleting', path_file=path_file)
path_file.unlink()
open_in_browser ⚓︎
open_in_browser(path_file)
Open the path in the default web browser.
| PARAMETER | DESCRIPTION |
|---|---|
path_file
|
Path to file
TYPE:
|
Source code in corallium/file_helpers.py
def open_in_browser(path_file: Path) -> None: # pragma: no cover
"""Open the path in the default web browser.
Args:
path_file: Path to file
"""
webbrowser.open(path_file.resolve().as_uri())
read_lines ⚓︎
read_lines(path_file, encoding='utf-8', errors=None)
Read a file and split on newlines for later parsing.
| PARAMETER | DESCRIPTION |
|---|---|
path_file
|
path to the file
TYPE:
|
encoding
|
defaults to ‘utf-8’
TYPE:
|
errors
|
defaults to None. Use ‘ignore’ if needed. Full documentation: https://docs.python.org/3.12/library/functions.html#open
TYPE:
|
| RETURNS | DESCRIPTION |
|---|---|
list[str]
|
List[str]: lines of text as list |
Source code in corallium/file_helpers.py
def read_lines(path_file: Path, encoding: str | None = 'utf-8', errors: str | None = None) -> list[str]:
"""Read a file and split on newlines for later parsing.
Args:
path_file: path to the file
encoding: defaults to 'utf-8'
errors: defaults to None. Use 'ignore' if needed. Full documentation: https://docs.python.org/3.12/library/functions.html#open
Returns:
List[str]: lines of text as list
"""
return path_file.read_text(encoding=encoding, errors=errors).splitlines() if path_file.is_file() else []
read_package_name
cached
⚓︎
read_package_name(cwd=None)
Return the package name.
Source code in corallium/file_helpers.py
@lru_cache(maxsize=25)
def read_package_name(cwd: Path | None = None) -> str:
"""Return the package name."""
pyproject = read_pyproject(cwd=cwd)
with suppress(KeyError):
return str(pyproject['project']['name']) # For uv
return str(pyproject['tool']['poetry']['name'])
read_pyproject
cached
⚓︎
read_pyproject(cwd=None)
Return the ‘pyproject.toml’ file contents.
Cached with maxsize=128 to support multi-project workflows.
| RAISES | DESCRIPTION |
|---|---|
FileNotFoundError
|
if not found or cannot be read |
Source code in corallium/file_helpers.py
@lru_cache(maxsize=25)
def read_pyproject(cwd: Path | None = None) -> Any:
"""Return the 'pyproject.toml' file contents.
Cached with maxsize=128 to support multi-project workflows.
Raises:
FileNotFoundError: if not found or cannot be read
"""
toml_path = find_in_parents(name='pyproject.toml', cwd=cwd)
try:
pyproject_txt = toml_path.read_text(encoding='utf-8')
except (OSError, UnicodeDecodeError) as exc:
msg = f'Could not read pyproject.toml at: {toml_path}'
raise FileNotFoundError(msg) from exc
try:
return tomllib.loads(pyproject_txt)
except tomllib.TOMLDecodeError as exc:
msg = f'Invalid TOML in pyproject.toml at: {toml_path}'
raise ValueError(msg) from exc
read_yaml_file ⚓︎
read_yaml_file(path_yaml)
Attempt to read the specified yaml file. Returns an empty dictionary if not found or a parser error occurs.
Note: suppresses all tags in the YAML file
| PARAMETER | DESCRIPTION |
|---|---|
path_yaml
|
path to the yaml file
TYPE:
|
| RETURNS | DESCRIPTION |
|---|---|
Any
|
dictionary representation of the source file |
| RAISES | DESCRIPTION |
|---|---|
RuntimeError
|
when yaml dependency is missing |
Source code in corallium/file_helpers.py
def read_yaml_file(path_yaml: Path) -> Any:
"""Attempt to read the specified yaml file. Returns an empty dictionary if not found or a parser error occurs.
> Note: suppresses all tags in the YAML file
Args:
path_yaml: path to the yaml file
Returns:
dictionary representation of the source file
Raises:
RuntimeError: when yaml dependency is missing
"""
try:
import yaml # noqa: PLC0415 # lazy-load the optional dependency
except ImportError as exc:
raise RuntimeError("The 'calcipy[docs]' extras are missing") from exc
# Based on: https://github.com/yaml/pyyaml/issues/86#issuecomment-380252434
# Use safe_load with custom constructors to suppress tags
yaml.add_multi_constructor('', lambda _loader, _suffix, _node: None, Loader=yaml.SafeLoader)
yaml.add_multi_constructor('!', lambda _loader, _suffix, _node: None, Loader=yaml.SafeLoader)
yaml.add_multi_constructor('!!', lambda _loader, _suffix, _node: None, Loader=yaml.SafeLoader)
try:
return yaml.safe_load(path_yaml.read_text(encoding='utf-8'))
except (FileNotFoundError, KeyError) as exc: # pragma: no cover
LOGGER.warning('Unexpected read error', path_yaml=path_yaml, error=str(exc))
return {}
except yaml.constructor.ConstructorError:
LOGGER.exception('Warning: burying poorly handled yaml error')
return {}
sanitize_filename ⚓︎
sanitize_filename(filename, repl_char='_', allowed_chars=ALLOWED_CHARS)
Replace all characters not in the allow_chars with repl_char.
Handles empty strings, path separators, and Windows reserved names.
| PARAMETER | DESCRIPTION |
|---|---|
filename
|
string filename (stem and suffix only, not a full path)
TYPE:
|
repl_char
|
replacement character. Default is
TYPE:
|
allowed_chars
|
all allowed characters. Default is
TYPE:
|
| RETURNS | DESCRIPTION |
|---|---|
str
|
sanitized filename
TYPE:
|
| RAISES | DESCRIPTION |
|---|---|
ValueError
|
if filename is empty or becomes empty after sanitization |
Source code in corallium/file_helpers.py
def sanitize_filename(filename: str, repl_char: str = '_', allowed_chars: str = ALLOWED_CHARS) -> str:
"""Replace all characters not in the `allow_chars` with `repl_char`.
Handles empty strings, path separators, and Windows reserved names.
Args:
filename: string filename (stem and suffix only, not a full path)
repl_char: replacement character. Default is `_`
allowed_chars: all allowed characters. Default is `ALLOWED_CHARS`
Returns:
str: sanitized filename
Raises:
ValueError: if filename is empty or becomes empty after sanitization
"""
if not filename:
raise ValueError('Filename cannot be empty')
# Remove path separators first (prevents directory traversal)
filename = filename.replace('/', repl_char).replace('\\', repl_char)
# Replace disallowed characters
sanitized = ''.join((char if char in allowed_chars else repl_char) for char in filename)
if not sanitized:
msg = f'Filename becomes empty after sanitization: {filename!r}'
raise ValueError(msg)
return sanitized
tail_lines ⚓︎
tail_lines(path_file, *, count)
Tail a file for up to the last count (or full file) lines.
Optimized to read in chunks instead of byte-by-byte for better performance.
| PARAMETER | DESCRIPTION |
|---|---|
path_file
|
path to the file
TYPE:
|
count
|
maximum number of lines to return
TYPE:
|
| RETURNS | DESCRIPTION |
|---|---|
list[str]
|
List[str]: lines of text as list |
Source code in corallium/file_helpers.py
def tail_lines(path_file: Path, *, count: int) -> list[str]:
"""Tail a file for up to the last count (or full file) lines.
Optimized to read in chunks instead of byte-by-byte for better performance.
Args:
path_file: path to the file
count: maximum number of lines to return
Returns:
List[str]: lines of text as list
"""
max_chunk_size = 8192 # 8KB chunks for efficient disk I/O
with path_file.open('rb') as f_h:
file_size = f_h.seek(0, os.SEEK_END)
if file_size == 0:
return []
buffer = b''
remaining_bytes = file_size
while remaining_bytes > 0:
chunk_size = min(max_chunk_size, remaining_bytes)
f_h.seek(remaining_bytes - chunk_size, os.SEEK_SET)
chunk = f_h.read(chunk_size)
buffer = chunk + buffer
remaining_bytes -= chunk_size
# Count newlines in buffer to see if we have enough lines
lines_found = buffer.count(b'\n')
if lines_found >= count:
break
# Decode and split into lines
decoded = buffer.decode('utf-8', errors='replace')
all_lines = [line.rstrip('\r') for line in decoded.split('\n')]
# Return last 'count' lines (matching original behavior)
# Note: split on '\n' creates an extra empty string if text ends with '\n'
return all_lines[-count:]
trim_trailing_whitespace ⚓︎
trim_trailing_whitespace(pth)
Trim trailing whitespace from the specified file.
Preserves the original line ending style (LF or CRLF).
Source code in corallium/file_helpers.py
def trim_trailing_whitespace(pth: Path) -> None:
"""Trim trailing whitespace from the specified file.
Preserves the original line ending style (LF or CRLF).
"""
text = pth.read_text(encoding='utf-8')
# Detect line ending style
has_crlf = '\r\n' in text
line_break = '\r\n' if has_crlf else '\n'
# Strip trailing spaces from each line
stripped = [line.rstrip(' ') for line in text.split(line_break)]
pth.write_text(line_break.join(stripped), encoding='utf-8')