Source code for seqme.core.io
import pickle
from pathlib import Path
from typing import Any
[docs]
def read_fasta(
path: str | Path,
*,
return_headers: bool = False,
) -> list[str] | tuple[list[str], list[str]]:
"""Retrieve sequences from a FASTA file.
Args:
path: Path to FASTA file.
return_headers: Whether to return sequence headers alongside the sequences.
Returns:
The list of sequences.
If ``return_headers`` is ``True``, returns a tuple ``(headers, sequences)``,
where ``headers`` is a list of header strings (without the leading
``>``) and ``sequences`` is the corresponding list of sequence strings.
"""
path = Path(path)
if not path.is_file():
raise FileNotFoundError(f"File not found: {path}")
headers: list[str] = []
sequences: list[str] = []
current_seq: list[str] = []
with path.open() as f:
for line in f:
line = line.strip()
if not line:
continue # skip empty lines
if line.startswith(">"):
if return_headers:
headers.append(line[1:])
if current_seq:
sequence = "".join(current_seq)
if sequence:
sequences.append(sequence)
current_seq = []
else:
current_seq.append(line)
# Add the last sequence if present
if current_seq:
sequence = "".join(current_seq)
if sequence:
sequences.append(sequence)
return (headers, sequences) if return_headers else sequences
[docs]
def to_fasta(sequences: list[str], path: str | Path, *, headers: list[str] | None = None):
"""Write sequences to a FASTA file.
Args:
sequences: List of text sequences.
path: Output filepath, e.g., ``"/path/seqs.fasta"``.
headers: Optional sequence names.
"""
if headers is not None and len(headers) != len(sequences):
raise ValueError("headers length must match sequences length")
path = Path(path)
path.parent.mkdir(parents=True, exist_ok=True)
with path.open("w") as f:
for i, seq in enumerate(sequences):
header = headers[i] if headers else f">seq_{i + 1}"
if not header.startswith(">"):
header = ">" + header
f.write(f"{header}\n")
f.write(f"{seq}\n")
[docs]
def read_pickle(path: str | Path) -> Any:
"""Load and return an object from a pickle file.
Args:
path: Path to pickle file.
Returns:
The deserialized Python object.
"""
path = Path(path)
if not path.is_file():
raise FileNotFoundError(f"File not found: {path}")
with path.open("rb") as f:
return pickle.load(f)
[docs]
def to_pickle(content: Any, path: str | Path):
"""Serialize an object and write it to a pickle file.
Args:
content: Pickable object.
path: Output filepath, e.g., ``"/path/cache.pkl"``.
"""
path = Path(path)
path.parent.mkdir(parents=True, exist_ok=True)
with path.open("wb") as f:
pickle.dump(content, f)