from __future__ import annotations
from collections.abc import Iterable
from itertools import islice
from sys import maxsize
def batched[T](
iterable: Iterable[T],
n: int,
*,
strict: bool = False,
) -> Iterable[tuple[T, ...]]:
"""Split an iterable into batches of size n."""
if not 1 <= n <= maxsize:
msg = "Batch size n must be at least one and at most sys.maxsize."
raise ValueError(msg)
iterator = iter(iterable)
while batch := tuple(islice(iterator, n)):
if strict and len(batch) != n:
msg = "Incomplete batch for strict batching."
raise ValueError(msg)
yield batchWe’re always looping through something in Python. Whether it’s processing lines in a file or streaming data from an API, iterables are everywhere.
Python’s built-in itertools is great, but every now and then you hit a problem that it doesn’t quite cover out of the box. Over time, I’ve put together a small collection of helper functions to handle those “I wish itertools had this” moments. Here are a few of my favorites, complete with some type-safe implementations you can drop into your own projects.
batched
This one’s a classic: you have a long list and you want to slice it up into chunks. Maybe you’re feeding data into a neural network or just trying not to hit a rate limit on an API.
Python 3.12 actually added this to itertools, but if you’re stuck on an older version or just want a standalone version, here’s how you can do it:
Example
unbox
Ever have a list that should only have one thing in it? Maybe it’s a filtered database result or a unique ID. Instead of doing results[0] and hoping for the best, unbox makes sure your assumption is actually right.
It grabs that one item and throws a ValueError if it finds more than one (or zero). It’s a great way to catch bugs early.
Example
unzip
If you’ve ever used Python’s zip function, you know it’s a lifesaver for pairing up data. But what about the other way around? If you have a list of pairs and you want two separate lists, unzip is the tool for the job.
Example
flatten_iterable
Nested lists are a headache. flatten_iterable lets you turn a messy, deeply nested structure into a single, clean list. It’s smart enough to leave strings and bytes alone, so you don’t end up with a list of individual characters when you didn’t mean to.
from collections.abc import Generator
from typing import cast
type NestedIterable[T] = Iterable[NestedIterable[T] | T]
def flatten_iterable[T](iterable: NestedIterable[T]) -> list[T]:
"""Flatten an iterable of iterables."""
def _helper(lst: NestedIterable[T]) -> Generator[T]:
for item in lst:
if isinstance(item, Iterable) and not isinstance(item, (str, bytes)):
yield from _helper(cast(NestedIterable[T], item))
else:
yield cast(T, item)
return list(_helper(iterable))Example
flatten_string_key_dict
Nested dictionaries are great for structure, but they can be a pain to access. This function flattens them into a single level, using dots to join the keys (like parent.child.grandchild). It’s perfect for turning complex config files into something much easier to work with.
from collections.abc import Mapping
type NestedDict[T] = Mapping[str, NestedDict[T] | T]
def flatten_string_key_dict[T](dictionary: NestedDict[T]) -> dict[str, T]:
"""Flatten a nested string-keyed dictionary."""
def _generate_items(
d: NestedDict[T] | T, prefix: str = ""
) -> Generator[tuple[str, T]]:
if isinstance(d, dict):
for key, value in d.items():
yield from _generate_items(value, prefix + key + ".")
else:
yield prefix[:-1], cast(T, d)
return dict(_generate_items(dictionary))Example
group_by_non_consecutive
If you’ve ever used itertools.groupby, you know the catch: your data must be sorted first, or it won’t work as expected. This helper function takes care of that for you by sorting the data before grouping it. It’s a bit more memory-heavy because it has to hold the whole list in memory to sort it, but for most everyday tasks, it’s a huge convenience.
import itertools
from collections.abc import Callable, Iterator
from typing import Any, Protocol, Self
class _SupportsLessThan(Protocol):
def __lt__(self, other: Self) -> bool: ...
def group_by_non_consecutive[T, SLT: _SupportsLessThan](
iterable: Iterable[T],
*,
key: Callable[[T], SLT] | None = None,
reverse: bool = False,
) -> Iterator[tuple[SLT, Iterator[T]]]:
"""Sort and group an iterable by a key function.
Unlike itertools.groupby, this function does not require the iterable
to be pre-sorted. It first sorts the entire iterable and then applies
the grouping.
"""
if key is None:
def default_key(x: T) -> SLT:
return cast(SLT, x)
key = default_key
sorted_iterable = sorted(iterable, key=key, reverse=reverse) # type: ignore[arg-type]
return itertools.groupby(sorted_iterable, key=key) # type: ignore[arg-type]Example
Show the code
# With key function
data = ["apple", "banana", "ant", "bear", "apricot"]
print(f"group_by_non_consecutive({data}, key=lambda x: x[0])")
for key, group in group_by_non_consecutive(data, key=lambda x: x[0]):
print(f" {key}: {list(group)}")
# Without key function
data_nums = [1, 2, 1, 3, 2, 1]
print(f"group_by_non_consecutive({data_nums})")
for key, group in group_by_non_consecutive(data_nums):
print(f" {key}: {list(group)}")group_by_non_consecutive(['apple', 'banana', 'ant', 'bear', 'apricot'], key=lambda x: x[0])
a: ['apple', 'ant', 'apricot']
b: ['banana', 'bear']
group_by_non_consecutive([1, 2, 1, 3, 2, 1])
1: [1, 1, 1]
2: [2, 2]
3: [3]
These are some of the helpers I find myself reaching for time and again. Hopefully, they’ll save you some time and keep your code a bit cleaner. You can grab the full script with all these functions and examples below.
Download the whole code here.