Skip to content

vllm.v1.kv_offload.cpu.policies.abstract

BlockStatus

Bases: Structure

Offloading status for a single block of KV data. Holds the following information:

ref_cnt - the current number of transfers using this block as a source. A value of -1 indicates the block is not yet ready to be read. block_id - index of the physical CPU buffer slot.

Source code in vllm/v1/kv_offload/cpu/policies/abstract.py
class BlockStatus(ctypes.Structure):
    """
    Offloading status for a single block of KV data.
    Holds the following information:

    ref_cnt - the current number of transfers using this block as a source.
        A value of -1 indicates the block is not yet ready to be read.
    block_id - index of the physical CPU buffer slot.
    """

    _fields_ = [("ref_cnt", ctypes.c_int32), ("block_id", ctypes.c_int64)]

    def __init__(self, block_id: int):
        super().__init__()
        # initialize block as "not ready" (ref_cnt = -1)
        self.ref_cnt = -1
        self.block_id = block_id

    @property
    def is_ready(self) -> bool:
        """
        Returns whether the block is ready to be read.
        """
        return self.ref_cnt >= 0

is_ready property

is_ready: bool

Returns whether the block is ready to be read.

CachePolicy

Bases: ABC

Encapsulates both block organization (data structures) and replacement decisions (which block to evict). LRU and ARC differ in both dimensions — ARC's ghost lists and target_t1_size live at the intersection of storage and eviction, so they cannot be separated cleanly.

Source code in vllm/v1/kv_offload/cpu/policies/abstract.py
class CachePolicy(ABC):
    """
    Encapsulates both block organization (data structures) and replacement
    decisions (which block to evict). LRU and ARC differ in both dimensions —
    ARC's ghost lists and target_t1_size live at the intersection of storage
    and eviction, so they cannot be separated cleanly.
    """

    @abstractmethod
    def __init__(self, cache_capacity: int) -> None: ...

    @abstractmethod
    def get(self, block_hash: BlockHash) -> BlockStatus | None:
        """Find block in data structures. Returns None if not present."""

    @abstractmethod
    def insert(self, block_hash: BlockHash, block: BlockStatus) -> None:
        """Add a newly allocated block. For ARC: also removes from ghost lists."""

    @abstractmethod
    def remove(self, block_hash: BlockHash) -> None:
        """Remove a block (used to clean up after a failed store)."""

    @abstractmethod
    def touch(self, block_hashes: Iterable[BlockHash]) -> None:
        """Mark blocks as recently used."""

    @abstractmethod
    def evict(
        self, n: int, protected: set[BlockHash]
    ) -> list[tuple[BlockHash, BlockStatus]] | None:
        """
        Evict exactly n blocks, skipping any in protected.

        Returns a list of (block_hash, block) for the evicted blocks,
        or None if n evictions cannot be satisfied. The operation is atomic:
        if None is returned, no state changes are made.

        For ARC: ghost list cleanup (trimming to cache_capacity) is performed
        at the end of a successful eviction.
        """

evict abstractmethod

evict(
    n: int, protected: set[BlockHash]
) -> list[tuple[BlockHash, BlockStatus]] | None

Evict exactly n blocks, skipping any in protected.

Returns a list of (block_hash, block) for the evicted blocks, or None if n evictions cannot be satisfied. The operation is atomic: if None is returned, no state changes are made.

For ARC: ghost list cleanup (trimming to cache_capacity) is performed at the end of a successful eviction.

Source code in vllm/v1/kv_offload/cpu/policies/abstract.py
@abstractmethod
def evict(
    self, n: int, protected: set[BlockHash]
) -> list[tuple[BlockHash, BlockStatus]] | None:
    """
    Evict exactly n blocks, skipping any in protected.

    Returns a list of (block_hash, block) for the evicted blocks,
    or None if n evictions cannot be satisfied. The operation is atomic:
    if None is returned, no state changes are made.

    For ARC: ghost list cleanup (trimming to cache_capacity) is performed
    at the end of a successful eviction.
    """

get abstractmethod

get(block_hash: BlockHash) -> BlockStatus | None

Find block in data structures. Returns None if not present.

Source code in vllm/v1/kv_offload/cpu/policies/abstract.py
@abstractmethod
def get(self, block_hash: BlockHash) -> BlockStatus | None:
    """Find block in data structures. Returns None if not present."""

insert abstractmethod

insert(block_hash: BlockHash, block: BlockStatus) -> None

Add a newly allocated block. For ARC: also removes from ghost lists.

Source code in vllm/v1/kv_offload/cpu/policies/abstract.py
@abstractmethod
def insert(self, block_hash: BlockHash, block: BlockStatus) -> None:
    """Add a newly allocated block. For ARC: also removes from ghost lists."""

remove abstractmethod

remove(block_hash: BlockHash) -> None

Remove a block (used to clean up after a failed store).

Source code in vllm/v1/kv_offload/cpu/policies/abstract.py
@abstractmethod
def remove(self, block_hash: BlockHash) -> None:
    """Remove a block (used to clean up after a failed store)."""

touch abstractmethod

touch(block_hashes: Iterable[BlockHash]) -> None

Mark blocks as recently used.

Source code in vllm/v1/kv_offload/cpu/policies/abstract.py
@abstractmethod
def touch(self, block_hashes: Iterable[BlockHash]) -> None:
    """Mark blocks as recently used."""