实现缓冲协议¶

注意

此页面使用两种不同的语法变体

Cython 特定的 cdef 语法，旨在使类型声明简洁且易于从 C/C++ 的角度阅读。
纯 Python 语法，允许在纯 Python 代码中使用静态 Cython 类型声明，遵循 PEP-484 类型提示和 PEP 526 变量注释。

要在 Python 语法中使用 C 数据类型，您需要在要编译的 Python 模块中导入特殊的 cython 模块，例如
```
import cython
```
如果您使用纯 Python 语法，我们强烈建议您使用最新的 Cython 3 版本，因为与 0.29.x 版本相比，这里已经进行了重大改进。

Cython 对象可以通过实现“缓冲协议”来向 Python 代码公开内存缓冲区。本章介绍如何实现该协议以及如何使用 NumPy 从扩展类型管理的内存中获取数据。

矩阵类¶

以下 Cython/C++ 代码实现了一个浮点数矩阵，其中列数在构造时固定，但可以动态添加行。

# distutils: language = c++

from cython.cimports.libcpp.vector import vector

@cython.cclass
class Matrix:
    ncols: cython.uint
    v: vector[cython.float]

    def __cinit__(self, ncols: cython.uint):
        self.ncols = ncols

    def add_row(self):
        """Adds a row, initially zero-filled."""
        self.v.resize(self.v.size() + self.ncols)

# distutils: language = c++

from libcpp.vector cimport vector


cdef class Matrix:
    cdef unsigned ncols
    cdef vector[float] v

    def __cinit__(self, unsigned ncols):
        self.ncols = ncols

    def add_row(self):
        """Adds a row, initially zero-filled."""
        self.v.resize(self.v.size() + self.ncols)

没有方法可以对矩阵的内容进行任何有用的操作。我们可以为此实现自定义的 __getitem__、__setitem__ 等，但我们将使用缓冲协议将矩阵的数据公开给 Python，以便我们可以使用 NumPy 来执行有用的工作。

实现缓冲协议需要添加两个方法，__getbuffer__ 和 __releasebuffer__，Cython 对它们进行了特殊处理。

# distutils: language = c++
from cython.cimports.cpython import Py_buffer
from cython.cimports.libcpp.vector import vector

@cython.cclass
class Matrix:
    ncols: cython.Py_ssize_t
    shape: cython.Py_ssize_t[2]
    strides: cython.Py_ssize_t[2]
    v: vector[cython.float]

    def __cinit__(self, ncols: cython.Py_ssize_t):
        self.ncols = ncols

    def add_row(self):
        """Adds a row, initially zero-filled."""
        self.v.resize(self.v.size() + self.ncols)

    def __getbuffer__(self, buffer: cython.pointer(Py_buffer), flags: cython.int):
        itemsize: cython.Py_ssize_t = cython.sizeof(self.v[0])

        self.shape[0] = self.v.size() // self.ncols
        self.shape[1] = self.ncols

        # Stride 1 is the distance, in bytes, between two items in a row;
        # this is the distance between two adjacent items in the vector.
        # Stride 0 is the distance between the first elements of adjacent rows.
        self.strides[1] = cython.cast(cython.Py_ssize_t, (
             cython.cast(cython.p_char, cython.address(self.v[1]))
           - cython.cast(cython.p_char, cython.address(self.v[0]))
           )
       )
        self.strides[0] = self.ncols * self.strides[1]

        buffer.buf = cython.cast(cython.p_char, cython.address(self.v[0]))
        buffer.format = 'f'                     # float
        buffer.internal = cython.NULL           # see References
        buffer.itemsize = itemsize
        buffer.len = self.v.size() * itemsize   # product(shape) * itemsize
        buffer.ndim = 2
        buffer.obj = self
        buffer.readonly = 0
        buffer.shape = self.shape
        buffer.strides = self.strides
        buffer.suboffsets = cython.NULL         # for pointer arrays only

    def __releasebuffer__(self, buffer: cython.pointer(Py_buffer)):
        pass

# distutils: language = c++
from cpython cimport Py_buffer
from libcpp.vector cimport vector


cdef class Matrix:
    cdef Py_ssize_t ncols
    cdef Py_ssize_t[2] shape
    cdef Py_ssize_t[2] strides
    cdef vector[float] v

    def __cinit__(self, Py_ssize_t ncols):
        self.ncols = ncols

    def add_row(self):
        """Adds a row, initially zero-filled."""
        self.v.resize(self.v.size() + self.ncols)

    def __getbuffer__(self, Py_buffer *buffer, int flags):
        cdef Py_ssize_t itemsize = sizeof(self.v[0])

        self.shape[0] = self.v.size() // self.ncols
        self.shape[1] = self.ncols

        # Stride 1 is the distance, in bytes, between two items in a row;
        # this is the distance between two adjacent items in the vector.
        # Stride 0 is the distance between the first elements of adjacent rows.
        self.strides[1] = <Py_ssize_t>(  <char *>&(self.v[1])
                                       - <char *>&(self.v[0]))



        self.strides[0] = self.ncols * self.strides[1]

        buffer.buf = <char *>&(self.v[0])
        buffer.format = 'f'                     # float
        buffer.internal = NULL                  # see References
        buffer.itemsize = itemsize
        buffer.len = self.v.size() * itemsize   # product(shape) * itemsize
        buffer.ndim = 2
        buffer.obj = self
        buffer.readonly = 0
        buffer.shape = self.shape
        buffer.strides = self.strides
        buffer.suboffsets = NULL                # for pointer arrays only

    def __releasebuffer__(self, Py_buffer *buffer):
        pass

方法 Matrix.__getbuffer__ 填充一个描述符结构，称为 Py_buffer，它由 Python C-API 定义。它包含指向内存中实际缓冲区的指针，以及有关数组形状和步幅（从一个元素或行到下一个元素或行的步长）的元数据。它的 shape 和 strides 成员是指针，必须指向类型和大小为 Py_ssize_t[ndim] 的数组。这些数组必须在任何缓冲区视图查看数据时保持活动状态，因此我们将它们存储在 Matrix 对象上作为成员。

代码尚未完成，但我们已经可以编译它并测试基本功能。

>>> from matrix import Matrix
>>> import numpy as np
>>> m = Matrix(10)
>>> np.asarray(m)
array([], shape=(0, 10), dtype=float32)
>>> m.add_row()
>>> a = np.asarray(m)
>>> a[:] = 1
>>> m.add_row()
>>> a = np.asarray(m)
>>> a
array([[ 1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.],
       [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.]], dtype=float32)

现在我们可以将 Matrix 视为 NumPy ndarray，并使用标准 NumPy 操作修改其内容。

内存安全和引用计数¶

到目前为止实现的 Matrix 类是不安全的。 add_row 操作可能会移动底层缓冲区，这会使任何 NumPy（或其他）视图上的数据失效。如果您尝试在 add_row 调用后访问值，您将获得过时的值或段错误。

这就是 __releasebuffer__ 的用武之地。我们可以为每个矩阵添加一个引用计数，并在存在视图时将其锁定以进行变异。

# distutils: language = c++

from cython.cimports.cpython import Py_buffer
from cython.cimports.libcpp.vector import vector

@cython.cclass
class Matrix:

    view_count: cython.int

    ncols: cython.Py_ssize_t
    v: vector[cython.float]
    # ...

    def __cinit__(self, ncols: cython.Py_ssize_t):
        self.ncols = ncols
        self.view_count = 0

    def add_row(self):
        if self.view_count > 0:
            raise ValueError("can't add row while being viewed")
        self.v.resize(self.v.size() + self.ncols)

    def __getbuffer__(self, buffer: cython.pointer(Py_buffer), flags: cython.int):
        # ... as before

        self.view_count += 1

    def __releasebuffer__(self, buffer: cython.pointer(Py_buffer)):
        self.view_count -= 1

# distutils: language = c++

from cpython cimport Py_buffer
from libcpp.vector cimport vector


cdef class Matrix:

    cdef int view_count

    cdef Py_ssize_t ncols
    cdef vector[float] v
    # ...

    def __cinit__(self, Py_ssize_t ncols):
        self.ncols = ncols
        self.view_count = 0

    def add_row(self):
        if self.view_count > 0:
            raise ValueError("can't add row while being viewed")
        self.v.resize(self.v.size() + self.ncols)

    def __getbuffer__(self, Py_buffer *buffer, int flags):
        # ... as before

        self.view_count += 1

    def __releasebuffer__(self, Py_buffer *buffer):
        self.view_count -= 1

标志¶

我们在代码中跳过了某些输入验证。 flags 参数传递给 __getbuffer__ 来自 np.asarray（和其他客户端），并且是描述请求的数组类型的布尔标志的 OR。严格来说，如果 flags 包含 PyBUF_ND、PyBUF_SIMPLE 或 PyBUF_F_CONTIGUOUS，__getbuffer__ 必须抛出 BufferError。这些宏可以从 cpython.buffer 中 cimport。

（向量中的矩阵结构实际上符合 PyBUF_ND，但这将禁止 __getbuffer__ 填充步长。单行矩阵是 F 连续的，但更大的矩阵不是。）

参考资料¶

此处使用的缓冲区接口在 PEP 3118 中规定，修订缓冲区协议。

Jake Vanderplas 的博客上有一个使用此 API 的 C 语言教程，Python 缓冲区协议简介。

参考文档可用于 Python 3 和 Python 2。Py2 文档还描述了不再使用的旧缓冲区协议；从 Python 2.6 开始，PEP 3118 协议已实现，而旧协议仅与旧代码相关。

实现缓冲协议¶

矩阵类¶

内存安全和引用计数¶

标志¶

参考资料¶

目录

上一主题

下一主题

此页面