API Add PyProxyBufferMethods (pyodide#1215)

jmsmdy · Mar 26, 2021 · fd88a18 · fd88a18
1 parent b22b4f0
commit fd88a18
Show file tree

Hide file tree

Showing 8 changed files with 673 additions and 20 deletions.
diff --git a/Makefile b/Makefile
@@ -114,7 +114,7 @@ clean-all: clean
 	make -C cpython clean
 	rm -fr cpython/build
 
-%.o: %.c $(CPYTHONLIB) $(wildcard src/**/*.h)
+%.o: %.c $(CPYTHONLIB) $(wildcard src/**/*.h src/**/*.js)
 	$(CC) -o $@ -c $< $(MAIN_MODULE_CFLAGS) -Isrc/core/
 
 

diff --git a/docs/project/changelog.md b/docs/project/changelog.md
@@ -53,6 +53,9 @@ substitutions:
   [#1175](https://github.com/iodide-project/pyodide/pull/1175)
 - {{ API }} The `pyodide.pyimport` function is deprecated in favor of using
   `pyodide.globals.get('key')`. [#1367](https://github.com/iodide-project/pyodide/pull/1367)
+- {{ API }} Added `PyProxy.getBuffer` API to allow direct access to Python
+  buffers as Javascript TypedArrays.
+  [1215](https://github.com/iodide-project/pyodide/pull/1215)
 
 ### Fixed
 - {{ Fix }} getattr and dir on JsProxy now report consistent results and include all

diff --git a/docs/sphinx_pyodide/sphinx_pyodide/jsdoc.py b/docs/sphinx_pyodide/sphinx_pyodide/jsdoc.py
@@ -4,16 +4,21 @@
 from docutils.utils import new_document
 
 from collections import OrderedDict
+import re
 
 from sphinx import addnodes
 from sphinx.util import rst
 from sphinx.util.docutils import switch_source_input
 from sphinx.ext.autosummary import autosummary_table, extract_summary
 
 from sphinx_js.jsdoc import Analyzer as JsAnalyzer
-from sphinx_js.ir import Function
+from sphinx_js.ir import Class, Function
 from sphinx_js.parsers import path_and_formal_params, PathVisitor
-from sphinx_js.renderers import AutoFunctionRenderer, AutoAttributeRenderer
+from sphinx_js.renderers import (
+    AutoFunctionRenderer,
+    AutoAttributeRenderer,
+    AutoClassRenderer,
+)
 
 
 class PyodideAnalyzer:
@@ -47,7 +52,12 @@ def get_object_from_json(self, json):
         path components which JsAnalyzer.get_object requires.
         """
         path = self.longname_to_path(json["longname"])
-        kind = "function" if json["kind"] == "function" else "attribute"
+        if json["kind"] == "function":
+            kind = "function"
+        elif json["kind"] == "class":
+            kind = "class"
+        else:
+            kind = "attribute"
         obj = self.inner.get_object(path, kind)
         obj.kind = kind
         return obj
@@ -58,12 +68,16 @@ def create_js_doclets(self):
         """
 
         def get_val():
-            return OrderedDict([["attribute", []], ["function", []]])
+            return OrderedDict([["attribute", []], ["function", []], ["class", []]])
 
         self.js_docs = {key: get_val() for key in ["globals", "pyodide", "PyProxy"]}
         items = {"PyProxy": []}
         for (key, group) in self._doclets_by_class.items():
             key = [x for x in key if "/" not in x]
+            if key[-1] == "PyBuffer":
+                # PyBuffer stuff is documented as a class. Would be nice to have
+                # a less ad hoc way to deal with this...
+                continue
             if key[-1] == "globalThis":
                 items["globals"] = group
             if key[0] == "pyodide." and key[-1] == "Module":
@@ -76,7 +90,13 @@ def get_val():
                 if json.get("access", None) == "private":
                     continue
                 obj = self.get_object_from_json(json)
+                if isinstance(obj, Class):
+                    # sphinx-jsdoc messes up array types. Fix them.
+                    for x in obj.members:
+                        if hasattr(x, "type"):
+                            x.type = re.sub("Array\.<([a-zA-Z_0-9]*)>", r"\1[]", x.type)
                 if obj.name[0] == '"' and obj.name[-1] == '"':
+                    # sphinx-jsdoc messes up Symbol attributes. Fix them.
                     obj.name = "[" + obj.name[1:-1] + "]"
                 self.js_docs[key][obj.kind].append(obj)
 
@@ -97,11 +117,13 @@ def get_rst(self, obj):
             JsDoc also has an AutoClassRenderer which may be useful in the future."""
             if isinstance(obj, Function):
                 renderer = AutoFunctionRenderer
+            elif isinstance(obj, Class):
+                renderer = AutoClassRenderer
             else:
                 renderer = AutoAttributeRenderer
-            return renderer(self, app, arguments=["dummy"]).rst(
-                [obj.name], obj, use_short_name=False
-            )
+            return renderer(
+                self, app, arguments=["dummy"], options={"members": ["*"]}
+            ).rst([obj.name], obj, use_short_name=False)
 
         def get_rst_for_group(self, objects):
             return [self.get_rst(obj) for obj in objects]
@@ -144,6 +166,9 @@ def run(self):
             for group_name, group_objects in value.items():
                 if not group_objects:
                     continue
+                if group_name == "class":
+                    # Plural of class is "classes" not "classs"
+                    group_name += "e"
                 result.append(self.format_heading(group_name.title() + "s:"))
                 table_items = self.get_summary_table(module, group_objects)
                 table_markup = self.format_table(table_items)

diff --git a/docs/usage/type-conversions.md b/docs/usage/type-conversions.md
@@ -369,15 +369,30 @@ numpy_array = np.asarray(array)
 
 ### Converting Python Buffer objects to Javascript
 
-Python `bytes` and `buffer` objects are translated to Javascript as
-`TypedArray`s without any memory copy at all. This conversion is thus very
-efficient, but be aware that any changes to the buffer will be reflected in both
-places.
-
-Numpy arrays are currently converted to Javascript as nested (regular) Arrays. A
-more efficient method will probably emerge as we decide on an ndarray
-implementation for Javascript.
-
+A PyProxy of any Python object supporting the
+[Python Buffer protocol](https://docs.python.org/3/c-api/buffer.html) will have
+a method called :any`getBuffer`. This can be used to retrieve a reference to a
+Javascript typed array that points to the data backing the Python object,
+combined with other metadata about the buffer format. The metadata is suitable
+for use with a Javascript ndarray library if one is present. For instance, if
+you load the Javascript [ndarray](https://github.com/scijs/ndarray)
+package, you can do:
+```js
+let proxy = pyodide.globals.get("some_numpy_ndarray");
+let buffer = proxy.getBuffer();
+proxy.destroy();
+try {
+    if(buffer.readonly){
+        // We can't stop you from changing a readonly buffer, but it can cause undefined behavior.
+        throw new Error("Uh-oh, we were planning to change the buffer");
+    }
+    let array = new ndarray(buffer.data, buffer.shape, buffer.strides, buffer.offset);
+    // manipulate array here
+    // changes will be reflected in the Python ndarray!
+} finally {
+    buffer.release(); // Release the memory when we're done
+}
+```
 
 ## Importing Python objects into Javascript
 

diff --git a/packages/numpy/test_numpy.py b/packages/numpy/test_numpy.py
@@ -1,3 +1,6 @@
+import pytest
+
+
 def test_numpy(selenium):
     selenium.load_package("numpy")
     selenium.run("import numpy")
@@ -191,3 +194,107 @@ def test_runwebworker_numpy(selenium_standalone):
         """
     )
     assert output == "[0. 0. 0. 0. 0.]"
+
+
+def test_get_buffer(selenium):
+    selenium.run_js(
+        """
+        await pyodide.runPythonAsync(`
+            import numpy as np
+            x = np.arange(24)
+            z1 = x.reshape([8,3])
+            z2 = z1[-1::-1]
+            z3 = z1[::,-1::-1]
+            z4 = z1[-1::-1,-1::-1]
+        `);
+        for(let x of ["z1", "z2", "z3", "z4"]){
+            let z = pyodide.pyimport(x).getBuffer("u32");
+            for(let idx1 = 0; idx1 < 8; idx1++) {
+                for(let idx2 = 0; idx2 < 3; idx2++){
+                    let v1 = z.data[z.offset + z.strides[0] * idx1 + z.strides[1] * idx2];
+                    let v2 = pyodide.runPython(`repr(${x}[${idx1}, ${idx2}])`);
+                    console.log(`${v1}, ${typeof(v1)}, ${v2}, ${typeof(v2)}, ${v1===v2}`);
+                    if(v1.toString() !== v2){
+                        throw new Error(`Discrepancy ${x}[${idx1}, ${idx2}]: ${v1} != ${v2}`);
+                    }
+                }
+            }
+            z.release();
+        }
+        """
+    )
+
+
+@pytest.mark.parametrize(
+    "arg",
+    [
+        "np.arange(6).reshape((2, -1))",
+        "np.arange(12).reshape((3, -1))[::2, ::2]",
+        "np.arange(12).reshape((3, -1))[::-1, ::-1]",
+        "np.arange(12).reshape((3, -1))[::, ::-1]",
+        "np.arange(12).reshape((3, -1))[::-1, ::]",
+        "np.arange(12).reshape((3, -1))[::-2, ::-2]",
+        "np.arange(6).reshape((2, -1)).astype(np.int8, order='C')",
+        "np.arange(6).reshape((2, -1)).astype(np.int8, order='F')",
+        "np.arange(6).reshape((2, -1, 1))",
+        "np.ones((1, 1))[0:0]",  # shape[0] == 0
+        "np.ones(1)",  # ndim == 0
+    ]
+    + [
+        f"np.arange(3).astype(np.{type_})"
+        for type_ in ["int8", "uint8", "int16", "int32", "float32", "float64"]
+    ],
+)
+def test_get_buffer_roundtrip(selenium, arg):
+    selenium.run_js(
+        f"""
+        await pyodide.runPythonAsync(`
+            import numpy as np
+            x = {arg}
+        `);
+        window.x_js_buf = pyodide.pyimport("x").getBuffer();
+        x_js_buf.length = x_js_buf.data.length;
+        """
+    )
+
+    selenium.run_js(
+        """
+        pyodide.runPython(`
+            import itertools
+            from unittest import TestCase
+            from js import x_js_buf
+            assert_equal = TestCase().assertEqual
+
+            assert_equal(x_js_buf.ndim, x.ndim)
+            assert_equal(x_js_buf.shape.to_py(), list(x.shape))
+            assert_equal(x_js_buf.strides.to_py(), [s/x.itemsize for s in x.data.strides])
+            assert_equal(x_js_buf.format, x.data.format)
+            if len(x) == 0:
+                assert x_js_buf.length == 0
+            else:
+                minoffset = 1000
+                maxoffset = 0
+                for tup in itertools.product(*[range(n) for n in x.shape]):
+                    offset = x_js_buf.offset + sum(x*y for (x,y) in zip(tup, x_js_buf.strides))
+                    minoffset = min(offset, minoffset)
+                    maxoffset = max(offset, maxoffset)
+                    assert_equal(x[tup], x_js_buf.data[offset])
+                assert_equal(minoffset, 0)
+                assert_equal(maxoffset + 1, x_js_buf.length)
+            x_js_buf.release()
+        `);
+        """
+    )
+
+
+def test_get_buffer_error_messages(selenium):
+    with pytest.raises(Exception, match="Javascript has no Float16Array"):
+        selenium.run_js(
+            """
+            await pyodide.runPythonAsync(`
+                import numpy as np
+                x = np.ones(2, dtype=np.float16)
+            `);
+            pyodide.pyimport("x").getBuffer();
+            """
+        )