Skip to content

Commit

Permalink
Fix memory access out of bounds in numpy + chrome 89 (#1474)
Browse files Browse the repository at this point in the history
  • Loading branch information
Hood Chatham authored Apr 17, 2021
1 parent cae11ab commit 1485732
Show file tree
Hide file tree
Showing 6 changed files with 348 additions and 4 deletions.
1 change: 1 addition & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ all: check \
build/pyodide.asm.js: \
src/core/docstring.o \
src/core/error_handling.o \
src/core/numpy_patch.o \
src/core/hiwire.o \
src/core/js2python.o \
src/core/jsproxy.o \
Expand Down
3 changes: 2 additions & 1 deletion packages/numpy/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@ source:
sha256: 16507ba6617f62ae3c6ab1725ae6f550331025d4d9a369b83f6d5a470446c342

patches:
- patches/fix-i32-load-memory-access-oob.patch
- patches/segfault-PyArray_Broadcast.patch
- patches/segfault-PyArray_PyIntAsIntp.patch
- patches/add-emscripten-cpu.patch
- patches/disable-maybe-uninitialized.patch
- patches/dont-include-execinfo.patch
Expand Down
71 changes: 71 additions & 0 deletions packages/numpy/patches/segfault-PyArray_Broadcast.patch
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
From 6f91dceca26bb06bada90e0e0bbe770538928d5f Mon Sep 17 00:00:00 2001
From: Hood <hood@mit.edu>
Date: Fri, 16 Apr 2021 08:41:03 -0700
Subject: [PATCH 2/2] Fix segfault in PyArray_Broadcast in chrome 89 #1474

Resolves #1473.

This is a similar situation as with #1384 (see the other patch) however in this
case when I made modifications to `PyArray_Broadcast` to remove the offending
load, the crash moved around to other locations corresponding to different spots
in the source. After a significant amount of messing around with this, I just
reimplemented the first part of the function in Javascript. See
src/core/numpy_patch.c where we define PyArray_Broadcast_part1.

---
numpy/core/src/multiarray/iterators.c | 32 +++++++++++++++++++++------
1 file changed, 25 insertions(+), 7 deletions(-)

diff --git a/numpy/core/src/multiarray/iterators.c b/numpy/core/src/multiarray/iterators.c
index 9da811f69..b59d32da1 100644
--- a/numpy/core/src/multiarray/iterators.c
+++ b/numpy/core/src/multiarray/iterators.c
@@ -1153,13 +1153,13 @@ NPY_NO_EXPORT PyTypeObject PyArrayIter_Type = {

/** END of Array Iterator **/

-/* Adjust dimensionality and strides for index object iterators
- --- i.e. broadcast
-*/
-/*NUMPY_API*/
-NPY_NO_EXPORT int
-PyArray_Broadcast(PyArrayMultiIterObject *mit)
-{
+int
+PyArray_Broadcast_part1(void *mit);
+
+#ifndef __EMSCRIPTEN__
+int
+PyArray_Broadcast_part1(void *mit_void){
+ PyArrayMultiIterObject *mit = (PyArrayMultiIterObject *)mit_void;
int i, nd, k, j;
npy_intp tmp;
PyArrayIterObject *it;
@@ -1195,6 +1195,24 @@ PyArray_Broadcast(PyArrayMultiIterObject *mit)
}
}
}
+ return 0;
+}
+#endif
+
+/* Adjust dimensionality and strides for index object iterators
+ --- i.e. broadcast
+*/
+/*NUMPY_API*/
+NPY_NO_EXPORT int
+PyArray_Broadcast(PyArrayMultiIterObject *mit)
+{
+ int i, nd, k, j;
+ npy_intp tmp;
+ PyArrayIterObject *it;
+
+ if(PyArray_Broadcast_part1((void*)mit) == -1){
+ return -1;
+ }

/*
* Reset the iterator dimensions and strides of each iterator
--
2.17.1

Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
From 6cf164b6a1a7cc151aa52ab5bb9e20a95a6a5e67 Mon Sep 17 00:00:00 2001
From c5a48b544d465673d900459d7e9226379f79e84a Mon Sep 17 00:00:00 2001
From: Hood <hood@mit.edu>
Date: Fri, 2 Apr 2021 14:24:24 -0700
Subject: [PATCH] Fix "load" memory access out of bounds on Chrome v89 #1384
Subject: [PATCH 1/2] Fix segfault in PyArray_PyIntAsIntp in Chrome v89 #1384

Resolves https://github.com/pyodide/pyodide/issues/1384.

Expand Down Expand Up @@ -111,5 +111,5 @@ index 52cb58726..5467f4d78 100644
}
return nd;
--

2.17.1

5 changes: 5 additions & 0 deletions src/core/main.c
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,10 @@ run_python_simple_inner(char* code)
return result ? 0 : -1;
}

// from numpy_patch.c (no need for a header just for this)
int
numpy_patch_init();

int
main(int argc, char** argv)
{
Expand Down Expand Up @@ -121,6 +125,7 @@ main(int argc, char** argv)
TRY_INIT_WITH_CORE_MODULE(error_handling);
TRY_INIT(hiwire);
TRY_INIT(docstring);
TRY_INIT(numpy_patch);
TRY_INIT(js2python);
TRY_INIT_WITH_CORE_MODULE(python2js);
TRY_INIT(python2js_buffer);
Expand Down
266 changes: 266 additions & 0 deletions src/core/numpy_patch.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,266 @@
#define PY_SSIZE_T_CLEAN
#include "Python.h"

#include "error_handling.h"

// Stuff copied from ndarraytypes.h to check offsets
// First we need a couple of things.
typedef int npy_intp;
typedef void PyArray_Descr; // makes it opaque
typedef bool npy_bool;

#if true /* begin copied from numpy */

#define NPY_MAXDIMS 32
#define NPY_MAXARGS 32

/*
* The main array object structure.
*
* It has been recommended to use the inline functions defined below
* (PyArray_DATA and friends) to access fields here for a number of
* releases. Direct access to the members themselves is deprecated.
* To ensure that your code does not use deprecated access,
* #define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION
* (or NPY_1_8_API_VERSION or higher as required).
*/
/* This struct will be moved to a private header in a future release */
typedef struct tagPyArrayObject_fields
{
PyObject_HEAD
/* Pointer to the raw data buffer */
char* data;
/* The number of dimensions, also called 'ndim' */
int nd;
/* The size in each dimension, also called 'shape' */
npy_intp* dimensions;
/*
* Number of bytes to jump to get to the
* next element in each dimension
*/
npy_intp* strides;
/*
* This object is decref'd upon
* deletion of array. Except in the
* case of WRITEBACKIFCOPY which has
* special handling.
*
* For views it points to the original
* array, collapsed so no chains of
* views occur.
*
* For creation from buffer object it
* points to an object that should be
* decref'd on deletion
*
* For WRITEBACKIFCOPY flag this is an
* array to-be-updated upon calling
* PyArray_ResolveWritebackIfCopy
*/
PyObject* base;
/* Pointer to type structure */
PyArray_Descr* descr;
/* Flags describing array -- see below */
int flags;
/* For weak references */
PyObject* weakreflist;
} PyArrayObject_fields;

/*
* Can't put this in npy_deprecated_api.h like the others.
* PyArrayObject field access is deprecated as of NumPy 1.7.
*/
typedef PyArrayObject_fields PyArrayObject;

/* FWD declaration */
typedef struct PyArrayIterObject_tag PyArrayIterObject;

/*
* type of the function which translates a set of coordinates to a
* pointer to the data
*/
typedef char* (*npy_iter_get_dataptr_t)(PyArrayIterObject* iter, npy_intp*);

struct PyArrayIterObject_tag
{
PyObject_HEAD int nd_m1; /* number of dimensions - 1 */
npy_intp index, size;
npy_intp coordinates[NPY_MAXDIMS]; /* N-dimensional loop */
npy_intp dims_m1[NPY_MAXDIMS]; /* ao->dimensions - 1 */
npy_intp strides[NPY_MAXDIMS]; /* ao->strides or fake */
npy_intp backstrides[NPY_MAXDIMS]; /* how far to jump back */
npy_intp factors[NPY_MAXDIMS]; /* shape factors */
PyArrayObject* ao;
char* dataptr; /* pointer to current item*/
npy_bool contiguous;

npy_intp bounds[NPY_MAXDIMS][2];
npy_intp limits[NPY_MAXDIMS][2];
npy_intp limits_sizes[NPY_MAXDIMS];
npy_iter_get_dataptr_t translate;
};

typedef struct
{
PyObject_HEAD int numiter; /* number of iters */
npy_intp size; /* broadcasted size */
npy_intp index; /* current index */
int nd; /* number of dims */
npy_intp dimensions[NPY_MAXDIMS]; /* dimensions */
PyArrayIterObject* iters[NPY_MAXARGS]; /* iterators */
} PyArrayMultiIterObject;

#endif /* end copied from numpy */

// To minimize change of confusion, make sure variables in here don't match any
// variables used in PyArray_Broadcast_part1
#define offset_Array_nd 12
#define offset_Array_dimensions 16

#define offset_Iter_ao 660

#define offset_MultIter_numiter 8
#define offset_MultIter_nd 20
#define offset_MultIter_dimensions 24
#define offset_MultIter_iters 152

/**
* Check that our offsets match the numpy declarations.
*/
// clang-format off
int numpy_patch_init(){
assert(offset_Array_nd == offsetof(PyArrayObject, nd));
assert(offset_Array_dimensions == offsetof(PyArrayObject, dimensions));

assert(offset_Iter_ao == offsetof(PyArrayIterObject, ao));

assert(offset_MultIter_numiter == offsetof(PyArrayMultiIterObject, numiter));
assert(offset_MultIter_nd == offsetof(PyArrayMultiIterObject, nd));
assert(offset_MultIter_dimensions == offsetof(PyArrayMultiIterObject, dimensions));
assert(offset_MultIter_iters == offsetof(PyArrayMultiIterObject, iters));
return 0;
}
// clang-format on

// clang-format off
#define LOAD(ptr) HEAP32[(ptr)/4]
#define LOAD_ARRAY(ptr, idx) LOAD(ptr + 4*idx)

#define Array_nd(ptr) LOAD(ptr + offset_Array_nd)
#define Array_dimensions(ptr) LOAD(ptr + offset_Array_dimensions)

#define Iter_array(ptr) LOAD(ptr + offset_Iter_ao)

#define MultiIter_numiter(ptr) LOAD(ptr + offset_MultIter_numiter)
#define MultiIter_nd(ptr) LOAD(ptr + offset_MultIter_nd)
#define MultiIter_iter(ptr, index) LOAD_ARRAY(ptr + offset_MultIter_iters, index)
#define MultiIter_dimension(ptr, index) LOAD_ARRAY(ptr + offset_MultIter_dimensions, index)
// clang-format on

/**
* It's annoying to set errors from Javascript.
*/
void
set_shape_mismatch_err()
{
PyErr_SetString(
PyExc_ValueError,
"shape mismatch: objects cannot be broadcast to a single shape");
}

/**
* This is basically a 1-1 port of the first segment of PyArray_Broadcast.
* I rearranged the code a small amount to save effort.
* Pretty much all that happened is that we destroyed type information, replaced
* declarations with "let", and had to make special macros to do most memory
* access which is much more annoying than in Javascript.
*
* See below for the C equivalent.
*/
EM_JS_NUM(int, PyArray_Broadcast_part1, (void* mit), {
let numiter = MultiIter_numiter(mit);
/* Discover the broadcast number of dimensions */
let nd = 0;
for (let i = 0; i < numiter; i++) {
let cur_nd = Array_nd(Iter_array(MultiIter_iter(mit, i)));
nd = (cur_nd > nd) ? cur_nd : nd;
}
MultiIter_nd(mit) = nd;

/* Discover the broadcast shape in each dimension */
let start_offset = (mit + offset_MultIter_dimensions) / 4;
HEAP32.subarray(start_offset, start_offset + nd).fill(1);

for (let j = 0; j < numiter; j++) {
let it = MultiIter_iter(mit, j);
for (i = 0; i < nd; i++) {
/* This prepends 1 to shapes not already equal to nd */
let cur_array = Iter_array(it);
let cur_nd = Array_nd(cur_array);
let k = i + cur_nd - nd;
if (k >= 0) {
let tmp = LOAD_ARRAY(Array_dimensions(cur_array), k);
if (tmp == 1) {
continue;
}
let mit_dim_i = MultiIter_dimension(mit, i);
if (mit_dim_i == 1) {
MultiIter_dimension(mit, i) = tmp;
} else if (mit_dim_i != tmp) {
_set_shape_mismatch_err();
return -1;
}
}
}
}
})

// Here is the C code I based the above function on.
// This is lightly reorganized from the original definition.
/*
NPY_NO_EXPORT int
PyArray_Broadcast(PyArrayMultiIterObject *mit)
{
int i, nd, k, j;
npy_intp tmp, tmp2;
PyArrayIterObject *it;
PyArrayIterObject **it_ptr;
/ * Discover the broadcast number of dimensions * /
for (i = 0, nd = 0; i < mit->numiter; i++) {
nd = PyArray_MAX(nd, PyArray_NDIM(mit->iters[i]->ao));
}
mit->nd = nd;
/ * Discover the broadcast shape in each dimension * /
for (i = 0; i < nd; i++) {
mit->dimensions[i] = 1;
}
for (j = 0; j < mit->numiter; j++) {
it = mit->iters[j];
for (i = 0; i < nd; i++) {
/ * This prepends 1 to shapes not already equal to nd * /
k = i + PyArray_NDIM(it->ao) - nd;
if (k >= 0) {
tmp = PyArray_DIMS(it->ao)[k];
if (tmp == 1) {
continue;
}
if (mit->dimensions[i] == 1) {
mit->dimensions[i] = tmp;
}
else if (mit->dimensions[i] != tmp) {
PyErr_SetString(PyExc_ValueError,
"shape mismatch: objects" \
" cannot be broadcast" \
" to a single shape");
return -1;
}
}
}
}
// Rest of function skipped
}
*/

0 comments on commit 1485732

Please sign in to comment.