finos · texodus · Feb 8, 2020 · Feb 4, 2020 · Feb 4, 2020 · Feb 4, 2020
diff --git a/cpp/perspective/src/cpp/computed_function.cpp b/cpp/perspective/src/cpp/computed_function.cpp
diff --git a/packages/perspective-viewer-hypergrid/src/js/perspective-plugin.js b/packages/perspective-viewer-hypergrid/src/js/perspective-plugin.js
@@ -23,7 +23,7 @@ var Borders = cellRenderersRegistry.BaseClass.extend("Borders", {
         var color;
 
         gc.save();
-        gc.translate(-0.5, 0.5); // paint "sharp" lines on pixels instead of "blury" lines between pixels
+        gc.translate(-0.5, 0.5); // paint "sharp" lines on pixels instead of "blurry" lines between pixels
         gc.cache.lineWidth = 1;
 
         color = config.borderTop;

diff --git a/packages/perspective/test/js/computed.js b/packages/perspective/test/js/computed.js
diff --git a/python/perspective/perspective/src/numpy.cpp b/python/perspective/perspective/src/numpy.cpp
@@ -600,17 +600,32 @@ namespace numpy {
      */
     std::vector<std::string>
     NumpyLoader::make_names() {
-        auto names = py::list(m_accessor.attr("data")().attr("keys")());
-        return names.cast<std::vector<std::string>>();
+        auto data = m_accessor.attr("data")();
+        auto py_names = m_accessor.attr("names")().cast<std::vector<std::string>>();
+
+        // Match names to dataset - only keep names that are present in dataset.
+        // The `m_names` variable is used internally to access the numpy arrays
+        // containing each column. On first-time load, `m_names` contains
+        // every name in the dataset. On update, `m_names` is recalculated to
+        // only include columns that are present in the update dataset.
+        std::vector<std::string> names;
+        for (const auto& name : py_names) {
+            if (data.contains(py::str(name))) {
+                names.push_back(name);
+            }
+        }
+
+        return names;
     }
 
     std::vector<t_dtype>
     NumpyLoader::make_types() {
         std::vector<t_dtype> rval;
 
-        py::list arrays = m_accessor.attr("data")().attr("values")();
-        for (const auto& a : arrays) {
-            py::array array = py::array::ensure(a);
+        auto data = m_accessor.attr("data")();
+        for (const auto& name : m_names) {
+            // Access each array by name to guarantee ordered access.
+            py::array array = py::array::ensure(data[py::str(name)]);
 
             if (!array) {
                 PSP_COMPLAIN_AND_ABORT("Perspective does not support the mixing of ndarrays and lists.");

diff --git a/python/perspective/perspective/src/table.cpp b/python/perspective/perspective/src/table.cpp
@@ -113,8 +113,14 @@ std::shared_ptr<Table> make_table_py(t_val table, t_data_accessor accessor, t_va
          * not created from a DataFrame, the "index" column would not exist.
          */
         if (is_numpy) {
+            // `numpy_loader`s `m_names` and `m_types` variable contains only
+            // the column names and data types present in the update dataset,
+            // not the names/types of the entire `Table`.
             numpy_loader.init();
         }
+
+        // `column_names` and `data_types` contain every single column in the 
+        // dataset, as well as `__INDEX__` if it exists.
         column_names = accessor.attr("names")().cast<std::vector<std::string>>();
         data_types = accessor.attr("types")().cast<std::vector<t_dtype>>();
     } else if (is_numpy) {
@@ -123,9 +129,15 @@ std::shared_ptr<Table> make_table_py(t_val table, t_data_accessor accessor, t_va
          * Perspective. Using `get_data_types` allows us to know the type of an array with `dtype=object`.
          */
         numpy_loader.init();
+
+        // This will contain every single column in the dataset, as the
+        // first-time data load path does not mutate the `names` property of
+        // `accessor`.
         column_names = numpy_loader.names();
 
-        // composite array and inferred `data_types` for the Table
+        // Infer data type for each column, and then use a composite of numpy
+        // dtype, inferred `t_dtype`, and stringified numpy dtype to get the
+        // final, canonical data type mapping.
         std::vector<t_dtype> inferred_types = get_data_types(accessor.attr("data")(), 1, column_names, accessor.attr("date_validator")().cast<t_val>());
         data_types = numpy_loader.reconcile_dtypes(inferred_types);
     }  else {

diff --git a/python/perspective/perspective/src/view.cpp b/python/perspective/perspective/src/view.cpp
@@ -120,16 +120,18 @@ make_view_config(const t_schema& schema, t_val date_parser, t_val config) {
 
     // to preserve order, do not cast to std::map - use keys and python 3.7's guarantee that dicts respect insertion order
     auto p_aggregates = py::dict(config.attr("get_aggregates")());
-    auto aggregate_keys = py::list(p_aggregates.attr("keys")());
     tsl::ordered_map<std::string, std::vector<std::string>> aggregates;
 
-    for (auto& key : aggregate_keys) {
-        const std::string key_str = key.cast<std::string>();
-        if (py::isinstance<py::str>(p_aggregates[key])) {
-            std::vector<std::string> agg{p_aggregates[key].cast<std::string>()};
-            aggregates[key_str] = agg;
-        } else {
-            aggregates[key_str] = p_aggregates[key].cast<std::vector<std::string>>();
+    for (auto& column : columns) {
+        py::str py_column_name = py::str(column);
+        if (p_aggregates.contains(py_column_name)) {
+            if (py::isinstance<py::str>(p_aggregates[py_column_name])) {
+                std::vector<std::string> agg{
+                    p_aggregates[py_column_name].cast<std::string>()};
+                aggregates[column] = agg;
+            } else {
+                aggregates[column] = p_aggregates[py_column_name].cast<std::vector<std::string>>();
+            }
         }
     };
 

diff --git a/python/perspective/perspective/table/_accessor.py b/python/perspective/perspective/table/_accessor.py
@@ -43,41 +43,44 @@ def _type_to_format(data_or_schema):
                 - 0: records (:obj:`list` of :obj:`dict`)
                 - 1: columns (:obj:`dict` of :obj:`str` to :obj:`list`)
                 - 2: schema (dist[str]/dict[type])
+        :obj:`list`: column names
         ():obj:`list`/:obj:`dict`): processed data
     '''
     if isinstance(data_or_schema, list):
         # records
-        return False, 0, data_or_schema
+        names = list(data_or_schema[0].keys()) if len(data_or_schema) > 0 else []
+        return False, 0, names, data_or_schema
     elif isinstance(data_or_schema, dict):
         # schema or columns
         for v in data_or_schema.values():
             if isinstance(v, type) or isinstance(v, str):
                 # schema maps name -> type
-                return False, 2, data_or_schema
+                return False, 2, list(data_or_schema.keys()), data_or_schema
             elif isinstance(v, list):
                 # a dict of iterables = type 1
-                return False, 1, data_or_schema
+                return False, 1, list(data_or_schema.keys()), data_or_schema
             else:
                 # See if iterable
                 try:
                     iter(v)
                 except TypeError:
                     raise NotImplementedError("Cannot load dataset of non-iterable type: Data passed in through a dict must be of type `list` or `numpy.ndarray`.")
                 else:
-                    return isinstance(v, numpy.ndarray), 1, data_or_schema
+                    return isinstance(v, numpy.ndarray), 1, list(data_or_schema.keys()), data_or_schema
     elif isinstance(data_or_schema, numpy.ndarray):
         # structured or record array
         if not isinstance(data_or_schema.dtype.names, tuple):
             raise NotImplementedError("Data should be dict of numpy.ndarray or a structured array.")
-        return True, 1, _flatten_structure(data_or_schema)
+        flattened = _flatten_structure(data_or_schema)
+        return True, 1, list(flattened.keys()), flattened
     else:
         if not (isinstance(data_or_schema, pandas.DataFrame) or isinstance(data_or_schema, pandas.Series)):
             # if pandas not installed or is not a dataframe or series
             raise NotImplementedError("Data must be dataframe, dict, list, numpy.recarray, or a numpy structured array.")
         else:
             # flatten column/index multiindex
             df, _ = deconstruct_pandas(data_or_schema)
-            return True, 1, {c: df[c].values for c in df.columns}
+            return True, 1, df.columns.tolist(), {c: df[c].values for c in df.columns}
 
 
 class _PerspectiveAccessor(object):
@@ -88,18 +91,13 @@ class _PerspectiveAccessor(object):
     INTEGER_TYPES = six.integer_types + (numpy.integer,)
 
     def __init__(self, data_or_schema):
-        self._is_numpy, self._format, self._data_or_schema = _type_to_format(data_or_schema)
+        self._is_numpy, self._format, self._names, self._data_or_schema = _type_to_format(data_or_schema)
         self._date_validator = _PerspectiveDateValidator()
         self._row_count = \
             len(self._data_or_schema) if self._format == 0 else \
             len(max(self._data_or_schema.values(), key=len)) if self._format == 1 else \
             0
 
-        if isinstance(self._data_or_schema, list):
-            self._names = list(self._data_or_schema[0].keys()) if len(self._data_or_schema) > 0 else []
-        elif isinstance(self._data_or_schema, dict):
-            self._names = list(self._data_or_schema.keys())
-
         self._types = []
 
         # Verify that column names are strings, and that numpy arrays are of
@@ -115,6 +113,7 @@ def __init__(self, data_or_schema):
                     raise PerspectiveError("Mixed datasets of numpy.ndarray and lists are not supported.")
 
                 dtype = array.dtype
+
                 if name == "index" and isinstance(data_or_schema.index, pandas.DatetimeIndex):
                     # use the index of the original, unflattened dataframe
                     dtype = _parse_datetime_index(data_or_schema.index)

diff --git a/python/perspective/perspective/tests/table/test_table_numpy.py b/python/perspective/perspective/tests/table/test_table_numpy.py
@@ -28,6 +28,26 @@ def test_table_int(self):
             "b": [4, 5, 6]
         }
 
+    def test_table_int_lots_of_columns(self):
+        data = {
+            "a": np.array([1, 2, 3]),
+            "b": np.array([4, 5, 6]),
+            "c": np.array([4, 5, 6]),
+            "d": np.array([4, 5, 6]),
+            "e": np.array([4, 5, 6]),
+            "f": np.array([4, 5, 6]),
+        }
+        tbl = Table(data)
+        assert tbl.size() == 3
+        assert tbl.view().to_dict() == {
+            "a": [1, 2, 3],
+            "b": [4, 5, 6],
+            "c": [4, 5, 6],
+            "d": [4, 5, 6],
+            "e": [4, 5, 6],
+            "f": [4, 5, 6]
+        }
+
     def test_table_int_with_None(self):
         data = {"a": np.array([1, 2, 3, None, None]), "b": np.array([4, 5, 6, None, None])}
         tbl = Table(data)
@@ -738,6 +758,36 @@ def test_table_numpy_from_schema_str(self):
         table.update(df)
         assert table.view().to_dict()["a"] == data
 
+    # partial update
+
+    def test_table_numpy_partial_update(self):
+        data = ["a", None, "b", None, "c"]
+        df = {"a": np.array([1, 2, 3, 4, 5]), "b": np.array(data), "c": np.array(data)}
+        table = Table(df, index="a")
+        table.update({
+            "a": np.array([2, 4, 5]),
+            "b": np.array(["x", "y", "z"])
+        })
+        assert table.view().to_dict() == {
+            "a": [1, 2, 3, 4, 5],
+            "b": ["a", "x", "b", "y", "z"],
+            "c": ["a", None, "b", None, "c"]
+        }
+
+    def test_table_numpy_partial_update_implicit(self):
+        data = ["a", None, "b", None, "c"]
+        df = {"a": np.array([1, 2, 3, 4, 5]), "b": np.array(data), "c": np.array(data)}
+        table = Table(df)
+        table.update({
+            "__INDEX__": np.array([1, 3, 4]),
+            "b": np.array(["x", "y", "z"])
+        })
+        assert table.view().to_dict() == {
+            "a": [1, 2, 3, 4, 5],
+            "b": ["a", "x", "b", "y", "z"],
+            "c": ["a", None, "b", None, "c"]
+        }
+
     # structured array
 
     def test_table_structured_array(self):

diff --git a/python/perspective/perspective/tests/table/test_table_pandas.py b/python/perspective/perspective/tests/table/test_table_pandas.py
@@ -35,6 +35,20 @@ def test_table_dataframe(self):
             {"a": 3, "b": 4, "index": 1}
         ]
 
+    def test_table_dataframe_column_order(self):
+        d = [{"a": 1, "b": 2, "c": 3, "d": 4}, {"a": 3, "b": 4, "c": 5, "d": 6}]
+        data = pd.DataFrame(d, columns=["b", "c", "a", "d"])
+        tbl = Table(data)
+        assert tbl.size() == 2
+        assert tbl.columns() == ["index", "b", "c", "a", "d"]
+
+    def test_table_dataframe_selective_column_order(self):
+        d = [{"a": 1, "b": 2, "c": 3, "d": 4}, {"a": 3, "b": 4, "c": 5, "d": 6}]
+        data = pd.DataFrame(d, columns=["b", "c", "a"])
+        tbl = Table(data)
+        assert tbl.size() == 2
+        assert tbl.columns() == ["index", "b", "c", "a"]
+
     def test_table_dataframe_does_not_mutate(self):
         # make sure we don't mutate the dataframe that a user passes in
         data = pd.DataFrame({