Skip to content

Commit

Permalink
Merge pull request #821 from finos/py2
Browse files Browse the repository at this point in the history
Default to int64 in Python3, add `long` and `unicode` to schema and type inference
  • Loading branch information
texodus authored Nov 22, 2019
2 parents f1bfb55 + 572bc35 commit 01e24b9
Show file tree
Hide file tree
Showing 9 changed files with 190 additions and 29 deletions.
8 changes: 7 additions & 1 deletion python/perspective/perspective/src/accessor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -74,10 +74,16 @@ infer_type(t_val x, t_val date_validator) {
} else if (py::isinstance<py::bool_>(x) || type_string == "bool") {
// booleans are both instances of bool_ and int_ - check for bool first
t = t_dtype::DTYPE_BOOL;
} else if (type_string == "long") {
t = t_dtype::DTYPE_INT64;
} else if (py::isinstance<py::float_>(x)) {
t = t_dtype::DTYPE_FLOAT64;
} else if (py::isinstance<py::int_>(x)) {
t = t_dtype::DTYPE_INT32;
if (PY_MAJOR_VERSION < 3) {
t = t_dtype::DTYPE_INT32;
} else {
t = t_dtype::DTYPE_INT64;
}
} else if (py::isinstance<py::str>(x) || type_string == "str") {
t_dtype parsed_type = date_validator.attr("format")(x).cast<t_dtype>();
if (parsed_type == t_dtype::DTYPE_DATE || parsed_type == t_dtype::DTYPE_TIME) {
Expand Down
8 changes: 4 additions & 4 deletions python/perspective/perspective/src/fill.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -236,13 +236,13 @@ _fill_col_numeric(t_data_accessor accessor, t_data_table& tbl,
// inference checked the entire column/we could reset parsing.
double fval = item.cast<double>();
if (!is_update && (fval > 2147483647 || fval < -2147483648)) {
WARN("Promoting %s to float from int32", name);
WARN("Promoting column `%s` to float from int32", name);
tbl.promote_column(name, DTYPE_FLOAT64, i, true);
col = tbl.get_column(name);
type = DTYPE_FLOAT64;
col->set_nth(i, fval);
} else if (!is_update && isnan(fval)) {
WARN("Promoting column %s to string from int32", name);
WARN("Promoting column `%s` to string from int32", name);
tbl.promote_column(name, DTYPE_STR, i, false);
col = tbl.get_column(name);
_fill_col_string(
Expand All @@ -255,7 +255,7 @@ _fill_col_numeric(t_data_accessor accessor, t_data_table& tbl,
case DTYPE_INT64: {
double fval = item.cast<double>();
if (!is_update && isnan(fval)) {
WARN("Promoting %s to string from int64", name);
WARN("Promoting column `%s` to string from int64", name);
tbl.promote_column(name, DTYPE_STR, i, false);
col = tbl.get_column(name);
_fill_col_string(
Expand All @@ -272,7 +272,7 @@ _fill_col_numeric(t_data_accessor accessor, t_data_table& tbl,
bool is_float = py::isinstance<py::float_>(item);
bool is_numpy_nan = is_float && npy_isnan(item.cast<double>());
if (!is_update && (!is_float || is_numpy_nan)) {
WARN("Promoting column %s to string from float64", name);
WARN("Promoting column `%s` to string from float64", name);
tbl.promote_column(name, DTYPE_STR, i, false);
col = tbl.get_column(name);
_fill_col_string(
Expand Down
8 changes: 4 additions & 4 deletions python/perspective/perspective/src/numpy.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -239,13 +239,13 @@ namespace numpy {

double fval = item.cast<double>();
if (fval > 2147483647 || fval < -2147483648) {
binding::WARN("Promoting %s to float from int32", name);
binding::WARN("Promoting column `%s` to float from int32", name);
tbl.promote_column(name, DTYPE_FLOAT64, i, true);
col = tbl.get_column(name);
type = DTYPE_FLOAT64;
col->set_nth(i, fval);
} else if (isnan(fval)) {
binding::WARN("Promoting column %s to string from int32", name);
binding::WARN("Promoting column `%s` to string from int32", name);
tbl.promote_column(name, DTYPE_STR, i, false);
col = tbl.get_column(name);
fill_object_iter<std::string>(
Expand Down Expand Up @@ -276,7 +276,7 @@ namespace numpy {

double fval = item.cast<double>();
if (isnan(fval)) {
binding::WARN("Promoting %s to string from int64", name);
binding::WARN("Promoting column `%s` to string from int64", name);
tbl.promote_column(name, DTYPE_STR, i, false);
col = tbl.get_column(name);
fill_object_iter<std::string>(
Expand Down Expand Up @@ -308,7 +308,7 @@ namespace numpy {
bool is_float = py::isinstance<py::float_>(item);
bool is_numpy_nan = is_float && npy_isnan(item.cast<double>());
if (!is_float || is_numpy_nan) {
binding::WARN("Promoting column %s to string from float64", name);
binding::WARN("Promoting column `%s` to string from float64", name);
tbl.promote_column(name, DTYPE_STR, i, false);
col = tbl.get_column(name);
fill_object_iter<std::string>(
Expand Down
9 changes: 3 additions & 6 deletions python/perspective/perspective/src/utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,8 @@ t_dtype type_string_to_t_dtype(std::string value, std::string name){
auto type = t_dtype::DTYPE_STR;

// TODO consider refactor
if (value == "int" || value == "integer") {
// Python int
if (value == "int" || value == "integer" || value == "int64" || value == "long") {
// Python int, long, and Numpy int64
type = t_dtype::DTYPE_INT64;
} else if (value == "int8") {
// Numpy int8
Expand All @@ -32,9 +32,6 @@ t_dtype type_string_to_t_dtype(std::string value, std::string name){
} else if (value == "int32") {
// Numpy int32
type = t_dtype::DTYPE_INT32;
} else if (value == "int64") {
// Numpy int64
type = t_dtype::DTYPE_INT64;
} else if (value == "float") {
// Python float
type = t_dtype::DTYPE_FLOAT64;
Expand All @@ -53,7 +50,7 @@ t_dtype type_string_to_t_dtype(std::string value, std::string name){
// TODO
// Numpy float128
type = t_dtype::DTYPE_FLOAT64;
} else if (value == "str" || value == "string") {
} else if (value == "str" || value == "string" || value == "unicode") {
// Python unicode str
type = t_dtype::DTYPE_STR;
} else if (value == "bool" || value == "boolean") {
Expand Down
22 changes: 14 additions & 8 deletions python/perspective/perspective/table/_accessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,14 +55,17 @@ def _type_to_format(data_or_schema):
if isinstance(v, type) or isinstance(v, str):
# schema maps name -> type
return False, 2, data_or_schema
elif isinstance(v, list) or iter(v):
# if columns entries are iterable, type 1
return isinstance(v, numpy.ndarray), 1, data_or_schema
elif isinstance(v, list):
# a dict of iterables = type 1
return False, 1, data_or_schema
else:
# Can't process
raise NotImplementedError("Dict values must be list or type!")
# Can't process
raise NotImplementedError("Dict values must be list or type!")
# See if iterable
try:
iter(v)
except TypeError:
raise NotImplementedError("Cannot load dataset of non-iterable type: Data passed in through a dict must be of type `list` or `numpy.ndarray`.")
else:
return isinstance(v, numpy.ndarray), 1, data_or_schema
elif isinstance(data_or_schema, numpy.ndarray):
# structured or record array
if not isinstance(data_or_schema.dtype.names, tuple):
Expand All @@ -81,6 +84,8 @@ def _type_to_format(data_or_schema):
class _PerspectiveAccessor(object):
'''A uniform accessor that wraps data/schemas of varying formats with a common `marshal` function.'''

INTEGER_TYPES = six.integer_types + (numpy.integer,)

def __init__(self, data_or_schema):
self._is_numpy, self._format, self._data_or_schema = _type_to_format(data_or_schema)
self._date_validator = _PerspectiveDateValidator()
Expand Down Expand Up @@ -187,7 +192,7 @@ def marshal(self, cidx, ridx, dtype):
# should be able to update int columns with either ints or floats
val = int(val)
elif dtype == t_dtype.DTYPE_FLOAT32 or dtype == t_dtype.DTYPE_FLOAT64:
if not isinstance(val, bool) and isinstance(val, (int, numpy.integer)):
if not isinstance(val, bool) and isinstance(val, _PerspectiveAccessor.INTEGER_TYPES):
# should be able to update float columns with either ints or floats
val = float(val)
elif dtype == t_dtype.DTYPE_DATE:
Expand All @@ -213,6 +218,7 @@ def marshal(self, cidx, ridx, dtype):
val = unicode(val) # noqa: F821
else:
val = str(val)

return val

def _get_numpy_column(self, name):
Expand Down
89 changes: 88 additions & 1 deletion python/perspective/perspective/tests/table/test_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,13 @@ def test_empty_table(self):
tbl = Table([])
assert tbl.size() == 0

def test_table_not_iterable(self):
data = {
"a": 1
}
with raises(NotImplementedError):
Table(data)

def test_table_int(self):
data = [{"a": 1, "b": 2}, {"a": 3, "b": 4}]
tbl = Table(data)
Expand All @@ -46,12 +53,68 @@ def test_table_int_column_names(self):
def test_table_int_overflow(self):
if six.PY2:
maxint = sys.maxint + 1
# overflows into float
data = {"a": [i for i in range(100)] + [maxint, maxint, maxint]}
tbl = Table(data)
# two promotions later
assert tbl.schema() == {
"a": str
"a": float
}

def test_table_long(self):
if six.PY2:
# don't overflow in this test
data = [long(100), long(200), long(300)] # noqa: F821
tbl = Table({
"a": data
})
assert tbl.schema() == {
"a": int
}
assert tbl.view().to_dict()["a"] == [int(d) for d in data]

def test_table_long_overflow(self):
if six.PY2:
maxint = sys.maxint
# don't overflow in this test
data = [maxint, maxint + 1, maxint + 2]
tbl = Table({
"a": data
})
assert tbl.schema() == {
"a": float
}
assert tbl.view().to_dict()["a"] == [float(d) for d in data]

def test_table_int_to_long(self):
if six.PY2:
# don't overflow in this test
data = [int(100), int(200), int(300)]
tbl = Table({
"a": long # noqa: F821
})
assert tbl.schema() == {
"a": int
}
tbl.update({
"a": data
})
assert tbl.view().to_dict()["a"] == data

def test_table_float_to_long(self):
if six.PY2:
# don't overflow in this test
data = [1.5, 2.5, 3.5] # noqa: F821
tbl = Table({
"a": long # noqa: F821
})
assert tbl.schema() == {
"a": int
}
tbl.update({
"a": data
})
assert tbl.view().to_dict()["a"] == [1, 2, 3]

def test_table_nones(self):
none_data = [{"a": 1, "b": None}, {"a": None, "b": 2}]
Expand Down Expand Up @@ -299,6 +362,30 @@ def test_table_symmetric_string_schema(self):

assert tbl2.schema(True) == schema

def test_table_long_schema(self):
if six.PY2:
schema = {
"a": long, # noqa: F821
"b": int
}
tbl = Table(schema)
assert tbl.schema() == {
"a": int,
"b": int
}

def test_table_unicode_schema(self):
if six.PY2:
schema = {
"a": unicode, # noqa: F821
"b": int
}
tbl = Table(schema)
assert tbl.schema() == {
"a": str,
"b": int
}

# is_valid_filter

def test_table_is_valid_filter_str(self):
Expand Down
18 changes: 13 additions & 5 deletions python/perspective/perspective/tests/table/test_table_infer.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
# This file is part of the Perspective library, distributed under the terms of
# the Apache License 2.0. The full license can be found in the LICENSE file.
#

import six
from perspective.table import Table
from datetime import date, datetime

Expand All @@ -23,10 +23,18 @@ def test_table_infer_float(self):
assert tbl.schema() == {"a": float}

def test_table_promote_float(self):
data = {"a": [1, 2, 3, 4, 2147483648]}
tbl = Table(data)
assert tbl.schema() == {"a": float}
assert tbl.view().to_dict() == {"a": [1.0, 2.0, 3.0, 4.0, 2147483648.0]}
if six.PY2:
data = {"a": [1.5, 2.5, 3.5, 4.5, "abc"]}
tbl = Table(data)
assert tbl.schema() == {"a": str}
assert tbl.view().to_dict() == {"a": ["1.5", "2.5", "3.5", "4.5", "abc"]}

def test_table_promote_float_py2(self):
if six.PY2:
data = {"a": [1, 2, 3, 4, 2147483648]}
tbl = Table(data)
assert tbl.schema() == {"a": float}
assert tbl.view().to_dict() == {"a": [1.0, 2.0, 3.0, 4.0, 2147483648.0]}

def test_table_infer_bool(self):
data = {"a": [None, None, None, None, True, True, True]}
Expand Down
45 changes: 45 additions & 0 deletions python/perspective/perspective/tests/table/test_table_numpy.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,18 @@ def test_table_int64(self):
"b": [4, 5, 6]
}

def test_table_long_numpy(self):
if six.PY2:
data = {"a": np.array([1, 2, 3], dtype=long)} # noqa: F821
tbl = Table(data)
assert tbl.schema() == {
"a": int
}
assert tbl.size() == 3
assert tbl.view().to_dict() == {
"a": [1, 2, 3]
}

def test_table_float(self):
data = {"a": np.array([1.1, 2.2]), "b": np.array([3.3, 4.4])}
tbl = Table(data)
Expand Down Expand Up @@ -572,6 +584,39 @@ def test_table_numpy_from_schema_int(self):
table.update(df)
assert table.view().to_dict()["a"] == [1, None, 2, None, 3, 4]

def test_table_numpy_from_schema_long(self):
if six.PY2:
df = {
"a": np.array([1, None, 2, None, 3, 4])
}
table = Table({
"a": long # noqa: F821
})
table.update(df)
assert table.view().to_dict()["a"] == [1, None, 2, None, 3, 4]

def test_table_numpy_from_schema_int_to_long(self):
if six.PY2:
df = {
"a": np.array([1, 2, 3, 4], dtype="int64")
}
table = Table({
"a": long # noqa: F821
})
table.update(df)
assert table.view().to_dict()["a"] == [1, 2, 3, 4]

def test_table_numpy_from_schema_float_to_long(self):
if six.PY2:
df = {
"a": np.array([1, None, 2, None, 3, 4], dtype="float64")
}
table = Table({
"a": long # noqa: F821
})
table.update(df)
assert table.view().to_dict()["a"] == [1, None, 2, None, 3, 4]

def test_table_numpy_from_schema_bool(self):
data = [True, False, True, False]
df = {
Expand Down
12 changes: 12 additions & 0 deletions python/perspective/perspective/tests/table/test_table_pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -297,6 +297,18 @@ def test_table_pandas_from_schema_int(self):
table.update(df)
assert table.view().to_dict()["a"] == data

def test_table_pandas_from_schema_long(self):
if six.PY2:
data = [None, 1, None, 2, None, 3, 4]
df = pd.DataFrame({
"a": data
})
table = Table({
"a": long # noqa: F821
})
table.update(df)
assert table.view().to_dict()["a"] == data

def test_table_pandas_from_schema_bool(self):
data = [True, False, True, False]
df = pd.DataFrame({
Expand Down

0 comments on commit 01e24b9

Please sign in to comment.