use infer_dtype_from_scalar

pandas-dev · jorisvandenbossche · May 17, 2017 · May 11, 2017 · May 12, 2017 · May 15, 2017
commit 3bd0404f2582403bc660facd5d1932b306b624e4
diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py
@@ -333,7 +333,7 @@ def maybe_promote(dtype, fill_value=np.nan):
     return dtype, fill_value
 
 
-def infer_dtype_from_scalar(val, pandas_dtype=False):
+def infer_dtype_from_scalar(val, pandas_dtype=False, use_datetimetz=True):
     """
     interpret the dtype from a scalar
 
@@ -368,7 +368,7 @@ def infer_dtype_from_scalar(val, pandas_dtype=False):
 
     elif isinstance(val, (np.datetime64, datetime)):
         val = tslib.Timestamp(val)
-        if val is tslib.NaT or val.tz is None:
+        if val is tslib.NaT or val.tz is None or not use_datetimetz:
             dtype = np.dtype('M8[ns]')
         else:
             if pandas_dtype:

diff --git a/pandas/core/util/hashing.py b/pandas/core/util/hashing.py
@@ -5,7 +5,6 @@
 
 import numpy as np
 from pandas._libs import hashing
-from pandas.compat import string_and_binary_types, text_type
 from pandas.core.dtypes.generic import (
     ABCMultiIndex,
     ABCIndexClass,
@@ -14,6 +13,7 @@
 from pandas.core.dtypes.common import (
     is_categorical_dtype, is_list_like)
 from pandas.core.dtypes.missing import isnull
+from pandas.core.dtypes.cast import infer_dtype_from_scalar
 
 
 # 16 byte long hashing key
@@ -317,20 +317,8 @@ def _hash_scalar(val, encoding='utf8', hash_key=None):
         # this is to be consistent with the _hash_categorical implementation
         return np.array([np.iinfo(np.uint64).max], dtype='u8')
 
-    if isinstance(val, string_and_binary_types + (text_type,)):
-        vals = np.array([val], dtype=object)
-    else:
-        vals = np.array([val])
-
-        if vals.dtype == np.object_:
-            from pandas import Timestamp, Timedelta, Period, Interval
-            if isinstance(val, (Timestamp, Timedelta)):
-                vals = np.array([val.value])
-            elif isinstance(val, (Period, Interval)):
-                pass
-            else:
-                from pandas import Index
-                vals = Index(vals).values
+    dtype, val = infer_dtype_from_scalar(val, use_datetimetz=False)
+    vals = np.array([val], dtype=dtype)
 
     return hash_array(vals, hash_key=hash_key, encoding=encoding,
                       categorize=False)
diff --git a/pandas/tests/util/test_hashing.py b/pandas/tests/util/test_hashing.py
@@ -81,13 +81,13 @@ def test_hash_tuples(self):
 
     def test_hash_tuple(self):
         # test equivalence between hash_tuples and hash_tuple
-        for tup in [(1, 'one'), (1, np.nan)]:
+        for tup in [(1, 'one'), (1, np.nan), (1.0, pd.NaT, 'A')]:
             result = hash_tuple(tup)
             expected = hash_tuples([tup])[0]
             assert result == expected
 
     def test_hash_scalar(self):
-        for val in [1, 1.4, 'A', b'A', u'A',  pd.Timestamp("2012-01-01"),
+        for val in [1, 1.4, 'A', b'A', u'A', pd.Timestamp("2012-01-01"),
                     pd.Timestamp("2012-01-01", tz='Europe/Brussels'),
                     pd.Period('2012-01-01', freq='D'), pd.Timedelta('1 days'),
                     pd.Interval(0, 1), np.nan, pd.NaT, None]: