From d3496c9f4f27d3009b71be87f6108b4fed7314bd Mon Sep 17 00:00:00 2001 From: Wilco Dijkstra Date: Fri, 25 Mar 2016 16:44:26 -0300 Subject: [PATCH] Improve generic strcspn performance Improve strcspn performance using a much faster algorithm. It is kept simple so it works well on most targets. It is generally at least 10 times faster than the existing implementation on bench-strcspn on a few AArch64 implementations, and for some tests 100 times as fast (repeatedly calling strchr on a small string is extremely slow...). In fact the string/bits/string2.h inlines make no longer sense, as GCC already uses strlen if reject is an empty string, strchrnul is 5 times as fast as __strcspn_c1, while __strcspn_c2 and __strcspn_c3 are slower than the strcspn main loop for large strings (though reject length 2-4 could be special cased in the future to gain even more performance). Tested on x86_64, i686, and aarch64. * string/Version (libc): Add GLIBC_2.24. * string/strcspn.c (strcspn): Rewrite function. * string/bits/string2.h (strcspn): Use __builtin_strcspn. (__strcspn_c1): Remove inline function. (__strcspn_c2): Likewise. (__strcspn_c3): Likewise. * string/string-inline.c [SHLIB_COMPAT(libc, GLIBC_2_1_1, GLIBC_2_24)] (__strcspn_c1): Add compatibility symbol. [SHLIB_COMPAT(libc, GLIBC_2_1_1, GLIBC_2_24)] (__strcspn_c2): Likewise. [SHLIB_COMPAT(libc, GLIBC_2_1_1, GLIBC_2_24)] (__strcspn_c3): Likewise. * sysdeps/i386/string-inlines.c: Include generic string-inlines.c. --- ChangeLog | 18 +++++++++ string/Versions | 2 + string/bits/string2.h | 73 ++--------------------------------- string/strcspn.c | 46 ++++++++++++++++++---- string/string-inlines.c | 40 +++++++++++++++++++ sysdeps/i386/string-inlines.c | 19 +-------- 6 files changed, 102 insertions(+), 96 deletions(-) diff --git a/ChangeLog b/ChangeLog index 084c3d888f2..0d522ad4c40 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,21 @@ +2016-04-01 Wilco Dijkstra + Adhemerval Zanella + + * string/Version (libc): Add GLIBC_2.24. + * string/strcspn.c (strcspn): Rewrite function. + * string/bits/string2.h (strcspn): Use __builtin_strcspn. + (__strcspn_c1): Remove inline function. + (__strcspn_c2): Likewise. + (__strcspn_c3): Likewise. + * string/string-inline.c + [SHLIB_COMPAT(libc, GLIBC_2_1_1, GLIBC_2_24)] (__strcspn_c1): Add + compatibility symbol. + [SHLIB_COMPAT(libc, GLIBC_2_1_1, GLIBC_2_24)] (__strcspn_c2): + Likewise. + [SHLIB_COMPAT(libc, GLIBC_2_1_1, GLIBC_2_24)] (__strcspn_c3): + Likewise. + * sysdeps/i386/string-inlines.c: Include generic string-inlines.c. + 2016-04-01 Stefan Liebler * sysdeps/s390/s390-32/dl-trampoline.h (_dl_runtime_resolve): diff --git a/string/Versions b/string/Versions index 59bf35a3bd2..475c1fdb643 100644 --- a/string/Versions +++ b/string/Versions @@ -80,4 +80,6 @@ libc { GLIBC_2.6 { strerror_l; } + GLIBC_2.24 { + } } diff --git a/string/bits/string2.h b/string/bits/string2.h index 8200ef173da..a8df0db14a6 100644 --- a/string/bits/string2.h +++ b/string/bits/string2.h @@ -905,77 +905,10 @@ __stpcpy_small (char *__dest, /* Return the length of the initial segment of S which consists entirely of characters not in REJECT. */ -#if !defined _HAVE_STRING_ARCH_strcspn || defined _FORCE_INLINES -# ifndef _HAVE_STRING_ARCH_strcspn -# if __GNUC_PREREQ (3, 2) -# define strcspn(s, reject) \ - __extension__ \ - ({ char __r0, __r1, __r2; \ - (__builtin_constant_p (reject) && __string2_1bptr_p (reject) \ - ? ((__builtin_constant_p (s) && __string2_1bptr_p (s)) \ - ? __builtin_strcspn (s, reject) \ - : ((__r0 = ((const char *) (reject))[0], __r0 == '\0') \ - ? strlen (s) \ - : ((__r1 = ((const char *) (reject))[1], __r1 == '\0') \ - ? __strcspn_c1 (s, __r0) \ - : ((__r2 = ((const char *) (reject))[2], __r2 == '\0') \ - ? __strcspn_c2 (s, __r0, __r1) \ - : (((const char *) (reject))[3] == '\0' \ - ? __strcspn_c3 (s, __r0, __r1, __r2) \ - : __builtin_strcspn (s, reject)))))) \ - : __builtin_strcspn (s, reject)); }) -# else -# define strcspn(s, reject) \ - __extension__ \ - ({ char __r0, __r1, __r2; \ - (__builtin_constant_p (reject) && __string2_1bptr_p (reject) \ - ? ((__r0 = ((const char *) (reject))[0], __r0 == '\0') \ - ? strlen (s) \ - : ((__r1 = ((const char *) (reject))[1], __r1 == '\0') \ - ? __strcspn_c1 (s, __r0) \ - : ((__r2 = ((const char *) (reject))[2], __r2 == '\0') \ - ? __strcspn_c2 (s, __r0, __r1) \ - : (((const char *) (reject))[3] == '\0' \ - ? __strcspn_c3 (s, __r0, __r1, __r2) \ - : strcspn (s, reject))))) \ - : strcspn (s, reject)); }) -# endif +#ifndef _HAVE_STRING_ARCH_strcspn +# if __GNUC_PREREQ (3, 2) +# define strcspn(s, reject) __builtin_strcspn (s, reject) # endif - -__STRING_INLINE size_t __strcspn_c1 (const char *__s, int __reject); -__STRING_INLINE size_t -__strcspn_c1 (const char *__s, int __reject) -{ - size_t __result = 0; - while (__s[__result] != '\0' && __s[__result] != __reject) - ++__result; - return __result; -} - -__STRING_INLINE size_t __strcspn_c2 (const char *__s, int __reject1, - int __reject2); -__STRING_INLINE size_t -__strcspn_c2 (const char *__s, int __reject1, int __reject2) -{ - size_t __result = 0; - while (__s[__result] != '\0' && __s[__result] != __reject1 - && __s[__result] != __reject2) - ++__result; - return __result; -} - -__STRING_INLINE size_t __strcspn_c3 (const char *__s, int __reject1, - int __reject2, int __reject3); -__STRING_INLINE size_t -__strcspn_c3 (const char *__s, int __reject1, int __reject2, - int __reject3) -{ - size_t __result = 0; - while (__s[__result] != '\0' && __s[__result] != __reject1 - && __s[__result] != __reject2 && __s[__result] != __reject3) - ++__result; - return __result; -} #endif diff --git a/string/strcspn.c b/string/strcspn.c index 88889196397..c535dd1c3dd 100644 --- a/string/strcspn.c +++ b/string/strcspn.c @@ -16,6 +16,7 @@ . */ #include +#include #undef strcspn @@ -26,16 +27,45 @@ /* Return the length of the maximum initial segment of S which contains no characters from REJECT. */ size_t -STRCSPN (const char *s, const char *reject) +STRCSPN (const char *str, const char *reject) { - size_t count = 0; + if (__glibc_unlikely (reject[0] == '\0') || + __glibc_unlikely (reject[1] == '\0')) + return __strchrnul (str, reject [0]) - str; - while (*s != '\0') - if (strchr (reject, *s++) == NULL) - ++count; - else - return count; + /* Use multiple small memsets to enable inlining on most targets. */ + unsigned char table[256]; + unsigned char *p = memset (table, 0, 64); + memset (p + 64, 0, 64); + memset (p + 128, 0, 64); + memset (p + 192, 0, 64); - return count; + unsigned char *s = (unsigned char*) reject; + unsigned char tmp; + do + p[tmp = *s++] = 1; + while (tmp); + + s = (unsigned char*) str; + if (p[s[0]]) return 0; + if (p[s[1]]) return 1; + if (p[s[2]]) return 2; + if (p[s[3]]) return 3; + + s = (unsigned char *) ((uintptr_t)(s) & ~3); + + unsigned int c0, c1, c2, c3; + do + { + s += 4; + c0 = p[s[0]]; + c1 = p[s[1]]; + c2 = p[s[2]]; + c3 = p[s[3]]; + } + while ((c0 | c1 | c2 | c3) == 0); + + size_t count = s - (unsigned char *) str; + return (c0 | c1) != 0 ? count - c0 + 1 : count - c2 + 3; } libc_hidden_builtin_def (strcspn) diff --git a/string/string-inlines.c b/string/string-inlines.c index 16db3ea308a..83bdd6c1853 100644 --- a/string/string-inlines.c +++ b/string/string-inlines.c @@ -32,3 +32,43 @@ #undef __NO_INLINE__ #include #include + +#include "shlib-compat.h" + +#if SHLIB_COMPAT (libc, GLIBC_2_1_1, GLIBC_2_24) +/* The inline functions are not used from GLIBC 2.24 and forward, however + they are required to provide the symbols through string-inlines.c + (if inlining is not possible for compatibility reasons). */ +size_t +__old_strcspn_c1 (const char *__s, int __reject) +{ + size_t __result = 0; + while (__s[__result] != '\0' && __s[__result] != __reject) + ++__result; + return __result; +} +compat_symbol (libc, __old_strcspn_c1, __strcspn_c1, GLIBC_2_1_1); + +size_t +__old_strcspn_c2 (const char *__s, int __reject1, int __reject2) +{ + size_t __result = 0; + while (__s[__result] != '\0' && __s[__result] != __reject1 + && __s[__result] != __reject2) + ++__result; + return __result; +} +compat_symbol (libc, __old_strcspn_c2, __strcspn_c2, GLIBC_2_1_1); + +size_t +__old_strcspn_c3 (const char *__s, int __reject1, int __reject2, + int __reject3) +{ + size_t __result = 0; + while (__s[__result] != '\0' && __s[__result] != __reject1 + && __s[__result] != __reject2 && __s[__result] != __reject3) + ++__result; + return __result; +} +compat_symbol (libc, __old_strcspn_c3, __strcspn_c3, GLIBC_2_1_1); +#endif diff --git a/sysdeps/i386/string-inlines.c b/sysdeps/i386/string-inlines.c index c7de270c0a2..64d80e8eef5 100644 --- a/sysdeps/i386/string-inlines.c +++ b/sysdeps/i386/string-inlines.c @@ -15,27 +15,10 @@ License along with the GNU C Library; if not, see . */ -/* and declare some extern inline - functions. These functions are declared additionally here if - inlining is not possible. */ - -#undef __USE_STRING_INLINES -#define __USE_STRING_INLINES -#define _FORCE_INLINES -#define __STRING_INLINE /* empty */ -#define __NO_INLINE__ - /* This is to avoid PLT entries for the x86 version. */ #define __memcpy_g __memcpy_g_internal #define __strchr_g __strchr_g_internal - -#include -#undef index -#undef rindex - -#undef __NO_INLINE__ -#include -#include +#include void * (__memcpy_c) (void *d, const void *s, size_t n)