Fixed bug 3468 - _allshr in SDL_stdlib.c is not working properly

Mark Pizzolato

On Windows with Visual Studio, when building SDL as a static library using the x86 (32bit) mode, several intrinsic operations are implemented in code in SDL_stdlib.c.

One of these, _allshr() is not properly implemented and fails for some input.  As a result, some operations on 64bit data elements (long long) don't always work.

I classified this bug as a blocker since things absolutely don't work when the affected code is invoked.  The affected code is only invoked when SDL is compiled in x86 mode on Visual Studio when building a SDL as a static library.  This build environment isn't common, and hence the bug hasn't been noticed previously.

I reopened #2537 and mentioned this problem and provided a fix.  That fix is provided again here along with test code which could be added to some of the SDL test code.  This test code verifies that the x86 intrinsic routines produce the same results as the native x64 instructions which these routines emulate under the Microsoft compiler.  The point of the tests is to make sure that Visual Studio x86 code produces the same results as Visual Studio x64 code.  Some of the arguments (or boundary conditions) may produce different results on other compiler environments, so the tests really shouldn't be run on all compilers.  The test driver only actually exercised code when the compiler defines _MSC_VER, so the driver can generically be invoked without issue.
This commit is contained in:
Sam Lantinga 2016-11-06 10:01:08 -08:00
parent d780031277
commit 40b571c91e
2 changed files with 250 additions and 35 deletions

View File

@ -914,8 +914,8 @@ _allshr()
{
/* *INDENT-OFF* */
__asm {
cmp cl,40h
jae RETZERO
cmp cl,3Fh
jae RETSIGN
cmp cl,20h
jae MORE32
shrd eax,edx,cl
@ -923,13 +923,13 @@ _allshr()
ret
MORE32:
mov eax,edx
xor edx,edx
sar edx,1Fh
and cl,1Fh
sar eax,cl
ret
RETZERO:
xor eax,eax
xor edx,edx
RETSIGN:
sar edx,1Fh
mov eax,edx
ret
}
/* *INDENT-ON* */

View File

@ -128,6 +128,220 @@ TestEndian(SDL_bool verbose)
return (error ? 1 : 0);
}
static int TST_allmul (void *a, void *b, int arg, void *result, void *expected)
{
(*(long long *)result) = ((*(long long *)a) * (*(long long *)b));
return (*(long long *)result) == (*(long long *)expected);
}
static int TST_alldiv (void *a, void *b, int arg, void *result, void *expected)
{
(*(long long *)result) = ((*(long long *)a) / (*(long long *)b));
return (*(long long *)result) == (*(long long *)expected);
}
static int TST_allrem (void *a, void *b, int arg, void *result, void *expected)
{
(*(long long *)result) = ((*(long long *)a) % (*(long long *)b));
return (*(long long *)result) == (*(long long *)expected);
}
static int TST_ualldiv (void *a, void *b, int arg, void *result, void *expected)
{
(*(unsigned long long *)result) = ((*(unsigned long long *)a) / (*(unsigned long long *)b));
return (*(unsigned long long *)result) == (*(unsigned long long *)expected);
}
static int TST_uallrem (void *a, void *b, int arg, void *result, void *expected)
{
(*(unsigned long long *)result) = ((*(unsigned long long *)a) % (*(unsigned long long *)b));
return (*(unsigned long long *)result) == (*(unsigned long long *)expected);
}
static int TST_allshl (void *a, void *b, int arg, void *result, void *expected)
{
(*(long long *)result) = (*(long long *)a) << arg;
return (*(long long *)result) == (*(long long *)expected);
}
static int TST_aullshl (void *a, void *b, int arg, void *result, void *expected)
{
(*(unsigned long long *)result) = (*(unsigned long long *)a) << arg;
return (*(unsigned long long *)result) == (*(unsigned long long *)expected);
}
static int TST_allshr (void *a, void *b, int arg, void *result, void *expected)
{
(*(long long *)result) = (*(long long *)a) >> arg;
return (*(long long *)result) == (*(long long *)expected);
}
static int TST_aullshr (void *a, void *b, int arg, void *result, void *expected)
{
(*(unsigned long long *)result) = (*(unsigned long long *)a) >> arg;
return (*(unsigned long long *)result) == (*(unsigned long long *)expected);
}
typedef int (*LL_Intrinsic)(void *a, void *b, int arg, void *result, void *expected);
typedef struct {
const char *operation;
LL_Intrinsic routine;
unsigned long long a, b;
int arg;
unsigned long long expected_result;
} LL_Test;
static LL_Test LL_Tests[] =
{
{"_allshl", &TST_allshl, 0xFFFFFFFFFFFFFFFFll, 0ll, 65, 0x0000000000000000ll},
{"_allshl", &TST_allshl, 0xFFFFFFFFFFFFFFFFll, 0ll, 1, 0xFFFFFFFFFFFFFFFEll},
{"_allshl", &TST_allshl, 0xFFFFFFFFFFFFFFFFll, 0ll, 32, 0xFFFFFFFF00000000ll},
{"_allshl", &TST_allshl, 0xFFFFFFFFFFFFFFFFll, 0ll, 33, 0xFFFFFFFE00000000ll},
{"_allshl", &TST_allshl, 0xFFFFFFFFFFFFFFFFll, 0ll, 0, 0xFFFFFFFFFFFFFFFFll},
{"_allshr", &TST_allshr, 0xAAAAAAAA55555555ll, 0ll, 63, 0xFFFFFFFFFFFFFFFFll},
{"_allshr", &TST_allshr, 0xFFFFFFFFFFFFFFFFll, 0ll, 65, 0xFFFFFFFFFFFFFFFFll},
{"_allshr", &TST_allshr, 0xFFFFFFFFFFFFFFFFll, 0ll, 1, 0xFFFFFFFFFFFFFFFFll},
{"_allshr", &TST_allshr, 0xFFFFFFFFFFFFFFFFll, 0ll, 32, 0xFFFFFFFFFFFFFFFFll},
{"_allshr", &TST_allshr, 0xFFFFFFFFFFFFFFFFll, 0ll, 33, 0xFFFFFFFFFFFFFFFFll},
{"_allshr", &TST_allshr, 0xFFFFFFFFFFFFFFFFll, 0ll, 0, 0xFFFFFFFFFFFFFFFFll},
{"_allshr", &TST_allshr, 0x5F5F5F5F5F5F5F5Fll, 0ll, 65, 0x0000000000000000ll},
{"_allshr", &TST_allshr, 0x5F5F5F5F5F5F5F5Fll, 0ll, 1, 0x2FAFAFAFAFAFAFAFll},
{"_allshr", &TST_allshr, 0x5F5F5F5F5F5F5F5Fll, 0ll, 32, 0x000000005F5F5F5Fll},
{"_allshr", &TST_allshr, 0x5F5F5F5F5F5F5F5Fll, 0ll, 33, 0x000000002FAFAFAFll},
{"_aullshl", &TST_aullshl, 0xFFFFFFFFFFFFFFFFll, 0ll, 65, 0x0000000000000000ll},
{"_aullshl", &TST_aullshl, 0xFFFFFFFFFFFFFFFFll, 0ll, 1, 0xFFFFFFFFFFFFFFFEll},
{"_aullshl", &TST_aullshl, 0xFFFFFFFFFFFFFFFFll, 0ll, 32, 0xFFFFFFFF00000000ll},
{"_aullshl", &TST_aullshl, 0xFFFFFFFFFFFFFFFFll, 0ll, 33, 0xFFFFFFFE00000000ll},
{"_aullshl", &TST_aullshl, 0xFFFFFFFFFFFFFFFFll, 0ll, 0, 0xFFFFFFFFFFFFFFFFll},
{"_aullshr", &TST_aullshr, 0xFFFFFFFFFFFFFFFFll, 0ll, 65, 0x0000000000000000ll},
{"_aullshr", &TST_aullshr, 0xFFFFFFFFFFFFFFFFll, 0ll, 1, 0x7FFFFFFFFFFFFFFFll},
{"_aullshr", &TST_aullshr, 0xFFFFFFFFFFFFFFFFll, 0ll, 32, 0x00000000FFFFFFFFll},
{"_aullshr", &TST_aullshr, 0xFFFFFFFFFFFFFFFFll, 0ll, 33, 0x000000007FFFFFFFll},
{"_aullshr", &TST_aullshr, 0xFFFFFFFFFFFFFFFFll, 0ll, 0, 0xFFFFFFFFFFFFFFFFll},
{"_allmul", &TST_allmul, 0xFFFFFFFFFFFFFFFFll, 0x0000000000000000ll, 0, 0x0000000000000000ll},
{"_allmul", &TST_allmul, 0x0000000000000000ll, 0xFFFFFFFFFFFFFFFFll, 0, 0x0000000000000000ll},
{"_allmul", &TST_allmul, 0x000000000FFFFFFFll, 0x0000000000000001ll, 0, 0x000000000FFFFFFFll},
{"_allmul", &TST_allmul, 0x0000000000000001ll, 0x000000000FFFFFFFll, 0, 0x000000000FFFFFFFll},
{"_allmul", &TST_allmul, 0x000000000FFFFFFFll, 0x0000000000000010ll, 0, 0x00000000FFFFFFF0ll},
{"_allmul", &TST_allmul, 0x0000000000000010ll, 0x000000000FFFFFFFll, 0, 0x00000000FFFFFFF0ll},
{"_allmul", &TST_allmul, 0x000000000FFFFFFFll, 0x0000000000000100ll, 0, 0x0000000FFFFFFF00ll},
{"_allmul", &TST_allmul, 0x0000000000000100ll, 0x000000000FFFFFFFll, 0, 0x0000000FFFFFFF00ll},
{"_allmul", &TST_allmul, 0x000000000FFFFFFFll, 0x0000000010000000ll, 0, 0x00FFFFFFF0000000ll},
{"_allmul", &TST_allmul, 0x0000000010000000ll, 0x000000000FFFFFFFll, 0, 0x00FFFFFFF0000000ll},
{"_allmul", &TST_allmul, 0x000000000FFFFFFFll, 0x0000000080000000ll, 0, 0x07FFFFFF80000000ll},
{"_allmul", &TST_allmul, 0x0000000080000000ll, 0x000000000FFFFFFFll, 0, 0x07FFFFFF80000000ll},
{"_allmul", &TST_allmul, 0xFFFFFFFFFFFFFFFEll, 0x0000000080000000ll, 0, 0xFFFFFFFF00000000ll},
{"_allmul", &TST_allmul, 0x0000000080000000ll, 0xFFFFFFFFFFFFFFFEll, 0, 0xFFFFFFFF00000000ll},
{"_allmul", &TST_allmul, 0xFFFFFFFFFFFFFFFEll, 0x0000000080000008ll, 0, 0xFFFFFFFEFFFFFFF0ll},
{"_allmul", &TST_allmul, 0x0000000080000008ll, 0xFFFFFFFFFFFFFFFEll, 0, 0xFFFFFFFEFFFFFFF0ll},
{"_allmul", &TST_allmul, 0x00000000FFFFFFFFll, 0x00000000FFFFFFFFll, 0, 0xFFFFFFFE00000001ll},
{"_alldiv", &TST_alldiv, 0x0000000000000000ll, 0x0000000000000001ll, 0, 0x0000000000000000ll},
{"_alldiv", &TST_alldiv, 0x0000000000000000ll, 0xFFFFFFFFFFFFFFFFll, 0, 0x0000000000000000ll},
{"_alldiv", &TST_alldiv, 0x0000000000000001ll, 0xFFFFFFFFFFFFFFFFll, 0, 0xFFFFFFFFFFFFFFFFll},
{"_alldiv", &TST_alldiv, 0xFFFFFFFFFFFFFFFFll, 0x0000000000000001ll, 0, 0xFFFFFFFFFFFFFFFFll},
{"_alldiv", &TST_alldiv, 0x0000000000000001ll, 0xFFFFFFFFFFFFFFFFll, 0, 0xFFFFFFFFFFFFFFFFll},
{"_alldiv", &TST_alldiv, 0x0000000000000001ll, 0x0000000000000001ll, 0, 0x0000000000000001ll},
{"_alldiv", &TST_alldiv, 0xFFFFFFFFFFFFFFFFll, 0xFFFFFFFFFFFFFFFFll, 0, 0x0000000000000001ll},
{"_alldiv", &TST_alldiv, 0x000000000FFFFFFFll, 0x0000000000000001ll, 0, 0x000000000FFFFFFFll},
{"_alldiv", &TST_alldiv, 0x0000000FFFFFFFFFll, 0x0000000000000010ll, 0, 0x00000000FFFFFFFFll},
{"_alldiv", &TST_alldiv, 0x0000000000000100ll, 0x000000000FFFFFFFll, 0, 0x0000000000000000ll},
{"_alldiv", &TST_alldiv, 0x00FFFFFFF0000000ll, 0x0000000010000000ll, 0, 0x000000000FFFFFFFll},
{"_alldiv", &TST_alldiv, 0x07FFFFFF80000000ll, 0x0000000080000000ll, 0, 0x000000000FFFFFFFll},
{"_alldiv", &TST_alldiv, 0xFFFFFFFFFFFFFFFEll, 0x0000000080000000ll, 0, 0x0000000000000000ll},
{"_alldiv", &TST_alldiv, 0xFFFFFFFEFFFFFFF0ll, 0xFFFFFFFFFFFFFFFEll, 0, 0x0000000080000008ll},
{"_alldiv", &TST_alldiv, 0x7FFFFFFEFFFFFFF0ll, 0xFFFFFFFFFFFFFFFEll, 0, 0xC000000080000008ll},
{"_alldiv", &TST_alldiv, 0x7FFFFFFEFFFFFFF0ll, 0x0000FFFFFFFFFFFEll, 0, 0x0000000000007FFFll},
{"_alldiv", &TST_alldiv, 0x7FFFFFFEFFFFFFF0ll, 0x7FFFFFFEFFFFFFF0ll, 0, 0x0000000000000001ll},
{"_allrem", &TST_allrem, 0x0000000000000000ll, 0x0000000000000001ll, 0, 0x0000000000000000ll},
{"_allrem", &TST_allrem, 0x0000000000000000ll, 0xFFFFFFFFFFFFFFFFll, 0, 0x0000000000000000ll},
{"_allrem", &TST_allrem, 0x0000000000000001ll, 0xFFFFFFFFFFFFFFFFll, 0, 0x0000000000000000ll},
{"_allrem", &TST_allrem, 0xFFFFFFFFFFFFFFFFll, 0x0000000000000001ll, 0, 0x0000000000000000ll},
{"_allrem", &TST_allrem, 0x0000000000000001ll, 0xFFFFFFFFFFFFFFFFll, 0, 0x0000000000000000ll},
{"_allrem", &TST_allrem, 0x0000000000000001ll, 0x0000000000000001ll, 0, 0x0000000000000000ll},
{"_allrem", &TST_allrem, 0xFFFFFFFFFFFFFFFFll, 0xFFFFFFFFFFFFFFFFll, 0, 0x0000000000000000ll},
{"_allrem", &TST_allrem, 0x000000000FFFFFFFll, 0x0000000000000001ll, 0, 0x0000000000000000ll},
{"_allrem", &TST_allrem, 0x0000000FFFFFFFFFll, 0x0000000000000010ll, 0, 0x000000000000000Fll},
{"_allrem", &TST_allrem, 0x0000000000000100ll, 0x000000000FFFFFFFll, 0, 0x0000000000000100ll},
{"_allrem", &TST_allrem, 0x00FFFFFFF0000000ll, 0x0000000010000000ll, 0, 0x0000000000000000ll},
{"_allrem", &TST_allrem, 0x07FFFFFF80000000ll, 0x0000000080000000ll, 0, 0x0000000000000000ll},
{"_allrem", &TST_allrem, 0xFFFFFFFFFFFFFFFEll, 0x0000000080000000ll, 0, 0xFFFFFFFFFFFFFFFEll},
{"_allrem", &TST_allrem, 0xFFFFFFFEFFFFFFF0ll, 0xFFFFFFFFFFFFFFFEll, 0, 0x0000000000000000ll},
{"_allrem", &TST_allrem, 0x7FFFFFFEFFFFFFF0ll, 0xFFFFFFFFFFFFFFFEll, 0, 0x0000000000000000ll},
{"_allrem", &TST_allrem, 0x7FFFFFFEFFFFFFF0ll, 0x0000FFFFFFFFFFFEll, 0, 0x0000FFFF0000FFEEll},
{"_allrem", &TST_allrem, 0x7FFFFFFEFFFFFFF0ll, 0x7FFFFFFEFFFFFFF0ll, 0, 0x0000000000000000ll},
{"_ualldiv", &TST_ualldiv, 0x0000000000000000ll, 0x0000000000000001ll, 0, 0x0000000000000000ll},
{"_ualldiv", &TST_ualldiv, 0x0000000000000000ll, 0xFFFFFFFFFFFFFFFFll, 0, 0x0000000000000000ll},
{"_ualldiv", &TST_ualldiv, 0x0000000000000001ll, 0xFFFFFFFFFFFFFFFFll, 0, 0x0000000000000000ll},
{"_ualldiv", &TST_ualldiv, 0xFFFFFFFFFFFFFFFFll, 0x0000000000000001ll, 0, 0xFFFFFFFFFFFFFFFFll},
{"_ualldiv", &TST_ualldiv, 0x0000000000000001ll, 0xFFFFFFFFFFFFFFFFll, 0, 0x0000000000000000ll},
{"_ualldiv", &TST_ualldiv, 0x0000000000000001ll, 0x0000000000000001ll, 0, 0x0000000000000001ll},
{"_ualldiv", &TST_ualldiv, 0xFFFFFFFFFFFFFFFFll, 0xFFFFFFFFFFFFFFFFll, 0, 0x0000000000000001ll},
{"_ualldiv", &TST_ualldiv, 0x000000000FFFFFFFll, 0x0000000000000001ll, 0, 0x000000000FFFFFFFll},
{"_ualldiv", &TST_ualldiv, 0x0000000FFFFFFFFFll, 0x0000000000000010ll, 0, 0x00000000FFFFFFFFll},
{"_ualldiv", &TST_ualldiv, 0x0000000000000100ll, 0x000000000FFFFFFFll, 0, 0x0000000000000000ll},
{"_ualldiv", &TST_ualldiv, 0x00FFFFFFF0000000ll, 0x0000000010000000ll, 0, 0x000000000FFFFFFFll},
{"_ualldiv", &TST_ualldiv, 0x07FFFFFF80000000ll, 0x0000000080000000ll, 0, 0x000000000FFFFFFFll},
{"_ualldiv", &TST_ualldiv, 0xFFFFFFFFFFFFFFFEll, 0x0000000080000000ll, 0, 0x00000001FFFFFFFFll},
{"_ualldiv", &TST_ualldiv, 0xFFFFFFFEFFFFFFF0ll, 0xFFFFFFFFFFFFFFFEll, 0, 0x0000000000000000ll},
{"_ualldiv", &TST_ualldiv, 0x7FFFFFFEFFFFFFF0ll, 0xFFFFFFFFFFFFFFFEll, 0, 0x0000000000000000ll},
{"_ualldiv", &TST_ualldiv, 0x7FFFFFFEFFFFFFF0ll, 0x0000FFFFFFFFFFFEll, 0, 0x0000000000007FFFll},
{"_ualldiv", &TST_ualldiv, 0x7FFFFFFEFFFFFFF0ll, 0x7FFFFFFEFFFFFFF0ll, 0, 0x0000000000000001ll},
{"_uallrem", &TST_uallrem, 0x0000000000000000ll, 0x0000000000000001ll, 0, 0x0000000000000000ll},
{"_uallrem", &TST_uallrem, 0x0000000000000000ll, 0xFFFFFFFFFFFFFFFFll, 0, 0x0000000000000000ll},
{"_uallrem", &TST_uallrem, 0x0000000000000001ll, 0xFFFFFFFFFFFFFFFFll, 0, 0x0000000000000001ll},
{"_uallrem", &TST_uallrem, 0xFFFFFFFFFFFFFFFFll, 0x0000000000000001ll, 0, 0x0000000000000000ll},
{"_uallrem", &TST_uallrem, 0x0000000000000001ll, 0xFFFFFFFFFFFFFFFFll, 0, 0x0000000000000001ll},
{"_uallrem", &TST_uallrem, 0x0000000000000001ll, 0x0000000000000001ll, 0, 0x0000000000000000ll},
{"_uallrem", &TST_uallrem, 0xFFFFFFFFFFFFFFFFll, 0xFFFFFFFFFFFFFFFFll, 0, 0x0000000000000000ll},
{"_uallrem", &TST_uallrem, 0x000000000FFFFFFFll, 0x0000000000000001ll, 0, 0x0000000000000000ll},
{"_uallrem", &TST_uallrem, 0x0000000FFFFFFFFFll, 0x0000000000000010ll, 0, 0x000000000000000Fll},
{"_uallrem", &TST_uallrem, 0x0000000000000100ll, 0x000000000FFFFFFFll, 0, 0x0000000000000100ll},
{"_uallrem", &TST_uallrem, 0x00FFFFFFF0000000ll, 0x0000000010000000ll, 0, 0x0000000000000000ll},
{"_uallrem", &TST_uallrem, 0x07FFFFFF80000000ll, 0x0000000080000000ll, 0, 0x0000000000000000ll},
{"_uallrem", &TST_uallrem, 0xFFFFFFFFFFFFFFFEll, 0x0000000080000000ll, 0, 0x000000007FFFFFFEll},
{"_uallrem", &TST_uallrem, 0xFFFFFFFEFFFFFFF0ll, 0xFFFFFFFFFFFFFFFEll, 0, 0xFFFFFFFEFFFFFFF0ll},
{"_uallrem", &TST_uallrem, 0x7FFFFFFEFFFFFFF0ll, 0xFFFFFFFFFFFFFFFEll, 0, 0x7FFFFFFEFFFFFFF0ll},
{"_uallrem", &TST_uallrem, 0x7FFFFFFEFFFFFFF0ll, 0x0000FFFFFFFFFFFEll, 0, 0x0000FFFF0000FFEEll},
{"_uallrem", &TST_uallrem, 0x7FFFFFFEFFFFFFF0ll, 0x7FFFFFFEFFFFFFF0ll, 0, 0x0000000000000000ll},
{NULL}
};
int
Test64Bit (SDL_bool verbose)
{
LL_Test *t;
int failed = 0;
for (t = LL_Tests; t->routine != NULL; t++) {
unsigned long long result = 0;
unsigned int *al = (unsigned int *)&t->a;
unsigned int *bl = (unsigned int *)&t->b;
unsigned int *el = (unsigned int *)&t->expected_result;
unsigned int *rl = (unsigned int *)&result;
if (!t->routine(&t->a, &t->b, t->arg, &result, &t->expected_result)) {
if (verbose)
SDL_Log("%s(0x%08X%08X, 0x%08X%08X, %3d, produced: 0x%08X%08X, expected: 0x%08X%08X\n",
t->operation, al[1], al[0], bl[1], bl[0], t->arg, rl[1], rl[0], el[1], el[0]);
++failed;
}
}
if (verbose && (failed == 0))
SDL_Log("All 64bit instrinsic tests passed\n");
return (failed ? 1 : 0);
}
int
TestCPUInfo(SDL_bool verbose)
@ -198,6 +412,7 @@ main(int argc, char *argv[])
status += TestTypes(verbose);
status += TestEndian(verbose);
status += Test64Bit(verbose);
status += TestCPUInfo(verbose);
status += TestAssertions(verbose);