diff options
-rw-r--r-- | c/Makefile | 4 | ||||
-rw-r--r-- | c/blake3_dispatch.c | 18 |
2 files changed, 10 insertions, 12 deletions
@@ -33,7 +33,7 @@ all: blake3.c blake3_dispatch.c blake3_portable.c main.c $(TARGETS) blake3_sse41.o: blake3_sse41.c $(CC) $(CFLAGS) $(EXTRAFLAGS) -c $^ -o $@ -msse4.1 -blake3_avx2.o: blake3_avx2.c # blake3_sse41.c +blake3_avx2.o: blake3_avx2.c $(CC) $(CFLAGS) $(EXTRAFLAGS) -c $^ -o $@ -mavx2 blake3_avx512.o: blake3_avx512.c @@ -42,7 +42,7 @@ blake3_avx512.o: blake3_avx512.c blake3_neon.o: blake3_neon.c $(CC) $(CFLAGS) $(EXTRAFLAGS) -c $^ -o $@ -test: CFLAGS += -DBLAKE3_TESTING -fsanitize=address -fsanitize=undefined +test: CFLAGS += -DBLAKE3_TESTING -fsanitize=address,undefined test: all ./test.py diff --git a/c/blake3_dispatch.c b/c/blake3_dispatch.c index 7daf43e..85169d2 100644 --- a/c/blake3_dispatch.c +++ b/c/blake3_dispatch.c @@ -97,8 +97,10 @@ static void cpuid(uint32_t out[4], uint32_t id) { __cpuid((int *)out, id); #else #if defined(__i386__) || defined(_M_IX86) - __asm__ __volatile__("pushl %%ebx\ncpuid\nmovl %%ebp, %%esi\npopl %%ebx" - : "=a"(out[0]), "=S"(out[1]), "=c"(out[2]), "=d"(out[3]) + __asm__ __volatile__("movl %%ebx, %1\n" + "cpuid\n" + "xchgl %1, %%ebx\n" + : "=a"(out[0]), "=r"(out[1]), "=c"(out[2]), "=d"(out[3]) : "a"(id)); #else __asm__ __volatile__("cpuid\n" @@ -112,16 +114,12 @@ static void cpuidex(uint32_t out[4], uint32_t id, uint32_t sid) { #if defined(_MSC_VER) __cpuidex((int *)out, id, sid); #else -#if defined(__i386__) || defined(_M_IX86) - __asm__ __volatile__("pushl %%ebx\ncpuid\nmovl %%ebp, %%esi\npopl %%ebx" - : "=a"(out[0]), "=S"(out[1]), "=c"(out[2]), "=d"(out[3]) - : "a"(id), "c"(sid)); -#else - __asm__ __volatile__("cpuid\n" - : "=a"(out[0]), "=b"(out[1]), "=c"(out[2]), "=d"(out[3]) + __asm__ __volatile__("movl %%ebx, %1\n" + "cpuid\n" + "xchgl %1, %%ebx\n" + : "=a"(out[0]), "=r"(out[1]), "=c"(out[2]), "=d"(out[3]) : "a"(id), "c"(sid)); #endif -#endif } #endif |