summaryrefslogtreecommitdiff
path: root/c
diff options
context:
space:
mode:
authorSamuel Neves <sneves@dei.uc.pt>2020-02-02 18:47:38 +0000
committerSamuel Neves <sneves@dei.uc.pt>2020-02-02 18:47:38 +0000
commita1c4c4efb58aa57596f4c92e0a01914a584c8613 (patch)
tree002cd07d1368214151ac2ba4e42564fdefac22f7 /c
parent58926046ca31df80b8806e64d9edd13b8dd503dd (diff)
Fix #51.
Thanks to bit4 for spotting this bug.
Diffstat (limited to 'c')
-rw-r--r--c/Makefile4
-rw-r--r--c/blake3_dispatch.c18
2 files changed, 10 insertions, 12 deletions
diff --git a/c/Makefile b/c/Makefile
index 8043ce8..37273b0 100644
--- a/c/Makefile
+++ b/c/Makefile
@@ -33,7 +33,7 @@ all: blake3.c blake3_dispatch.c blake3_portable.c main.c $(TARGETS)
blake3_sse41.o: blake3_sse41.c
$(CC) $(CFLAGS) $(EXTRAFLAGS) -c $^ -o $@ -msse4.1
-blake3_avx2.o: blake3_avx2.c # blake3_sse41.c
+blake3_avx2.o: blake3_avx2.c
$(CC) $(CFLAGS) $(EXTRAFLAGS) -c $^ -o $@ -mavx2
blake3_avx512.o: blake3_avx512.c
@@ -42,7 +42,7 @@ blake3_avx512.o: blake3_avx512.c
blake3_neon.o: blake3_neon.c
$(CC) $(CFLAGS) $(EXTRAFLAGS) -c $^ -o $@
-test: CFLAGS += -DBLAKE3_TESTING -fsanitize=address -fsanitize=undefined
+test: CFLAGS += -DBLAKE3_TESTING -fsanitize=address,undefined
test: all
./test.py
diff --git a/c/blake3_dispatch.c b/c/blake3_dispatch.c
index 7daf43e..85169d2 100644
--- a/c/blake3_dispatch.c
+++ b/c/blake3_dispatch.c
@@ -97,8 +97,10 @@ static void cpuid(uint32_t out[4], uint32_t id) {
__cpuid((int *)out, id);
#else
#if defined(__i386__) || defined(_M_IX86)
- __asm__ __volatile__("pushl %%ebx\ncpuid\nmovl %%ebp, %%esi\npopl %%ebx"
- : "=a"(out[0]), "=S"(out[1]), "=c"(out[2]), "=d"(out[3])
+ __asm__ __volatile__("movl %%ebx, %1\n"
+ "cpuid\n"
+ "xchgl %1, %%ebx\n"
+ : "=a"(out[0]), "=r"(out[1]), "=c"(out[2]), "=d"(out[3])
: "a"(id));
#else
__asm__ __volatile__("cpuid\n"
@@ -112,16 +114,12 @@ static void cpuidex(uint32_t out[4], uint32_t id, uint32_t sid) {
#if defined(_MSC_VER)
__cpuidex((int *)out, id, sid);
#else
-#if defined(__i386__) || defined(_M_IX86)
- __asm__ __volatile__("pushl %%ebx\ncpuid\nmovl %%ebp, %%esi\npopl %%ebx"
- : "=a"(out[0]), "=S"(out[1]), "=c"(out[2]), "=d"(out[3])
- : "a"(id), "c"(sid));
-#else
- __asm__ __volatile__("cpuid\n"
- : "=a"(out[0]), "=b"(out[1]), "=c"(out[2]), "=d"(out[3])
+ __asm__ __volatile__("movl %%ebx, %1\n"
+ "cpuid\n"
+ "xchgl %1, %%ebx\n"
+ : "=a"(out[0]), "=r"(out[1]), "=c"(out[2]), "=d"(out[3])
: "a"(id), "c"(sid));
#endif
-#endif
}
#endif