diff options
author | Alexey Radul <axch@google.com> | 2023-07-06 17:31:35 -0400 |
---|---|---|
committer | Alexey Radul <axch@google.com> | 2023-07-06 17:43:12 -0400 |
commit | c8c0ae3b2d8393ec8e4460a8ca9880fca4a9d960 (patch) | |
tree | 59542abc9f72683a0e1259d5625c6348a5cf7bce | |
parent | c489415792c1491882df0235f589a36879e306db (diff) |
Better control pointer alignments.
It seems that either llvm-hs or LLVM itself assumes that a pointer
load or store that doesn't indicate an explicit alignment is aligned
to the size of pointee. But this is wrong for vector loads from
multidimensional Dex arrays---the minor dimension need only be aligned
on the size of a single array element, not the whole vector.
-rw-r--r-- | src/lib/ImpToLLVM.hs | 23 | ||||
-rw-r--r-- | tests/opt-tests.dx | 19 |
2 files changed, 39 insertions, 3 deletions
diff --git a/src/lib/ImpToLLVM.hs b/src/lib/ImpToLLVM.hs index 19905424..3d1dd429 100644 --- a/src/lib/ImpToLLVM.hs +++ b/src/lib/ImpToLLVM.hs @@ -902,16 +902,33 @@ withWidthOfFP x template = case typeOf template of L.FloatingPointType L.FloatFP -> litVal $ Float32Lit $ realToFrac x _ -> error $ "Unsupported floating point type: " ++ show (typeOf template) +-- If we are accessing a `L.Type` from a Dex array, what memory alignment (in +-- bytes) can we guarantee? This is probably better expressed in Dex types, but +-- we would need to plumb them to do it that way. 1-byte alignment should +-- always be safe, but we can promise higher-performance alignments for some +-- types. +dexAlignment :: L.Type -> Word32 +dexAlignment = \case + L.IntegerType bits | bits `mod` 8 == 0 -> bits `div` 8 + L.IntegerType _ -> 1 + L.PointerType _ _ -> 4 + L.FloatingPointType L.FloatFP -> 4 + L.FloatingPointType L.DoubleFP -> 8 + L.VectorType _ eltTy -> dexAlignment eltTy + _ -> 1 + store :: LLVMBuilder m => Operand -> Operand -> m () -store ptr x = addInstr $ L.Do $ L.Store False ptr x Nothing 0 [] +store ptr x = addInstr $ L.Do $ L.Store False ptr x Nothing alignment [] where + alignment = dexAlignment $ typeOf x load :: LLVMBuilder m => L.Type -> Operand -> m Operand load pointeeTy ptr = #if MIN_VERSION_llvm_hs(15,0,0) - emitInstr pointeeTy $ L.Load False pointeeTy ptr Nothing 0 [] + emitInstr pointeeTy $ L.Load False pointeeTy ptr Nothing alignment [] #else - emitInstr pointeeTy $ L.Load False ptr Nothing 0 [] + emitInstr pointeeTy $ L.Load False ptr Nothing alignment [] #endif + where alignment = dexAlignment pointeeTy ilt :: LLVMBuilder m => Operand -> Operand -> m Operand ilt x y = emitInstr i1 $ L.ICmp IP.SLT x y [] diff --git a/tests/opt-tests.dx b/tests/opt-tests.dx index b5376200..71996962 100644 --- a/tests/opt-tests.dx +++ b/tests/opt-tests.dx @@ -227,3 +227,22 @@ _ = yield_accum (AddMonoid Int32) \result. -- CHECK: [[xsix:v#[0-9]+]]:<16xInt32> = -- CHECK-NEXT: vslice -- CHECK: extend [[refix]] [[xsix]] + +"Non-aligned" +-- CHECK-LABEL: Non-aligned + +-- This is a regression test. We are checking that Dex-side +-- vectorization does not end up assuming that arrays are aligned on +-- the size of the vectors, only on the size of the underlying +-- scalars. + +non_aligned = for i:(Fin 7). for j:(Fin 257). +0 + +%passes llvm +_ = yield_accum (AddMonoid Int32) \result. + tile((Fin 257), 32) \set. + for_ i:set. + ix = inject(i, to=(Fin 257)) + result!(6@(Fin 7))!ix += non_aligned[6@_][ix] +-- CHECK: load <16 x i32>, <16 x i32>* %"v#{{[0-9]+}}", align 4 +-- CHECK: store <16 x i32> %"v#{{[0-9]+}}", <16 x i32>* %"v#{{[0-9]+}}", align 4 |