summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlexey Radul <axch@google.com>2023-07-06 17:31:35 -0400
committerAlexey Radul <axch@google.com>2023-07-06 17:43:12 -0400
commitc8c0ae3b2d8393ec8e4460a8ca9880fca4a9d960 (patch)
tree59542abc9f72683a0e1259d5625c6348a5cf7bce
parentc489415792c1491882df0235f589a36879e306db (diff)
Better control pointer alignments.
It seems that either llvm-hs or LLVM itself assumes that a pointer load or store that doesn't indicate an explicit alignment is aligned to the size of pointee. But this is wrong for vector loads from multidimensional Dex arrays---the minor dimension need only be aligned on the size of a single array element, not the whole vector.
-rw-r--r--src/lib/ImpToLLVM.hs23
-rw-r--r--tests/opt-tests.dx19
2 files changed, 39 insertions, 3 deletions
diff --git a/src/lib/ImpToLLVM.hs b/src/lib/ImpToLLVM.hs
index 19905424..3d1dd429 100644
--- a/src/lib/ImpToLLVM.hs
+++ b/src/lib/ImpToLLVM.hs
@@ -902,16 +902,33 @@ withWidthOfFP x template = case typeOf template of
L.FloatingPointType L.FloatFP -> litVal $ Float32Lit $ realToFrac x
_ -> error $ "Unsupported floating point type: " ++ show (typeOf template)
+-- If we are accessing a `L.Type` from a Dex array, what memory alignment (in
+-- bytes) can we guarantee? This is probably better expressed in Dex types, but
+-- we would need to plumb them to do it that way. 1-byte alignment should
+-- always be safe, but we can promise higher-performance alignments for some
+-- types.
+dexAlignment :: L.Type -> Word32
+dexAlignment = \case
+ L.IntegerType bits | bits `mod` 8 == 0 -> bits `div` 8
+ L.IntegerType _ -> 1
+ L.PointerType _ _ -> 4
+ L.FloatingPointType L.FloatFP -> 4
+ L.FloatingPointType L.DoubleFP -> 8
+ L.VectorType _ eltTy -> dexAlignment eltTy
+ _ -> 1
+
store :: LLVMBuilder m => Operand -> Operand -> m ()
-store ptr x = addInstr $ L.Do $ L.Store False ptr x Nothing 0 []
+store ptr x = addInstr $ L.Do $ L.Store False ptr x Nothing alignment [] where
+ alignment = dexAlignment $ typeOf x
load :: LLVMBuilder m => L.Type -> Operand -> m Operand
load pointeeTy ptr =
#if MIN_VERSION_llvm_hs(15,0,0)
- emitInstr pointeeTy $ L.Load False pointeeTy ptr Nothing 0 []
+ emitInstr pointeeTy $ L.Load False pointeeTy ptr Nothing alignment []
#else
- emitInstr pointeeTy $ L.Load False ptr Nothing 0 []
+ emitInstr pointeeTy $ L.Load False ptr Nothing alignment []
#endif
+ where alignment = dexAlignment pointeeTy
ilt :: LLVMBuilder m => Operand -> Operand -> m Operand
ilt x y = emitInstr i1 $ L.ICmp IP.SLT x y []
diff --git a/tests/opt-tests.dx b/tests/opt-tests.dx
index b5376200..71996962 100644
--- a/tests/opt-tests.dx
+++ b/tests/opt-tests.dx
@@ -227,3 +227,22 @@ _ = yield_accum (AddMonoid Int32) \result.
-- CHECK: [[xsix:v#[0-9]+]]:<16xInt32> =
-- CHECK-NEXT: vslice
-- CHECK: extend [[refix]] [[xsix]]
+
+"Non-aligned"
+-- CHECK-LABEL: Non-aligned
+
+-- This is a regression test. We are checking that Dex-side
+-- vectorization does not end up assuming that arrays are aligned on
+-- the size of the vectors, only on the size of the underlying
+-- scalars.
+
+non_aligned = for i:(Fin 7). for j:(Fin 257). +0
+
+%passes llvm
+_ = yield_accum (AddMonoid Int32) \result.
+ tile((Fin 257), 32) \set.
+ for_ i:set.
+ ix = inject(i, to=(Fin 257))
+ result!(6@(Fin 7))!ix += non_aligned[6@_][ix]
+-- CHECK: load <16 x i32>, <16 x i32>* %"v#{{[0-9]+}}", align 4
+-- CHECK: store <16 x i32> %"v#{{[0-9]+}}", <16 x i32>* %"v#{{[0-9]+}}", align 4