Better control pointer alignments.

It seems that either llvm-hs or LLVM itself assumes that a pointer load or store that doesn't indicate an explicit alignment is aligned to the size of pointee. But this is wrong for vector loads from multidimensional Dex arrays---the minor dimension need only be aligned on the size of a single array element, not the whole vector.
author: Alexey Radul <axch@google.com> 2023-07-06 17:31:35 -0400
committer: Alexey Radul <axch@google.com> 2023-07-06 17:43:12 -0400
commit: c8c0ae3b2d8393ec8e4460a8ca9880fca4a9d960 (patch)
tree: 59542abc9f72683a0e1259d5625c6348a5cf7bce
parent: c489415792c1491882df0235f589a36879e306db (diff)
2 files changed, 39 insertions, 3 deletions
diff --git a/src/lib/ImpToLLVM.hs b/src/lib/ImpToLLVM.hs
index 19905424..3d1dd429 100644
--- a/src/lib/ImpToLLVM.hs
+++ b/src/lib/ImpToLLVM.hs
@@ -902,16 +902,33 @@ withWidthOfFP x template = case typeOf template of
   L.FloatingPointType L.FloatFP  -> litVal $ Float32Lit $ realToFrac x
   _ -> error $ "Unsupported floating point type: " ++ show (typeOf template)
 
+-- If we are accessing a `L.Type` from a Dex array, what memory alignment (in
+-- bytes) can we guarantee?  This is probably better expressed in Dex types, but
+-- we would need to plumb them to do it that way.  1-byte alignment should
+-- always be safe, but we can promise higher-performance alignments for some
+-- types.
+dexAlignment :: L.Type -> Word32
+dexAlignment = \case
+  L.IntegerType bits | bits `mod` 8 == 0 -> bits `div` 8
+  L.IntegerType _ -> 1
+  L.PointerType _ _ -> 4
+  L.FloatingPointType L.FloatFP -> 4
+  L.FloatingPointType L.DoubleFP -> 8
+  L.VectorType _ eltTy -> dexAlignment eltTy
+  _ -> 1
+
 store :: LLVMBuilder m => Operand -> Operand -> m ()
-store ptr x =  addInstr $ L.Do $ L.Store False ptr x Nothing 0 []
+store ptr x = addInstr $ L.Do $ L.Store False ptr x Nothing alignment [] where
+  alignment = dexAlignment $ typeOf x
 
 load :: LLVMBuilder m => L.Type -> Operand -> m Operand
 load pointeeTy ptr =
 #if MIN_VERSION_llvm_hs(15,0,0)
-  emitInstr pointeeTy $ L.Load False pointeeTy ptr Nothing 0 []
+  emitInstr pointeeTy $ L.Load False pointeeTy ptr Nothing alignment []
 #else
-  emitInstr pointeeTy $ L.Load False ptr Nothing 0 []
+  emitInstr pointeeTy $ L.Load False ptr Nothing alignment []
 #endif
+  where alignment = dexAlignment pointeeTy
 
 ilt :: LLVMBuilder m => Operand -> Operand -> m Operand
 ilt x y = emitInstr i1 $ L.ICmp IP.SLT x y []
diff --git a/tests/opt-tests.dx b/tests/opt-tests.dx
index b5376200..71996962 100644
--- a/tests/opt-tests.dx
+++ b/tests/opt-tests.dx
@@ -227,3 +227,22 @@ _ = yield_accum (AddMonoid Int32) \result.
 -- CHECK: [[xsix:v#[0-9]+]]:<16xInt32> =
 -- CHECK-NEXT: vslice
 -- CHECK: extend [[refix]] [[xsix]]
+
+"Non-aligned"
+-- CHECK-LABEL: Non-aligned
+
+-- This is a regression test.  We are checking that Dex-side
+-- vectorization does not end up assuming that arrays are aligned on
+-- the size of the vectors, only on the size of the underlying
+-- scalars.
+
+non_aligned = for i:(Fin 7). for j:(Fin 257). +0
+
+%passes llvm
+_ = yield_accum (AddMonoid Int32) \result.
+  tile((Fin 257), 32) \set.
+    for_ i:set.
+      ix = inject(i, to=(Fin 257))
+      result!(6@(Fin 7))!ix += non_aligned[6@_][ix]
+-- CHECK: load <16 x i32>, <16 x i32>* %"v#{{[0-9]+}}", align 4
+-- CHECK: store <16 x i32> %"v#{{[0-9]+}}", <16 x i32>* %"v#{{[0-9]+}}", align 4
author	Alexey Radul <axch@google.com>	2023-07-06 17:31:35 -0400
committer	Alexey Radul <axch@google.com>	2023-07-06 17:43:12 -0400
commit	c8c0ae3b2d8393ec8e4460a8ca9880fca4a9d960 (patch)
tree	59542abc9f72683a0e1259d5625c6348a5cf7bce
parent	c489415792c1491882df0235f589a36879e306db (diff)