blob: f293065c6727e0cca6b0fdbaff3c1434d851792a (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
|
x = for i:(Fin 5). i_to_f $ ordinal i
x
> [0., 1., 2., 3., 4.]
x + x
> [0., 2., 4., 6., 8.]
-- TODO: Make it a FileCheck test
testNestedParallelism =
for i:(Fin 10).
x = ordinal i
q = for j:(Fin 2000). i_to_f $ x * ordinal j
(2.0 .* q, 4.0 .* q)
(fst testNestedParallelism.(2@_)).(5@_)
> 20.
-- TODO: Make it a FileCheck test
testNestedLoops =
for i:(Fin 10).
for j:(Fin 20).
ordinal i * ordinal j
testNestedLoops.(4@_).(5@_)
> 20
-- The state is large enough such that it shouldn't fit on the stack of a
-- single GPU thread. It should get lifted to a top-level allocation instead.
-- allocationLiftingTest =
-- for i:(Fin 100).
-- yieldState (for j:(Fin 1000). ordinal i) $ \s.
-- s!(0@_) := get s!(0@_) + 1
-- (allocationLiftingTest.(4@_).(0@_), allocationLiftingTest.(4@_).(1@_))
-- > (5, 4)
|