1 // A stream in CUDA speak
2 module dcompute.driver.cuda.queue;
3 
4 import dcompute.driver.cuda;
5 struct Queue
6 {
7     void* raw;
8     this (bool async)
9     {
10         status = cast(Status)cuStreamCreate(&raw,async ? 0 : 1);
11         checkErrors();
12     }
13     this (bool async, int priority)
14     {
15         status = cast(Status)cuStreamCreateWithPriority(&raw,async ? 0 : 1,priority);
16         checkErrors();
17     }
18     
19     @property bool async()
20     {
21         uint ret;
22         status = cast(Status)cuStreamGetFlags(raw,&ret);
23         checkErrors();
24         return cast(bool) ret;
25     }
26     
27     @property int priority()
28     {
29         int ret;
30         status = cast(Status)cuStreamGetPriority(raw,&ret);
31         checkErrors();
32         return ret;
33     }
34 
35     void wait(Event e,uint flags)
36     {
37         status = cast(Status)cuStreamWaitEvent(raw,e.raw,flags);
38         checkErrors();
39     }
40     
41     // cuMemcpy.*Async and friends
42     // TODO: implement this properly
43     /*template copy(T, CopySource from, CopySource to, int dimentions = 1,
44                   Flag!"peer" _peer = No.peer)
45     {
46         auto copy(Memory to)
47         {
48             status = cast(Status)cuMemcpy(to.ptr.raw,ptr.raw,length);
49             checkErrors();
50         }
51     }*/
52 
53     
54     /*void addCallback(void delegate(Queue,Status) dg)
55     {
56         static CUstreamCallback cb = (void* ,Status void*) =>
57         cuStreamAddCallback
58     }*/
59     
60     auto enqueue(alias k)(uint[3] _grid, uint[3] _block, uint _sharedMem = 0)
61     {
62         static struct Call
63         {
64             Queue q;
65             uint[3] grid, block;
66             uint sharedMem;
67             
68             this(Queue _q,uint[3] _grid, uint[3] _block, uint _sharedMem)
69             {
70                 q= _q;
71                 grid = _grid;
72                 block = _block;
73                 sharedMem = _sharedMem;
74             }
75             //TODO integrate evnts into this.
76             void opCall(HostArgsOf!(typeof(k)) args)
77             {
78                 auto kernel = Program.globalProgram.getKernel!k();
79                 void*[typeof(args).length] vargs;
80                 foreach(uint i, ref a; args)
81                 {
82                     vargs[i] = cast(void*)&a;
83                 }
84                 
85                 status = cast(Status)
86                         cuLaunchKernel(kernel.raw,
87                                        grid[0], grid[1], grid[2],
88                                        block[0],block[1],block[2],
89                                        sharedMem,
90                                        q.raw,
91                                        vargs.ptr,
92                                        null);
93                 checkErrors();
94             }
95         }
96         
97         return Call(this,_grid,_block,_sharedMem);
98     }
99 }