1 // A stream in CUDA speak 2 module dcompute.driver.cuda.queue; 3 4 import dcompute.driver.cuda; 5 struct Queue 6 { 7 void* raw; 8 this (bool async) 9 { 10 status = cast(Status)cuStreamCreate(&raw,async ? 0 : 1); 11 checkErrors(); 12 } 13 this (bool async, int priority) 14 { 15 status = cast(Status)cuStreamCreateWithPriority(&raw,async ? 0 : 1,priority); 16 checkErrors(); 17 } 18 19 @property bool async() 20 { 21 uint ret; 22 status = cast(Status)cuStreamGetFlags(raw,&ret); 23 checkErrors(); 24 return cast(bool) ret; 25 } 26 27 @property int priority() 28 { 29 int ret; 30 status = cast(Status)cuStreamGetPriority(raw,&ret); 31 checkErrors(); 32 return ret; 33 } 34 35 void wait(Event e,uint flags) 36 { 37 status = cast(Status)cuStreamWaitEvent(raw,e.raw,flags); 38 checkErrors(); 39 } 40 41 // cuMemcpy.*Async and friends 42 // TODO: implement this properly 43 /*template copy(T, CopySource from, CopySource to, int dimentions = 1, 44 Flag!"peer" _peer = No.peer) 45 { 46 auto copy(Memory to) 47 { 48 status = cast(Status)cuMemcpy(to.ptr.raw,ptr.raw,length); 49 checkErrors(); 50 } 51 }*/ 52 53 54 /*void addCallback(void delegate(Queue,Status) dg) 55 { 56 static CUstreamCallback cb = (void* ,Status void*) => 57 cuStreamAddCallback 58 }*/ 59 60 auto enqueue(alias k)(uint[3] _grid, uint[3] _block, uint _sharedMem = 0) 61 { 62 static struct Call 63 { 64 Queue q; 65 uint[3] grid, block; 66 uint sharedMem; 67 68 this(Queue _q,uint[3] _grid, uint[3] _block, uint _sharedMem) 69 { 70 q= _q; 71 grid = _grid; 72 block = _block; 73 sharedMem = _sharedMem; 74 } 75 //TODO integrate evnts into this. 76 void opCall(HostArgsOf!(typeof(k)) args) 77 { 78 auto kernel = Program.globalProgram.getKernel!k(); 79 void*[typeof(args).length] vargs; 80 foreach(uint i, ref a; args) 81 { 82 vargs[i] = cast(void*)&a; 83 } 84 85 status = cast(Status) 86 cuLaunchKernel(kernel.raw, 87 grid[0], grid[1], grid[2], 88 block[0],block[1],block[2], 89 sharedMem, 90 q.raw, 91 vargs.ptr, 92 null); 93 checkErrors(); 94 } 95 } 96 97 return Call(this,_grid,_block,_sharedMem); 98 } 99 }