NifTK  16.4.1 - 0798f20
CMIC's Translational Medical Imaging Platform
niftkCUDAManager.h
Go to the documentation of this file.
1 /*=============================================================================
2 
3  NifTK: A software platform for medical image computing.
4 
5  Copyright (c) University College London (UCL). All rights reserved.
6 
7  This software is distributed WITHOUT ANY WARRANTY; without even
8  the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
9  PURPOSE.
10 
11  See LICENSE.txt in the top level directory for details.
12 
13 =============================================================================*/
14 
15 #ifndef niftkCUDAManager_h
16 #define niftkCUDAManager_h
17 
18 #include "niftkCUDAExports.h"
21 #include <QThread>
22 #include <QMutex>
23 #include <cuda.h>
24 #include <cuda_runtime_api.h>
25 #include <vector>
26 #include <list>
27 #include <set>
28 #include <string>
29 #include <boost/lockfree/queue.hpp>
30 
31 namespace niftk
32 {
33 
34 // FIXME: not yet implemented
36 {
37  ScopedCUDADevice(int dev);
39 };
40 
41 
47 {
48  const void* m_DevicePointer;
49  std::size_t m_SizeInBytes;
50  unsigned int m_BytePitch;
51  unsigned int m_PixelWidth;
52  unsigned int m_PixelHeight; // obviously the unit is lines of pixels
53  int m_FIXME_pixeltype; // still havent thought about this one...
54 
55  unsigned int m_Id;
56  cudaEvent_t m_ReadyEvent;
57 };
58 
59 
66 {
68  std::size_t m_SizeInBytes;
69  unsigned int m_BytePitch;
70  unsigned int m_PixelWidth;
71  unsigned int m_PixelHeight; // obviously the unit is lines of pixels
72  int m_FIXME_pixeltype; // still havent thought about this one...
73 
74  unsigned int m_Id;
75  cudaEvent_t m_ReadyEvent;
76 };
77 
78 
79 // forward-decl
80 namespace impldetail
81 {
82 struct ModuleCleanup;
83 struct StreamCallbackReleasePOD;
84 }
85 
86 
120 class NIFTKCUDA_EXPORT CUDAManager : public QThread
121 {
122  friend class LightweightCUDAImage;
124 
125 public:
130  static CUDAManager* GetInstance();
131 
132 
133  // FIXME: not yet implemented
134  ScopedCUDADevice ActivateDevice(int dev);
135 
136  cudaStream_t GetStream(const std::string& name);
137 
141  ReadAccessor RequestReadAccess(const LightweightCUDAImage& lwci);
142 
143  WriteAccessor RequestOutputImage(unsigned int width, unsigned int height, int FIXME_pixeltype);
144 
145  // when done with queueing commands to fill output image, call this.
146  // it will give you a LightweightCUDAImage that can be stuffed in CUDAImage,
147  // which in turn can go to a DataNode.
148  LightweightCUDAImage Finalise(WriteAccessor& writeAccessor, cudaStream_t stream);
149 
153  LightweightCUDAImage FinaliseAndAutorelease(
154  WriteAccessor& writeAccessor,
155  ReadAccessor& readAccessor,
156  cudaStream_t stream);
157 
163  void Autorelease(ReadAccessor& readAccessor, cudaStream_t stream);
164 
165 
166  void Autorelease(WriteAccessor& writeAccessor, cudaStream_t stream);
167 
168 
169 protected:
170  CUDAManager();
171  virtual ~CUDAManager();
172 
173 
178  void AllRefsDropped(LightweightCUDAImage& lwci);
179 
180 
183 private:
184  CUDAManager(const CUDAManager& copyme);
185  CUDAManager& operator=(const CUDAManager& assignme);
187 
188 
190  std::size_t TierToSize(unsigned int tier) const;
191 
193  unsigned int SizeToTier(std::size_t size) const;
194 
195 
203  static void CUDART_CB AutoReleaseStreamCallback(cudaStream_t stream, cudaError_t status, void* userData);
204 
205 
211  void ReleaseReadAccess(unsigned int id);
212 
217  void ProcessAutoreleaseQueue();
218 
219 
220  static CUDAManager* s_Instance;
221  // there's only one instance of our class (singleton), so a single mutex is ok too.
222  static QMutex s_Lock;
223 
224  unsigned int m_LastIssuedId;
225 
226  // vector is a size tier, followed by linked list for that tier.
227  std::vector<std::list<LightweightCUDAImage> > m_AvailableImagePool;
228 
229  // images currently in use via WriteAccessor, i.e. work is being queued.
230  std::map<unsigned int, LightweightCUDAImage> m_InFlightOutputImages;
231 
232  // images that can be requested with RequestReadAccess.
233  std::map<unsigned int, LightweightCUDAImage> m_ValidImages;
234 
235  std::map<std::string, cudaStream_t> m_Streams;
236 
237  // the auto-release callback cannot acquire s_Lock because that will deadlock within the cuda driver.
238  boost::lockfree::queue<impldetail::StreamCallbackReleasePOD*> m_AutoreleaseQueue;
239 };
240 
241 } // end namespace
242 #endif
const void * m_DevicePointer
Definition: niftkCUDAManager.h:48
unsigned int m_PixelHeight
Definition: niftkCUDAManager.h:71
unsigned int m_PixelWidth
Definition: niftkCUDAManager.h:70
int m_FIXME_pixeltype
Definition: niftkCUDAManager.h:72
unsigned int m_PixelWidth
Definition: niftkCUDAManager.h:51
unsigned int m_BytePitch
Definition: niftkCUDAManager.h:50
Definition: niftkCUDAManager.cxx:71
GLuint GLuint stream
Definition: glew.h:6079
unsigned int m_Id
Definition: niftkCUDAManager.h:55
std::vcl_size_t m_SizeInBytes
Definition: niftkCUDAManager.h:68
cudaEvent_t m_ReadyEvent
Definition: niftkCUDAManager.h:56
unsigned int m_PixelHeight
Definition: niftkCUDAManager.h:52
cudaEvent_t m_ReadyEvent
Definition: niftkCUDAManager.h:75
unsigned int m_BytePitch
Definition: niftkCUDAManager.h:69
unsigned int m_Id
Definition: niftkCUDAManager.h:74
Definition: niftkCUDAManager.h:65
void * m_DevicePointer
Definition: niftkCUDAManager.h:67
std::vcl_size_t m_SizeInBytes
Definition: niftkCUDAManager.h:49
Definition: niftkCUDAManager.h:120
GLint GLint GLint GLint GLint GLint GLsizei GLsizei height
Definition: glew.h:1236
GLuint const GLchar * name
Definition: glew.h:1798
Definition: niftkCUDAManager.h:35
GLsizeiptr size
Definition: glew.h:1665
GLint GLint GLint GLint GLint GLint GLsizei width
Definition: glew.h:1236
Definition: niftkCUDAManager.h:46
int m_FIXME_pixeltype
Definition: niftkCUDAManager.h:53
Definition: niftkLightweightCUDAImage.h:33
Definition: niftkExceptionObject.h:21
GLsizei const GLcharARB ** string
Definition: glew.h:5194