gpufilter
GPU-Efficient Recursive Filtering and Summed-Area Tables
|
00001 00008 #ifndef GPUDEFS_H 00009 #define GPUDEFS_H 00010 00011 //== INCLUDES ================================================================= 00012 00013 #include <cuda_runtime.h> 00014 00015 #include <defs.h> 00016 #include <extension.h> 00017 00018 //== DEFINITIONS =============================================================== 00019 00020 #define HWS 16 // Half Warp Size 00021 #define DW 8 // Default number of warps (computational block height) 00022 #define CHW 7 // Carry-heavy number of warps (computational block height for some kernels) 00023 #define OW 6 // Optimized number of warps (computational block height for some kernels) 00024 #define DNB 6 // Default number of blocks per SM (minimum blocks per SM launch bounds) 00025 #define ONB 5 // Optimized number of blocks per SM (minimum blocks per SM for some kernels) 00026 #define MTS 192 // Maximum number of threads per block with 8 blocks per SM 00027 #define MBO 8 // Maximum number of blocks per SM using optimize or maximum warps 00028 #define CHB 7 // Carry-heavy number of blocks per SM using default number of warps 00029 #define MW 6 // Maximum number of warps per block with 8 blocks per SM (with all warps computing) 00030 #define SOW 5 // Dual-scheduler optimized number of warps per block (with 8 blocks per SM and to use the dual scheduler with 1 computing warp) 00031 #define MBH 3 // Maximum number of blocks per SM using half-warp size 00032 00033 //== NAMESPACES =============================================================== 00034 00035 namespace gpufilter { 00036 00037 //== CLASS DEFINITION ========================================================= 00038 00044 typedef struct _alg_setup { 00045 int width, 00046 height, 00047 m_size, 00048 n_size, 00049 last_m, 00050 last_n, 00051 border, 00052 carry_height, 00053 carry_width; 00054 float inv_width, 00055 inv_height; 00056 } alg_setup; 00057 00058 //== PROTOTYPES =============================================================== 00059 00072 extern 00073 void calc_alg_setup( alg_setup& algs, 00074 const int& w, 00075 const int& h ); 00076 00092 extern 00093 void calc_alg_setup( alg_setup& algs, 00094 const int& w, 00095 const int& h, 00096 const int& extb ); 00097 00107 extern 00108 void up_alg_setup( const alg_setup& algs ); 00109 00120 extern 00121 void up_constants_coefficients1( const float& b0, 00122 const float& a1 ); 00123 00136 extern 00137 void up_constants_coefficients2( const float& b0, 00138 const float& a1, 00139 const float& a2 ); 00140 00141 //============================================================================= 00142 } // namespace gpufilter 00143 //============================================================================= 00144 #endif // GPUDEFS_H 00145 //=============================================================================