gpufilter
GPU-Efficient Recursive Filtering and Summed-Area Tables
include/gpudefs.h
00001 
00008 #ifndef GPUDEFS_H
00009 #define GPUDEFS_H
00010 
00011 //== INCLUDES =================================================================
00012 
00013 #include <cuda_runtime.h>
00014 
00015 #include <defs.h>
00016 #include <extension.h>
00017 
00018 //== DEFINITIONS ===============================================================
00019 
00020 #define HWS 16 // Half Warp Size
00021 #define DW 8 // Default number of warps (computational block height)
00022 #define CHW 7 // Carry-heavy number of warps (computational block height for some kernels)
00023 #define OW 6 // Optimized number of warps (computational block height for some kernels)
00024 #define DNB 6 // Default number of blocks per SM (minimum blocks per SM launch bounds)
00025 #define ONB 5 // Optimized number of blocks per SM (minimum blocks per SM for some kernels)
00026 #define MTS 192 // Maximum number of threads per block with 8 blocks per SM
00027 #define MBO 8 // Maximum number of blocks per SM using optimize or maximum warps
00028 #define CHB 7 // Carry-heavy number of blocks per SM using default number of warps
00029 #define MW 6 // Maximum number of warps per block with 8 blocks per SM (with all warps computing)
00030 #define SOW 5 // Dual-scheduler optimized number of warps per block (with 8 blocks per SM and to use the dual scheduler with 1 computing warp)
00031 #define MBH 3 // Maximum number of blocks per SM using half-warp size
00032 
00033 //== NAMESPACES ===============================================================
00034 
00035 namespace gpufilter {
00036 
00037 //== CLASS DEFINITION =========================================================
00038 
00044 typedef struct _alg_setup {
00045     int width, 
00046         height, 
00047         m_size, 
00048         n_size, 
00049         last_m, 
00050         last_n, 
00051         border, 
00052         carry_height, 
00053         carry_width; 
00054     float inv_width, 
00055         inv_height; 
00056 } alg_setup; 
00057 
00058 //== PROTOTYPES ===============================================================
00059 
00072 extern
00073 void calc_alg_setup( alg_setup& algs,
00074                      const int& w,
00075                      const int& h );
00076 
00092 extern
00093 void calc_alg_setup( alg_setup& algs,
00094                      const int& w,
00095                      const int& h,
00096                      const int& extb );
00097 
00107 extern
00108 void up_alg_setup( const alg_setup& algs );
00109 
00120 extern
00121 void up_constants_coefficients1( const float& b0,
00122                                  const float& a1 );
00123 
00136 extern
00137 void up_constants_coefficients2( const float& b0,
00138                                  const float& a1,
00139                                  const float& a2 );
00140 
00141 //=============================================================================
00142 } // namespace gpufilter
00143 //=============================================================================
00144 #endif // GPUDEFS_H
00145 //=============================================================================