gpufilter
GPU-Efficient Recursive Filtering and Summed-Area Tables
|
Main namespace for the gpufilter library. More...
Classes | |
class | dvector |
Device Vector class. More... | |
struct | _clamp |
Access pattern: clamp-to-border. More... | |
struct | _repeat |
Access pattern: repeat. More... | |
struct | _mirror |
Access pattern: mirror. More... | |
struct | _alg_setup |
Algorithm setup to configure the GPU to run. More... | |
class | base_timer |
Base timer class. More... | |
class | gpu_timer |
GPU specialization of the timer class. More... | |
class | cpu_timer |
CPU specialization of the timer class. More... | |
class | scoped_timer_stop |
Scope-limited timer class. More... | |
class | timer_pool |
Pool of timers. More... | |
class | Vector |
Vector class. More... | |
class | Matrix |
Matrix class. More... | |
Typedefs | |
typedef struct gpufilter::_alg_setup | alg_setup |
Enumerations | |
enum | initcond { zero, clamp, repeat, mirror } |
Enumerates possible initial conditions for 2D-image access. More... | |
Functions | |
__global__ void | alg4_stage1 (float2 *g_transp_pybar, float2 *g_transp_ezhat) |
Algorithm 4 stage 1. | |
__global__ void | alg4_stage2_3 (float2 *g_transp_pybar, float2 *g_transp_ezhat) |
Algorithm 4 stage 2 and 3 (fusioned) | |
__global__ void | alg4_stage4 (float *g_transp_out, float2 *g_transp_py, float2 *g_transp_ez, float2 *g_pubar, float2 *g_evhat, int out_stride) |
Algorithm 4 stage 4. | |
__global__ void | alg4_stage5_6 (float2 *g_transp_pybar, float2 *g_transp_ezhat) |
Algorithm 4 stage 5 and 6 (fusioned) | |
__global__ void | alg4_stage7 (float *g_out, float2 *g_transp_py, float2 *g_transp_ez, int out_stride) |
Algorithm 4 stage 7. | |
__global__ void | alg5_stage1 (float *g_transp_pybar, float *g_transp_ezhat, float *g_ptucheck, float *g_etvtilde) |
Algorithm 5 stage 1. | |
__global__ void | alg5_stage2_3 (float *g_transp_pybar, float *g_transp_ezhat) |
Algorithm 5 stage 2 and 3 (fusioned) | |
__global__ void | alg5_stage4_5 (float *g_ptucheck, float *g_etvtilde, const float *g_transp_py, const float *g_transp_ez) |
Algorithm 5 stage 4 and 5 (fusioned) | |
__global__ void | alg5_stage6 (float *g_out, const float *g_transp_py, const float *g_transp_ez, const float *g_ptu, const float *g_etv) |
Algorithm 5 stage 6. | |
template<class T > | |
T * | cuda_new (size_t elements) |
Allocates a new memory space in the GPU. | |
template<class T > | |
void | cuda_delete (T *d_ptr) |
Deallocates a memory space in the GPU. | |
template<class T > | |
void | extend_image (T *&ext_img, int &ext_w, int &ext_h, const T *img, const int &w, const int &h, const int &extb, const initcond &ic) |
Extend an image to consider initial condition outside. | |
template<class T > | |
void | extract_image (T *img, const int &w, const int &h, T *&ext_img, const int &ext_w, const int &extb) |
Extract an image from an extended image. | |
template<class T > | |
void | rcfr (T *inout, const int &w, const int &h, const T &b0, const T &a1, const bool &ff=false) |
Compute first-order recursive filtering on columns forward and reverse. | |
template<class T > | |
void | rrfr (T *inout, const int &w, const int &h, const T &b0, const T &a1, const bool &ff=false) |
Compute first-order recursive filtering on rows forward and reverse. | |
template<class T > | |
void | r (T *inout, const int &w, const int &h, const T &b0, const T &a1, const bool &ff=false, const int &extb=0, const initcond &ic=zero) |
Compute first-order recursive filtering. | |
template<class T > | |
void | rcfr (T *inout, const int &w, const int &h, const T &b0, const T &a1, const T &a2, const bool &ff=false) |
Compute second-order recursive filtering on columns forward and reverse. | |
template<class T > | |
void | rrfr (T *inout, const int &w, const int &h, const T &b0, const T &a1, const T &a2, const bool &ff=false) |
Compute second-order recursive filtering on rows forward and reverse. | |
template<class T > | |
void | r (T *inout, const int &w, const int &h, const T &b0, const T &a1, const T &a2, const bool &ff=false, const int &extb=0, const initcond &ic=zero) |
Compute second-order recursive filtering. | |
template<class T > | |
void | sat_cpu (T *in, const int &w, const int &h) |
Compute the Summed-area Table of an image in the CPU. | |
template<class T > | |
void | gaussian_cpu (T **in, const int &w, const int &h, const int &depth, const T &s, const int &extb=1, const initcond &ic=clamp) |
Gaussian blur an image in the CPU. | |
template<class T > | |
void | gaussian_cpu (T *in, const int &w, const int &h, const T &s, const int &extb=1, const initcond &ic=clamp) |
Gaussian blur a single-channel image in the CPU. | |
template<class T > | |
void | bspline3i_cpu (T **in, const int &w, const int &h, const int &depth, const int &extb=1, const initcond &ic=mirror) |
Compute the Bicubic B-Spline interpolation of an image in the CPU. | |
template<class T > | |
void | bspline3i_cpu (T *in, const int &w, const int &h, const int &extb=1, const initcond &ic=mirror) |
Compute the Bicubic B-Spline interpolation of a single-channel image in the CPU. | |
void | calc_borders (int &left, int &top, int &right, int &bottom, const int &w, const int &h, const int &extb) |
Calculate image borders. | |
bool | extend (const int &w, const int &h, const int &extb) |
Verify if an image needs to be extended. | |
void | cuda_error (const std::string &msg) |
Check error in device. | |
template<class T > | |
T | lookat (const T *in, const int &i, const int &n, const initcond &ic, const int &p=1) |
Look in an input at a given index range. | |
template<class T > | |
T | lookat (const T *img, const int &i, const int &j, const int &h, const int &w, const initcond &ic) |
Look in an image at a given position. | |
void | calc_alg_setup (alg_setup &algs, const int &w, const int &h) |
Calculate algorithm setup values. | |
void | calc_alg_setup (alg_setup &algs, const int &w, const int &h, const int &extb) |
Upload device constants sizes. | |
void | up_alg_setup (const alg_setup &algs) |
Upload algorithm setup values. | |
void | up_constants_coefficients1 (const float &b0, const float &a1) |
Upload device constants first-order coefficients. | |
void | up_constants_coefficients2 (const float &b0, const float &a1, const float &a2) |
Upload device constants second-order coefficients. | |
template<class T > | |
T | qs (const T &s) |
Compute recursive filtering scaling factor. | |
template<class T > | |
std::complex< T > | ds (const std::complex< T > &d, const T &s) |
Rescale poles of the recursive filtering z-transform. | |
template<class T > | |
T | ds (const T &d, const T &s) |
Rescale poles in the real-axis of the recursive filtering z-transform. | |
template<class T1 , class T2 > | |
void | weights1 (const T1 &s, T2 &b0, T2 &a1) |
Compute first-order weights. | |
template<class T1 , class T2 > | |
void | weights2 (const T1 &s, T2 &b0, T2 &a1, T2 &a2) |
Compute first- and second-order weights. | |
void | prepare_algSAT (alg_setup &algs, dvector< float > &d_inout, dvector< float > &d_ybar, dvector< float > &d_vhat, dvector< float > &d_ysum, const float *h_in, const int &w, const int &h) |
Prepare for Algorithm SAT. | |
void | algSAT (dvector< float > &d_out, dvector< float > &d_ybar, dvector< float > &d_vhat, dvector< float > &d_ysum, const dvector< float > &d_in, const alg_setup &algs) |
Compute Algorithm SAT. | |
void | algSAT (dvector< float > &d_inout, dvector< float > &d_ybar, dvector< float > &d_vhat, dvector< float > &d_ysum, const alg_setup &algs) |
Compute Algorithm SAT. | |
void | algSAT (float *inout, const int &w, const int &h) |
Compute Algorithm SAT. | |
void | prepare_alg4 (alg_setup &algs, alg_setup &algs_transp, dvector< float > &d_out, dvector< float > &d_transp_out, dvector< float2 > &d_transp_pybar, dvector< float2 > &d_transp_ezhat, dvector< float2 > &d_pubar, dvector< float2 > &d_evhat, cudaArray *&a_in, const float *h_in, const int &w, const int &h, const float &b0, const float &a1, const float &a2, const int &extb=0, const initcond &ic=zero) |
Prepare for Algorithm 4. | |
void | alg4 (dvector< float > &d_out, dvector< float > &d_transp_out, dvector< float2 > &d_transp_pybar, dvector< float2 > &d_transp_ezhat, dvector< float2 > &d_pubar, dvector< float2 > &d_evhat, const cudaArray *a_in, const alg_setup &algs, const alg_setup &algs_transp) |
Compute Algorithm 4 (first-order) | |
void | alg4 (float *h_inout, const int &w, const int &h, const float &b0, const float &a1, const float &a2, const int &extb=0, const initcond &ic=zero) |
Compute Algorithm 4 (second-order) | |
void | prepare_alg5 (alg_setup &algs, dvector< float > &d_out, dvector< float > &d_transp_pybar, dvector< float > &d_transp_ezhat, dvector< float > &d_ptucheck, dvector< float > &d_etvtilde, cudaArray *&a_in, const float *h_in, const int &w, const int &h, const float &b0, const float &a1, const int &extb=0, const initcond &ic=zero) |
Prepare for Algorithm 5. | |
void | alg5 (dvector< float > &d_out, dvector< float > &d_transp_pybar, dvector< float > &d_transp_ezhat, dvector< float > &d_ptucheck, dvector< float > &d_etvtilde, const cudaArray *a_in, const alg_setup &algs) |
Compute Algorithm 5 (first-order) | |
void | alg5 (float *h_inout, const int &w, const int &h, const float &b0, const float &a1, const int &extb=0, const initcond &ic=zero) |
Compute Algorithm 5 (first-order) | |
void | gaussian_gpu (float **inout, const int &w, const int &h, const int &d, const float &s, const int &extb=1, const initcond &ic=clamp) |
Gaussian blur an image in the GPU. | |
void | gaussian_gpu (float *inout, const int &w, const int &h, const float &s, const int &extb=1, const initcond &ic=clamp) |
Gaussian blur a single-channel image in the GPU. | |
void | bspline3i_gpu (float **inout, const int &w, const int &h, const int &d, const int &extb=1, const initcond &ic=mirror) |
Compute the Bicubic B-Spline interpolation of an image in the GPU. | |
void | bspline3i_gpu (float *inout, const int &w, const int &h, const int &extb=1, const initcond &ic=mirror) |
Compute the Bicubic B-Spline interpolation of a single-channel image in the GPU. | |
__host__ void | up_texture (cudaArray *&a_in, const float *h_in, const int &w, const int &h, const initcond &ic) |
Upload input image as a texture in device. | |
__global__ void | algSAT_stage1 (const float *g_in, float *g_ybar, float *g_vhat) |
Algorithm SAT stage 1. | |
__global__ void | algSAT_stage2 (float *g_ybar, float *g_ysum) |
Algorithm SAT stage 2. | |
__global__ void | algSAT_stage3 (const float *g_ysum, float *g_vhat) |
Algorithm SAT stage 3. | |
__global__ void | algSAT_stage4 (float *g_inout, const float *g_y, const float *g_v) |
Algorithm SAT stage 4. | |
__global__ void | algSAT_stage4 (float *g_out, const float *g_in, const float *g_y, const float *g_v) |
Algorithm SAT stage 4 (not-in-place computation) | |
template<class T > | |
void | copy_to_symbol (const std::string &name, const T &value) |
Copy value(s) to symbol. | |
void | copy_to_symbol (const std::string &name, unsigned long value) |
void | copy_to_symbol (const std::string &name, long value) |
template<class T > | |
void | copy_to_symbol (const std::string &name, const std::string &size_name, const std::vector< T > &items) |
template<class T > | |
void | copy_to_symbol (const std::string &name, const std::vector< T > &items) |
Variables | |
timer_pool | timers |
Global pool of timers. |
Main namespace for the gpufilter library.
typedef struct gpufilter::_alg_setup gpufilter::alg_setup |