gpufilter
GPU-Efficient Recursive Filtering and Summed-Area Tables
include/gpufilter.h
Go to the documentation of this file.
00001 
00012 #ifndef GPUFILTER_H
00013 #define GPUFILTER_H
00014 
00015 //== MAIN DOCUMENTATION =======================================================
00016 
00112 //== INCLUDES =================================================================
00113 
00114 #include <cmath>
00115 #include <complex>
00116 
00117 #include <gpudefs.h>
00118 
00119 #include <dvector.h>
00120 #include <extension.h>
00121 
00122 //== NAMESPACES ===============================================================
00123 
00128 namespace gpufilter {
00129 
00130 //== IMPLEMENTATION ===========================================================
00131 
00144 template< class T >
00145 T qs( const T& s ) {
00146     return (T)0.00399341 + (T)0.4715161 * s;
00147 }
00148 
00163 template< class T >
00164 std::complex<T> ds( const std::complex<T>& d,
00165                     const T& s ) {
00166     T q = qs(s);
00167     return std::polar(std::pow(std::abs(d),(T)1/q), std::arg(d)/q);
00168 }
00169 
00184 template< class T >
00185 T ds( const T& d,
00186       const T& s ) {
00187     return std::pow(d, (T)1/qs(s));
00188 }
00189 
00204 template< class T1, class T2 >
00205 void weights1( const T1& s,
00206                T2& b0,
00207                T2& a1 ) {
00208     const T1 d3 = (T1)1.86543;
00209     T1 d = ds(d3, s);
00210     b0 = static_cast<T2>(-((T1)1-d)/d);
00211     a1 = static_cast<T2>((T1)-1/d);
00212 }
00213 
00237 template< class T1, class T2 >
00238 void weights2( const T1& s,
00239                T2& b0,
00240                T2& a1,
00241                T2& a2 ) {
00242     const std::complex<T1> d1((T1)1.41650, (T1)1.00829);
00243     std::complex<T1> d = ds(d1, s);
00244     T1 n2 = std::abs(d); 
00245     n2 *= n2;
00246     T1 re = std::real(d);
00247     b0 = static_cast<T2>(((T1)1-(T1)2*re+n2)/n2);
00248     a1 = static_cast<T2>((T1)-2*re/n2);
00249     a2 = static_cast<T2>((T1)1/n2);
00250 }
00251 
00252 //== EXTERNS ==================================================================
00253 
00254 //-- SAT ----------------------------------------------------------------------
00255 
00276 extern
00277 void prepare_algSAT( alg_setup& algs,
00278                      dvector<float>& d_inout,
00279                      dvector<float>& d_ybar,
00280                      dvector<float>& d_vhat,
00281                      dvector<float>& d_ysum,
00282                      const float *h_in,
00283                      const int& w,
00284                      const int& h );
00310 extern
00311 void algSAT( dvector<float>& d_out,
00312              dvector<float>& d_ybar,
00313              dvector<float>& d_vhat,
00314              dvector<float>& d_ysum,
00315              const dvector<float>& d_in,
00316              const alg_setup& algs );
00317 
00333 extern
00334 void algSAT( dvector<float>& d_inout,
00335              dvector<float>& d_ybar,
00336              dvector<float>& d_vhat,
00337              dvector<float>& d_ysum,
00338              const alg_setup& algs );
00339 
00369 extern
00370 void algSAT( float *inout,
00371              const int& w,
00372              const int& h );
00381 //-- Alg4 ---------------------------------------------------------------------
00382 
00412 extern
00413 void prepare_alg4( alg_setup& algs,
00414                    alg_setup& algs_transp,
00415                    dvector<float>& d_out,
00416                    dvector<float>& d_transp_out,
00417                    dvector<float2>& d_transp_pybar,
00418                    dvector<float2>& d_transp_ezhat,
00419                    dvector<float2>& d_pubar,
00420                    dvector<float2>& d_evhat,
00421                    cudaArray *& a_in,
00422                    const float *h_in,
00423                    const int& w,
00424                    const int& h,
00425                    const float& b0,
00426                    const float& a1,
00427                    const float& a2,
00428                    const int& extb = 0,
00429                    const initcond& ic = zero );
00458 extern
00459 void alg4( dvector<float>& d_out,
00460            dvector<float>& d_transp_out,
00461            dvector<float2>& d_transp_pybar,
00462            dvector<float2>& d_transp_ezhat,
00463            dvector<float2>& d_pubar,
00464            dvector<float2>& d_evhat,
00465            const cudaArray *a_in,
00466            const alg_setup& algs,
00467            const alg_setup& algs_transp );
00468 
00489 extern
00490 void alg4( float *h_inout,
00491            const int& w,
00492            const int& h,
00493            const float& b0,
00494            const float& a1,
00495            const float& a2,
00496            const int& extb = 0,
00497            const initcond& ic = zero );
00508 //-- Alg5 ---------------------------------------------------------------------
00509 
00536 extern
00537 void prepare_alg5( alg_setup& algs,
00538                    dvector<float>& d_out,
00539                    dvector<float>& d_transp_pybar,
00540                    dvector<float>& d_transp_ezhat,
00541                    dvector<float>& d_ptucheck,
00542                    dvector<float>& d_etvtilde,
00543                    cudaArray *& a_in,
00544                    const float *h_in,
00545                    const int& w,
00546                    const int& h,
00547                    const float& b0,
00548                    const float& a1,
00549                    const int& extb = 0,
00550                    const initcond& ic = zero );
00577 extern
00578 void alg5( dvector<float>& d_out,
00579            dvector<float>& d_transp_pybar,
00580            dvector<float>& d_transp_ezhat,
00581            dvector<float>& d_ptucheck,
00582            dvector<float>& d_etvtilde,
00583            const cudaArray *a_in,
00584            const alg_setup& algs );
00585 
00617 extern
00618 void alg5( float *h_inout,
00619            const int& w,
00620            const int& h,
00621            const float& b0,
00622            const float& a1,
00623            const int& extb = 0,
00624            const initcond& ic = zero );
00635 //-- Gaussian -----------------------------------------------------------------
00636 
00654 extern
00655 void gaussian_gpu( float **inout,
00656                    const int& w,
00657                    const int& h,
00658                    const int& d,
00659                    const float& s,
00660                    const int& extb = 1,
00661                    const initcond& ic = clamp );
00662 
00675 extern
00676 void gaussian_gpu( float *inout,
00677                    const int& w,
00678                    const int& h,
00679                    const float& s,
00680                    const int& extb = 1,
00681                    const initcond& ic = clamp );
00682 
00683 //-- BSpline ------------------------------------------------------------------
00684 
00700 extern
00701 void bspline3i_gpu( float **inout,
00702                     const int& w,
00703                     const int& h,
00704                     const int& d,
00705                     const int& extb = 1,
00706                     const initcond& ic = mirror );
00707 
00719 extern
00720 void bspline3i_gpu( float *inout,
00721                     const int& w,
00722                     const int& h,
00723                     const int& extb = 1,
00724                     const initcond& ic = mirror );
00725 
00726 //=============================================================================
00727 } // namespace gpufilter
00728 //=============================================================================
00729 
00730 //== SECONDARY DOCUMENTATION ==================================================
00731 
00923 //=============================================================================
00924 #endif // GPUFILTER_H
00925 //=============================================================================