gpufilter
GPU-Efficient Recursive Filtering and Summed-Area Tables
|
00001 00012 #ifndef GPUFILTER_H 00013 #define GPUFILTER_H 00014 00015 //== MAIN DOCUMENTATION ======================================================= 00016 00112 //== INCLUDES ================================================================= 00113 00114 #include <cmath> 00115 #include <complex> 00116 00117 #include <gpudefs.h> 00118 00119 #include <dvector.h> 00120 #include <extension.h> 00121 00122 //== NAMESPACES =============================================================== 00123 00128 namespace gpufilter { 00129 00130 //== IMPLEMENTATION =========================================================== 00131 00144 template< class T > 00145 T qs( const T& s ) { 00146 return (T)0.00399341 + (T)0.4715161 * s; 00147 } 00148 00163 template< class T > 00164 std::complex<T> ds( const std::complex<T>& d, 00165 const T& s ) { 00166 T q = qs(s); 00167 return std::polar(std::pow(std::abs(d),(T)1/q), std::arg(d)/q); 00168 } 00169 00184 template< class T > 00185 T ds( const T& d, 00186 const T& s ) { 00187 return std::pow(d, (T)1/qs(s)); 00188 } 00189 00204 template< class T1, class T2 > 00205 void weights1( const T1& s, 00206 T2& b0, 00207 T2& a1 ) { 00208 const T1 d3 = (T1)1.86543; 00209 T1 d = ds(d3, s); 00210 b0 = static_cast<T2>(-((T1)1-d)/d); 00211 a1 = static_cast<T2>((T1)-1/d); 00212 } 00213 00237 template< class T1, class T2 > 00238 void weights2( const T1& s, 00239 T2& b0, 00240 T2& a1, 00241 T2& a2 ) { 00242 const std::complex<T1> d1((T1)1.41650, (T1)1.00829); 00243 std::complex<T1> d = ds(d1, s); 00244 T1 n2 = std::abs(d); 00245 n2 *= n2; 00246 T1 re = std::real(d); 00247 b0 = static_cast<T2>(((T1)1-(T1)2*re+n2)/n2); 00248 a1 = static_cast<T2>((T1)-2*re/n2); 00249 a2 = static_cast<T2>((T1)1/n2); 00250 } 00251 00252 //== EXTERNS ================================================================== 00253 00254 //-- SAT ---------------------------------------------------------------------- 00255 00276 extern 00277 void prepare_algSAT( alg_setup& algs, 00278 dvector<float>& d_inout, 00279 dvector<float>& d_ybar, 00280 dvector<float>& d_vhat, 00281 dvector<float>& d_ysum, 00282 const float *h_in, 00283 const int& w, 00284 const int& h ); 00310 extern 00311 void algSAT( dvector<float>& d_out, 00312 dvector<float>& d_ybar, 00313 dvector<float>& d_vhat, 00314 dvector<float>& d_ysum, 00315 const dvector<float>& d_in, 00316 const alg_setup& algs ); 00317 00333 extern 00334 void algSAT( dvector<float>& d_inout, 00335 dvector<float>& d_ybar, 00336 dvector<float>& d_vhat, 00337 dvector<float>& d_ysum, 00338 const alg_setup& algs ); 00339 00369 extern 00370 void algSAT( float *inout, 00371 const int& w, 00372 const int& h ); 00381 //-- Alg4 --------------------------------------------------------------------- 00382 00412 extern 00413 void prepare_alg4( alg_setup& algs, 00414 alg_setup& algs_transp, 00415 dvector<float>& d_out, 00416 dvector<float>& d_transp_out, 00417 dvector<float2>& d_transp_pybar, 00418 dvector<float2>& d_transp_ezhat, 00419 dvector<float2>& d_pubar, 00420 dvector<float2>& d_evhat, 00421 cudaArray *& a_in, 00422 const float *h_in, 00423 const int& w, 00424 const int& h, 00425 const float& b0, 00426 const float& a1, 00427 const float& a2, 00428 const int& extb = 0, 00429 const initcond& ic = zero ); 00458 extern 00459 void alg4( dvector<float>& d_out, 00460 dvector<float>& d_transp_out, 00461 dvector<float2>& d_transp_pybar, 00462 dvector<float2>& d_transp_ezhat, 00463 dvector<float2>& d_pubar, 00464 dvector<float2>& d_evhat, 00465 const cudaArray *a_in, 00466 const alg_setup& algs, 00467 const alg_setup& algs_transp ); 00468 00489 extern 00490 void alg4( float *h_inout, 00491 const int& w, 00492 const int& h, 00493 const float& b0, 00494 const float& a1, 00495 const float& a2, 00496 const int& extb = 0, 00497 const initcond& ic = zero ); 00508 //-- Alg5 --------------------------------------------------------------------- 00509 00536 extern 00537 void prepare_alg5( alg_setup& algs, 00538 dvector<float>& d_out, 00539 dvector<float>& d_transp_pybar, 00540 dvector<float>& d_transp_ezhat, 00541 dvector<float>& d_ptucheck, 00542 dvector<float>& d_etvtilde, 00543 cudaArray *& a_in, 00544 const float *h_in, 00545 const int& w, 00546 const int& h, 00547 const float& b0, 00548 const float& a1, 00549 const int& extb = 0, 00550 const initcond& ic = zero ); 00577 extern 00578 void alg5( dvector<float>& d_out, 00579 dvector<float>& d_transp_pybar, 00580 dvector<float>& d_transp_ezhat, 00581 dvector<float>& d_ptucheck, 00582 dvector<float>& d_etvtilde, 00583 const cudaArray *a_in, 00584 const alg_setup& algs ); 00585 00617 extern 00618 void alg5( float *h_inout, 00619 const int& w, 00620 const int& h, 00621 const float& b0, 00622 const float& a1, 00623 const int& extb = 0, 00624 const initcond& ic = zero ); 00635 //-- Gaussian ----------------------------------------------------------------- 00636 00654 extern 00655 void gaussian_gpu( float **inout, 00656 const int& w, 00657 const int& h, 00658 const int& d, 00659 const float& s, 00660 const int& extb = 1, 00661 const initcond& ic = clamp ); 00662 00675 extern 00676 void gaussian_gpu( float *inout, 00677 const int& w, 00678 const int& h, 00679 const float& s, 00680 const int& extb = 1, 00681 const initcond& ic = clamp ); 00682 00683 //-- BSpline ------------------------------------------------------------------ 00684 00700 extern 00701 void bspline3i_gpu( float **inout, 00702 const int& w, 00703 const int& h, 00704 const int& d, 00705 const int& extb = 1, 00706 const initcond& ic = mirror ); 00707 00719 extern 00720 void bspline3i_gpu( float *inout, 00721 const int& w, 00722 const int& h, 00723 const int& extb = 1, 00724 const initcond& ic = mirror ); 00725 00726 //============================================================================= 00727 } // namespace gpufilter 00728 //============================================================================= 00729 00730 //== SECONDARY DOCUMENTATION ================================================== 00731 00923 //============================================================================= 00924 #endif // GPUFILTER_H 00925 //=============================================================================