gpufilter
GPU-Efficient Recursive Filtering and Summed-Area Tables
|
00001 00009 #ifndef UTIL_H 00010 #define UTIL_H 00011 00012 //== INCLUDES ================================================================= 00013 00014 #include <cassert> 00015 #include <iostream> 00016 00017 #ifdef __CUDA_ARCH__ 00018 # ifdef assert 00019 # undef assert 00020 # endif 00021 # define assert (void) 00022 #endif 00023 00024 //== NAMESPACES =============================================================== 00025 00026 namespace gpufilter { 00027 00028 //== CLASS DEFINITION ========================================================= 00029 00043 template <class T, int N> 00044 class Vector { 00045 00046 public: 00047 00052 std::vector<T> to_vector() const { 00053 return std::vector<T>(&m_data[0], &m_data[0]+size()); 00054 } 00055 00061 __host__ __device__ const T& operator [] ( int i ) const { 00062 assert(i >= 0 && i < size()); 00063 return m_data[i]; 00064 } 00065 00071 __host__ __device__ T& operator [] ( int i ) { 00072 assert(i >= 0 && i < size()); 00073 return m_data[i]; 00074 } 00075 00080 __host__ __device__ int size() const { return N; } 00081 00088 friend std::ostream& operator << ( std::ostream& out, 00089 const Vector& v ) { 00090 out << '['; 00091 for (int i=0; i<v.size(); ++i) { 00092 out << v[i]; 00093 if (i < v.size()-1) out << ','; 00094 } 00095 return out << ']'; 00096 } 00097 00103 __host__ __device__ Vector& operator += ( const Vector& v ) { 00104 assert(size() == v.size()); 00105 #pragma unroll 00106 for (int j=0; j<size(); ++j) 00107 m_data[j] += v[j]; 00108 return *this; 00109 } 00110 00116 __host__ __device__ Vector operator + ( const Vector& v ) const { 00117 return Vector(*this) += v; 00118 } 00119 00125 __host__ __device__ Vector &operator *= ( const T& v ) { 00126 #pragma unroll 00127 for (int j=0; j<size(); ++j) 00128 m_data[j] *= v; 00129 return *this; 00130 } 00131 00137 __host__ __device__ Vector operator * ( const T& v ) const { 00138 return Vector(*this) *= v; 00139 } 00140 00146 __host__ __device__ Vector& operator /= ( const T& v ) { 00147 #pragma unroll 00148 for (int j=0; j<size(); ++j) 00149 m_data[j] /= v; 00150 return *this; 00151 } 00152 00158 __host__ __device__ Vector operator / ( const T& v ) const { 00159 return Vector(*this) /= v; 00160 } 00161 00166 __host__ __device__ operator const T* () const { return &m_data[0]; } 00167 00172 __host__ __device__ operator T* () { return &m_data[0]; } 00173 00174 private: 00175 00176 T m_data[N]; 00177 00178 }; 00179 00180 //== CLASS DEFINITION ========================================================= 00181 00195 template <class T, int M, int N=M> 00196 class Matrix { 00197 00198 public: 00199 00204 __host__ __device__ int rows() const { return M; } 00205 00210 __host__ __device__ int cols() const { return N; } 00211 00217 __host__ __device__ const Vector<T,N>& operator [] ( int i ) const { 00218 assert(i >= 0 && i < rows()); 00219 return m_data[i]; 00220 } 00221 00227 __host__ __device__ Vector<T,N>& operator [] ( int i ) { 00228 assert(i >= 0 && i < rows()); 00229 return m_data[i]; 00230 } 00231 00238 friend std::ostream& operator << ( std::ostream& out, 00239 const Matrix& m ) { 00240 out << '['; 00241 for (int i=0; i<m.rows(); ++i) { 00242 for (int j=0; j<m.cols(); ++j) { 00243 out << m[i][j]; 00244 if (j < m.cols()-1) out << ','; 00245 } 00246 if (i < m.rows()-1) out << ";\n"; 00247 } 00248 return out << ']'; 00249 } 00250 00258 template <int P, int Q> 00259 __host__ __device__ Matrix<T,M,Q> operator * ( const Matrix<T,P,Q>& rhs ) const { 00260 assert(cols()==rhs.rows()); 00261 Matrix<T,M,Q> r; 00262 for (int i=0; i<r.rows(); ++i) { 00263 for (int j=0; j<r.cols(); ++j) { 00264 r[i][j] = m_data[i][0]*rhs[0][j]; 00265 for (int k=1; k<cols(); ++k) 00266 r[i][j] += m_data[i][k]*rhs[k][j]; 00267 } 00268 } 00269 return r; 00270 } 00271 00277 __host__ __device__ Matrix& operator *= ( T val ) { 00278 #pragma unroll 00279 for (int i=0; i<rows(); ++i) 00280 #pragma unroll 00281 for (int j=0; j<cols(); ++j) 00282 m_data[i][j] *= val; 00283 return *this; 00284 } 00285 00291 __host__ __device__ Vector<T,M> col( int j ) const { 00292 Vector<T,M> c; 00293 #pragma unroll 00294 for (int i=0; i<rows(); ++i) 00295 c[i] = m_data[i][j]; 00296 return c; 00297 } 00298 00304 __host__ __device__ void set_col( int j, 00305 const Vector<T,M>& c ) { 00306 #pragma unroll 00307 for (int i=0; i<rows(); ++i) 00308 m_data[i][j] = c[i]; 00309 } 00310 00317 __host__ __device__ friend Matrix operator * ( const Matrix& m, 00318 T val ) { 00319 return Matrix(m) *= val; 00320 } 00321 00328 __host__ __device__ friend Matrix operator * ( T val, 00329 const Matrix& m ) { 00330 return operator*(m,val); 00331 } 00332 00338 __host__ __device__ Matrix& operator += ( const Matrix& rhs ) { 00339 #pragma unroll 00340 for (int i=0; i<rows(); ++i) 00341 #pragma unroll 00342 for (int j=0; j<cols(); ++j) 00343 m_data[i][j] += rhs[i][j]; 00344 return *this; 00345 } 00346 00353 __host__ __device__ friend Matrix operator + ( const Matrix& lhs, 00354 const Matrix& rhs ) { 00355 return Matrix(lhs) += rhs; 00356 } 00357 00363 __host__ __device__ Matrix& operator -= ( const Matrix& rhs ) { 00364 #pragma unroll 00365 for (int i=0; i<rows(); ++i) 00366 #pragma unroll 00367 for (int j=0; j<cols(); ++j) 00368 m_data[i][j] -= rhs[i][j]; 00369 return *this; 00370 } 00371 00378 __host__ __device__ friend Matrix operator - ( const Matrix& lhs, 00379 const Matrix& rhs ) { 00380 return Matrix(lhs) -= rhs; 00381 } 00382 00383 private: 00384 00385 Vector<T,N> m_data[M]; 00386 00387 }; 00388 00389 //== IMPLEMENTATION =========================================================== 00390 00391 //-- Multiply ----------------------------------------------------------------- 00392 00400 template <class T, int M, int N> 00401 __host__ __device__ Vector<T,N> operator * ( const Vector<T,M>& v, 00402 const Matrix<T,M,N>& m ) { 00403 assert(v.size() == m.rows()); 00404 Vector<T,N> r; 00405 #pragma unroll 00406 for (int j=0; j<m.cols(); ++j) { 00407 r[j] = v[0]*m[0][j]; 00408 #pragma unroll 00409 for (int i=1; i<m.rows(); ++i) 00410 r[j] += v[i]*m[i][j]; 00411 } 00412 return r; 00413 } 00414 00415 //-- Basics ------------------------------------------------------------------- 00416 00423 template <class T, int N> 00424 __host__ __device__ Vector<T,N> zeros() { 00425 Vector<T,N> v; 00426 if (N>0) 00427 { 00428 #if __CUDA_ARCH__ 00429 #pragma unroll 00430 for (int j=0; j<v.size(); ++j) 00431 v[j] = T(); 00432 #else 00433 std::fill(&v[0], &v[N-1]+1, T()); 00434 #endif 00435 } 00436 return v; // I'm hoping that RVO will kick in 00437 } 00438 00446 template <class T, int M, int N> 00447 __host__ __device__ Matrix<T,M,N> zeros() { 00448 Matrix<T,M,N> mat; 00449 if (M>0 && N>0) 00450 { 00451 #if __CUDA_ARCH__ 00452 #pragma unroll 00453 for (int i=0; i<mat.rows(); ++i) 00454 #pragma unroll 00455 for (int j=0; j<mat.cols(); ++j) 00456 mat[i][j] = T(); 00457 #else 00458 std::fill(&mat[0][0], &mat[M-1][N-1]+1, T()); 00459 #endif 00460 } 00461 return mat; // I'm hoping that RVO will kick in 00462 } 00463 00471 template <class T, int M, int N> 00472 Matrix<T,M,N> identity() { 00473 Matrix<T,M,N> mat; 00474 for (int i=0; i<M; ++i) 00475 for (int j=0; j<N; ++j) 00476 mat[i][j] = i==j ? 1 : 0; 00477 return mat; 00478 } 00479 00488 template <class T, int M, int N> 00489 __host__ __device__ Matrix<T,N,M> transp( const Matrix<T,M,N>& m ) { 00490 Matrix<T,N,M> tm; 00491 #pragma unroll 00492 for (int i=0; i<m.rows(); ++i) 00493 #pragma unroll 00494 for (int j=0; j<m.cols(); ++j) 00495 tm[j][i] = m[i][j]; 00496 return tm; 00497 } 00498 00499 //-- Forward ------------------------------------------------------------------ 00500 00514 template <class T, int N, int R> 00515 void fwd_inplace( const Vector<T,R>& p, 00516 Vector<T,N>& b, 00517 const Vector<T,R+1>& w ) { 00518 for (int j=0; j<b.size(); ++j) { 00519 b[j] *= w[0]; 00520 for (int k=1; k<w.size(); ++k) { 00521 if (j-k < 0) 00522 b[j] -= p[p.size()+j-k]*w[k]; // use data from prologue 00523 else 00524 b[j] -= b[j-k]*w[k]; 00525 } 00526 } 00527 } 00528 00543 template <class T, int M, int N, int R> 00544 void fwd_inplace( const Matrix<T,M,R>& p, 00545 Matrix<T,M,N>& b, 00546 const Vector<T,R+1>& w ) { 00547 for (int i=0; i<b.rows(); ++i) 00548 fwd_inplace(p[i], b[i], w); 00549 } 00550 00571 template <class T, int M, int N, int R> 00572 Matrix<T,M,N> fwd( const Matrix<T,M,R>& p, 00573 const Matrix<T,M,N>& b, 00574 const Vector<T,R+1>& w) { 00575 Matrix<T,M,N> fb = b; 00576 fwd_inplace(p, fb, w); 00577 return fb; 00578 } 00579 00595 template <class T, int M, int N, int R> 00596 void fwd_inplace( const Matrix<T,R,N>& p, 00597 Matrix<T,M,N>& b, 00598 const Vector<T,R+1>& w ) { 00599 b = fwd(p, b, w); 00600 } 00601 00623 template <class T, int M, int N, int R> 00624 Matrix<T,M,N> fwdT( const Matrix<T,R,N>& pT, 00625 const Matrix<T,M,N>& b, 00626 const Vector<T,R+1>& w ) { 00627 return transp(fwd(transp(pT), transp(b), w)); 00628 } 00629 00630 //-- Reverse ------------------------------------------------------------------ 00631 00645 template <class T, int N, int R> 00646 void rev_inplace( Vector<T,N>& b, 00647 const Vector<T,R>& e, 00648 const Vector<T,R+1>& w ) { 00649 for (int j=b.size()-1; j>=0; --j) { 00650 b[j] *= w[0]; 00651 for (int k=1; k<w.size(); ++k) { 00652 if (j+k >= b.size()) 00653 b[j] -= e[j+k-b.size()]*w[k]; // use data from epilogue 00654 else 00655 b[j] -= b[j+k]*w[k]; 00656 } 00657 } 00658 } 00659 00674 template <class T, int M, int N, int R> 00675 void rev_inplace( Matrix<T,M,N>& b, 00676 const Matrix<T,M,R>& e, 00677 const Vector<T,R+1>& w ) { 00678 for (int i=0; i<b.rows(); ++i) 00679 rev_inplace(b[i], e[i], w); 00680 } 00681 00703 template <class T, int M, int N, int R> 00704 Matrix<T,M,N> rev( const Matrix<T,M,N>& b, 00705 const Matrix<T,M,R>& e, 00706 const Vector<T,R+1>& w ) { 00707 Matrix<T,M,N> rb = b; 00708 rev_inplace(rb, e, w); 00709 return rb; 00710 } 00711 00727 template <class T, int M, int N, int R> 00728 void rev_inplace( Matrix<T,M,N>& b, 00729 const Matrix<T,R,N>& e, 00730 const Vector<T,R+1>& w ) { 00731 b = rev(b, e, w); 00732 } 00733 00755 template <class T, int M, int N, int R> 00756 Matrix<T,M,N> revT( const Matrix<T,M,N>& b, 00757 const Matrix<T,R,N>& eT, 00758 const Vector<T,R+1>& w ) { 00759 return transp(rev(transp(b), transp(eT), w)); 00760 } 00761 00762 //-- Head --------------------------------------------------------------------- 00763 00790 template <int R, int M, int N, class T> 00791 Matrix<T,M,R> head( const Matrix<T,M,N>& mat ) { 00792 assert(mat.cols() >= R); 00793 Matrix<T,M,R> h; 00794 for (int j=0; j<R; ++j) 00795 for (int i=0; i<mat.rows(); ++i) 00796 h[i][j] = mat[i][j]; 00797 return h; 00798 } 00799 00822 template <int R, int M, int N, class T> 00823 Matrix<T,R,N> headT( const Matrix<T,M,N>& mat ) { 00824 return transp(head<R>(transp(mat))); 00825 } 00826 00827 //-- Tail --------------------------------------------------------------------- 00828 00851 template <int R, int M, int N, class T> 00852 Matrix<T,M,R> tail( const Matrix<T,M,N>& mat ) { 00853 assert(mat.cols() >= R); 00854 Matrix<T,M,R> t; 00855 for (int j=0; j<R; ++j) 00856 for (int i=0; i<mat.rows(); ++i) 00857 t[i][j] = mat[i][mat.cols()-R+j]; 00858 return t; 00859 } 00860 00883 template <int R, int M, int N, class T> 00884 Matrix<T,R,N> tailT( const Matrix<T,M,N>& mat ) { 00885 return transp(tail<R>(transp(mat))); 00886 } 00887 00888 //============================================================================= 00889 } // namespace gpufilter 00890 //============================================================================= 00891 #endif // UTIL_H 00892 //=============================================================================