gpufilter
GPU-Efficient Recursive Filtering and Summed-Area Tables
include/util.h
Go to the documentation of this file.
00001 
00009 #ifndef UTIL_H
00010 #define UTIL_H
00011 
00012 //== INCLUDES =================================================================
00013 
00014 #include <cassert>
00015 #include <iostream>
00016 
00017 #ifdef  __CUDA_ARCH__
00018 #   ifdef assert
00019 #       undef assert
00020 #   endif
00021 #   define assert (void)
00022 #endif
00023 
00024 //== NAMESPACES ===============================================================
00025 
00026 namespace gpufilter {
00027 
00028 //== CLASS DEFINITION =========================================================
00029 
00043 template <class T, int N>
00044 class Vector {
00045     
00046 public:
00047 
00052     std::vector<T> to_vector() const {
00053         return std::vector<T>(&m_data[0], &m_data[0]+size());
00054     }
00055 
00061     __host__ __device__ const T& operator [] ( int i ) const {
00062         assert(i >= 0 && i < size());
00063         return m_data[i];
00064     }
00065 
00071     __host__ __device__ T& operator [] ( int i ) {
00072         assert(i >= 0 && i < size());
00073         return m_data[i];
00074     }
00075 
00080     __host__ __device__ int size() const { return N; }
00081 
00088     friend std::ostream& operator << ( std::ostream& out,
00089                                        const Vector& v ) {
00090         out << '[';
00091         for (int i=0; i<v.size(); ++i) {
00092             out << v[i];
00093             if (i < v.size()-1) out << ',';
00094         }
00095         return out << ']';
00096     }
00097 
00103     __host__ __device__ Vector& operator += ( const Vector& v ) {
00104         assert(size() == v.size());
00105 #pragma unroll
00106         for (int j=0; j<size(); ++j)
00107             m_data[j] += v[j];
00108         return *this;
00109     }
00110 
00116     __host__ __device__ Vector operator + ( const Vector& v ) const {
00117         return Vector(*this) += v;
00118     }
00119 
00125     __host__ __device__ Vector &operator *= ( const T& v ) {
00126 #pragma unroll
00127         for (int j=0; j<size(); ++j)
00128             m_data[j] *= v;
00129         return *this;
00130     }
00131 
00137     __host__ __device__ Vector operator * ( const T& v ) const {
00138         return Vector(*this) *= v;
00139     }
00140 
00146     __host__ __device__ Vector& operator /= ( const T& v ) {
00147 #pragma unroll
00148         for (int j=0; j<size(); ++j)
00149             m_data[j] /= v;
00150         return *this;
00151     }
00152 
00158     __host__ __device__ Vector operator / ( const T& v ) const {
00159         return Vector(*this) /= v;
00160     }
00161 
00166     __host__ __device__ operator const T* () const { return &m_data[0]; }
00167 
00172     __host__ __device__ operator T* () { return &m_data[0]; }
00173 
00174 private:
00175 
00176     T m_data[N]; 
00177 
00178 };
00179 
00180 //== CLASS DEFINITION =========================================================
00181 
00195 template <class T, int M, int N=M>
00196 class Matrix {
00197 
00198 public:
00199 
00204     __host__ __device__ int rows() const { return M; }
00205 
00210     __host__ __device__ int cols() const { return N; }
00211 
00217     __host__ __device__ const Vector<T,N>& operator [] ( int i ) const {
00218         assert(i >= 0 && i < rows());
00219         return m_data[i];
00220     }
00221 
00227     __host__ __device__ Vector<T,N>& operator [] ( int i ) {
00228         assert(i >= 0 && i < rows());
00229         return m_data[i];
00230     }
00231 
00238     friend std::ostream& operator << ( std::ostream& out,
00239                                        const Matrix& m ) {
00240         out << '[';
00241         for (int i=0; i<m.rows(); ++i) {
00242             for (int j=0; j<m.cols(); ++j) {
00243                 out << m[i][j];
00244                 if (j < m.cols()-1) out << ',';
00245             }
00246             if (i < m.rows()-1) out << ";\n";
00247         }
00248         return out << ']';
00249     }
00250 
00258     template <int P, int Q>
00259     __host__ __device__ Matrix<T,M,Q> operator * ( const Matrix<T,P,Q>& rhs ) const {
00260         assert(cols()==rhs.rows());
00261         Matrix<T,M,Q> r;
00262         for (int i=0; i<r.rows(); ++i) {
00263             for (int j=0; j<r.cols(); ++j) {
00264                 r[i][j] = m_data[i][0]*rhs[0][j];
00265                 for (int k=1; k<cols(); ++k)
00266                     r[i][j] += m_data[i][k]*rhs[k][j];
00267             }
00268         }
00269         return r;
00270     }
00271 
00277     __host__ __device__ Matrix& operator *= ( T val ) {
00278 #pragma unroll
00279         for (int i=0; i<rows(); ++i)
00280 #pragma unroll
00281             for (int j=0; j<cols(); ++j)
00282                 m_data[i][j] *= val;
00283         return *this;
00284     }
00285 
00291     __host__ __device__ Vector<T,M> col( int j ) const {
00292         Vector<T,M> c;
00293 #pragma unroll
00294         for (int i=0; i<rows(); ++i)
00295             c[i] = m_data[i][j];
00296         return c;
00297     }
00298 
00304     __host__ __device__ void set_col( int j,
00305                                       const Vector<T,M>& c ) {
00306 #pragma unroll
00307         for (int i=0; i<rows(); ++i)
00308             m_data[i][j] = c[i];
00309     }
00310 
00317     __host__ __device__ friend Matrix operator * ( const Matrix& m,
00318                                                    T val ) {
00319         return Matrix(m) *= val;
00320     }
00321 
00328     __host__ __device__ friend Matrix operator * ( T val,
00329                                                    const Matrix& m ) {
00330         return operator*(m,val);
00331     }
00332 
00338     __host__ __device__ Matrix& operator += ( const Matrix& rhs ) {
00339 #pragma unroll
00340         for (int i=0; i<rows(); ++i)
00341 #pragma unroll
00342             for (int j=0; j<cols(); ++j)
00343                 m_data[i][j] += rhs[i][j];
00344         return *this;
00345     }
00346 
00353     __host__ __device__ friend Matrix operator + ( const Matrix& lhs,
00354                                                    const Matrix& rhs ) {
00355         return Matrix(lhs) += rhs;
00356     }
00357 
00363     __host__ __device__ Matrix& operator -= ( const Matrix& rhs ) {
00364 #pragma unroll
00365         for (int i=0; i<rows(); ++i)
00366 #pragma unroll
00367             for (int j=0; j<cols(); ++j)
00368                 m_data[i][j] -= rhs[i][j];
00369         return *this;
00370     }
00371 
00378     __host__ __device__ friend Matrix operator - ( const Matrix& lhs,
00379                                                    const Matrix& rhs ) {
00380         return Matrix(lhs) -= rhs;
00381     }
00382 
00383 private:
00384 
00385     Vector<T,N> m_data[M]; 
00386 
00387 };
00388 
00389 //== IMPLEMENTATION ===========================================================
00390 
00391 //-- Multiply -----------------------------------------------------------------
00392 
00400 template <class T, int M, int N>
00401 __host__ __device__ Vector<T,N> operator * ( const Vector<T,M>& v,
00402                                              const Matrix<T,M,N>& m ) {
00403     assert(v.size() == m.rows());
00404     Vector<T,N> r;
00405 #pragma unroll
00406     for (int j=0; j<m.cols(); ++j) {
00407         r[j] = v[0]*m[0][j];
00408 #pragma unroll
00409         for (int i=1; i<m.rows(); ++i)
00410             r[j] += v[i]*m[i][j];
00411     }
00412     return r;
00413 }
00414 
00415 //-- Basics -------------------------------------------------------------------
00416 
00423 template <class T, int N>
00424 __host__ __device__ Vector<T,N> zeros() {
00425     Vector<T,N> v;
00426     if (N>0)
00427     {
00428 #if __CUDA_ARCH__
00429 #pragma unroll
00430         for (int j=0; j<v.size(); ++j)
00431             v[j] = T();
00432 #else
00433         std::fill(&v[0], &v[N-1]+1, T());
00434 #endif
00435     }
00436     return v; // I'm hoping that RVO will kick in
00437 }
00438 
00446 template <class T, int M, int N>
00447 __host__ __device__ Matrix<T,M,N> zeros() {
00448     Matrix<T,M,N> mat;
00449     if (M>0 && N>0)
00450     {
00451 #if __CUDA_ARCH__
00452 #pragma unroll
00453         for (int i=0; i<mat.rows(); ++i)
00454 #pragma unroll
00455             for (int j=0; j<mat.cols(); ++j)
00456                 mat[i][j] = T();
00457 #else
00458         std::fill(&mat[0][0], &mat[M-1][N-1]+1, T());
00459 #endif
00460     }
00461     return mat; // I'm hoping that RVO will kick in
00462 }
00463 
00471 template <class T, int M, int N>
00472 Matrix<T,M,N> identity() {
00473     Matrix<T,M,N> mat;
00474     for (int i=0; i<M; ++i)
00475         for (int j=0; j<N; ++j)
00476             mat[i][j] = i==j ? 1 : 0;
00477     return mat;
00478 }
00479 
00488 template <class T, int M, int N>
00489 __host__ __device__ Matrix<T,N,M> transp( const Matrix<T,M,N>& m ) {
00490     Matrix<T,N,M> tm;
00491 #pragma unroll
00492     for (int i=0; i<m.rows(); ++i)
00493 #pragma unroll
00494         for (int j=0; j<m.cols(); ++j)
00495             tm[j][i] = m[i][j];
00496     return tm;
00497 }
00498 
00499 //-- Forward ------------------------------------------------------------------
00500 
00514 template <class T, int N, int R>
00515 void fwd_inplace( const Vector<T,R>& p,
00516                   Vector<T,N>& b,
00517                   const Vector<T,R+1>& w ) {
00518     for (int j=0; j<b.size(); ++j) {
00519         b[j] *= w[0];
00520         for (int k=1; k<w.size(); ++k) {
00521             if (j-k < 0)
00522                 b[j] -= p[p.size()+j-k]*w[k]; // use data from prologue
00523             else
00524                 b[j] -= b[j-k]*w[k];
00525         }
00526     }
00527 }
00528 
00543 template <class T, int M, int N, int R>
00544 void fwd_inplace( const Matrix<T,M,R>& p,
00545                   Matrix<T,M,N>& b,
00546                   const Vector<T,R+1>& w ) {
00547     for (int i=0; i<b.rows(); ++i)
00548         fwd_inplace(p[i], b[i], w);
00549 }
00550 
00571 template <class T, int M, int N, int R>
00572 Matrix<T,M,N> fwd( const Matrix<T,M,R>& p,
00573                    const Matrix<T,M,N>& b, 
00574                    const Vector<T,R+1>& w) {
00575     Matrix<T,M,N> fb = b;
00576     fwd_inplace(p, fb, w);
00577     return fb;
00578 }
00579 
00595 template <class T, int M, int N, int R>
00596 void fwd_inplace( const Matrix<T,R,N>& p,
00597                   Matrix<T,M,N>& b,
00598                   const Vector<T,R+1>& w ) {
00599     b = fwd(p, b, w);
00600 }
00601 
00623 template <class T, int M, int N, int R>
00624 Matrix<T,M,N> fwdT( const Matrix<T,R,N>& pT,
00625                     const Matrix<T,M,N>& b, 
00626                     const Vector<T,R+1>& w ) {
00627     return transp(fwd(transp(pT), transp(b), w));
00628 }
00629 
00630 //-- Reverse ------------------------------------------------------------------
00631 
00645 template <class T, int N, int R>
00646 void rev_inplace( Vector<T,N>& b,
00647                   const Vector<T,R>& e,
00648                   const Vector<T,R+1>& w ) {
00649     for (int j=b.size()-1; j>=0; --j) {
00650         b[j] *= w[0];
00651         for (int k=1; k<w.size(); ++k) {
00652             if (j+k >= b.size())
00653                 b[j] -= e[j+k-b.size()]*w[k]; // use data from epilogue
00654             else
00655                 b[j] -= b[j+k]*w[k];
00656         }
00657     }
00658 }
00659 
00674 template <class T, int M, int N, int R>
00675 void rev_inplace( Matrix<T,M,N>& b,
00676                   const Matrix<T,M,R>& e,
00677                   const Vector<T,R+1>& w ) {
00678     for (int i=0; i<b.rows(); ++i)
00679         rev_inplace(b[i], e[i], w);
00680 }
00681 
00703 template <class T, int M, int N, int R>
00704 Matrix<T,M,N> rev( const Matrix<T,M,N>& b,
00705                    const Matrix<T,M,R>& e, 
00706                    const Vector<T,R+1>& w ) {
00707     Matrix<T,M,N> rb = b;
00708     rev_inplace(rb, e, w);
00709     return rb;
00710 }
00711 
00727 template <class T, int M, int N, int R>
00728 void rev_inplace( Matrix<T,M,N>& b,
00729                   const Matrix<T,R,N>& e,
00730                   const Vector<T,R+1>& w ) {
00731     b = rev(b, e, w);
00732 }
00733 
00755 template <class T, int M, int N, int R>
00756 Matrix<T,M,N> revT( const Matrix<T,M,N>& b,
00757                     const Matrix<T,R,N>& eT,
00758                     const Vector<T,R+1>& w ) {
00759     return transp(rev(transp(b), transp(eT), w));
00760 }
00761 
00762 //-- Head ---------------------------------------------------------------------
00763 
00790 template <int R, int M, int N, class T>
00791 Matrix<T,M,R> head( const Matrix<T,M,N>& mat ) {
00792     assert(mat.cols() >= R);
00793     Matrix<T,M,R> h;
00794     for (int j=0; j<R; ++j)
00795         for (int i=0; i<mat.rows(); ++i)
00796             h[i][j] = mat[i][j];
00797     return h;
00798 }
00799 
00822 template <int R, int M, int N, class T>
00823 Matrix<T,R,N> headT( const Matrix<T,M,N>& mat ) {
00824     return transp(head<R>(transp(mat)));
00825 }
00826 
00827 //-- Tail ---------------------------------------------------------------------
00828 
00851 template <int R, int M, int N, class T>
00852 Matrix<T,M,R> tail( const Matrix<T,M,N>& mat ) {
00853     assert(mat.cols() >= R);
00854     Matrix<T,M,R> t;
00855     for (int j=0; j<R; ++j)
00856         for (int i=0; i<mat.rows(); ++i)
00857             t[i][j] = mat[i][mat.cols()-R+j];
00858     return t;
00859 }
00860 
00883 template <int R, int M, int N, class T>
00884 Matrix<T,R,N> tailT( const Matrix<T,M,N>& mat ) {
00885     return transp(tail<R>(transp(mat)));
00886 }
00887 
00888 //=============================================================================
00889 } // namespace gpufilter
00890 //=============================================================================
00891 #endif // UTIL_H
00892 //=============================================================================