Apache SINGA
A distributed deep learning platform .
 All Classes Namespaces Files Functions Variables Typedefs Enumerator Macros
tensor_expr_engine-inl.hpp
Go to the documentation of this file.
1 #ifndef MSHADOW_TENSOR_EXPR_ENGINE_INL_HPP
2 #define MSHADOW_TENSOR_EXPR_ENGINE_INL_HPP
3 
8 #include "tensor_expr.h"
9 #include "tensor.h"
10 
11 namespace mshadow{
12  namespace expr{
19  template<typename SubType, typename SrcExp, int dim>
20  struct MakeTensorExp: public Exp< MakeTensorExp<SubType,SrcExp,dim>, type::kMapper >{
24  inline const SubType& real_self( void ) const{
25  return *static_cast<const SubType*>(this);
26  }
27  };
28  };
29 
30  namespace expr{
32  template<typename ExpType>
33  class Plan{
34  public:
39  MSHADOW_XINLINE real_t Eval( index_t y, index_t x ) const;
40  };
41 
42  template <typename Device, int dim>
43  class Plan< Tensor<Device,dim> >{
44  public:
45  Plan( const Tensor<Device,dim> &t )
46  :dptr_(t.dptr),stride_(t.shape.stride_){}
47  MSHADOW_XINLINE real_t Eval( index_t y, index_t x ) const{
48  return dptr_[ y * stride_ + x ];
49  }
50  private:
51  const real_t *dptr_;
52  index_t stride_;
53  };
54  // special evaluation case for 1d tensor
55  template <typename Device>
56  class Plan< Tensor<Device,1> >{
57  public:
58  Plan( const Tensor<Device,1> &t ):dptr_(t.dptr){}
59  MSHADOW_XINLINE real_t Eval( index_t y, index_t x ) const{
60  return dptr_[ x ];
61  }
62  private:
63  const real_t *dptr_;
64  };
65 
66  template<>
67  class Plan<ScalarExp>{
68  public:
69  Plan( real_t scalar ):scalar_(scalar){}
71  MSHADOW_XINLINE real_t Eval( index_t y, index_t x ) const{
72  return scalar_;
73  }
74  private:
75  real_t scalar_;
76  };
77 
78  template<typename OP, typename TA, typename TB,int etype>
79  class Plan< BinaryMapExp<OP,TA,TB,etype> >{
80  public:
81  Plan( const Plan<TA> &lhs, const Plan<TB> &rhs )
82  :lhs_(lhs), rhs_(rhs){}
83  MSHADOW_XINLINE real_t Eval( index_t y, index_t x ) const{
84  return OP::Map( lhs_.Eval( y, x ), rhs_.Eval( y, x ) );
85  }
86  private:
87  Plan<TA> lhs_;
88  Plan<TB> rhs_;
89  };
90 
91  template<typename OP, typename TA, int etype>
92  class Plan< UnaryMapExp<OP,TA,etype> >{
93  public:
94  Plan( const Plan<TA> &src ):src_(src){}
95  MSHADOW_XINLINE real_t Eval( index_t y, index_t x ) const{
96  return OP::Map( src_.Eval( y, x ) );
97  }
98  private:
99  Plan<TA> src_;
100  };
101 
102 
103  template<typename SubType, typename SrcExp, int dim>
104  struct Plan< MakeTensorExp<SubType,SrcExp,dim> >{
105  public:
106  Plan( const Plan<SubType> &src ):src_(src){}
107  MSHADOW_XINLINE real_t Eval( index_t y, index_t x ) const{
108  return src_.Eval( y, x );
109  }
110  private:
111  Plan<SubType> src_;
112  };
113 
114  // allow UnaryMap see the plan
115  template<typename OP, typename TA, typename TB, int etype>
117 
118  // translate from exp to execution plan
119  inline Plan<ScalarExp> MakePlan( const ScalarExp &e ){
120  return Plan<ScalarExp>( e.scalar_ );
121  }
122 
123  template<typename T>
124  inline Plan<T> MakePlan( const ContainerExp<T> &e ){
125  return Plan<T>( e.self() );
126  }
127 
128  template<typename T, typename SrcExp, int dim>
129  inline Plan< T > MakePlan( const MakeTensorExp<T,SrcExp,dim> &e ){
130  return Plan< T >( e.real_self() );
131  }
132 
133  template<typename OP, typename TA, int etype>
134  inline Plan< UnaryMapExp<OP,TA,etype> > MakePlan( const UnaryMapExp<OP,TA,etype> &e ){
135  return Plan< UnaryMapExp<OP,TA,etype> >( MakePlan(e.src_) );
136  }
137 
138  template<typename OP, typename TA, typename TB, int etype>
139  inline Plan< BinaryMapExp<OP,TA,TB,etype> > MakePlan( const BinaryMapExp<OP,TA,TB,etype> &e ){
140  return Plan< BinaryMapExp<OP,TA,TB,etype> >( MakePlan(e.lhs_), MakePlan(e.rhs_) );
141  }
142  }; // namespace expr
143 
144  namespace expr{
152  template<typename E>
153  struct ExpInfo{
154  const static int kDim = -1;
155  const static int kDevMask = 0;
156  };
157  template<>
159  const static int kDim = 0;
160  const static int kDevMask = 0xffff;
161  };
162  template<typename Device, int dim>
163  struct ExpInfo< Tensor<Device,dim> >{
164  const static int kDim = dim;
165  const static int kDevMask = Device::kDevMask;
166  };
167  template<typename T, typename SrcExp, int dim>
168  struct ExpInfo< MakeTensorExp<T,SrcExp,dim> >{
169  const static int kDimSrc = ExpInfo<SrcExp>::kDim;
170  const static int kDim = kDimSrc >= 0 ? dim : -1;
171  const static int kDevMask = ExpInfo<SrcExp>::kDevMask;
172  };
173  template<typename OP, typename TA, int etype>
174  struct ExpInfo< UnaryMapExp<OP,TA,etype> >{
175  const static int kDim = ExpInfo<TA>::kDim;
176  const static int kDevMask = ExpInfo<TA>::kDevMask;
177  };
178  template<typename OP, typename TA, typename TB, int etype>
179  struct ExpInfo< BinaryMapExp<OP,TA,TB,etype> >{
180  const static int kDimLhs = ExpInfo<TA>::kDim;
181  const static int kDimRhs = ExpInfo<TB>::kDim;
182  const static int kDim = (kDimLhs>=0 && kDimRhs >= 0) ? \
183  ( kDimLhs==0 ? kDimRhs : ( (kDimRhs==0||kDimLhs==kDimRhs) ? kDimLhs : -1 ) ):-1;
184  const static int kDevMask = ExpInfo<TA>::kDevMask & ExpInfo<TB>::kDevMask;
185  };
186 
188  template<typename Device, int dim, typename E>
189  struct TypeCheck{
191  const static int kExpDim = ExpInfo<E>::kDim;
193  const static bool kDevPass = (ExpInfo<E>::kDevMask & Device::kDevMask) != 0;
195  const static bool kMapPass = (kExpDim == 0 || kExpDim == dim) && kDevPass;
197  const static bool kRedPass = (kExpDim > dim) && kDevPass;
198  };
199 
200  template<bool kPass>
202  template<>
203  struct TypeCheckPass<false>{};
204  template<>
205  struct TypeCheckPass<true>{
206  inline static void Error_All_Tensor_in_Exp_Must_Have_Same_Type( void ){}
207  inline static void Error_TypeCheck_Not_Pass_For_Reduce_Exp( void ){}
208  inline static void Error_Expression_Does_Not_Meet_Dimension_Req( void ){}
209  };
210  }; // namespace expr
211 
212  namespace expr{
213  // check shape consistency
214  template<int dim,typename E>
215  struct ShapeCheck{
216  inline static Shape<dim> Check( const E &t );
217  };
218 
219  template<int dim>
220  struct ShapeCheck<dim,ScalarExp>{
221  inline static Shape<dim> Check( const ScalarExp &exp ){
222  // use lowest dimension to mark scalar exp
223  Shape<dim> shape; shape[0] = 0;
224  return shape;
225  }
226  };
227  template<int dim,typename Device>
228  struct ShapeCheck<dim,Tensor<Device,dim> >{
229  inline static Shape<dim> Check( const Tensor<Device,dim> &t ){
230  return t.shape;
231  }
232  };
233  template<int dim,typename SrcExp,typename T>
234  struct ShapeCheck<dim,MakeTensorExp<T,SrcExp,dim> >{
235  inline static Shape<dim> Check( const MakeTensorExp<T,SrcExp,dim> &t ){
236  return t.shape_;
237  }
238  };
239  template<int dim, typename OP, typename TA, int etype>
240  struct ShapeCheck< dim,UnaryMapExp<OP,TA,etype> >{
241  inline static Shape<dim> Check( const UnaryMapExp<OP,TA,etype> &t ){
243  return s;
244  }
245  };
246  template<int dim, typename OP, typename TA, typename TB, int etype>
247  struct ShapeCheck< dim, BinaryMapExp<OP,TA,TB,etype> >{
248  inline static Shape<dim> Check( const BinaryMapExp<OP,TA,TB,etype> &t ){
251  if( shape1[0] == 0 ) return shape2;
252  if( shape2[0] == 0 ) return shape1;
253  utils::Assert( shape1 == shape2, "BinaryMapExp: Shapes of two tensors in BinaryMapExp expression is not the same");
254  return shape1;
255  }
256  };
257  }; // namespace expr
258 
259  // the matrix OP depends on BLAS
260  namespace expr{
261  template<typename SV,typename Device, int ddim, int ldim, int rdim, bool ltrans, bool rtrans>
262  struct DotEngine{
263  inline static void Eval( Tensor<Device,ddim> &dst, const Tensor<Device,ldim> &lhs, const Tensor<Device,rdim> &rhs, real_t scale );
264  };
265 
266  // handles the dot
267  template<typename Device>
268  struct BLASEngine;
269 
270  #if (MSHADOW_USE_CBLAS||MSHADOW_USE_MKL)
271  template<>
272  struct BLASEngine<cpu>{
273  inline static CBLAS_TRANSPOSE GetT( bool t ){
274  return t ? CblasTrans : CblasNoTrans;
275  }
276  inline static void gemm( bool transa, bool transb, int m, int n, int k, float alpha, \
277  const float *A, int lda, const float *B, int ldb, float beta, float *C, int ldc ){
278  cblas_sgemm(CblasColMajor, GetT(transa), GetT(transb), m,n,k,alpha,A,lda,B,ldb,beta,C,ldc);
279  }
280  inline static void gemm( bool transa, bool transb, int m, int n, int k, double alpha, \
281  const double *A, int lda, const double *B, int ldb, double beta, double *C, int ldc ){
282  cblas_dgemm(CblasColMajor, GetT(transa), GetT(transb), m,n,k,alpha,A,lda,B,ldb,beta,C,ldc);
283  }
284  inline static void gemv( bool trans, int m, int n, float alpha, const float *A, int lda, \
285  const float *X, int incX, float beta, float *Y, int incY ){
286  cblas_sgemv(CblasColMajor, GetT(trans), m,n,alpha,A,lda,X,incX,beta,Y,incY);
287  }
288  inline static void gemv( bool trans, int m, int n, double alpha, const double *A, int lda, \
289  const double *X, int incX, double beta, double *Y, int incY ){
290  cblas_dgemv(CblasColMajor, GetT(trans), m,n,alpha,A,lda,X,incX,beta,Y,incY);
291  }
292  inline static void ger( int m, int n, float alpha, const float *X, int incX, const float *Y, int incY, float *A, int lda ){
293  cblas_sger(CblasColMajor,m,n,alpha,X,incX,Y,incY,A,lda);
294  }
295  inline static void ger( int m, int n, double alpha, const double *X, int incX, const double *Y, int incY, double *A, int lda ){
296  cblas_dger(CblasColMajor,m,n,alpha,X,incX,Y,incY,A,lda);
297  }
298  };
299  #endif // MSHADOW_USE_CBLAS || MSHADOW_USE_MKL
300 
301  #if MSHADOW_USE_CUDA
302  // All CuBLAS goes to here, use legacy API: not threadsafe
303  template<>
304  struct BLASEngine<gpu>{
305  inline static char GetT( bool t ){
306  return t ? 'T' : 'N';
307  }
308  inline static void gemm( bool transa, bool transb, int m, int n, int k, float alpha,
309  const float *A, int lda, const float *B, int ldb, float beta, float *C, int ldc ){
310  cublasSgemm(GetT(transa),GetT(transb),m,n,k,alpha,A,lda,B,ldb,beta,C,ldc);
311  }
312  inline static void gemm( bool transa, bool transb, int m, int n, int k, double alpha,
313  const double *A, int lda, const double *B, int ldb, double beta, double *C, int ldc ){
314  cublasDgemm(GetT(transa),GetT(transb),m,n,k,alpha,A,lda,B,ldb,beta,C,ldc);
315  }
316  inline static void gemv( bool trans, int m, int n, float alpha, const float *A, int lda, \
317  const float *X, int incX, float beta, float *Y, int incY ){
318  cublasSgemv(GetT(trans), m,n,alpha,A,lda,X,incX,beta,Y,incY);
319  }
320  inline static void gemv( bool trans, int m, int n, double alpha, const double *A, int lda, \
321  const double *X, int incX, double beta, double *Y, int incY ){
322  cublasDgemv(GetT(trans), m,n,alpha,A,lda,X,incX,beta,Y,incY);
323  }
324  inline static void ger( int m, int n, float alpha, const float *X, int incX, const float *Y, int incY, float *A, int lda ){
325  cublasSger(m,n,alpha,X,incX,Y,incY,A,lda);
326  }
327  inline static void ger( int m, int n, double alpha, const double *X, int incX, const double *Y, int incY, double *A, int lda ){
328  cublasDger(m,n,alpha,X,incX,Y,incY,A,lda);
329  }
330  };
331  #endif
332 
333  // helper function to decide which shape we are in
334  inline static Shape<2> GetShape( const Shape<2> &shape, bool transpose ){
335  return transpose ? Shape2(shape[0],shape[1]) : shape;
336  }
337  // dst = dot( lhs[.T], rhs[.T] )
338  template<typename SV, typename xpu, bool transpose_left, bool transpose_right>
339  struct DotEngine<SV,xpu,2,2,2,transpose_left,transpose_right>{
340  inline static void Eval( Tensor<xpu,2> &dst, const Tensor<xpu,2> &lhs, const Tensor<xpu,2> &rhs, real_t scale ) {
341  Shape<2> sleft = GetShape( lhs.shape, transpose_left );
342  Shape<2> sright = GetShape( rhs.shape, transpose_right );
343  utils::Assert( dst.shape[1] == sleft[1] && dst.shape[0] == sright[0] \
344  && sleft[0] == sright[1] , "dot-gemm: matrix shape mismatch" );
345  // use column major argument to compatible with most BLAS
347  ( transpose_right , transpose_left,
348  transpose_right ? rhs.shape[1] : rhs.shape[0],
349  transpose_left ? lhs.shape[0] : lhs.shape[1],
350  transpose_right ? rhs.shape[0] : rhs.shape[1],
351  scale * SV::kAlphaBLAS,
352  rhs.dptr, rhs.shape.stride_,
353  lhs.dptr, lhs.shape.stride_,
354  SV::kBetaBLAS,
355  dst.dptr, dst.shape.stride_ );
356  }
357  };
358  template<typename SV, typename xpu, bool transpose_right>
359  struct DotEngine<SV,xpu,1,1,2,false,transpose_right>{
360  inline static void Eval( Tensor<xpu,1> &dst, const Tensor<xpu,1> &lhs, const Tensor<xpu,2> &rhs, real_t scale ) {
361  Shape<2> sright = GetShape( rhs.shape, transpose_right );
362  utils::Assert( dst.shape[0] == sright[0] && lhs.shape[0] == sright[1], "dot-gemv: matrix shape mismatch");
364  ( transpose_right,
365  rhs.shape[0], rhs.shape[1], scale * SV::kAlphaBLAS,
366  rhs.dptr, rhs.shape.stride_,
367  lhs.dptr, 1, SV::kBetaBLAS,
368  dst.dptr, 1 );
369  }
370  };
371  template<typename SV, typename xpu>
372  struct DotEngine<SV,xpu,2,1,1,true,false>{
373  inline static void Eval( Tensor<xpu,2> &dst, const Tensor<xpu,1> &lhs, const Tensor<xpu,1> &rhs, real_t scale ) {
374  utils::Assert( dst.shape[1] == lhs.shape[0] && dst.shape[0] == rhs.shape[0], "dot-ger: matrix shape mismatch" );
375  if( SV::kBetaBLAS < 1e-6f ){
377  ( rhs.shape[0], lhs.shape[0], scale * SV::kAlphaBLAS,
378  rhs.dptr, 1, lhs.dptr, 1, dst.dptr, dst.shape.stride_ );
379  }else{
380  DotEngine<SV,xpu,2,2,2,true,false>::Eval( dst, lhs.FlatTo2D(), rhs.FlatTo2D(), scale );
381  }
382  }
383  };
384 
385  }; // namespace expr
386 
387  namespace expr{
389  template<typename SV, typename Device, int dim, typename E>
391  inline static void Eval( Tensor<Device,dim>& dst, const E &exp );
392  };
393  template<typename SV, typename Device, int dim>
394  struct ExpEngine<SV, Tensor<Device,dim> >{
395  template<typename E>
396  inline static void Eval( Tensor<Device,dim>& dst, const Exp<E,type::kMapper> &exp ){
397  MapExp<SV,dim,E>( dst, exp );
398  }
399  template<typename E>
400  inline static void Eval( Tensor<Device,dim>& dst, const Exp<E,type::kContainer> &exp ){
401  MapExp<SV,dim,E>( dst, exp );
402  }
403  template<typename E>
404  inline static void Eval( Tensor<Device,dim>& dst, const Exp<E,type::kComplex> &exp ){
406  }
407  };
408  template<typename SV, typename Device, int dim, int ldim,int rdim,bool ltrans,bool rtrans>
409  struct ExpComplexEngine< SV, Device, dim, DotExp< Tensor<Device,ldim>, Tensor<Device,rdim>, ltrans, rtrans > >{
410  inline static void Eval( Tensor<Device,dim> &dst, const DotExp< Tensor<Device,ldim>, Tensor<Device,rdim>, ltrans, rtrans > &exp ){
411  DotEngine<SV,Device,dim,ldim,rdim,ltrans,rtrans>::Eval( dst, exp.lhs_, exp.rhs_, exp.scale_ );
412  }
413  };
414  }; // namespace expr
415 };
416 #endif
static const bool kMapPass
whether the expression can be mapped to expression of dim
Definition: tensor_expr_engine-inl.hpp:195
unsigned index_t
type that will be used for index
Definition: tensor_base.h:123
Shape< dim > shape_
the shape of this expression
Definition: tensor_expr_engine-inl.hpp:22
Definition: tensor_expr_engine-inl.hpp:67
This part of code gives plan that can be used to carry out execution.
Definition: tensor_expr_engine-inl.hpp:33
Definition: tensor_expr_engine-inl.hpp:201
template to do type check
Definition: tensor_expr_engine-inl.hpp:189
binary map expression lhs [op] rhs
Definition: tensor_expr.h:225
void Assert(bool exp)
assert a expression is true
Definition: tensor_base.h:285
static const bool kRedPass
whether the expression can be reduced to expression of dim
Definition: tensor_expr_engine-inl.hpp:197
const SubType & real_self(void) const
true self of subtype
Definition: tensor_expr_engine-inl.hpp:24
static const int kExpDim
dimension of expression
Definition: tensor_expr_engine-inl.hpp:191
Definition: tensor_expr_engine-inl.hpp:262
float real_t
type that will be used for content
Definition: tensor_base.h:118
const SubType & self(void) const
Definition: tensor_expr.h:52
header file of tensor data structure and functions covention: this lib requires explicit memory alloc...
device name CPU
Definition: tensor.h:185
device name CPU
Definition: tensor.h:192
const TA & src_
source expression
Definition: tensor_expr.h:342
static type inference template, used to get the dimension of each expression, if ExpInfo<E>::kDim == ...
Definition: tensor_expr_engine-inl.hpp:153
MSHADOW_XINLINE Shape< 2 > Shape2(index_t s1, index_t s0)
construct a two dimension shape, stride will equal s0
Definition: tensor.h:152
Definition: tensor.h:276
MSHADOW_XINLINE real_t Eval(index_t y, index_t x) const
evaluate the expression at index [y][x] to be implemented by SubType
Definition: tensor_expr_engine-inl.hpp:215
static const bool kDevPass
whether the expression device type matches
Definition: tensor_expr_engine-inl.hpp:193
some engine that evaluate complex expression
Definition: tensor_expr_engine-inl.hpp:390
real_t * dptr
pointer to the data
Definition: tensor.h:215
MSHADOW_XINLINE Tensor< Device, 2 > FlatTo2D(void) const
flatten the tensor to 2 dimension, collapse the higher dimensions together
Definition: tensor.h:229
MSHADOW_XINLINE real_t Eval(index_t y, index_t x) const
evaluate at [y][x]
Definition: tensor_expr_engine-inl.hpp:71
unary map expression op(src)
Definition: tensor_expr.h:340
matrix multiplication expression dot( lhs[.T], rhs[.T] )
Definition: tensor_expr.h:172
Shape< dimension > shape
shape of the tensor
Definition: tensor.h:217
scalar expression
Definition: tensor_expr.h:62
base class for expression
Definition: tensor_expr.h:49
const TA & lhs_
left operand
Definition: tensor_expr.h:227
a general class that allows extension that makes tensors of some shape
Definition: tensor_expr_engine-inl.hpp:20
definitions of abstract expressions and expressions template
expression engine that actually interprets these expressions this is a function template that needed ...
Definition: tensor_expr.h:34
real_t scalar_
scalar value
Definition: tensor_expr.h:64
Definition: tensor_expr_engine-inl.hpp:268
general tensor
Definition: tensor.h:206
const TB & rhs_
right operand
Definition: tensor_expr.h:229