| [658] | 1 | #ifndef BZ_ARRAYSTENCIL_CC
 | 
|---|
 | 2 | #define BZ_ARRAYSTENCIL_CC
 | 
|---|
 | 3 | 
 | 
|---|
 | 4 | #ifndef BZ_ARRAYSTENCIL_H
 | 
|---|
 | 5 |  #error <blitz/array/stencil.cc> must be included via <blitz/array/stencil.h>
 | 
|---|
 | 6 | #endif
 | 
|---|
 | 7 | 
 | 
|---|
 | 8 | BZ_NAMESPACE(blitz)
 | 
|---|
 | 9 | 
 | 
|---|
 | 10 | // NEEDS_WORK:
 | 
|---|
 | 11 | // o Need to allow scalar arguments as well as arrays
 | 
|---|
 | 12 | // o Unit stride optimization
 | 
|---|
 | 13 | // o Tiling
 | 
|---|
 | 14 | // o Pass coordinate vector to stencil, so that where-like constructs
 | 
|---|
 | 15 | //   can depend on location
 | 
|---|
 | 16 | // o Maybe allow expression templates to be passed as
 | 
|---|
 | 17 | //   array parameters?
 | 
|---|
 | 18 | 
 | 
|---|
 | 19 | /*
 | 
|---|
 | 20 |  * There are a lot of kludges in this code to work around the fact that
 | 
|---|
 | 21 |  * you can't have default template parameters with function templates.
 | 
|---|
 | 22 |  * Ideally, one would implement applyStencil(..) as:
 | 
|---|
 | 23 |  *
 | 
|---|
 | 24 |  * template<class T_stencil, class T_numtype1, class T_array2,
 | 
|---|
 | 25 |  *    class T_array3, class T_array4, class T_array5, class T_array6,
 | 
|---|
 | 26 |  *    class T_array7, class T_array8, class T_array9, class T_array10,
 | 
|---|
 | 27 |  *    class T_array11>
 | 
|---|
 | 28 |  * void applyStencil(const T_stencil& stencil, Array<T_numtype1,3>& A,
 | 
|---|
 | 29 |  *    T_array2& B = _dummyArray, T_array3& C = _dummyArray, ......)
 | 
|---|
 | 30 |  *
 | 
|---|
 | 31 |  * and allow for up to (say) 11 arrays to be passed.  But this doesn't
 | 
|---|
 | 32 |  * appear to be legal C++.  Instead, 11 versions of applyStencil are
 | 
|---|
 | 33 |  * provided, each one with a different number of array parameters,
 | 
|---|
 | 34 |  * and these stubs fill in the _dummyArray parameters and invoke
 | 
|---|
 | 35 |  * applyStencil_imp().
 | 
|---|
 | 36 |  */
 | 
|---|
 | 37 | 
 | 
|---|
 | 38 | template<int N_rank, class T_numtype1, class T_array2,
 | 
|---|
 | 39 |     class T_array3, class T_array4, class T_array5, class T_array6,
 | 
|---|
 | 40 |     class T_array7, class T_array8, class T_array9, class T_array10,
 | 
|---|
 | 41 |     class T_array11>
 | 
|---|
 | 42 | inline void checkShapes(const Array<T_numtype1,N_rank>& A,
 | 
|---|
 | 43 |     const T_array2& B, const T_array3& C, const T_array4& D, 
 | 
|---|
 | 44 |     const T_array5& E, const T_array6& F, const T_array7& G, 
 | 
|---|
 | 45 |     const T_array8& H, const T_array9& I, const T_array10& J, 
 | 
|---|
 | 46 |     const T_array11& K)
 | 
|---|
 | 47 | {
 | 
|---|
 | 48 |     BZPRECONDITION(areShapesConformable(A.shape(),B.shape())
 | 
|---|
 | 49 |         && areShapesConformable(A.shape(),C.shape())
 | 
|---|
 | 50 |         && areShapesConformable(A.shape(),D.shape())
 | 
|---|
 | 51 |         && areShapesConformable(A.shape(),E.shape())
 | 
|---|
 | 52 |         && areShapesConformable(A.shape(),F.shape())
 | 
|---|
 | 53 |         && areShapesConformable(A.shape(),G.shape())
 | 
|---|
 | 54 |         && areShapesConformable(A.shape(),H.shape())
 | 
|---|
 | 55 |         && areShapesConformable(A.shape(),I.shape())
 | 
|---|
 | 56 |         && areShapesConformable(A.shape(),J.shape())
 | 
|---|
 | 57 |         && areShapesConformable(A.shape(),K.shape()));
 | 
|---|
 | 58 | }
 | 
|---|
 | 59 | 
 | 
|---|
 | 60 | template<class T_extent, int N_rank, 
 | 
|---|
 | 61 |     class T_stencil, class T_numtype1, class T_array2,
 | 
|---|
 | 62 |     class T_array3, class T_array4, class T_array5, class T_array6,
 | 
|---|
 | 63 |     class T_array7, class T_array8, class T_array9, class T_array10,
 | 
|---|
 | 64 |     class T_array11>
 | 
|---|
 | 65 | inline void calcStencilExtent(T_extent& At, const T_stencil& stencil, 
 | 
|---|
 | 66 |     const Array<T_numtype1,N_rank>& A,
 | 
|---|
 | 67 |     const T_array2& B, const T_array3& C, const T_array4& D, const T_array5& E, 
 | 
|---|
 | 68 |     const T_array6& F, const T_array7& G, const T_array8& H, const T_array9& I, 
 | 
|---|
 | 69 |     const T_array10& J, const T_array11& K)
 | 
|---|
 | 70 | {
 | 
|---|
 | 71 |     // Interrogate the stencil to find out its extent
 | 
|---|
 | 72 |     stencilExtent_traits<T_array2>::T_stencilExtent Bt;
 | 
|---|
 | 73 |     stencilExtent_traits<T_array3>::T_stencilExtent Ct;
 | 
|---|
 | 74 |     stencilExtent_traits<T_array4>::T_stencilExtent Dt;
 | 
|---|
 | 75 |     stencilExtent_traits<T_array5>::T_stencilExtent Et;
 | 
|---|
 | 76 |     stencilExtent_traits<T_array6>::T_stencilExtent Ft;
 | 
|---|
 | 77 |     stencilExtent_traits<T_array7>::T_stencilExtent Gt;
 | 
|---|
 | 78 |     stencilExtent_traits<T_array8>::T_stencilExtent Ht;
 | 
|---|
 | 79 |     stencilExtent_traits<T_array9>::T_stencilExtent It;
 | 
|---|
 | 80 |     stencilExtent_traits<T_array10>::T_stencilExtent Jt;
 | 
|---|
 | 81 |     stencilExtent_traits<T_array11>::T_stencilExtent Kt;
 | 
|---|
 | 82 | 
 | 
|---|
 | 83 |     stencil.apply(At, Bt, Ct, Dt, Et, Ft, Gt, Ht, It, Jt, Kt);
 | 
|---|
 | 84 |     At.combine(Bt);
 | 
|---|
 | 85 |     At.combine(Ct);
 | 
|---|
 | 86 |     At.combine(Dt);
 | 
|---|
 | 87 |     At.combine(Et);
 | 
|---|
 | 88 |     At.combine(Ft);
 | 
|---|
 | 89 |     At.combine(Gt);
 | 
|---|
 | 90 |     At.combine(Ht);
 | 
|---|
 | 91 |     At.combine(It);
 | 
|---|
 | 92 |     At.combine(Jt);
 | 
|---|
 | 93 |     At.combine(Kt);
 | 
|---|
 | 94 | }
 | 
|---|
 | 95 | 
 | 
|---|
 | 96 | template<int N_rank, class T_stencil, class T_numtype1, class T_array2>
 | 
|---|
 | 97 | inline RectDomain<N_rank> interiorDomain(const T_stencil& stencil,
 | 
|---|
 | 98 |     const Array<T_numtype1,N_rank>& A,
 | 
|---|
 | 99 |     const T_array2& B)
 | 
|---|
 | 100 | {
 | 
|---|
 | 101 |     RectDomain<N_rank> domain = A.domain();
 | 
|---|
 | 102 | 
 | 
|---|
 | 103 |     // Interrogate the stencil to find out its extent
 | 
|---|
 | 104 |     stencilExtent<3, T_numtype1> At;
 | 
|---|
 | 105 |     calcStencilExtent(At, stencil, A, B, _dummyArray, _dummyArray, 
 | 
|---|
 | 106 |         _dummyArray, _dummyArray, _dummyArray, _dummyArray, _dummyArray, 
 | 
|---|
 | 107 |         _dummyArray, _dummyArray);
 | 
|---|
 | 108 | 
 | 
|---|
 | 109 |     // Shrink the domain according to the stencil size
 | 
|---|
 | 110 |     TinyVector<int,N_rank> lbound, ubound;
 | 
|---|
 | 111 |     lbound = domain.lbound() - At.min();
 | 
|---|
 | 112 |     ubound = domain.ubound() - At.max();
 | 
|---|
 | 113 |     return RectDomain<N_rank>(lbound,ubound);
 | 
|---|
 | 114 | }
 | 
|---|
 | 115 | 
 | 
|---|
 | 116 | /*
 | 
|---|
 | 117 |  * This version applies a stencil to a set of 3D arrays.  Up to 11 arrays
 | 
|---|
 | 118 |  * may be used.  Any unused arrays are turned into dummyArray objects.
 | 
|---|
 | 119 |  * Operations on dummyArray objects are translated into no-ops.
 | 
|---|
 | 120 |  */
 | 
|---|
 | 121 | template<class T_stencil, class T_numtype1, class T_array2,
 | 
|---|
 | 122 |     class T_array3, class T_array4, class T_array5, class T_array6,
 | 
|---|
 | 123 |     class T_array7, class T_array8, class T_array9, class T_array10,
 | 
|---|
 | 124 |     class T_array11>
 | 
|---|
 | 125 | void applyStencil_imp(const T_stencil& stencil, Array<T_numtype1,3>& A,
 | 
|---|
 | 126 |     T_array2& B, T_array3& C, T_array4& D, T_array5& E, T_array6& F,
 | 
|---|
 | 127 |     T_array7& G, T_array8& H, T_array9& I, T_array10& J, T_array11& K)
 | 
|---|
 | 128 | {
 | 
|---|
 | 129 |     checkShapes(A,B,C,D,E,F,G,H,I,J,K);
 | 
|---|
 | 130 |   
 | 
|---|
 | 131 |     // Interrogate the stencil to find out its extent
 | 
|---|
 | 132 |     stencilExtent<3, T_numtype1> At;
 | 
|---|
 | 133 |     calcStencilExtent(At, stencil, A, B, C, D, E, F, G, H, I, J, K);
 | 
|---|
 | 134 | 
 | 
|---|
 | 135 |     // Now determine the subdomain over which the stencil
 | 
|---|
 | 136 |     // can be applied without worrying about overrunning the
 | 
|---|
 | 137 |     // boundaries of the array
 | 
|---|
 | 138 |     int stencil_lbound0 = At.min(0);
 | 
|---|
 | 139 |     int stencil_lbound1 = At.min(1);
 | 
|---|
 | 140 |     int stencil_lbound2 = At.min(2);
 | 
|---|
 | 141 | 
 | 
|---|
 | 142 |     int stencil_ubound0 = At.max(0);
 | 
|---|
 | 143 |     int stencil_ubound1 = At.max(1);
 | 
|---|
 | 144 |     int stencil_ubound2 = At.max(2);
 | 
|---|
 | 145 | 
 | 
|---|
 | 146 |     int lbound0 = max(A.lbound(0), A.lbound(0) - stencil_lbound0);
 | 
|---|
 | 147 |     int lbound1 = max(A.lbound(1), A.lbound(1) - stencil_lbound1);
 | 
|---|
 | 148 |     int lbound2 = max(A.lbound(2), A.lbound(2) - stencil_lbound2);
 | 
|---|
 | 149 | 
 | 
|---|
 | 150 |     int ubound0 = min(A.ubound(0), A.ubound(0) - stencil_ubound0);
 | 
|---|
 | 151 |     int ubound1 = min(A.ubound(1), A.ubound(1) - stencil_ubound1);
 | 
|---|
 | 152 |     int ubound2 = min(A.ubound(2), A.ubound(2) - stencil_ubound2);
 | 
|---|
 | 153 | 
 | 
|---|
 | 154 | #if 0
 | 
|---|
 | 155 |     cout << "Stencil bounds are:" << endl
 | 
|---|
 | 156 |      << lbound0 << '\t' << ubound0 << endl
 | 
|---|
 | 157 |      << lbound1 << '\t' << ubound1 << endl
 | 
|---|
 | 158 |      << lbound2 << '\t' << ubound2 << endl;
 | 
|---|
 | 159 | #endif
 | 
|---|
 | 160 | 
 | 
|---|
 | 161 |     // Now do the actual loop
 | 
|---|
 | 162 |     ArrayIterator<T_numtype1,3> Aiter(A);
 | 
|---|
 | 163 |     _bz_typename T_array2::T_iterator Biter(B);
 | 
|---|
 | 164 |     _bz_typename T_array3::T_iterator Citer(C);
 | 
|---|
 | 165 |     _bz_typename T_array4::T_iterator Diter(D);
 | 
|---|
 | 166 |     _bz_typename T_array5::T_iterator Eiter(E);
 | 
|---|
 | 167 |     _bz_typename T_array6::T_iterator Fiter(F);
 | 
|---|
 | 168 |     _bz_typename T_array7::T_iterator Giter(G);
 | 
|---|
 | 169 |     _bz_typename T_array8::T_iterator Hiter(H);
 | 
|---|
 | 170 |     _bz_typename T_array9::T_iterator Iiter(I);
 | 
|---|
 | 171 |     _bz_typename T_array10::T_iterator Jiter(J);
 | 
|---|
 | 172 |     _bz_typename T_array11::T_iterator Kiter(K);
 | 
|---|
 | 173 | 
 | 
|---|
 | 174 |     // Load the strides for the innermost loop
 | 
|---|
 | 175 |     Aiter.loadStride(2);
 | 
|---|
 | 176 |     Biter.loadStride(2);
 | 
|---|
 | 177 |     Citer.loadStride(2);
 | 
|---|
 | 178 |     Diter.loadStride(2);
 | 
|---|
 | 179 |     Eiter.loadStride(2);
 | 
|---|
 | 180 |     Fiter.loadStride(2);
 | 
|---|
 | 181 |     Giter.loadStride(2);
 | 
|---|
 | 182 |     Hiter.loadStride(2);
 | 
|---|
 | 183 |     Iiter.loadStride(2);
 | 
|---|
 | 184 |     Jiter.loadStride(2);
 | 
|---|
 | 185 |     Kiter.loadStride(2);
 | 
|---|
 | 186 | 
 | 
|---|
 | 187 |     for (int i=lbound0; i <= ubound0; ++i)
 | 
|---|
 | 188 |     {
 | 
|---|
 | 189 |       for (int j=lbound1; j <= ubound1; ++j)
 | 
|---|
 | 190 |       {
 | 
|---|
 | 191 |         Aiter.moveTo(i,j,lbound2);
 | 
|---|
 | 192 |         Biter.moveTo(i,j,lbound2);
 | 
|---|
 | 193 |         Citer.moveTo(i,j,lbound2);
 | 
|---|
 | 194 |         Diter.moveTo(i,j,lbound2);
 | 
|---|
 | 195 |         Eiter.moveTo(i,j,lbound2);
 | 
|---|
 | 196 |         Fiter.moveTo(i,j,lbound2);
 | 
|---|
 | 197 |         Giter.moveTo(i,j,lbound2);
 | 
|---|
 | 198 |         Hiter.moveTo(i,j,lbound2);
 | 
|---|
 | 199 |         Iiter.moveTo(i,j,lbound2);
 | 
|---|
 | 200 |         Jiter.moveTo(i,j,lbound2);
 | 
|---|
 | 201 |         Kiter.moveTo(i,j,lbound2);
 | 
|---|
 | 202 | 
 | 
|---|
 | 203 |         for (int k=lbound2; k <= ubound2; ++k)
 | 
|---|
 | 204 |         {
 | 
|---|
 | 205 |             stencil.apply(Aiter, Biter, Citer, Diter, Eiter, Fiter, Giter,
 | 
|---|
 | 206 |                 Hiter, Iiter, Jiter, Kiter);
 | 
|---|
 | 207 | 
 | 
|---|
 | 208 |             Aiter.advance();
 | 
|---|
 | 209 |             Biter.advance();
 | 
|---|
 | 210 |             Citer.advance();
 | 
|---|
 | 211 |             Diter.advance();
 | 
|---|
 | 212 |             Eiter.advance();
 | 
|---|
 | 213 |             Fiter.advance();
 | 
|---|
 | 214 |             Giter.advance();
 | 
|---|
 | 215 |             Hiter.advance();
 | 
|---|
 | 216 |             Iiter.advance();
 | 
|---|
 | 217 |             Jiter.advance();
 | 
|---|
 | 218 |             Kiter.advance();
 | 
|---|
 | 219 |         }
 | 
|---|
 | 220 |       }
 | 
|---|
 | 221 |     }
 | 
|---|
 | 222 | }
 | 
|---|
 | 223 | 
 | 
|---|
 | 224 | /*
 | 
|---|
 | 225 |  * This version applies a stencil to a set of 2D arrays.  Up to 11 arrays
 | 
|---|
 | 226 |  * may be used.  Any unused arrays are turned into dummyArray objects.
 | 
|---|
 | 227 |  * Operations on dummyArray objects are translated into no-ops.
 | 
|---|
 | 228 |  */
 | 
|---|
 | 229 | template<class T_stencil, class T_numtype1, class T_array2,
 | 
|---|
 | 230 |     class T_array3, class T_array4, class T_array5, class T_array6,
 | 
|---|
 | 231 |     class T_array7, class T_array8, class T_array9, class T_array10,
 | 
|---|
 | 232 |     class T_array11>
 | 
|---|
 | 233 | void applyStencil_imp(const T_stencil& stencil, const Array<T_numtype1,2>& A,
 | 
|---|
 | 234 |     const T_array2& B, const T_array3& C, const T_array4& D, 
 | 
|---|
 | 235 |     const T_array5& E, const T_array6& F, const T_array7& G, 
 | 
|---|
 | 236 |     const T_array8& H, const T_array9& I, const T_array10& J, 
 | 
|---|
 | 237 |     const T_array11& K)
 | 
|---|
 | 238 | {
 | 
|---|
 | 239 |     checkShapes(A,B,C,D,E,F,G,H,I,J,K);
 | 
|---|
 | 240 | 
 | 
|---|
 | 241 |     // Interrogate the stencil to find out its extent
 | 
|---|
 | 242 |     stencilExtent<2, T_numtype1> At;
 | 
|---|
 | 243 |     calcStencilExtent(At, stencil, A, B, C, D, E, F, G, H, I, J, K);
 | 
|---|
 | 244 | 
 | 
|---|
 | 245 |     // Now determine the subdomain over which the stencil
 | 
|---|
 | 246 |     // can be applied without worrying about overrunning the
 | 
|---|
 | 247 |     // boundaries of the array
 | 
|---|
 | 248 |     int stencil_lbound0 = At.min(0);
 | 
|---|
 | 249 |     int stencil_lbound1 = At.min(1);
 | 
|---|
 | 250 | 
 | 
|---|
 | 251 |     int stencil_ubound0 = At.max(0);
 | 
|---|
 | 252 |     int stencil_ubound1 = At.max(1);
 | 
|---|
 | 253 | 
 | 
|---|
 | 254 |     int lbound0 = max(A.lbound(0), A.lbound(0) - stencil_lbound0);
 | 
|---|
 | 255 |     int lbound1 = max(A.lbound(1), A.lbound(1) - stencil_lbound1);
 | 
|---|
 | 256 | 
 | 
|---|
 | 257 |     int ubound0 = min(A.ubound(0), A.ubound(0) - stencil_ubound0);
 | 
|---|
 | 258 |     int ubound1 = min(A.ubound(1), A.ubound(1) - stencil_ubound1);
 | 
|---|
 | 259 | 
 | 
|---|
 | 260 | #if 0
 | 
|---|
 | 261 |     cout << "Stencil bounds are:" << endl
 | 
|---|
 | 262 |      << lbound0 << '\t' << ubound0 << endl
 | 
|---|
 | 263 |      << lbound1 << '\t' << ubound1 << endl;
 | 
|---|
 | 264 | #endif 
 | 
|---|
 | 265 | 
 | 
|---|
 | 266 |     // Now do the actual loop
 | 
|---|
 | 267 |     ArrayIterator<T_numtype1,2> Aiter(A);
 | 
|---|
 | 268 |     _bz_typename T_array2::T_iterator Biter(B);
 | 
|---|
 | 269 |     _bz_typename T_array3::T_iterator Citer(C);
 | 
|---|
 | 270 |     _bz_typename T_array4::T_iterator Diter(D);
 | 
|---|
 | 271 |     _bz_typename T_array5::T_iterator Eiter(E);
 | 
|---|
 | 272 |     _bz_typename T_array6::T_iterator Fiter(F);
 | 
|---|
 | 273 |     _bz_typename T_array7::T_iterator Giter(G);
 | 
|---|
 | 274 |     _bz_typename T_array8::T_iterator Hiter(H);
 | 
|---|
 | 275 |     _bz_typename T_array9::T_iterator Iiter(I);
 | 
|---|
 | 276 |     _bz_typename T_array10::T_iterator Jiter(J);
 | 
|---|
 | 277 |     _bz_typename T_array11::T_iterator Kiter(K);
 | 
|---|
 | 278 | 
 | 
|---|
 | 279 |     // Load the strides for the innermost loop
 | 
|---|
 | 280 |     Aiter.loadStride(1);
 | 
|---|
 | 281 |     Biter.loadStride(1);
 | 
|---|
 | 282 |     Citer.loadStride(1);
 | 
|---|
 | 283 |     Diter.loadStride(1);
 | 
|---|
 | 284 |     Eiter.loadStride(1);
 | 
|---|
 | 285 |     Fiter.loadStride(1);
 | 
|---|
 | 286 |     Giter.loadStride(1);
 | 
|---|
 | 287 |     Hiter.loadStride(1);
 | 
|---|
 | 288 |     Iiter.loadStride(1);
 | 
|---|
 | 289 |     Jiter.loadStride(1);
 | 
|---|
 | 290 |     Kiter.loadStride(1);
 | 
|---|
 | 291 | 
 | 
|---|
 | 292 |     for (int i=lbound0; i <= ubound0; ++i)
 | 
|---|
 | 293 |     {
 | 
|---|
 | 294 |         Aiter.moveTo(i,lbound1);
 | 
|---|
 | 295 |         Biter.moveTo(i,lbound1);
 | 
|---|
 | 296 |         Citer.moveTo(i,lbound1);
 | 
|---|
 | 297 |         Diter.moveTo(i,lbound1);
 | 
|---|
 | 298 |         Eiter.moveTo(i,lbound1);
 | 
|---|
 | 299 |         Fiter.moveTo(i,lbound1);
 | 
|---|
 | 300 |         Giter.moveTo(i,lbound1);
 | 
|---|
 | 301 |         Hiter.moveTo(i,lbound1);
 | 
|---|
 | 302 |         Iiter.moveTo(i,lbound1);
 | 
|---|
 | 303 |         Jiter.moveTo(i,lbound1);
 | 
|---|
 | 304 |         Kiter.moveTo(i,lbound1);
 | 
|---|
 | 305 | 
 | 
|---|
 | 306 |         for (int k=lbound1; k <= ubound1; ++k)
 | 
|---|
 | 307 |         {
 | 
|---|
 | 308 |             stencil.apply(Aiter, Biter, Citer, Diter, Eiter, Fiter, Giter,
 | 
|---|
 | 309 |                 Hiter, Iiter, Jiter, Kiter);
 | 
|---|
 | 310 | 
 | 
|---|
 | 311 |             Aiter.advance();
 | 
|---|
 | 312 |             Biter.advance();
 | 
|---|
 | 313 |             Citer.advance();
 | 
|---|
 | 314 |             Diter.advance();
 | 
|---|
 | 315 |             Eiter.advance();
 | 
|---|
 | 316 |             Fiter.advance();
 | 
|---|
 | 317 |             Giter.advance();
 | 
|---|
 | 318 |             Hiter.advance();
 | 
|---|
 | 319 |             Iiter.advance();
 | 
|---|
 | 320 |             Jiter.advance();
 | 
|---|
 | 321 |             Kiter.advance();
 | 
|---|
 | 322 |         }
 | 
|---|
 | 323 |     }
 | 
|---|
 | 324 | }
 | 
|---|
 | 325 | 
 | 
|---|
 | 326 | /*
 | 
|---|
 | 327 |  * This version applies a stencil to a set of 1D arrays.  Up to 11 arrays
 | 
|---|
 | 328 |  * may be used.  Any unused arrays are turned into dummyArray objects.
 | 
|---|
 | 329 |  * Operations on dummyArray objects are translated into no-ops.
 | 
|---|
 | 330 |  */
 | 
|---|
 | 331 | template<class T_stencil, class T_numtype1, class T_array2,
 | 
|---|
 | 332 |     class T_array3, class T_array4, class T_array5, class T_array6,
 | 
|---|
 | 333 |     class T_array7, class T_array8, class T_array9, class T_array10,
 | 
|---|
 | 334 |     class T_array11>
 | 
|---|
 | 335 | void applyStencil_imp(const T_stencil& stencil, const Array<T_numtype1,1>& A,
 | 
|---|
 | 336 |     const T_array2& B, const T_array3& C, const T_array4& D,
 | 
|---|
 | 337 |     const T_array5& E, const T_array6& F, const T_array7& G,
 | 
|---|
 | 338 |     const T_array8& H, const T_array9& I, const T_array10& J,
 | 
|---|
 | 339 |     const T_array11& K)
 | 
|---|
 | 340 | {
 | 
|---|
 | 341 |     checkShapes(A,B,C,D,E,F,G,H,I,J,K);
 | 
|---|
 | 342 | 
 | 
|---|
 | 343 |     // Interrogate the stencil to find out its extent
 | 
|---|
 | 344 |     stencilExtent<1, T_numtype1> At;
 | 
|---|
 | 345 |     calcStencilExtent(At, stencil, A, B, C, D, E, F, G, H, I, J, K);
 | 
|---|
 | 346 | 
 | 
|---|
 | 347 |     // Now determine the subdomain over which the stencil
 | 
|---|
 | 348 |     // can be applied without worrying about overrunning the
 | 
|---|
 | 349 |     // boundaries of the array
 | 
|---|
 | 350 |     int stencil_lbound0 = At.min(0);
 | 
|---|
 | 351 | 
 | 
|---|
 | 352 |     int stencil_ubound0 = At.max(0);
 | 
|---|
 | 353 | 
 | 
|---|
 | 354 |     int lbound0 = max(A.lbound(0), A.lbound(0) - stencil_lbound0);
 | 
|---|
 | 355 |     int ubound0 = min(A.ubound(0), A.ubound(0) - stencil_ubound0);
 | 
|---|
 | 356 | 
 | 
|---|
 | 357 | #if 0
 | 
|---|
 | 358 |     cout << "Stencil bounds are:" << endl
 | 
|---|
 | 359 |      << lbound0 << '\t' << ubound0 << endl;
 | 
|---|
 | 360 | #endif
 | 
|---|
 | 361 | 
 | 
|---|
 | 362 |     // Now do the actual loop
 | 
|---|
 | 363 |     ArrayIterator<T_numtype1,1> Aiter(A);
 | 
|---|
 | 364 |     _bz_typename T_array2::T_iterator Biter(B);
 | 
|---|
 | 365 |     _bz_typename T_array3::T_iterator Citer(C);
 | 
|---|
 | 366 |     _bz_typename T_array4::T_iterator Diter(D);
 | 
|---|
 | 367 |     _bz_typename T_array5::T_iterator Eiter(E);
 | 
|---|
 | 368 |     _bz_typename T_array6::T_iterator Fiter(F);
 | 
|---|
 | 369 |     _bz_typename T_array7::T_iterator Giter(G);
 | 
|---|
 | 370 |     _bz_typename T_array8::T_iterator Hiter(H);
 | 
|---|
 | 371 |     _bz_typename T_array9::T_iterator Iiter(I);
 | 
|---|
 | 372 |     _bz_typename T_array10::T_iterator Jiter(J);
 | 
|---|
 | 373 |     _bz_typename T_array11::T_iterator Kiter(K);
 | 
|---|
 | 374 | 
 | 
|---|
 | 375 |     // Load the strides for the innermost loop
 | 
|---|
 | 376 |     Aiter.loadStride(0);
 | 
|---|
 | 377 |     Biter.loadStride(0);
 | 
|---|
 | 378 |     Citer.loadStride(0);
 | 
|---|
 | 379 |     Diter.loadStride(0);
 | 
|---|
 | 380 |     Eiter.loadStride(0);
 | 
|---|
 | 381 |     Fiter.loadStride(0);
 | 
|---|
 | 382 |     Giter.loadStride(0);
 | 
|---|
 | 383 |     Hiter.loadStride(0);
 | 
|---|
 | 384 |     Iiter.loadStride(0);
 | 
|---|
 | 385 |     Jiter.loadStride(0);
 | 
|---|
 | 386 |     Kiter.loadStride(0);
 | 
|---|
 | 387 | 
 | 
|---|
 | 388 |     for (int i=lbound0; i <= ubound0; ++i)
 | 
|---|
 | 389 |     {
 | 
|---|
 | 390 |         stencil.apply(Aiter, Biter, Citer, Diter, Eiter, Fiter, Giter,
 | 
|---|
 | 391 |             Hiter, Iiter, Jiter, Kiter);
 | 
|---|
 | 392 | 
 | 
|---|
 | 393 |         Aiter.advance();
 | 
|---|
 | 394 |         Biter.advance();
 | 
|---|
 | 395 |         Citer.advance();
 | 
|---|
 | 396 |         Diter.advance();
 | 
|---|
 | 397 |         Eiter.advance();
 | 
|---|
 | 398 |         Fiter.advance();
 | 
|---|
 | 399 |         Giter.advance();
 | 
|---|
 | 400 |         Hiter.advance();
 | 
|---|
 | 401 |         Iiter.advance();
 | 
|---|
 | 402 |         Jiter.advance();
 | 
|---|
 | 403 |         Kiter.advance();
 | 
|---|
 | 404 |     }
 | 
|---|
 | 405 | }
 | 
|---|
 | 406 | 
 | 
|---|
 | 407 | /*
 | 
|---|
 | 408 |  * These 11 versions of applyStencil handle from 1 to 11 array parameters.
 | 
|---|
 | 409 |  * They pad their argument list with enough dummyArray objects to call
 | 
|---|
 | 410 |  * applyStencil_imp with 11 array parameters.
 | 
|---|
 | 411 |  */
 | 
|---|
 | 412 | template<class T_stencil, class T_numtype1, int N_rank>
 | 
|---|
 | 413 | inline void applyStencil(const T_stencil& stencil, Array<T_numtype1,N_rank>& A)
 | 
|---|
 | 414 | {
 | 
|---|
 | 415 |     applyStencil_imp(stencil, A, _dummyArray, _dummyArray,
 | 
|---|
 | 416 |         _dummyArray, _dummyArray, _dummyArray, _dummyArray,
 | 
|---|
 | 417 |         _dummyArray, _dummyArray, _dummyArray, _dummyArray);
 | 
|---|
 | 418 | }
 | 
|---|
 | 419 | 
 | 
|---|
 | 420 | template<class T_stencil, class T_numtype1, int N_rank, class T_array2>
 | 
|---|
 | 421 | inline void applyStencil(const T_stencil& stencil, Array<T_numtype1,N_rank>& A,
 | 
|---|
 | 422 |     T_array2& B)
 | 
|---|
 | 423 | {
 | 
|---|
 | 424 |     applyStencil_imp(stencil, A, B, _dummyArray, _dummyArray,
 | 
|---|
 | 425 |         _dummyArray, _dummyArray, _dummyArray, _dummyArray,
 | 
|---|
 | 426 |         _dummyArray, _dummyArray, _dummyArray);
 | 
|---|
 | 427 | }
 | 
|---|
 | 428 | 
 | 
|---|
 | 429 | template<class T_stencil, class T_numtype1, int N_rank, class T_array2,
 | 
|---|
 | 430 |     class T_array3>
 | 
|---|
 | 431 | inline void applyStencil(const T_stencil& stencil, Array<T_numtype1,N_rank>& A,
 | 
|---|
 | 432 |     T_array2& B, T_array3& C)
 | 
|---|
 | 433 | {
 | 
|---|
 | 434 |     applyStencil_imp(stencil, A, B, C, _dummyArray, _dummyArray,
 | 
|---|
 | 435 |         _dummyArray, _dummyArray, _dummyArray, _dummyArray, _dummyArray,
 | 
|---|
 | 436 |         _dummyArray);
 | 
|---|
 | 437 | }
 | 
|---|
 | 438 | 
 | 
|---|
 | 439 | template<class T_stencil, class T_numtype1, int N_rank, class T_array2,
 | 
|---|
 | 440 |     class T_array3, class T_array4>
 | 
|---|
 | 441 | inline void applyStencil(const T_stencil& stencil, Array<T_numtype1,N_rank>& A,
 | 
|---|
 | 442 |     T_array2& B, T_array3& C, T_array4& D)
 | 
|---|
 | 443 | {
 | 
|---|
 | 444 |     applyStencil_imp(stencil, A, B, C, D, _dummyArray, _dummyArray,
 | 
|---|
 | 445 |         _dummyArray, _dummyArray, _dummyArray, _dummyArray, _dummyArray);
 | 
|---|
 | 446 | }
 | 
|---|
 | 447 | 
 | 
|---|
 | 448 | template<class T_stencil, class T_numtype1, int N_rank, class T_array2,
 | 
|---|
 | 449 |     class T_array3, class T_array4, class T_array5>
 | 
|---|
 | 450 | inline void applyStencil(const T_stencil& stencil, Array<T_numtype1,N_rank>& A,
 | 
|---|
 | 451 |    T_array2& B, T_array3& C, T_array4& D, T_array5& E)
 | 
|---|
 | 452 | {
 | 
|---|
 | 453 |     applyStencil_imp(stencil, A, B, C, D, E, _dummyArray,
 | 
|---|
 | 454 |         _dummyArray, _dummyArray, _dummyArray, _dummyArray, _dummyArray);
 | 
|---|
 | 455 | }
 | 
|---|
 | 456 | 
 | 
|---|
 | 457 | template<class T_stencil, class T_numtype1, int N_rank, class T_array2,
 | 
|---|
 | 458 |     class T_array3, class T_array4, class T_array5, class T_array6>
 | 
|---|
 | 459 | inline void applyStencil(const T_stencil& stencil, Array<T_numtype1,N_rank>& A,
 | 
|---|
 | 460 |    T_array2& B, T_array3& C, T_array4& D, T_array5& E, T_array6& F)
 | 
|---|
 | 461 | {
 | 
|---|
 | 462 |     applyStencil_imp(stencil, A, B, C, D, E, F,
 | 
|---|
 | 463 |         _dummyArray, _dummyArray, _dummyArray, _dummyArray, _dummyArray);
 | 
|---|
 | 464 | }
 | 
|---|
 | 465 | 
 | 
|---|
 | 466 | template<class T_stencil, class T_numtype1, int N_rank, class T_array2,
 | 
|---|
 | 467 |     class T_array3, class T_array4, class T_array5, class T_array6,
 | 
|---|
 | 468 |     class T_array7>
 | 
|---|
 | 469 | inline void applyStencil(const T_stencil& stencil, Array<T_numtype1,N_rank>& A,
 | 
|---|
 | 470 |    T_array2& B, T_array3& C, T_array4& D, T_array5& E, T_array6& F,
 | 
|---|
 | 471 |    T_array7& G)
 | 
|---|
 | 472 | {
 | 
|---|
 | 473 |     applyStencil_imp(stencil, A, B, C, D, E, F, G,
 | 
|---|
 | 474 |         _dummyArray, _dummyArray, _dummyArray, _dummyArray);
 | 
|---|
 | 475 | }
 | 
|---|
 | 476 | 
 | 
|---|
 | 477 | template<class T_stencil, class T_numtype1, int N_rank, class T_array2,
 | 
|---|
 | 478 |     class T_array3, class T_array4, class T_array5, class T_array6,
 | 
|---|
 | 479 |     class T_array7, class T_array8>
 | 
|---|
 | 480 | inline void applyStencil(const T_stencil& stencil, Array<T_numtype1,N_rank>& A,
 | 
|---|
 | 481 |    T_array2& B, T_array3& C, T_array4& D, T_array5& E, T_array6& F,
 | 
|---|
 | 482 |    T_array7& G, T_array8& H)
 | 
|---|
 | 483 | {
 | 
|---|
 | 484 |     applyStencil_imp(stencil, A, B, C, D, E, F, G, H,
 | 
|---|
 | 485 |         _dummyArray, _dummyArray, _dummyArray);
 | 
|---|
 | 486 | }
 | 
|---|
 | 487 | 
 | 
|---|
 | 488 | template<class T_stencil, class T_numtype1, int N_rank, class T_array2,
 | 
|---|
 | 489 |     class T_array3, class T_array4, class T_array5, class T_array6,
 | 
|---|
 | 490 |     class T_array7, class T_array8, class T_array9>
 | 
|---|
 | 491 | inline void applyStencil(const T_stencil& stencil, Array<T_numtype1,N_rank>& A,
 | 
|---|
 | 492 |    T_array2& B, T_array3& C, T_array4& D, T_array5& E, T_array6& F,
 | 
|---|
 | 493 |    T_array7& G, T_array8& H, T_array9& I)
 | 
|---|
 | 494 | {
 | 
|---|
 | 495 |     applyStencil_imp(stencil, A, B, C, D, E, F, G, H, I,
 | 
|---|
 | 496 |         _dummyArray, _dummyArray);
 | 
|---|
 | 497 | }
 | 
|---|
 | 498 | 
 | 
|---|
 | 499 | template<class T_stencil, class T_numtype1, int N_rank, class T_array2,
 | 
|---|
 | 500 |     class T_array3, class T_array4, class T_array5, class T_array6,
 | 
|---|
 | 501 |     class T_array7, class T_array8, class T_array9, class T_array10>
 | 
|---|
 | 502 | inline void applyStencil(const T_stencil& stencil, Array<T_numtype1,N_rank>& A,
 | 
|---|
 | 503 |    T_array2& B, T_array3& C, T_array4& D, T_array5& E, T_array6& F,
 | 
|---|
 | 504 |    T_array7& G, T_array8& H, T_array9& I, T_array10& J)
 | 
|---|
 | 505 | {
 | 
|---|
 | 506 |     applyStencil_imp(stencil, A, B, C, D, E, F, G, H, I, J,
 | 
|---|
 | 507 |         _dummyArray);
 | 
|---|
 | 508 | }
 | 
|---|
 | 509 | 
 | 
|---|
 | 510 | template<class T_stencil, class T_numtype1, int N_rank, class T_array2,
 | 
|---|
 | 511 |     class T_array3, class T_array4, class T_array5, class T_array6,
 | 
|---|
 | 512 |     class T_array7, class T_array8, class T_array9, class T_array10,
 | 
|---|
 | 513 |     class T_array11>
 | 
|---|
 | 514 | inline void applyStencil(const T_stencil& stencil, Array<T_numtype1,N_rank>& A,
 | 
|---|
 | 515 |    T_array2& B, T_array3& C, T_array4& D, T_array5& E, T_array6& F,
 | 
|---|
 | 516 |    T_array7& G, T_array8& H, T_array9& I, T_array10& J, T_array11& K)
 | 
|---|
 | 517 | {
 | 
|---|
 | 518 |     applyStencil_imp(stencil, A, B, C, D, E, F, G, H, I, J, K);
 | 
|---|
 | 519 | }
 | 
|---|
 | 520 | 
 | 
|---|
 | 521 | BZ_NAMESPACE_END
 | 
|---|
 | 522 | 
 | 
|---|
 | 523 | #endif // BZ_ARRAYSTENCIL_CC
 | 
|---|