| 1 | #ifndef BZ_ARRAYSTENCIL_CC
 | 
|---|
| 2 | #define BZ_ARRAYSTENCIL_CC
 | 
|---|
| 3 | 
 | 
|---|
| 4 | #ifndef BZ_ARRAYSTENCIL_H
 | 
|---|
| 5 |  #error <blitz/array/stencil.cc> must be included via <blitz/array/stencil.h>
 | 
|---|
| 6 | #endif
 | 
|---|
| 7 | 
 | 
|---|
| 8 | BZ_NAMESPACE(blitz)
 | 
|---|
| 9 | 
 | 
|---|
| 10 | // NEEDS_WORK:
 | 
|---|
| 11 | // o Need to allow scalar arguments as well as arrays
 | 
|---|
| 12 | // o Unit stride optimization
 | 
|---|
| 13 | // o Tiling
 | 
|---|
| 14 | // o Pass coordinate vector to stencil, so that where-like constructs
 | 
|---|
| 15 | //   can depend on location
 | 
|---|
| 16 | // o Maybe allow expression templates to be passed as
 | 
|---|
| 17 | //   array parameters?
 | 
|---|
| 18 | 
 | 
|---|
| 19 | /*
 | 
|---|
| 20 |  * There are a lot of kludges in this code to work around the fact that
 | 
|---|
| 21 |  * you can't have default template parameters with function templates.
 | 
|---|
| 22 |  * Ideally, one would implement applyStencil(..) as:
 | 
|---|
| 23 |  *
 | 
|---|
| 24 |  * template<class T_stencil, class T_numtype1, class T_array2,
 | 
|---|
| 25 |  *    class T_array3, class T_array4, class T_array5, class T_array6,
 | 
|---|
| 26 |  *    class T_array7, class T_array8, class T_array9, class T_array10,
 | 
|---|
| 27 |  *    class T_array11>
 | 
|---|
| 28 |  * void applyStencil(const T_stencil& stencil, Array<T_numtype1,3>& A,
 | 
|---|
| 29 |  *    T_array2& B = _dummyArray, T_array3& C = _dummyArray, ......)
 | 
|---|
| 30 |  *
 | 
|---|
| 31 |  * and allow for up to (say) 11 arrays to be passed.  But this doesn't
 | 
|---|
| 32 |  * appear to be legal C++.  Instead, 11 versions of applyStencil are
 | 
|---|
| 33 |  * provided, each one with a different number of array parameters,
 | 
|---|
| 34 |  * and these stubs fill in the _dummyArray parameters and invoke
 | 
|---|
| 35 |  * applyStencil_imp().
 | 
|---|
| 36 |  */
 | 
|---|
| 37 | 
 | 
|---|
| 38 | template<int N_rank, class T_numtype1, class T_array2,
 | 
|---|
| 39 |     class T_array3, class T_array4, class T_array5, class T_array6,
 | 
|---|
| 40 |     class T_array7, class T_array8, class T_array9, class T_array10,
 | 
|---|
| 41 |     class T_array11>
 | 
|---|
| 42 | inline void checkShapes(const Array<T_numtype1,N_rank>& A,
 | 
|---|
| 43 |     const T_array2& B, const T_array3& C, const T_array4& D, 
 | 
|---|
| 44 |     const T_array5& E, const T_array6& F, const T_array7& G, 
 | 
|---|
| 45 |     const T_array8& H, const T_array9& I, const T_array10& J, 
 | 
|---|
| 46 |     const T_array11& K)
 | 
|---|
| 47 | {
 | 
|---|
| 48 |     BZPRECONDITION(areShapesConformable(A.shape(),B.shape())
 | 
|---|
| 49 |         && areShapesConformable(A.shape(),C.shape())
 | 
|---|
| 50 |         && areShapesConformable(A.shape(),D.shape())
 | 
|---|
| 51 |         && areShapesConformable(A.shape(),E.shape())
 | 
|---|
| 52 |         && areShapesConformable(A.shape(),F.shape())
 | 
|---|
| 53 |         && areShapesConformable(A.shape(),G.shape())
 | 
|---|
| 54 |         && areShapesConformable(A.shape(),H.shape())
 | 
|---|
| 55 |         && areShapesConformable(A.shape(),I.shape())
 | 
|---|
| 56 |         && areShapesConformable(A.shape(),J.shape())
 | 
|---|
| 57 |         && areShapesConformable(A.shape(),K.shape()));
 | 
|---|
| 58 | }
 | 
|---|
| 59 | 
 | 
|---|
| 60 | template<class T_extent, int N_rank, 
 | 
|---|
| 61 |     class T_stencil, class T_numtype1, class T_array2,
 | 
|---|
| 62 |     class T_array3, class T_array4, class T_array5, class T_array6,
 | 
|---|
| 63 |     class T_array7, class T_array8, class T_array9, class T_array10,
 | 
|---|
| 64 |     class T_array11>
 | 
|---|
| 65 | inline void calcStencilExtent(T_extent& At, const T_stencil& stencil, 
 | 
|---|
| 66 |     const Array<T_numtype1,N_rank>& A,
 | 
|---|
| 67 |     const T_array2& B, const T_array3& C, const T_array4& D, const T_array5& E, 
 | 
|---|
| 68 |     const T_array6& F, const T_array7& G, const T_array8& H, const T_array9& I, 
 | 
|---|
| 69 |     const T_array10& J, const T_array11& K)
 | 
|---|
| 70 | {
 | 
|---|
| 71 |     // Interrogate the stencil to find out its extent
 | 
|---|
| 72 |     stencilExtent_traits<T_array2>::T_stencilExtent Bt;
 | 
|---|
| 73 |     stencilExtent_traits<T_array3>::T_stencilExtent Ct;
 | 
|---|
| 74 |     stencilExtent_traits<T_array4>::T_stencilExtent Dt;
 | 
|---|
| 75 |     stencilExtent_traits<T_array5>::T_stencilExtent Et;
 | 
|---|
| 76 |     stencilExtent_traits<T_array6>::T_stencilExtent Ft;
 | 
|---|
| 77 |     stencilExtent_traits<T_array7>::T_stencilExtent Gt;
 | 
|---|
| 78 |     stencilExtent_traits<T_array8>::T_stencilExtent Ht;
 | 
|---|
| 79 |     stencilExtent_traits<T_array9>::T_stencilExtent It;
 | 
|---|
| 80 |     stencilExtent_traits<T_array10>::T_stencilExtent Jt;
 | 
|---|
| 81 |     stencilExtent_traits<T_array11>::T_stencilExtent Kt;
 | 
|---|
| 82 | 
 | 
|---|
| 83 |     stencil.apply(At, Bt, Ct, Dt, Et, Ft, Gt, Ht, It, Jt, Kt);
 | 
|---|
| 84 |     At.combine(Bt);
 | 
|---|
| 85 |     At.combine(Ct);
 | 
|---|
| 86 |     At.combine(Dt);
 | 
|---|
| 87 |     At.combine(Et);
 | 
|---|
| 88 |     At.combine(Ft);
 | 
|---|
| 89 |     At.combine(Gt);
 | 
|---|
| 90 |     At.combine(Ht);
 | 
|---|
| 91 |     At.combine(It);
 | 
|---|
| 92 |     At.combine(Jt);
 | 
|---|
| 93 |     At.combine(Kt);
 | 
|---|
| 94 | }
 | 
|---|
| 95 | 
 | 
|---|
| 96 | template<int N_rank, class T_stencil, class T_numtype1, class T_array2>
 | 
|---|
| 97 | inline RectDomain<N_rank> interiorDomain(const T_stencil& stencil,
 | 
|---|
| 98 |     const Array<T_numtype1,N_rank>& A,
 | 
|---|
| 99 |     const T_array2& B)
 | 
|---|
| 100 | {
 | 
|---|
| 101 |     RectDomain<N_rank> domain = A.domain();
 | 
|---|
| 102 | 
 | 
|---|
| 103 |     // Interrogate the stencil to find out its extent
 | 
|---|
| 104 |     stencilExtent<3, T_numtype1> At;
 | 
|---|
| 105 |     calcStencilExtent(At, stencil, A, B, _dummyArray, _dummyArray, 
 | 
|---|
| 106 |         _dummyArray, _dummyArray, _dummyArray, _dummyArray, _dummyArray, 
 | 
|---|
| 107 |         _dummyArray, _dummyArray);
 | 
|---|
| 108 | 
 | 
|---|
| 109 |     // Shrink the domain according to the stencil size
 | 
|---|
| 110 |     TinyVector<int,N_rank> lbound, ubound;
 | 
|---|
| 111 |     lbound = domain.lbound() - At.min();
 | 
|---|
| 112 |     ubound = domain.ubound() - At.max();
 | 
|---|
| 113 |     return RectDomain<N_rank>(lbound,ubound);
 | 
|---|
| 114 | }
 | 
|---|
| 115 | 
 | 
|---|
| 116 | /*
 | 
|---|
| 117 |  * This version applies a stencil to a set of 3D arrays.  Up to 11 arrays
 | 
|---|
| 118 |  * may be used.  Any unused arrays are turned into dummyArray objects.
 | 
|---|
| 119 |  * Operations on dummyArray objects are translated into no-ops.
 | 
|---|
| 120 |  */
 | 
|---|
| 121 | template<class T_stencil, class T_numtype1, class T_array2,
 | 
|---|
| 122 |     class T_array3, class T_array4, class T_array5, class T_array6,
 | 
|---|
| 123 |     class T_array7, class T_array8, class T_array9, class T_array10,
 | 
|---|
| 124 |     class T_array11>
 | 
|---|
| 125 | void applyStencil_imp(const T_stencil& stencil, Array<T_numtype1,3>& A,
 | 
|---|
| 126 |     T_array2& B, T_array3& C, T_array4& D, T_array5& E, T_array6& F,
 | 
|---|
| 127 |     T_array7& G, T_array8& H, T_array9& I, T_array10& J, T_array11& K)
 | 
|---|
| 128 | {
 | 
|---|
| 129 |     checkShapes(A,B,C,D,E,F,G,H,I,J,K);
 | 
|---|
| 130 |   
 | 
|---|
| 131 |     // Interrogate the stencil to find out its extent
 | 
|---|
| 132 |     stencilExtent<3, T_numtype1> At;
 | 
|---|
| 133 |     calcStencilExtent(At, stencil, A, B, C, D, E, F, G, H, I, J, K);
 | 
|---|
| 134 | 
 | 
|---|
| 135 |     // Now determine the subdomain over which the stencil
 | 
|---|
| 136 |     // can be applied without worrying about overrunning the
 | 
|---|
| 137 |     // boundaries of the array
 | 
|---|
| 138 |     int stencil_lbound0 = At.min(0);
 | 
|---|
| 139 |     int stencil_lbound1 = At.min(1);
 | 
|---|
| 140 |     int stencil_lbound2 = At.min(2);
 | 
|---|
| 141 | 
 | 
|---|
| 142 |     int stencil_ubound0 = At.max(0);
 | 
|---|
| 143 |     int stencil_ubound1 = At.max(1);
 | 
|---|
| 144 |     int stencil_ubound2 = At.max(2);
 | 
|---|
| 145 | 
 | 
|---|
| 146 |     int lbound0 = max(A.lbound(0), A.lbound(0) - stencil_lbound0);
 | 
|---|
| 147 |     int lbound1 = max(A.lbound(1), A.lbound(1) - stencil_lbound1);
 | 
|---|
| 148 |     int lbound2 = max(A.lbound(2), A.lbound(2) - stencil_lbound2);
 | 
|---|
| 149 | 
 | 
|---|
| 150 |     int ubound0 = min(A.ubound(0), A.ubound(0) - stencil_ubound0);
 | 
|---|
| 151 |     int ubound1 = min(A.ubound(1), A.ubound(1) - stencil_ubound1);
 | 
|---|
| 152 |     int ubound2 = min(A.ubound(2), A.ubound(2) - stencil_ubound2);
 | 
|---|
| 153 | 
 | 
|---|
| 154 | #if 0
 | 
|---|
| 155 |     cout << "Stencil bounds are:" << endl
 | 
|---|
| 156 |      << lbound0 << '\t' << ubound0 << endl
 | 
|---|
| 157 |      << lbound1 << '\t' << ubound1 << endl
 | 
|---|
| 158 |      << lbound2 << '\t' << ubound2 << endl;
 | 
|---|
| 159 | #endif
 | 
|---|
| 160 | 
 | 
|---|
| 161 |     // Now do the actual loop
 | 
|---|
| 162 |     ArrayIterator<T_numtype1,3> Aiter(A);
 | 
|---|
| 163 |     _bz_typename T_array2::T_iterator Biter(B);
 | 
|---|
| 164 |     _bz_typename T_array3::T_iterator Citer(C);
 | 
|---|
| 165 |     _bz_typename T_array4::T_iterator Diter(D);
 | 
|---|
| 166 |     _bz_typename T_array5::T_iterator Eiter(E);
 | 
|---|
| 167 |     _bz_typename T_array6::T_iterator Fiter(F);
 | 
|---|
| 168 |     _bz_typename T_array7::T_iterator Giter(G);
 | 
|---|
| 169 |     _bz_typename T_array8::T_iterator Hiter(H);
 | 
|---|
| 170 |     _bz_typename T_array9::T_iterator Iiter(I);
 | 
|---|
| 171 |     _bz_typename T_array10::T_iterator Jiter(J);
 | 
|---|
| 172 |     _bz_typename T_array11::T_iterator Kiter(K);
 | 
|---|
| 173 | 
 | 
|---|
| 174 |     // Load the strides for the innermost loop
 | 
|---|
| 175 |     Aiter.loadStride(2);
 | 
|---|
| 176 |     Biter.loadStride(2);
 | 
|---|
| 177 |     Citer.loadStride(2);
 | 
|---|
| 178 |     Diter.loadStride(2);
 | 
|---|
| 179 |     Eiter.loadStride(2);
 | 
|---|
| 180 |     Fiter.loadStride(2);
 | 
|---|
| 181 |     Giter.loadStride(2);
 | 
|---|
| 182 |     Hiter.loadStride(2);
 | 
|---|
| 183 |     Iiter.loadStride(2);
 | 
|---|
| 184 |     Jiter.loadStride(2);
 | 
|---|
| 185 |     Kiter.loadStride(2);
 | 
|---|
| 186 | 
 | 
|---|
| 187 |     for (int i=lbound0; i <= ubound0; ++i)
 | 
|---|
| 188 |     {
 | 
|---|
| 189 |       for (int j=lbound1; j <= ubound1; ++j)
 | 
|---|
| 190 |       {
 | 
|---|
| 191 |         Aiter.moveTo(i,j,lbound2);
 | 
|---|
| 192 |         Biter.moveTo(i,j,lbound2);
 | 
|---|
| 193 |         Citer.moveTo(i,j,lbound2);
 | 
|---|
| 194 |         Diter.moveTo(i,j,lbound2);
 | 
|---|
| 195 |         Eiter.moveTo(i,j,lbound2);
 | 
|---|
| 196 |         Fiter.moveTo(i,j,lbound2);
 | 
|---|
| 197 |         Giter.moveTo(i,j,lbound2);
 | 
|---|
| 198 |         Hiter.moveTo(i,j,lbound2);
 | 
|---|
| 199 |         Iiter.moveTo(i,j,lbound2);
 | 
|---|
| 200 |         Jiter.moveTo(i,j,lbound2);
 | 
|---|
| 201 |         Kiter.moveTo(i,j,lbound2);
 | 
|---|
| 202 | 
 | 
|---|
| 203 |         for (int k=lbound2; k <= ubound2; ++k)
 | 
|---|
| 204 |         {
 | 
|---|
| 205 |             stencil.apply(Aiter, Biter, Citer, Diter, Eiter, Fiter, Giter,
 | 
|---|
| 206 |                 Hiter, Iiter, Jiter, Kiter);
 | 
|---|
| 207 | 
 | 
|---|
| 208 |             Aiter.advance();
 | 
|---|
| 209 |             Biter.advance();
 | 
|---|
| 210 |             Citer.advance();
 | 
|---|
| 211 |             Diter.advance();
 | 
|---|
| 212 |             Eiter.advance();
 | 
|---|
| 213 |             Fiter.advance();
 | 
|---|
| 214 |             Giter.advance();
 | 
|---|
| 215 |             Hiter.advance();
 | 
|---|
| 216 |             Iiter.advance();
 | 
|---|
| 217 |             Jiter.advance();
 | 
|---|
| 218 |             Kiter.advance();
 | 
|---|
| 219 |         }
 | 
|---|
| 220 |       }
 | 
|---|
| 221 |     }
 | 
|---|
| 222 | }
 | 
|---|
| 223 | 
 | 
|---|
| 224 | /*
 | 
|---|
| 225 |  * This version applies a stencil to a set of 2D arrays.  Up to 11 arrays
 | 
|---|
| 226 |  * may be used.  Any unused arrays are turned into dummyArray objects.
 | 
|---|
| 227 |  * Operations on dummyArray objects are translated into no-ops.
 | 
|---|
| 228 |  */
 | 
|---|
| 229 | template<class T_stencil, class T_numtype1, class T_array2,
 | 
|---|
| 230 |     class T_array3, class T_array4, class T_array5, class T_array6,
 | 
|---|
| 231 |     class T_array7, class T_array8, class T_array9, class T_array10,
 | 
|---|
| 232 |     class T_array11>
 | 
|---|
| 233 | void applyStencil_imp(const T_stencil& stencil, const Array<T_numtype1,2>& A,
 | 
|---|
| 234 |     const T_array2& B, const T_array3& C, const T_array4& D, 
 | 
|---|
| 235 |     const T_array5& E, const T_array6& F, const T_array7& G, 
 | 
|---|
| 236 |     const T_array8& H, const T_array9& I, const T_array10& J, 
 | 
|---|
| 237 |     const T_array11& K)
 | 
|---|
| 238 | {
 | 
|---|
| 239 |     checkShapes(A,B,C,D,E,F,G,H,I,J,K);
 | 
|---|
| 240 | 
 | 
|---|
| 241 |     // Interrogate the stencil to find out its extent
 | 
|---|
| 242 |     stencilExtent<2, T_numtype1> At;
 | 
|---|
| 243 |     calcStencilExtent(At, stencil, A, B, C, D, E, F, G, H, I, J, K);
 | 
|---|
| 244 | 
 | 
|---|
| 245 |     // Now determine the subdomain over which the stencil
 | 
|---|
| 246 |     // can be applied without worrying about overrunning the
 | 
|---|
| 247 |     // boundaries of the array
 | 
|---|
| 248 |     int stencil_lbound0 = At.min(0);
 | 
|---|
| 249 |     int stencil_lbound1 = At.min(1);
 | 
|---|
| 250 | 
 | 
|---|
| 251 |     int stencil_ubound0 = At.max(0);
 | 
|---|
| 252 |     int stencil_ubound1 = At.max(1);
 | 
|---|
| 253 | 
 | 
|---|
| 254 |     int lbound0 = max(A.lbound(0), A.lbound(0) - stencil_lbound0);
 | 
|---|
| 255 |     int lbound1 = max(A.lbound(1), A.lbound(1) - stencil_lbound1);
 | 
|---|
| 256 | 
 | 
|---|
| 257 |     int ubound0 = min(A.ubound(0), A.ubound(0) - stencil_ubound0);
 | 
|---|
| 258 |     int ubound1 = min(A.ubound(1), A.ubound(1) - stencil_ubound1);
 | 
|---|
| 259 | 
 | 
|---|
| 260 | #if 0
 | 
|---|
| 261 |     cout << "Stencil bounds are:" << endl
 | 
|---|
| 262 |      << lbound0 << '\t' << ubound0 << endl
 | 
|---|
| 263 |      << lbound1 << '\t' << ubound1 << endl;
 | 
|---|
| 264 | #endif 
 | 
|---|
| 265 | 
 | 
|---|
| 266 |     // Now do the actual loop
 | 
|---|
| 267 |     ArrayIterator<T_numtype1,2> Aiter(A);
 | 
|---|
| 268 |     _bz_typename T_array2::T_iterator Biter(B);
 | 
|---|
| 269 |     _bz_typename T_array3::T_iterator Citer(C);
 | 
|---|
| 270 |     _bz_typename T_array4::T_iterator Diter(D);
 | 
|---|
| 271 |     _bz_typename T_array5::T_iterator Eiter(E);
 | 
|---|
| 272 |     _bz_typename T_array6::T_iterator Fiter(F);
 | 
|---|
| 273 |     _bz_typename T_array7::T_iterator Giter(G);
 | 
|---|
| 274 |     _bz_typename T_array8::T_iterator Hiter(H);
 | 
|---|
| 275 |     _bz_typename T_array9::T_iterator Iiter(I);
 | 
|---|
| 276 |     _bz_typename T_array10::T_iterator Jiter(J);
 | 
|---|
| 277 |     _bz_typename T_array11::T_iterator Kiter(K);
 | 
|---|
| 278 | 
 | 
|---|
| 279 |     // Load the strides for the innermost loop
 | 
|---|
| 280 |     Aiter.loadStride(1);
 | 
|---|
| 281 |     Biter.loadStride(1);
 | 
|---|
| 282 |     Citer.loadStride(1);
 | 
|---|
| 283 |     Diter.loadStride(1);
 | 
|---|
| 284 |     Eiter.loadStride(1);
 | 
|---|
| 285 |     Fiter.loadStride(1);
 | 
|---|
| 286 |     Giter.loadStride(1);
 | 
|---|
| 287 |     Hiter.loadStride(1);
 | 
|---|
| 288 |     Iiter.loadStride(1);
 | 
|---|
| 289 |     Jiter.loadStride(1);
 | 
|---|
| 290 |     Kiter.loadStride(1);
 | 
|---|
| 291 | 
 | 
|---|
| 292 |     for (int i=lbound0; i <= ubound0; ++i)
 | 
|---|
| 293 |     {
 | 
|---|
| 294 |         Aiter.moveTo(i,lbound1);
 | 
|---|
| 295 |         Biter.moveTo(i,lbound1);
 | 
|---|
| 296 |         Citer.moveTo(i,lbound1);
 | 
|---|
| 297 |         Diter.moveTo(i,lbound1);
 | 
|---|
| 298 |         Eiter.moveTo(i,lbound1);
 | 
|---|
| 299 |         Fiter.moveTo(i,lbound1);
 | 
|---|
| 300 |         Giter.moveTo(i,lbound1);
 | 
|---|
| 301 |         Hiter.moveTo(i,lbound1);
 | 
|---|
| 302 |         Iiter.moveTo(i,lbound1);
 | 
|---|
| 303 |         Jiter.moveTo(i,lbound1);
 | 
|---|
| 304 |         Kiter.moveTo(i,lbound1);
 | 
|---|
| 305 | 
 | 
|---|
| 306 |         for (int k=lbound1; k <= ubound1; ++k)
 | 
|---|
| 307 |         {
 | 
|---|
| 308 |             stencil.apply(Aiter, Biter, Citer, Diter, Eiter, Fiter, Giter,
 | 
|---|
| 309 |                 Hiter, Iiter, Jiter, Kiter);
 | 
|---|
| 310 | 
 | 
|---|
| 311 |             Aiter.advance();
 | 
|---|
| 312 |             Biter.advance();
 | 
|---|
| 313 |             Citer.advance();
 | 
|---|
| 314 |             Diter.advance();
 | 
|---|
| 315 |             Eiter.advance();
 | 
|---|
| 316 |             Fiter.advance();
 | 
|---|
| 317 |             Giter.advance();
 | 
|---|
| 318 |             Hiter.advance();
 | 
|---|
| 319 |             Iiter.advance();
 | 
|---|
| 320 |             Jiter.advance();
 | 
|---|
| 321 |             Kiter.advance();
 | 
|---|
| 322 |         }
 | 
|---|
| 323 |     }
 | 
|---|
| 324 | }
 | 
|---|
| 325 | 
 | 
|---|
| 326 | /*
 | 
|---|
| 327 |  * This version applies a stencil to a set of 1D arrays.  Up to 11 arrays
 | 
|---|
| 328 |  * may be used.  Any unused arrays are turned into dummyArray objects.
 | 
|---|
| 329 |  * Operations on dummyArray objects are translated into no-ops.
 | 
|---|
| 330 |  */
 | 
|---|
| 331 | template<class T_stencil, class T_numtype1, class T_array2,
 | 
|---|
| 332 |     class T_array3, class T_array4, class T_array5, class T_array6,
 | 
|---|
| 333 |     class T_array7, class T_array8, class T_array9, class T_array10,
 | 
|---|
| 334 |     class T_array11>
 | 
|---|
| 335 | void applyStencil_imp(const T_stencil& stencil, const Array<T_numtype1,1>& A,
 | 
|---|
| 336 |     const T_array2& B, const T_array3& C, const T_array4& D,
 | 
|---|
| 337 |     const T_array5& E, const T_array6& F, const T_array7& G,
 | 
|---|
| 338 |     const T_array8& H, const T_array9& I, const T_array10& J,
 | 
|---|
| 339 |     const T_array11& K)
 | 
|---|
| 340 | {
 | 
|---|
| 341 |     checkShapes(A,B,C,D,E,F,G,H,I,J,K);
 | 
|---|
| 342 | 
 | 
|---|
| 343 |     // Interrogate the stencil to find out its extent
 | 
|---|
| 344 |     stencilExtent<1, T_numtype1> At;
 | 
|---|
| 345 |     calcStencilExtent(At, stencil, A, B, C, D, E, F, G, H, I, J, K);
 | 
|---|
| 346 | 
 | 
|---|
| 347 |     // Now determine the subdomain over which the stencil
 | 
|---|
| 348 |     // can be applied without worrying about overrunning the
 | 
|---|
| 349 |     // boundaries of the array
 | 
|---|
| 350 |     int stencil_lbound0 = At.min(0);
 | 
|---|
| 351 | 
 | 
|---|
| 352 |     int stencil_ubound0 = At.max(0);
 | 
|---|
| 353 | 
 | 
|---|
| 354 |     int lbound0 = max(A.lbound(0), A.lbound(0) - stencil_lbound0);
 | 
|---|
| 355 |     int ubound0 = min(A.ubound(0), A.ubound(0) - stencil_ubound0);
 | 
|---|
| 356 | 
 | 
|---|
| 357 | #if 0
 | 
|---|
| 358 |     cout << "Stencil bounds are:" << endl
 | 
|---|
| 359 |      << lbound0 << '\t' << ubound0 << endl;
 | 
|---|
| 360 | #endif
 | 
|---|
| 361 | 
 | 
|---|
| 362 |     // Now do the actual loop
 | 
|---|
| 363 |     ArrayIterator<T_numtype1,1> Aiter(A);
 | 
|---|
| 364 |     _bz_typename T_array2::T_iterator Biter(B);
 | 
|---|
| 365 |     _bz_typename T_array3::T_iterator Citer(C);
 | 
|---|
| 366 |     _bz_typename T_array4::T_iterator Diter(D);
 | 
|---|
| 367 |     _bz_typename T_array5::T_iterator Eiter(E);
 | 
|---|
| 368 |     _bz_typename T_array6::T_iterator Fiter(F);
 | 
|---|
| 369 |     _bz_typename T_array7::T_iterator Giter(G);
 | 
|---|
| 370 |     _bz_typename T_array8::T_iterator Hiter(H);
 | 
|---|
| 371 |     _bz_typename T_array9::T_iterator Iiter(I);
 | 
|---|
| 372 |     _bz_typename T_array10::T_iterator Jiter(J);
 | 
|---|
| 373 |     _bz_typename T_array11::T_iterator Kiter(K);
 | 
|---|
| 374 | 
 | 
|---|
| 375 |     // Load the strides for the innermost loop
 | 
|---|
| 376 |     Aiter.loadStride(0);
 | 
|---|
| 377 |     Biter.loadStride(0);
 | 
|---|
| 378 |     Citer.loadStride(0);
 | 
|---|
| 379 |     Diter.loadStride(0);
 | 
|---|
| 380 |     Eiter.loadStride(0);
 | 
|---|
| 381 |     Fiter.loadStride(0);
 | 
|---|
| 382 |     Giter.loadStride(0);
 | 
|---|
| 383 |     Hiter.loadStride(0);
 | 
|---|
| 384 |     Iiter.loadStride(0);
 | 
|---|
| 385 |     Jiter.loadStride(0);
 | 
|---|
| 386 |     Kiter.loadStride(0);
 | 
|---|
| 387 | 
 | 
|---|
| 388 |     for (int i=lbound0; i <= ubound0; ++i)
 | 
|---|
| 389 |     {
 | 
|---|
| 390 |         stencil.apply(Aiter, Biter, Citer, Diter, Eiter, Fiter, Giter,
 | 
|---|
| 391 |             Hiter, Iiter, Jiter, Kiter);
 | 
|---|
| 392 | 
 | 
|---|
| 393 |         Aiter.advance();
 | 
|---|
| 394 |         Biter.advance();
 | 
|---|
| 395 |         Citer.advance();
 | 
|---|
| 396 |         Diter.advance();
 | 
|---|
| 397 |         Eiter.advance();
 | 
|---|
| 398 |         Fiter.advance();
 | 
|---|
| 399 |         Giter.advance();
 | 
|---|
| 400 |         Hiter.advance();
 | 
|---|
| 401 |         Iiter.advance();
 | 
|---|
| 402 |         Jiter.advance();
 | 
|---|
| 403 |         Kiter.advance();
 | 
|---|
| 404 |     }
 | 
|---|
| 405 | }
 | 
|---|
| 406 | 
 | 
|---|
| 407 | /*
 | 
|---|
| 408 |  * These 11 versions of applyStencil handle from 1 to 11 array parameters.
 | 
|---|
| 409 |  * They pad their argument list with enough dummyArray objects to call
 | 
|---|
| 410 |  * applyStencil_imp with 11 array parameters.
 | 
|---|
| 411 |  */
 | 
|---|
| 412 | template<class T_stencil, class T_numtype1, int N_rank>
 | 
|---|
| 413 | inline void applyStencil(const T_stencil& stencil, Array<T_numtype1,N_rank>& A)
 | 
|---|
| 414 | {
 | 
|---|
| 415 |     applyStencil_imp(stencil, A, _dummyArray, _dummyArray,
 | 
|---|
| 416 |         _dummyArray, _dummyArray, _dummyArray, _dummyArray,
 | 
|---|
| 417 |         _dummyArray, _dummyArray, _dummyArray, _dummyArray);
 | 
|---|
| 418 | }
 | 
|---|
| 419 | 
 | 
|---|
| 420 | template<class T_stencil, class T_numtype1, int N_rank, class T_array2>
 | 
|---|
| 421 | inline void applyStencil(const T_stencil& stencil, Array<T_numtype1,N_rank>& A,
 | 
|---|
| 422 |     T_array2& B)
 | 
|---|
| 423 | {
 | 
|---|
| 424 |     applyStencil_imp(stencil, A, B, _dummyArray, _dummyArray,
 | 
|---|
| 425 |         _dummyArray, _dummyArray, _dummyArray, _dummyArray,
 | 
|---|
| 426 |         _dummyArray, _dummyArray, _dummyArray);
 | 
|---|
| 427 | }
 | 
|---|
| 428 | 
 | 
|---|
| 429 | template<class T_stencil, class T_numtype1, int N_rank, class T_array2,
 | 
|---|
| 430 |     class T_array3>
 | 
|---|
| 431 | inline void applyStencil(const T_stencil& stencil, Array<T_numtype1,N_rank>& A,
 | 
|---|
| 432 |     T_array2& B, T_array3& C)
 | 
|---|
| 433 | {
 | 
|---|
| 434 |     applyStencil_imp(stencil, A, B, C, _dummyArray, _dummyArray,
 | 
|---|
| 435 |         _dummyArray, _dummyArray, _dummyArray, _dummyArray, _dummyArray,
 | 
|---|
| 436 |         _dummyArray);
 | 
|---|
| 437 | }
 | 
|---|
| 438 | 
 | 
|---|
| 439 | template<class T_stencil, class T_numtype1, int N_rank, class T_array2,
 | 
|---|
| 440 |     class T_array3, class T_array4>
 | 
|---|
| 441 | inline void applyStencil(const T_stencil& stencil, Array<T_numtype1,N_rank>& A,
 | 
|---|
| 442 |     T_array2& B, T_array3& C, T_array4& D)
 | 
|---|
| 443 | {
 | 
|---|
| 444 |     applyStencil_imp(stencil, A, B, C, D, _dummyArray, _dummyArray,
 | 
|---|
| 445 |         _dummyArray, _dummyArray, _dummyArray, _dummyArray, _dummyArray);
 | 
|---|
| 446 | }
 | 
|---|
| 447 | 
 | 
|---|
| 448 | template<class T_stencil, class T_numtype1, int N_rank, class T_array2,
 | 
|---|
| 449 |     class T_array3, class T_array4, class T_array5>
 | 
|---|
| 450 | inline void applyStencil(const T_stencil& stencil, Array<T_numtype1,N_rank>& A,
 | 
|---|
| 451 |    T_array2& B, T_array3& C, T_array4& D, T_array5& E)
 | 
|---|
| 452 | {
 | 
|---|
| 453 |     applyStencil_imp(stencil, A, B, C, D, E, _dummyArray,
 | 
|---|
| 454 |         _dummyArray, _dummyArray, _dummyArray, _dummyArray, _dummyArray);
 | 
|---|
| 455 | }
 | 
|---|
| 456 | 
 | 
|---|
| 457 | template<class T_stencil, class T_numtype1, int N_rank, class T_array2,
 | 
|---|
| 458 |     class T_array3, class T_array4, class T_array5, class T_array6>
 | 
|---|
| 459 | inline void applyStencil(const T_stencil& stencil, Array<T_numtype1,N_rank>& A,
 | 
|---|
| 460 |    T_array2& B, T_array3& C, T_array4& D, T_array5& E, T_array6& F)
 | 
|---|
| 461 | {
 | 
|---|
| 462 |     applyStencil_imp(stencil, A, B, C, D, E, F,
 | 
|---|
| 463 |         _dummyArray, _dummyArray, _dummyArray, _dummyArray, _dummyArray);
 | 
|---|
| 464 | }
 | 
|---|
| 465 | 
 | 
|---|
| 466 | template<class T_stencil, class T_numtype1, int N_rank, class T_array2,
 | 
|---|
| 467 |     class T_array3, class T_array4, class T_array5, class T_array6,
 | 
|---|
| 468 |     class T_array7>
 | 
|---|
| 469 | inline void applyStencil(const T_stencil& stencil, Array<T_numtype1,N_rank>& A,
 | 
|---|
| 470 |    T_array2& B, T_array3& C, T_array4& D, T_array5& E, T_array6& F,
 | 
|---|
| 471 |    T_array7& G)
 | 
|---|
| 472 | {
 | 
|---|
| 473 |     applyStencil_imp(stencil, A, B, C, D, E, F, G,
 | 
|---|
| 474 |         _dummyArray, _dummyArray, _dummyArray, _dummyArray);
 | 
|---|
| 475 | }
 | 
|---|
| 476 | 
 | 
|---|
| 477 | template<class T_stencil, class T_numtype1, int N_rank, class T_array2,
 | 
|---|
| 478 |     class T_array3, class T_array4, class T_array5, class T_array6,
 | 
|---|
| 479 |     class T_array7, class T_array8>
 | 
|---|
| 480 | inline void applyStencil(const T_stencil& stencil, Array<T_numtype1,N_rank>& A,
 | 
|---|
| 481 |    T_array2& B, T_array3& C, T_array4& D, T_array5& E, T_array6& F,
 | 
|---|
| 482 |    T_array7& G, T_array8& H)
 | 
|---|
| 483 | {
 | 
|---|
| 484 |     applyStencil_imp(stencil, A, B, C, D, E, F, G, H,
 | 
|---|
| 485 |         _dummyArray, _dummyArray, _dummyArray);
 | 
|---|
| 486 | }
 | 
|---|
| 487 | 
 | 
|---|
| 488 | template<class T_stencil, class T_numtype1, int N_rank, class T_array2,
 | 
|---|
| 489 |     class T_array3, class T_array4, class T_array5, class T_array6,
 | 
|---|
| 490 |     class T_array7, class T_array8, class T_array9>
 | 
|---|
| 491 | inline void applyStencil(const T_stencil& stencil, Array<T_numtype1,N_rank>& A,
 | 
|---|
| 492 |    T_array2& B, T_array3& C, T_array4& D, T_array5& E, T_array6& F,
 | 
|---|
| 493 |    T_array7& G, T_array8& H, T_array9& I)
 | 
|---|
| 494 | {
 | 
|---|
| 495 |     applyStencil_imp(stencil, A, B, C, D, E, F, G, H, I,
 | 
|---|
| 496 |         _dummyArray, _dummyArray);
 | 
|---|
| 497 | }
 | 
|---|
| 498 | 
 | 
|---|
| 499 | template<class T_stencil, class T_numtype1, int N_rank, class T_array2,
 | 
|---|
| 500 |     class T_array3, class T_array4, class T_array5, class T_array6,
 | 
|---|
| 501 |     class T_array7, class T_array8, class T_array9, class T_array10>
 | 
|---|
| 502 | inline void applyStencil(const T_stencil& stencil, Array<T_numtype1,N_rank>& A,
 | 
|---|
| 503 |    T_array2& B, T_array3& C, T_array4& D, T_array5& E, T_array6& F,
 | 
|---|
| 504 |    T_array7& G, T_array8& H, T_array9& I, T_array10& J)
 | 
|---|
| 505 | {
 | 
|---|
| 506 |     applyStencil_imp(stencil, A, B, C, D, E, F, G, H, I, J,
 | 
|---|
| 507 |         _dummyArray);
 | 
|---|
| 508 | }
 | 
|---|
| 509 | 
 | 
|---|
| 510 | template<class T_stencil, class T_numtype1, int N_rank, class T_array2,
 | 
|---|
| 511 |     class T_array3, class T_array4, class T_array5, class T_array6,
 | 
|---|
| 512 |     class T_array7, class T_array8, class T_array9, class T_array10,
 | 
|---|
| 513 |     class T_array11>
 | 
|---|
| 514 | inline void applyStencil(const T_stencil& stencil, Array<T_numtype1,N_rank>& A,
 | 
|---|
| 515 |    T_array2& B, T_array3& C, T_array4& D, T_array5& E, T_array6& F,
 | 
|---|
| 516 |    T_array7& G, T_array8& H, T_array9& I, T_array10& J, T_array11& K)
 | 
|---|
| 517 | {
 | 
|---|
| 518 |     applyStencil_imp(stencil, A, B, C, D, E, F, G, H, I, J, K);
 | 
|---|
| 519 | }
 | 
|---|
| 520 | 
 | 
|---|
| 521 | BZ_NAMESPACE_END
 | 
|---|
| 522 | 
 | 
|---|
| 523 | #endif // BZ_ARRAYSTENCIL_CC
 | 
|---|