source: Sophya/trunk/SophyaExt/Blitz/blitz/array/stencil.cc@ 3776

Last change on this file since 3776 was 221, checked in by ansari, 27 years ago

Creation module DPC/Blitz (blitz 0.4) Reza 09/04/99

File size: 18.8 KB
Line 
1#ifndef BZ_ARRAYSTENCIL_CC
2#define BZ_ARRAYSTENCIL_CC
3
4#ifndef BZ_ARRAYSTENCIL_H
5 #error <blitz/array/stencil.cc> must be included via <blitz/array/stencil.h>
6#endif
7
8BZ_NAMESPACE(blitz)
9
10// NEEDS_WORK:
11// o Need to allow scalar arguments as well as arrays
12// o Unit stride optimization
13// o Tiling
14// o Pass coordinate vector to stencil, so that where-like constructs
15// can depend on location
16// o Maybe allow expression templates to be passed as
17// array parameters?
18
19/*
20 * There are a lot of kludges in this code to work around the fact that
21 * you can't have default template parameters with function templates.
22 * Ideally, one would implement applyStencil(..) as:
23 *
24 * template<class T_stencil, class T_numtype1, class T_array2,
25 * class T_array3, class T_array4, class T_array5, class T_array6,
26 * class T_array7, class T_array8, class T_array9, class T_array10,
27 * class T_array11>
28 * void applyStencil(const T_stencil& stencil, Array<T_numtype1,3>& A,
29 * T_array2& B = _dummyArray, T_array3& C = _dummyArray, ......)
30 *
31 * and allow for up to (say) 11 arrays to be passed. But this doesn't
32 * appear to be legal C++. Instead, 11 versions of applyStencil are
33 * provided, each one with a different number of array parameters,
34 * and these stubs fill in the _dummyArray parameters and invoke
35 * applyStencil_imp().
36 */
37
38template<int N_rank, class T_numtype1, class T_array2,
39 class T_array3, class T_array4, class T_array5, class T_array6,
40 class T_array7, class T_array8, class T_array9, class T_array10,
41 class T_array11>
42inline void checkShapes(const Array<T_numtype1,N_rank>& A,
43 const T_array2& B, const T_array3& C, const T_array4& D,
44 const T_array5& E, const T_array6& F, const T_array7& G,
45 const T_array8& H, const T_array9& I, const T_array10& J,
46 const T_array11& K)
47{
48 BZPRECONDITION(areShapesConformable(A.shape(),B.shape())
49 && areShapesConformable(A.shape(),C.shape())
50 && areShapesConformable(A.shape(),D.shape())
51 && areShapesConformable(A.shape(),E.shape())
52 && areShapesConformable(A.shape(),F.shape())
53 && areShapesConformable(A.shape(),G.shape())
54 && areShapesConformable(A.shape(),H.shape())
55 && areShapesConformable(A.shape(),I.shape())
56 && areShapesConformable(A.shape(),J.shape())
57 && areShapesConformable(A.shape(),K.shape()));
58}
59
60template<class T_extent, int N_rank,
61 class T_stencil, class T_numtype1, class T_array2,
62 class T_array3, class T_array4, class T_array5, class T_array6,
63 class T_array7, class T_array8, class T_array9, class T_array10,
64 class T_array11>
65inline void calcStencilExtent(T_extent& At, const T_stencil& stencil,
66 const Array<T_numtype1,N_rank>& A,
67 const T_array2& B, const T_array3& C, const T_array4& D, const T_array5& E,
68 const T_array6& F, const T_array7& G, const T_array8& H, const T_array9& I,
69 const T_array10& J, const T_array11& K)
70{
71 // Interrogate the stencil to find out its extent
72 stencilExtent_traits<T_array2>::T_stencilExtent Bt;
73 stencilExtent_traits<T_array3>::T_stencilExtent Ct;
74 stencilExtent_traits<T_array4>::T_stencilExtent Dt;
75 stencilExtent_traits<T_array5>::T_stencilExtent Et;
76 stencilExtent_traits<T_array6>::T_stencilExtent Ft;
77 stencilExtent_traits<T_array7>::T_stencilExtent Gt;
78 stencilExtent_traits<T_array8>::T_stencilExtent Ht;
79 stencilExtent_traits<T_array9>::T_stencilExtent It;
80 stencilExtent_traits<T_array10>::T_stencilExtent Jt;
81 stencilExtent_traits<T_array11>::T_stencilExtent Kt;
82
83 stencil.apply(At, Bt, Ct, Dt, Et, Ft, Gt, Ht, It, Jt, Kt);
84 At.combine(Bt);
85 At.combine(Ct);
86 At.combine(Dt);
87 At.combine(Et);
88 At.combine(Ft);
89 At.combine(Gt);
90 At.combine(Ht);
91 At.combine(It);
92 At.combine(Jt);
93 At.combine(Kt);
94}
95
96template<int N_rank, class T_stencil, class T_numtype1, class T_array2>
97inline RectDomain<N_rank> interiorDomain(const T_stencil& stencil,
98 const Array<T_numtype1,N_rank>& A,
99 const T_array2& B)
100{
101 RectDomain<N_rank> domain = A.domain();
102
103 // Interrogate the stencil to find out its extent
104 stencilExtent<3, T_numtype1> At;
105 calcStencilExtent(At, stencil, A, B, _dummyArray, _dummyArray,
106 _dummyArray, _dummyArray, _dummyArray, _dummyArray, _dummyArray,
107 _dummyArray, _dummyArray);
108
109 // Shrink the domain according to the stencil size
110 TinyVector<int,N_rank> lbound, ubound;
111 lbound = domain.lbound() - At.min();
112 ubound = domain.ubound() - At.max();
113 return RectDomain<N_rank>(lbound,ubound);
114}
115
116/*
117 * This version applies a stencil to a set of 3D arrays. Up to 11 arrays
118 * may be used. Any unused arrays are turned into dummyArray objects.
119 * Operations on dummyArray objects are translated into no-ops.
120 */
121template<class T_stencil, class T_numtype1, class T_array2,
122 class T_array3, class T_array4, class T_array5, class T_array6,
123 class T_array7, class T_array8, class T_array9, class T_array10,
124 class T_array11>
125void applyStencil_imp(const T_stencil& stencil, Array<T_numtype1,3>& A,
126 T_array2& B, T_array3& C, T_array4& D, T_array5& E, T_array6& F,
127 T_array7& G, T_array8& H, T_array9& I, T_array10& J, T_array11& K)
128{
129 checkShapes(A,B,C,D,E,F,G,H,I,J,K);
130
131 // Interrogate the stencil to find out its extent
132 stencilExtent<3, T_numtype1> At;
133 calcStencilExtent(At, stencil, A, B, C, D, E, F, G, H, I, J, K);
134
135 // Now determine the subdomain over which the stencil
136 // can be applied without worrying about overrunning the
137 // boundaries of the array
138 int stencil_lbound0 = At.min(0);
139 int stencil_lbound1 = At.min(1);
140 int stencil_lbound2 = At.min(2);
141
142 int stencil_ubound0 = At.max(0);
143 int stencil_ubound1 = At.max(1);
144 int stencil_ubound2 = At.max(2);
145
146 int lbound0 = max(A.lbound(0), A.lbound(0) - stencil_lbound0);
147 int lbound1 = max(A.lbound(1), A.lbound(1) - stencil_lbound1);
148 int lbound2 = max(A.lbound(2), A.lbound(2) - stencil_lbound2);
149
150 int ubound0 = min(A.ubound(0), A.ubound(0) - stencil_ubound0);
151 int ubound1 = min(A.ubound(1), A.ubound(1) - stencil_ubound1);
152 int ubound2 = min(A.ubound(2), A.ubound(2) - stencil_ubound2);
153
154#if 0
155 cout << "Stencil bounds are:" << endl
156 << lbound0 << '\t' << ubound0 << endl
157 << lbound1 << '\t' << ubound1 << endl
158 << lbound2 << '\t' << ubound2 << endl;
159#endif
160
161 // Now do the actual loop
162 ArrayIterator<T_numtype1,3> Aiter(A);
163 _bz_typename T_array2::T_iterator Biter(B);
164 _bz_typename T_array3::T_iterator Citer(C);
165 _bz_typename T_array4::T_iterator Diter(D);
166 _bz_typename T_array5::T_iterator Eiter(E);
167 _bz_typename T_array6::T_iterator Fiter(F);
168 _bz_typename T_array7::T_iterator Giter(G);
169 _bz_typename T_array8::T_iterator Hiter(H);
170 _bz_typename T_array9::T_iterator Iiter(I);
171 _bz_typename T_array10::T_iterator Jiter(J);
172 _bz_typename T_array11::T_iterator Kiter(K);
173
174 // Load the strides for the innermost loop
175 Aiter.loadStride(2);
176 Biter.loadStride(2);
177 Citer.loadStride(2);
178 Diter.loadStride(2);
179 Eiter.loadStride(2);
180 Fiter.loadStride(2);
181 Giter.loadStride(2);
182 Hiter.loadStride(2);
183 Iiter.loadStride(2);
184 Jiter.loadStride(2);
185 Kiter.loadStride(2);
186
187 for (int i=lbound0; i <= ubound0; ++i)
188 {
189 for (int j=lbound1; j <= ubound1; ++j)
190 {
191 Aiter.moveTo(i,j,lbound2);
192 Biter.moveTo(i,j,lbound2);
193 Citer.moveTo(i,j,lbound2);
194 Diter.moveTo(i,j,lbound2);
195 Eiter.moveTo(i,j,lbound2);
196 Fiter.moveTo(i,j,lbound2);
197 Giter.moveTo(i,j,lbound2);
198 Hiter.moveTo(i,j,lbound2);
199 Iiter.moveTo(i,j,lbound2);
200 Jiter.moveTo(i,j,lbound2);
201 Kiter.moveTo(i,j,lbound2);
202
203 for (int k=lbound2; k <= ubound2; ++k)
204 {
205 stencil.apply(Aiter, Biter, Citer, Diter, Eiter, Fiter, Giter,
206 Hiter, Iiter, Jiter, Kiter);
207
208 Aiter.advance();
209 Biter.advance();
210 Citer.advance();
211 Diter.advance();
212 Eiter.advance();
213 Fiter.advance();
214 Giter.advance();
215 Hiter.advance();
216 Iiter.advance();
217 Jiter.advance();
218 Kiter.advance();
219 }
220 }
221 }
222}
223
224/*
225 * This version applies a stencil to a set of 2D arrays. Up to 11 arrays
226 * may be used. Any unused arrays are turned into dummyArray objects.
227 * Operations on dummyArray objects are translated into no-ops.
228 */
229template<class T_stencil, class T_numtype1, class T_array2,
230 class T_array3, class T_array4, class T_array5, class T_array6,
231 class T_array7, class T_array8, class T_array9, class T_array10,
232 class T_array11>
233void applyStencil_imp(const T_stencil& stencil, const Array<T_numtype1,2>& A,
234 const T_array2& B, const T_array3& C, const T_array4& D,
235 const T_array5& E, const T_array6& F, const T_array7& G,
236 const T_array8& H, const T_array9& I, const T_array10& J,
237 const T_array11& K)
238{
239 checkShapes(A,B,C,D,E,F,G,H,I,J,K);
240
241 // Interrogate the stencil to find out its extent
242 stencilExtent<2, T_numtype1> At;
243 calcStencilExtent(At, stencil, A, B, C, D, E, F, G, H, I, J, K);
244
245 // Now determine the subdomain over which the stencil
246 // can be applied without worrying about overrunning the
247 // boundaries of the array
248 int stencil_lbound0 = At.min(0);
249 int stencil_lbound1 = At.min(1);
250
251 int stencil_ubound0 = At.max(0);
252 int stencil_ubound1 = At.max(1);
253
254 int lbound0 = max(A.lbound(0), A.lbound(0) - stencil_lbound0);
255 int lbound1 = max(A.lbound(1), A.lbound(1) - stencil_lbound1);
256
257 int ubound0 = min(A.ubound(0), A.ubound(0) - stencil_ubound0);
258 int ubound1 = min(A.ubound(1), A.ubound(1) - stencil_ubound1);
259
260#if 0
261 cout << "Stencil bounds are:" << endl
262 << lbound0 << '\t' << ubound0 << endl
263 << lbound1 << '\t' << ubound1 << endl;
264#endif
265
266 // Now do the actual loop
267 ArrayIterator<T_numtype1,2> Aiter(A);
268 _bz_typename T_array2::T_iterator Biter(B);
269 _bz_typename T_array3::T_iterator Citer(C);
270 _bz_typename T_array4::T_iterator Diter(D);
271 _bz_typename T_array5::T_iterator Eiter(E);
272 _bz_typename T_array6::T_iterator Fiter(F);
273 _bz_typename T_array7::T_iterator Giter(G);
274 _bz_typename T_array8::T_iterator Hiter(H);
275 _bz_typename T_array9::T_iterator Iiter(I);
276 _bz_typename T_array10::T_iterator Jiter(J);
277 _bz_typename T_array11::T_iterator Kiter(K);
278
279 // Load the strides for the innermost loop
280 Aiter.loadStride(1);
281 Biter.loadStride(1);
282 Citer.loadStride(1);
283 Diter.loadStride(1);
284 Eiter.loadStride(1);
285 Fiter.loadStride(1);
286 Giter.loadStride(1);
287 Hiter.loadStride(1);
288 Iiter.loadStride(1);
289 Jiter.loadStride(1);
290 Kiter.loadStride(1);
291
292 for (int i=lbound0; i <= ubound0; ++i)
293 {
294 Aiter.moveTo(i,lbound1);
295 Biter.moveTo(i,lbound1);
296 Citer.moveTo(i,lbound1);
297 Diter.moveTo(i,lbound1);
298 Eiter.moveTo(i,lbound1);
299 Fiter.moveTo(i,lbound1);
300 Giter.moveTo(i,lbound1);
301 Hiter.moveTo(i,lbound1);
302 Iiter.moveTo(i,lbound1);
303 Jiter.moveTo(i,lbound1);
304 Kiter.moveTo(i,lbound1);
305
306 for (int k=lbound1; k <= ubound1; ++k)
307 {
308 stencil.apply(Aiter, Biter, Citer, Diter, Eiter, Fiter, Giter,
309 Hiter, Iiter, Jiter, Kiter);
310
311 Aiter.advance();
312 Biter.advance();
313 Citer.advance();
314 Diter.advance();
315 Eiter.advance();
316 Fiter.advance();
317 Giter.advance();
318 Hiter.advance();
319 Iiter.advance();
320 Jiter.advance();
321 Kiter.advance();
322 }
323 }
324}
325
326/*
327 * This version applies a stencil to a set of 1D arrays. Up to 11 arrays
328 * may be used. Any unused arrays are turned into dummyArray objects.
329 * Operations on dummyArray objects are translated into no-ops.
330 */
331template<class T_stencil, class T_numtype1, class T_array2,
332 class T_array3, class T_array4, class T_array5, class T_array6,
333 class T_array7, class T_array8, class T_array9, class T_array10,
334 class T_array11>
335void applyStencil_imp(const T_stencil& stencil, const Array<T_numtype1,1>& A,
336 const T_array2& B, const T_array3& C, const T_array4& D,
337 const T_array5& E, const T_array6& F, const T_array7& G,
338 const T_array8& H, const T_array9& I, const T_array10& J,
339 const T_array11& K)
340{
341 checkShapes(A,B,C,D,E,F,G,H,I,J,K);
342
343 // Interrogate the stencil to find out its extent
344 stencilExtent<1, T_numtype1> At;
345 calcStencilExtent(At, stencil, A, B, C, D, E, F, G, H, I, J, K);
346
347 // Now determine the subdomain over which the stencil
348 // can be applied without worrying about overrunning the
349 // boundaries of the array
350 int stencil_lbound0 = At.min(0);
351
352 int stencil_ubound0 = At.max(0);
353
354 int lbound0 = max(A.lbound(0), A.lbound(0) - stencil_lbound0);
355 int ubound0 = min(A.ubound(0), A.ubound(0) - stencil_ubound0);
356
357#if 0
358 cout << "Stencil bounds are:" << endl
359 << lbound0 << '\t' << ubound0 << endl;
360#endif
361
362 // Now do the actual loop
363 ArrayIterator<T_numtype1,1> Aiter(A);
364 _bz_typename T_array2::T_iterator Biter(B);
365 _bz_typename T_array3::T_iterator Citer(C);
366 _bz_typename T_array4::T_iterator Diter(D);
367 _bz_typename T_array5::T_iterator Eiter(E);
368 _bz_typename T_array6::T_iterator Fiter(F);
369 _bz_typename T_array7::T_iterator Giter(G);
370 _bz_typename T_array8::T_iterator Hiter(H);
371 _bz_typename T_array9::T_iterator Iiter(I);
372 _bz_typename T_array10::T_iterator Jiter(J);
373 _bz_typename T_array11::T_iterator Kiter(K);
374
375 // Load the strides for the innermost loop
376 Aiter.loadStride(0);
377 Biter.loadStride(0);
378 Citer.loadStride(0);
379 Diter.loadStride(0);
380 Eiter.loadStride(0);
381 Fiter.loadStride(0);
382 Giter.loadStride(0);
383 Hiter.loadStride(0);
384 Iiter.loadStride(0);
385 Jiter.loadStride(0);
386 Kiter.loadStride(0);
387
388 for (int i=lbound0; i <= ubound0; ++i)
389 {
390 stencil.apply(Aiter, Biter, Citer, Diter, Eiter, Fiter, Giter,
391 Hiter, Iiter, Jiter, Kiter);
392
393 Aiter.advance();
394 Biter.advance();
395 Citer.advance();
396 Diter.advance();
397 Eiter.advance();
398 Fiter.advance();
399 Giter.advance();
400 Hiter.advance();
401 Iiter.advance();
402 Jiter.advance();
403 Kiter.advance();
404 }
405}
406
407/*
408 * These 11 versions of applyStencil handle from 1 to 11 array parameters.
409 * They pad their argument list with enough dummyArray objects to call
410 * applyStencil_imp with 11 array parameters.
411 */
412template<class T_stencil, class T_numtype1, int N_rank>
413inline void applyStencil(const T_stencil& stencil, Array<T_numtype1,N_rank>& A)
414{
415 applyStencil_imp(stencil, A, _dummyArray, _dummyArray,
416 _dummyArray, _dummyArray, _dummyArray, _dummyArray,
417 _dummyArray, _dummyArray, _dummyArray, _dummyArray);
418}
419
420template<class T_stencil, class T_numtype1, int N_rank, class T_array2>
421inline void applyStencil(const T_stencil& stencil, Array<T_numtype1,N_rank>& A,
422 T_array2& B)
423{
424 applyStencil_imp(stencil, A, B, _dummyArray, _dummyArray,
425 _dummyArray, _dummyArray, _dummyArray, _dummyArray,
426 _dummyArray, _dummyArray, _dummyArray);
427}
428
429template<class T_stencil, class T_numtype1, int N_rank, class T_array2,
430 class T_array3>
431inline void applyStencil(const T_stencil& stencil, Array<T_numtype1,N_rank>& A,
432 T_array2& B, T_array3& C)
433{
434 applyStencil_imp(stencil, A, B, C, _dummyArray, _dummyArray,
435 _dummyArray, _dummyArray, _dummyArray, _dummyArray, _dummyArray,
436 _dummyArray);
437}
438
439template<class T_stencil, class T_numtype1, int N_rank, class T_array2,
440 class T_array3, class T_array4>
441inline void applyStencil(const T_stencil& stencil, Array<T_numtype1,N_rank>& A,
442 T_array2& B, T_array3& C, T_array4& D)
443{
444 applyStencil_imp(stencil, A, B, C, D, _dummyArray, _dummyArray,
445 _dummyArray, _dummyArray, _dummyArray, _dummyArray, _dummyArray);
446}
447
448template<class T_stencil, class T_numtype1, int N_rank, class T_array2,
449 class T_array3, class T_array4, class T_array5>
450inline void applyStencil(const T_stencil& stencil, Array<T_numtype1,N_rank>& A,
451 T_array2& B, T_array3& C, T_array4& D, T_array5& E)
452{
453 applyStencil_imp(stencil, A, B, C, D, E, _dummyArray,
454 _dummyArray, _dummyArray, _dummyArray, _dummyArray, _dummyArray);
455}
456
457template<class T_stencil, class T_numtype1, int N_rank, class T_array2,
458 class T_array3, class T_array4, class T_array5, class T_array6>
459inline void applyStencil(const T_stencil& stencil, Array<T_numtype1,N_rank>& A,
460 T_array2& B, T_array3& C, T_array4& D, T_array5& E, T_array6& F)
461{
462 applyStencil_imp(stencil, A, B, C, D, E, F,
463 _dummyArray, _dummyArray, _dummyArray, _dummyArray, _dummyArray);
464}
465
466template<class T_stencil, class T_numtype1, int N_rank, class T_array2,
467 class T_array3, class T_array4, class T_array5, class T_array6,
468 class T_array7>
469inline void applyStencil(const T_stencil& stencil, Array<T_numtype1,N_rank>& A,
470 T_array2& B, T_array3& C, T_array4& D, T_array5& E, T_array6& F,
471 T_array7& G)
472{
473 applyStencil_imp(stencil, A, B, C, D, E, F, G,
474 _dummyArray, _dummyArray, _dummyArray, _dummyArray);
475}
476
477template<class T_stencil, class T_numtype1, int N_rank, class T_array2,
478 class T_array3, class T_array4, class T_array5, class T_array6,
479 class T_array7, class T_array8>
480inline void applyStencil(const T_stencil& stencil, Array<T_numtype1,N_rank>& A,
481 T_array2& B, T_array3& C, T_array4& D, T_array5& E, T_array6& F,
482 T_array7& G, T_array8& H)
483{
484 applyStencil_imp(stencil, A, B, C, D, E, F, G, H,
485 _dummyArray, _dummyArray, _dummyArray);
486}
487
488template<class T_stencil, class T_numtype1, int N_rank, class T_array2,
489 class T_array3, class T_array4, class T_array5, class T_array6,
490 class T_array7, class T_array8, class T_array9>
491inline void applyStencil(const T_stencil& stencil, Array<T_numtype1,N_rank>& A,
492 T_array2& B, T_array3& C, T_array4& D, T_array5& E, T_array6& F,
493 T_array7& G, T_array8& H, T_array9& I)
494{
495 applyStencil_imp(stencil, A, B, C, D, E, F, G, H, I,
496 _dummyArray, _dummyArray);
497}
498
499template<class T_stencil, class T_numtype1, int N_rank, class T_array2,
500 class T_array3, class T_array4, class T_array5, class T_array6,
501 class T_array7, class T_array8, class T_array9, class T_array10>
502inline void applyStencil(const T_stencil& stencil, Array<T_numtype1,N_rank>& A,
503 T_array2& B, T_array3& C, T_array4& D, T_array5& E, T_array6& F,
504 T_array7& G, T_array8& H, T_array9& I, T_array10& J)
505{
506 applyStencil_imp(stencil, A, B, C, D, E, F, G, H, I, J,
507 _dummyArray);
508}
509
510template<class T_stencil, class T_numtype1, int N_rank, class T_array2,
511 class T_array3, class T_array4, class T_array5, class T_array6,
512 class T_array7, class T_array8, class T_array9, class T_array10,
513 class T_array11>
514inline void applyStencil(const T_stencil& stencil, Array<T_numtype1,N_rank>& A,
515 T_array2& B, T_array3& C, T_array4& D, T_array5& E, T_array6& F,
516 T_array7& G, T_array8& H, T_array9& I, T_array10& J, T_array11& K)
517{
518 applyStencil_imp(stencil, A, B, C, D, E, F, G, H, I, J, K);
519}
520
521BZ_NAMESPACE_END
522
523#endif // BZ_ARRAYSTENCIL_CC
Note: See TracBrowser for help on using the repository browser.