| 1 | /*************************************************************************** | 
|---|
| 2 | * blitz/tuning.h      Platform-specific code tuning | 
|---|
| 3 | * | 
|---|
| 4 | * $Id: tuning.h,v 1.1.1.1 1999-11-26 16:37:05 ansari Exp $ | 
|---|
| 5 | * | 
|---|
| 6 | * Copyright (C) 1997,1998 Todd Veldhuizen <tveldhui@seurat.uwaterloo.ca> | 
|---|
| 7 | * | 
|---|
| 8 | * This program is free software; you can redistribute it and/or | 
|---|
| 9 | * modify it under the terms of the GNU General Public License | 
|---|
| 10 | * as published by the Free Software Foundation; either version 2 | 
|---|
| 11 | * of the License, or (at your option) any later version. | 
|---|
| 12 | * | 
|---|
| 13 | * This program is distributed in the hope that it will be useful, | 
|---|
| 14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | 
|---|
| 15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | 
|---|
| 16 | * GNU General Public License for more details. | 
|---|
| 17 | * | 
|---|
| 18 | * Suggestions:          blitz-suggest@cybervision.com | 
|---|
| 19 | * Bugs:                 blitz-bugs@cybervision.com | 
|---|
| 20 | * | 
|---|
| 21 | * For more information, please see the Blitz++ Home Page: | 
|---|
| 22 | *    http://seurat.uwaterloo.ca/blitz/ | 
|---|
| 23 | * | 
|---|
| 24 | *************************************************************************** | 
|---|
| 25 | * $Log: not supported by cvs2svn $ | 
|---|
| 26 | * Revision 1.1.1.1  1999/04/09  17:58:59  ansari | 
|---|
| 27 | * Creation module DPC/Blitz (blitz 0.4) Reza 09/04/99 | 
|---|
| 28 | * | 
|---|
| 29 | * Revision 1.6  1998/03/14 00:04:47  tveldhui | 
|---|
| 30 | * 0.2-alpha-05 | 
|---|
| 31 | * | 
|---|
| 32 | * Revision 1.5  1997/07/16 14:51:20  tveldhui | 
|---|
| 33 | * Update: Alpha release 0.2 (Arrays) | 
|---|
| 34 | * | 
|---|
| 35 | * Revision 1.4  1997/01/24 14:42:00  tveldhui | 
|---|
| 36 | * Periodic RCS update | 
|---|
| 37 | * | 
|---|
| 38 | * Revision 1.3  1997/01/23 03:28:28  tveldhui | 
|---|
| 39 | * Periodic RCS update | 
|---|
| 40 | * | 
|---|
| 41 | * Revision 1.2  1997/01/13 22:19:58  tveldhui | 
|---|
| 42 | * Periodic RCS update | 
|---|
| 43 | * | 
|---|
| 44 | * Revision 1.1  1996/11/11 17:29:13  tveldhui | 
|---|
| 45 | * Initial revision | 
|---|
| 46 | * | 
|---|
| 47 | * | 
|---|
| 48 | */ | 
|---|
| 49 |  | 
|---|
| 50 | #ifndef BZ_TUNING_H | 
|---|
| 51 | #define BZ_TUNING_H | 
|---|
| 52 |  | 
|---|
| 53 | // These estimates should be conservative (i.e. underestimate the | 
|---|
| 54 | // cache sizes). | 
|---|
| 55 | #define BZ_L1_CACHE_ESTIMATED_SIZE    8192 | 
|---|
| 56 | #define BZ_L2_CACHE_ESTIMATED_SIZE    65536 | 
|---|
| 57 |  | 
|---|
| 58 |  | 
|---|
| 59 | #undef  BZ_PARTIAL_LOOP_UNROLL | 
|---|
| 60 | #define BZ_PASS_EXPR_BY_VALUE | 
|---|
| 61 | #undef  BZ_PTR_INC_FASTER_THAN_INDIRECTION | 
|---|
| 62 | #define BZ_MANUAL_VECEXPR_COPY_CONSTRUCTOR | 
|---|
| 63 | #undef  BZ_KCC_COPY_PROPAGATION_KLUDGE | 
|---|
| 64 | #undef  BZ_ALTERNATE_FORWARD_BACKWARD_TRAVERSALS | 
|---|
| 65 | #undef  BZ_ARRAY_EXPR_PASS_INDEX_BY_VALUE | 
|---|
| 66 | #define BZ_INLINE_GROUP1 | 
|---|
| 67 | #define BZ_INLINE_GROUP2 | 
|---|
| 68 | #define BZ_COLLAPSE_LOOPS | 
|---|
| 69 | #define BZ_USE_FAST_READ_ARRAY_EXPR | 
|---|
| 70 | #define BZ_ARRAY_EXPR_USE_COMMON_STRIDE | 
|---|
| 71 | #undef  BZ_ARRAY_FAST_TRAVERSAL_UNROLL | 
|---|
| 72 | #undef  BZ_ARRAY_STACK_TRAVERSAL_CSE_AND_ANTIALIAS | 
|---|
| 73 | #undef  BZ_ARRAY_STACK_TRAVERSAL_UNROLL | 
|---|
| 74 | #define BZ_ARRAY_2D_STENCIL_TILING | 
|---|
| 75 | #define BZ_ARRAY_2D_STENCIL_TILE_SIZE       128 | 
|---|
| 76 | #undef  BZ_INTERLACE_ARRAYS | 
|---|
| 77 |  | 
|---|
| 78 |  | 
|---|
| 79 | /* | 
|---|
| 80 | * Platform-specific tuning | 
|---|
| 81 | */ | 
|---|
| 82 |  | 
|---|
| 83 | #ifdef _CRAYT3E | 
|---|
| 84 | // The backend compiler on the T3E does a better job of | 
|---|
| 85 | // loop unrolling. | 
|---|
| 86 | #undef BZ_PARTIAL_LOOP_UNROLL | 
|---|
| 87 | #undef BZ_ARRAY_FAST_TRAVERSAL_UNROLL | 
|---|
| 88 | #undef BZ_ARRAY_STACK_TRAVERSAL_UNROLL | 
|---|
| 89 | #endif | 
|---|
| 90 |  | 
|---|
| 91 | #ifdef __GNUC__ | 
|---|
| 92 | // The egcs compiler does a good job of loop unrolling, if | 
|---|
| 93 | // -funroll-loops is used. | 
|---|
| 94 | #undef BZ_PARTIAL_LOOP_UNROLL | 
|---|
| 95 | #undef BZ_ARRAY_FAST_TRAVERSAL_UNROLL | 
|---|
| 96 | #undef BZ_ARRAY_STACK_TRAVERSAL_UNROLL | 
|---|
| 97 | #endif | 
|---|
| 98 |  | 
|---|
| 99 | #ifdef  BZ_DISABLE_KCC_COPY_PROPAGATION_KLUDGE | 
|---|
| 100 | #undef BZ_KCC_COPY_PROPAGATION_KLUDGE | 
|---|
| 101 | #endif | 
|---|
| 102 |  | 
|---|
| 103 | #ifdef  BZ_INLINE_GROUP1 | 
|---|
| 104 | #define _bz_inline1 inline | 
|---|
| 105 | #else | 
|---|
| 106 | #define _bz_inline1 | 
|---|
| 107 | #endif | 
|---|
| 108 |  | 
|---|
| 109 | #ifdef  BZ_INLINE_GROUP2 | 
|---|
| 110 | #define _bz_inline2 inline | 
|---|
| 111 | #else | 
|---|
| 112 | #define _bz_inline2 | 
|---|
| 113 | #endif | 
|---|
| 114 |  | 
|---|
| 115 | #ifdef __DECCXX | 
|---|
| 116 | // The DEC cxx compiler has problems with loop unrolling | 
|---|
| 117 | // because of aliasing.  Loop unrolling and anti-aliasing | 
|---|
| 118 | // is done by Blitz++. | 
|---|
| 119 |  | 
|---|
| 120 | #define  BZ_PARTIAL_LOOP_UNROLL | 
|---|
| 121 | #define  BZ_ARRAY_STACK_TRAVERSAL_CSE_AND_ANTIALIAS | 
|---|
| 122 | #define  BZ_ARRAY_STACK_TRAVERSAL_UNROLL | 
|---|
| 123 | #endif | 
|---|
| 124 |  | 
|---|
| 125 | /* | 
|---|
| 126 | * BZ_NO_PROPAGATE(X) prevents the compiler from performing | 
|---|
| 127 | * copy propagation on a variable.  This is used for loop | 
|---|
| 128 | * unrolling to prevent KAI C++ from rearranging the | 
|---|
| 129 | * ordering of memory accesses. | 
|---|
| 130 | */ | 
|---|
| 131 |  | 
|---|
| 132 | #define BZ_NO_PROPAGATE(X)   X | 
|---|
| 133 |  | 
|---|
| 134 | #ifdef __KCC | 
|---|
| 135 | #ifdef BZ_USE_NO_PROPAGATE | 
|---|
| 136 | extern "C" int __kai_apply(const char*, ...); | 
|---|
| 137 |  | 
|---|
| 138 | #undef  BZ_NO_PROPAGATE(X) | 
|---|
| 139 | #define BZ_NO_PROPAGATE(X)  __kai_apply("(%a)",&X) | 
|---|
| 140 | #endif | 
|---|
| 141 | #endif | 
|---|
| 142 |  | 
|---|
| 143 | #endif // BZ_TUNING_H | 
|---|