| 1 | /** | 
|---|
| 2 | * @file dSFMT.c | 
|---|
| 3 | * @brief double precision SIMD-oriented Fast Mersenne Twister (dSFMT) | 
|---|
| 4 | * based on IEEE 754 format. | 
|---|
| 5 | * | 
|---|
| 6 | * @author Mutsuo Saito (Hiroshima University) | 
|---|
| 7 | * @author Makoto Matsumoto (Hiroshima University) | 
|---|
| 8 | * | 
|---|
| 9 | * Copyright (C) 2007,2008 Mutsuo Saito, Makoto Matsumoto and Hiroshima | 
|---|
| 10 | * University. All rights reserved. | 
|---|
| 11 | * | 
|---|
| 12 | * The new BSD License is applied to this software, see LICENSE.txt | 
|---|
| 13 | */ | 
|---|
| 14 | #include <stdio.h> | 
|---|
| 15 | #include <string.h> | 
|---|
| 16 | #include <stdlib.h> | 
|---|
| 17 | #include "dsfmtflags.h" | 
|---|
| 18 | #include "dSFMT-params.h" | 
|---|
| 19 |  | 
|---|
| 20 | /** dsfmt internal state vector */ | 
|---|
| 21 | dsfmt_t dsfmt_global_data; | 
|---|
| 22 | /** dsfmt mexp for check */ | 
|---|
| 23 | static const int dsfmt_mexp = DSFMT_MEXP; | 
|---|
| 24 |  | 
|---|
| 25 | /*---------------- | 
|---|
| 26 | STATIC FUNCTIONS | 
|---|
| 27 | ----------------*/ | 
|---|
| 28 | inline static uint32_t ini_func1(uint32_t x); | 
|---|
| 29 | inline static uint32_t ini_func2(uint32_t x); | 
|---|
| 30 | inline static void gen_rand_array_c1o2(dsfmt_t *dsfmt, w128_t *array, | 
|---|
| 31 | int size); | 
|---|
| 32 | inline static void gen_rand_array_c0o1(dsfmt_t *dsfmt, w128_t *array, | 
|---|
| 33 | int size); | 
|---|
| 34 | inline static void gen_rand_array_o0c1(dsfmt_t *dsfmt, w128_t *array, | 
|---|
| 35 | int size); | 
|---|
| 36 | inline static void gen_rand_array_o0o1(dsfmt_t *dsfmt, w128_t *array, | 
|---|
| 37 | int size); | 
|---|
| 38 | inline static int idxof(int i); | 
|---|
| 39 | static void initial_mask(dsfmt_t *dsfmt); | 
|---|
| 40 | static void period_certification(dsfmt_t *dsfmt); | 
|---|
| 41 |  | 
|---|
| 42 | #if defined(HAVE_SSE2) | 
|---|
| 43 | #  include <emmintrin.h> | 
|---|
| 44 | /** mask data for sse2 */ | 
|---|
| 45 | static __m128i sse2_param_mask; | 
|---|
| 46 | /** 1 in 64bit for sse2 */ | 
|---|
| 47 | static __m128i sse2_int_one; | 
|---|
| 48 | /** 2.0 double for sse2 */ | 
|---|
| 49 | static __m128d sse2_double_two; | 
|---|
| 50 | /** -1.0 double for sse2 */ | 
|---|
| 51 | static __m128d sse2_double_m_one; | 
|---|
| 52 |  | 
|---|
| 53 | static void setup_const(void); | 
|---|
| 54 | #endif | 
|---|
| 55 |  | 
|---|
| 56 | /** | 
|---|
| 57 | * This function simulate a 32-bit array index overlapped to 64-bit | 
|---|
| 58 | * array of LITTLE ENDIAN in BIG ENDIAN machine. | 
|---|
| 59 | */ | 
|---|
| 60 | #if defined(DSFMT_BIG_ENDIAN) | 
|---|
| 61 | inline static int idxof(int i) { | 
|---|
| 62 | return i ^ 1; | 
|---|
| 63 | } | 
|---|
| 64 | #else | 
|---|
| 65 | inline static int idxof(int i) { | 
|---|
| 66 | return i; | 
|---|
| 67 | } | 
|---|
| 68 | #endif | 
|---|
| 69 |  | 
|---|
| 70 | /** | 
|---|
| 71 | * This function represents the recursion formula. | 
|---|
| 72 | * @param r output | 
|---|
| 73 | * @param a a 128-bit part of the internal state array | 
|---|
| 74 | * @param b a 128-bit part of the internal state array | 
|---|
| 75 | * @param lung a 128-bit part of the internal state array | 
|---|
| 76 | */ | 
|---|
| 77 | #if defined(HAVE_ALTIVEC) | 
|---|
| 78 | inline static void do_recursion(w128_t *r, w128_t *a, w128_t * b, | 
|---|
| 79 | w128_t *lung) { | 
|---|
| 80 | const vector unsigned char sl1 = ALTI_SL1; | 
|---|
| 81 | const vector unsigned char sl1_perm = ALTI_SL1_PERM; | 
|---|
| 82 | const vector unsigned int sl1_msk = ALTI_SL1_MSK; | 
|---|
| 83 | const vector unsigned char sr1 = ALTI_SR; | 
|---|
| 84 | const vector unsigned char sr1_perm = ALTI_SR_PERM; | 
|---|
| 85 | const vector unsigned int sr1_msk = ALTI_SR_MSK; | 
|---|
| 86 | const vector unsigned char perm = ALTI_PERM; | 
|---|
| 87 | const vector unsigned int msk1 = ALTI_MSK; | 
|---|
| 88 | vector unsigned int w, x, y, z; | 
|---|
| 89 |  | 
|---|
| 90 | z = a->s; | 
|---|
| 91 | w = lung->s; | 
|---|
| 92 | x = vec_perm(w, (vector unsigned int)perm, perm); | 
|---|
| 93 | y = vec_perm(z, sl1_perm, sl1_perm); | 
|---|
| 94 | y = vec_sll(y, sl1); | 
|---|
| 95 | y = vec_and(y, sl1_msk); | 
|---|
| 96 | w = vec_xor(x, b->s); | 
|---|
| 97 | w = vec_xor(w, y); | 
|---|
| 98 | x = vec_perm(w, (vector unsigned int)sr1_perm, sr1_perm); | 
|---|
| 99 | x = vec_srl(x, sr1); | 
|---|
| 100 | x = vec_and(x, sr1_msk); | 
|---|
| 101 | y = vec_and(w, msk1); | 
|---|
| 102 | z = vec_xor(z, y); | 
|---|
| 103 | r->s = vec_xor(z, x); | 
|---|
| 104 | lung->s = w; | 
|---|
| 105 | } | 
|---|
| 106 | #elif defined(HAVE_SSE2) | 
|---|
| 107 | /** | 
|---|
| 108 | * This function setup some constant variables for SSE2. | 
|---|
| 109 | */ | 
|---|
| 110 | static void setup_const(void) { | 
|---|
| 111 | static int first = 1; | 
|---|
| 112 | if (!first) { | 
|---|
| 113 | return; | 
|---|
| 114 | } | 
|---|
| 115 | sse2_param_mask = _mm_set_epi32(DSFMT_MSK32_3, DSFMT_MSK32_4, | 
|---|
| 116 | DSFMT_MSK32_1, DSFMT_MSK32_2); | 
|---|
| 117 | sse2_int_one = _mm_set_epi32(0, 1, 0, 1); | 
|---|
| 118 | sse2_double_two = _mm_set_pd(2.0, 2.0); | 
|---|
| 119 | sse2_double_m_one = _mm_set_pd(-1.0, -1.0); | 
|---|
| 120 | first = 0; | 
|---|
| 121 | } | 
|---|
| 122 |  | 
|---|
| 123 | /** | 
|---|
| 124 | * This function represents the recursion formula. | 
|---|
| 125 | * @param r output 128-bit | 
|---|
| 126 | * @param a a 128-bit part of the internal state array | 
|---|
| 127 | * @param b a 128-bit part of the internal state array | 
|---|
| 128 | * @param d a 128-bit part of the internal state array (I/O) | 
|---|
| 129 | */ | 
|---|
| 130 | inline static void do_recursion(w128_t *r, w128_t *a, w128_t *b, w128_t *u) { | 
|---|
| 131 | __m128i v, w, x, y, z; | 
|---|
| 132 |  | 
|---|
| 133 | x = a->si; | 
|---|
| 134 | z = _mm_slli_epi64(x, DSFMT_SL1); | 
|---|
| 135 | y = _mm_shuffle_epi32(u->si, SSE2_SHUFF); | 
|---|
| 136 | z = _mm_xor_si128(z, b->si); | 
|---|
| 137 | y = _mm_xor_si128(y, z); | 
|---|
| 138 |  | 
|---|
| 139 | v = _mm_srli_epi64(y, DSFMT_SR); | 
|---|
| 140 | w = _mm_and_si128(y, sse2_param_mask); | 
|---|
| 141 | v = _mm_xor_si128(v, x); | 
|---|
| 142 | v = _mm_xor_si128(v, w); | 
|---|
| 143 | r->si = v; | 
|---|
| 144 | u->si = y; | 
|---|
| 145 | } | 
|---|
| 146 | #else /* standard C */ | 
|---|
| 147 | /** | 
|---|
| 148 | * This function represents the recursion formula. | 
|---|
| 149 | * @param r output 128-bit | 
|---|
| 150 | * @param a a 128-bit part of the internal state array | 
|---|
| 151 | * @param b a 128-bit part of the internal state array | 
|---|
| 152 | * @param lung a 128-bit part of the internal state array (I/O) | 
|---|
| 153 | */ | 
|---|
| 154 | inline static void do_recursion(w128_t *r, w128_t *a, w128_t * b, | 
|---|
| 155 | w128_t *lung) { | 
|---|
| 156 | uint64_t t0, t1, L0, L1; | 
|---|
| 157 |  | 
|---|
| 158 | t0 = a->u[0]; | 
|---|
| 159 | t1 = a->u[1]; | 
|---|
| 160 | L0 = lung->u[0]; | 
|---|
| 161 | L1 = lung->u[1]; | 
|---|
| 162 | lung->u[0] = (t0 << DSFMT_SL1) ^ (L1 >> 32) ^ (L1 << 32) ^ b->u[0]; | 
|---|
| 163 | lung->u[1] = (t1 << DSFMT_SL1) ^ (L0 >> 32) ^ (L0 << 32) ^ b->u[1]; | 
|---|
| 164 | r->u[0] = (lung->u[0] >> DSFMT_SR) ^ (lung->u[0] & DSFMT_MSK1) ^ t0; | 
|---|
| 165 | r->u[1] = (lung->u[1] >> DSFMT_SR) ^ (lung->u[1] & DSFMT_MSK2) ^ t1; | 
|---|
| 166 | } | 
|---|
| 167 | #endif | 
|---|
| 168 |  | 
|---|
| 169 | #if defined(HAVE_SSE2) | 
|---|
| 170 | /** | 
|---|
| 171 | * This function converts the double precision floating point numbers which | 
|---|
| 172 | * distribute uniformly in the range [1, 2) to those which distribute uniformly | 
|---|
| 173 | * in the range [0, 1). | 
|---|
| 174 | * @param w 128bit stracture of double precision floating point numbers (I/O) | 
|---|
| 175 | */ | 
|---|
| 176 | inline static void convert_c0o1(w128_t *w) { | 
|---|
| 177 | w->sd = _mm_add_pd(w->sd, sse2_double_m_one); | 
|---|
| 178 | } | 
|---|
| 179 |  | 
|---|
| 180 | /** | 
|---|
| 181 | * This function converts the double precision floating point numbers which | 
|---|
| 182 | * distribute uniformly in the range [1, 2) to those which distribute uniformly | 
|---|
| 183 | * in the range (0, 1]. | 
|---|
| 184 | * @param w 128bit stracture of double precision floating point numbers (I/O) | 
|---|
| 185 | */ | 
|---|
| 186 | inline static void convert_o0c1(w128_t *w) { | 
|---|
| 187 | w->sd = _mm_sub_pd(sse2_double_two, w->sd); | 
|---|
| 188 | } | 
|---|
| 189 |  | 
|---|
| 190 | /** | 
|---|
| 191 | * This function converts the double precision floating point numbers which | 
|---|
| 192 | * distribute uniformly in the range [1, 2) to those which distribute uniformly | 
|---|
| 193 | * in the range (0, 1). | 
|---|
| 194 | * @param w 128bit stracture of double precision floating point numbers (I/O) | 
|---|
| 195 | */ | 
|---|
| 196 | inline static void convert_o0o1(w128_t *w) { | 
|---|
| 197 | w->si = _mm_or_si128(w->si, sse2_int_one); | 
|---|
| 198 | w->sd = _mm_add_pd(w->sd, sse2_double_m_one); | 
|---|
| 199 | } | 
|---|
| 200 | #else /* standard C and altivec */ | 
|---|
| 201 | /** | 
|---|
| 202 | * This function converts the double precision floating point numbers which | 
|---|
| 203 | * distribute uniformly in the range [1, 2) to those which distribute uniformly | 
|---|
| 204 | * in the range [0, 1). | 
|---|
| 205 | * @param w 128bit stracture of double precision floating point numbers (I/O) | 
|---|
| 206 | */ | 
|---|
| 207 | inline static void convert_c0o1(w128_t *w) { | 
|---|
| 208 | w->d[0] -= 1.0; | 
|---|
| 209 | w->d[1] -= 1.0; | 
|---|
| 210 | } | 
|---|
| 211 |  | 
|---|
| 212 | /** | 
|---|
| 213 | * This function converts the double precision floating point numbers which | 
|---|
| 214 | * distribute uniformly in the range [1, 2) to those which distribute uniformly | 
|---|
| 215 | * in the range (0, 1]. | 
|---|
| 216 | * @param w 128bit stracture of double precision floating point numbers (I/O) | 
|---|
| 217 | */ | 
|---|
| 218 | inline static void convert_o0c1(w128_t *w) { | 
|---|
| 219 | w->d[0] = 2.0 - w->d[0]; | 
|---|
| 220 | w->d[1] = 2.0 - w->d[1]; | 
|---|
| 221 | } | 
|---|
| 222 |  | 
|---|
| 223 | /** | 
|---|
| 224 | * This function converts the double precision floating point numbers which | 
|---|
| 225 | * distribute uniformly in the range [1, 2) to those which distribute uniformly | 
|---|
| 226 | * in the range (0, 1). | 
|---|
| 227 | * @param w 128bit stracture of double precision floating point numbers (I/O) | 
|---|
| 228 | */ | 
|---|
| 229 | inline static void convert_o0o1(w128_t *w) { | 
|---|
| 230 | w->u[0] |= 1; | 
|---|
| 231 | w->u[1] |= 1; | 
|---|
| 232 | w->d[0] -= 1.0; | 
|---|
| 233 | w->d[1] -= 1.0; | 
|---|
| 234 | } | 
|---|
| 235 | #endif | 
|---|
| 236 |  | 
|---|
| 237 | /** | 
|---|
| 238 | * This function fills the user-specified array with double precision | 
|---|
| 239 | * floating point pseudorandom numbers of the IEEE 754 format. | 
|---|
| 240 | * @param dsfmt dsfmt state vector. | 
|---|
| 241 | * @param array an 128-bit array to be filled by pseudorandom numbers. | 
|---|
| 242 | * @param size number of 128-bit pseudorandom numbers to be generated. | 
|---|
| 243 | */ | 
|---|
| 244 | inline static void gen_rand_array_c1o2(dsfmt_t *dsfmt, w128_t *array, | 
|---|
| 245 | int size) { | 
|---|
| 246 | int i, j; | 
|---|
| 247 | w128_t lung; | 
|---|
| 248 |  | 
|---|
| 249 | lung = dsfmt->status[DSFMT_N]; | 
|---|
| 250 | do_recursion(&array[0], &dsfmt->status[0], &dsfmt->status[DSFMT_POS1], | 
|---|
| 251 | &lung); | 
|---|
| 252 | for (i = 1; i < DSFMT_N - DSFMT_POS1; i++) { | 
|---|
| 253 | do_recursion(&array[i], &dsfmt->status[i], | 
|---|
| 254 | &dsfmt->status[i + DSFMT_POS1], &lung); | 
|---|
| 255 | } | 
|---|
| 256 | for (; i < DSFMT_N; i++) { | 
|---|
| 257 | do_recursion(&array[i], &dsfmt->status[i], | 
|---|
| 258 | &array[i + DSFMT_POS1 - DSFMT_N], &lung); | 
|---|
| 259 | } | 
|---|
| 260 | for (; i < size - DSFMT_N; i++) { | 
|---|
| 261 | do_recursion(&array[i], &array[i - DSFMT_N], | 
|---|
| 262 | &array[i + DSFMT_POS1 - DSFMT_N], &lung); | 
|---|
| 263 | } | 
|---|
| 264 | for (j = 0; j < 2 * DSFMT_N - size; j++) { | 
|---|
| 265 | dsfmt->status[j] = array[j + size - DSFMT_N]; | 
|---|
| 266 | } | 
|---|
| 267 | for (; i < size; i++, j++) { | 
|---|
| 268 | do_recursion(&array[i], &array[i - DSFMT_N], | 
|---|
| 269 | &array[i + DSFMT_POS1 - DSFMT_N], &lung); | 
|---|
| 270 | dsfmt->status[j] = array[i]; | 
|---|
| 271 | } | 
|---|
| 272 | dsfmt->status[DSFMT_N] = lung; | 
|---|
| 273 | } | 
|---|
| 274 |  | 
|---|
| 275 | /** | 
|---|
| 276 | * This function fills the user-specified array with double precision | 
|---|
| 277 | * floating point pseudorandom numbers of the IEEE 754 format. | 
|---|
| 278 | * @param dsfmt dsfmt state vector. | 
|---|
| 279 | * @param array an 128-bit array to be filled by pseudorandom numbers. | 
|---|
| 280 | * @param size number of 128-bit pseudorandom numbers to be generated. | 
|---|
| 281 | */ | 
|---|
| 282 | inline static void gen_rand_array_c0o1(dsfmt_t *dsfmt, w128_t *array, | 
|---|
| 283 | int size) { | 
|---|
| 284 | int i, j; | 
|---|
| 285 | w128_t lung; | 
|---|
| 286 |  | 
|---|
| 287 | lung = dsfmt->status[DSFMT_N]; | 
|---|
| 288 | do_recursion(&array[0], &dsfmt->status[0], &dsfmt->status[DSFMT_POS1], | 
|---|
| 289 | &lung); | 
|---|
| 290 | for (i = 1; i < DSFMT_N - DSFMT_POS1; i++) { | 
|---|
| 291 | do_recursion(&array[i], &dsfmt->status[i], | 
|---|
| 292 | &dsfmt->status[i + DSFMT_POS1], &lung); | 
|---|
| 293 | } | 
|---|
| 294 | for (; i < DSFMT_N; i++) { | 
|---|
| 295 | do_recursion(&array[i], &dsfmt->status[i], | 
|---|
| 296 | &array[i + DSFMT_POS1 - DSFMT_N], &lung); | 
|---|
| 297 | } | 
|---|
| 298 | for (; i < size - DSFMT_N; i++) { | 
|---|
| 299 | do_recursion(&array[i], &array[i - DSFMT_N], | 
|---|
| 300 | &array[i + DSFMT_POS1 - DSFMT_N], &lung); | 
|---|
| 301 | convert_c0o1(&array[i - DSFMT_N]); | 
|---|
| 302 | } | 
|---|
| 303 | for (j = 0; j < 2 * DSFMT_N - size; j++) { | 
|---|
| 304 | dsfmt->status[j] = array[j + size - DSFMT_N]; | 
|---|
| 305 | } | 
|---|
| 306 | for (; i < size; i++, j++) { | 
|---|
| 307 | do_recursion(&array[i], &array[i - DSFMT_N], | 
|---|
| 308 | &array[i + DSFMT_POS1 - DSFMT_N], &lung); | 
|---|
| 309 | dsfmt->status[j] = array[i]; | 
|---|
| 310 | convert_c0o1(&array[i - DSFMT_N]); | 
|---|
| 311 | } | 
|---|
| 312 | for (i = size - DSFMT_N; i < size; i++) { | 
|---|
| 313 | convert_c0o1(&array[i]); | 
|---|
| 314 | } | 
|---|
| 315 | dsfmt->status[DSFMT_N] = lung; | 
|---|
| 316 | } | 
|---|
| 317 |  | 
|---|
| 318 | /** | 
|---|
| 319 | * This function fills the user-specified array with double precision | 
|---|
| 320 | * floating point pseudorandom numbers of the IEEE 754 format. | 
|---|
| 321 | * @param dsfmt dsfmt state vector. | 
|---|
| 322 | * @param array an 128-bit array to be filled by pseudorandom numbers. | 
|---|
| 323 | * @param size number of 128-bit pseudorandom numbers to be generated. | 
|---|
| 324 | */ | 
|---|
| 325 | inline static void gen_rand_array_o0o1(dsfmt_t *dsfmt, w128_t *array, | 
|---|
| 326 | int size) { | 
|---|
| 327 | int i, j; | 
|---|
| 328 | w128_t lung; | 
|---|
| 329 |  | 
|---|
| 330 | lung = dsfmt->status[DSFMT_N]; | 
|---|
| 331 | do_recursion(&array[0], &dsfmt->status[0], &dsfmt->status[DSFMT_POS1], | 
|---|
| 332 | &lung); | 
|---|
| 333 | for (i = 1; i < DSFMT_N - DSFMT_POS1; i++) { | 
|---|
| 334 | do_recursion(&array[i], &dsfmt->status[i], | 
|---|
| 335 | &dsfmt->status[i + DSFMT_POS1], &lung); | 
|---|
| 336 | } | 
|---|
| 337 | for (; i < DSFMT_N; i++) { | 
|---|
| 338 | do_recursion(&array[i], &dsfmt->status[i], | 
|---|
| 339 | &array[i + DSFMT_POS1 - DSFMT_N], &lung); | 
|---|
| 340 | } | 
|---|
| 341 | for (; i < size - DSFMT_N; i++) { | 
|---|
| 342 | do_recursion(&array[i], &array[i - DSFMT_N], | 
|---|
| 343 | &array[i + DSFMT_POS1 - DSFMT_N], &lung); | 
|---|
| 344 | convert_o0o1(&array[i - DSFMT_N]); | 
|---|
| 345 | } | 
|---|
| 346 | for (j = 0; j < 2 * DSFMT_N - size; j++) { | 
|---|
| 347 | dsfmt->status[j] = array[j + size - DSFMT_N]; | 
|---|
| 348 | } | 
|---|
| 349 | for (; i < size; i++, j++) { | 
|---|
| 350 | do_recursion(&array[i], &array[i - DSFMT_N], | 
|---|
| 351 | &array[i + DSFMT_POS1 - DSFMT_N], &lung); | 
|---|
| 352 | dsfmt->status[j] = array[i]; | 
|---|
| 353 | convert_o0o1(&array[i - DSFMT_N]); | 
|---|
| 354 | } | 
|---|
| 355 | for (i = size - DSFMT_N; i < size; i++) { | 
|---|
| 356 | convert_o0o1(&array[i]); | 
|---|
| 357 | } | 
|---|
| 358 | dsfmt->status[DSFMT_N] = lung; | 
|---|
| 359 | } | 
|---|
| 360 |  | 
|---|
| 361 | /** | 
|---|
| 362 | * This function fills the user-specified array with double precision | 
|---|
| 363 | * floating point pseudorandom numbers of the IEEE 754 format. | 
|---|
| 364 | * @param dsfmt dsfmt state vector. | 
|---|
| 365 | * @param array an 128-bit array to be filled by pseudorandom numbers. | 
|---|
| 366 | * @param size number of 128-bit pseudorandom numbers to be generated. | 
|---|
| 367 | */ | 
|---|
| 368 | inline static void gen_rand_array_o0c1(dsfmt_t *dsfmt, w128_t *array, | 
|---|
| 369 | int size) { | 
|---|
| 370 | int i, j; | 
|---|
| 371 | w128_t lung; | 
|---|
| 372 |  | 
|---|
| 373 | lung = dsfmt->status[DSFMT_N]; | 
|---|
| 374 | do_recursion(&array[0], &dsfmt->status[0], &dsfmt->status[DSFMT_POS1], | 
|---|
| 375 | &lung); | 
|---|
| 376 | for (i = 1; i < DSFMT_N - DSFMT_POS1; i++) { | 
|---|
| 377 | do_recursion(&array[i], &dsfmt->status[i], | 
|---|
| 378 | &dsfmt->status[i + DSFMT_POS1], &lung); | 
|---|
| 379 | } | 
|---|
| 380 | for (; i < DSFMT_N; i++) { | 
|---|
| 381 | do_recursion(&array[i], &dsfmt->status[i], | 
|---|
| 382 | &array[i + DSFMT_POS1 - DSFMT_N], &lung); | 
|---|
| 383 | } | 
|---|
| 384 | for (; i < size - DSFMT_N; i++) { | 
|---|
| 385 | do_recursion(&array[i], &array[i - DSFMT_N], | 
|---|
| 386 | &array[i + DSFMT_POS1 - DSFMT_N], &lung); | 
|---|
| 387 | convert_o0c1(&array[i - DSFMT_N]); | 
|---|
| 388 | } | 
|---|
| 389 | for (j = 0; j < 2 * DSFMT_N - size; j++) { | 
|---|
| 390 | dsfmt->status[j] = array[j + size - DSFMT_N]; | 
|---|
| 391 | } | 
|---|
| 392 | for (; i < size; i++, j++) { | 
|---|
| 393 | do_recursion(&array[i], &array[i - DSFMT_N], | 
|---|
| 394 | &array[i + DSFMT_POS1 - DSFMT_N], &lung); | 
|---|
| 395 | dsfmt->status[j] = array[i]; | 
|---|
| 396 | convert_o0c1(&array[i - DSFMT_N]); | 
|---|
| 397 | } | 
|---|
| 398 | for (i = size - DSFMT_N; i < size; i++) { | 
|---|
| 399 | convert_o0c1(&array[i]); | 
|---|
| 400 | } | 
|---|
| 401 | dsfmt->status[DSFMT_N] = lung; | 
|---|
| 402 | } | 
|---|
| 403 |  | 
|---|
| 404 | /** | 
|---|
| 405 | * This function represents a function used in the initialization | 
|---|
| 406 | * by init_by_array | 
|---|
| 407 | * @param x 32-bit integer | 
|---|
| 408 | * @return 32-bit integer | 
|---|
| 409 | */ | 
|---|
| 410 | static uint32_t ini_func1(uint32_t x) { | 
|---|
| 411 | return (x ^ (x >> 27)) * (uint32_t)1664525UL; | 
|---|
| 412 | } | 
|---|
| 413 |  | 
|---|
| 414 | /** | 
|---|
| 415 | * This function represents a function used in the initialization | 
|---|
| 416 | * by init_by_array | 
|---|
| 417 | * @param x 32-bit integer | 
|---|
| 418 | * @return 32-bit integer | 
|---|
| 419 | */ | 
|---|
| 420 | static uint32_t ini_func2(uint32_t x) { | 
|---|
| 421 | return (x ^ (x >> 27)) * (uint32_t)1566083941UL; | 
|---|
| 422 | } | 
|---|
| 423 |  | 
|---|
| 424 | /** | 
|---|
| 425 | * This function initializes the internal state array to fit the IEEE | 
|---|
| 426 | * 754 format. | 
|---|
| 427 | * @param dsfmt dsfmt state vector. | 
|---|
| 428 | */ | 
|---|
| 429 | static void initial_mask(dsfmt_t *dsfmt) { | 
|---|
| 430 | int i; | 
|---|
| 431 | uint64_t *psfmt; | 
|---|
| 432 |  | 
|---|
| 433 | psfmt = &dsfmt->status[0].u[0]; | 
|---|
| 434 | for (i = 0; i < DSFMT_N * 2; i++) { | 
|---|
| 435 | psfmt[i] = (psfmt[i] & DSFMT_LOW_MASK) | DSFMT_HIGH_CONST; | 
|---|
| 436 | } | 
|---|
| 437 | } | 
|---|
| 438 |  | 
|---|
| 439 | /** | 
|---|
| 440 | * This function certificate the period of 2^{SFMT_MEXP}-1. | 
|---|
| 441 | * @param dsfmt dsfmt state vector. | 
|---|
| 442 | */ | 
|---|
| 443 | static void period_certification(dsfmt_t *dsfmt) { | 
|---|
| 444 | uint64_t pcv[2] = {DSFMT_PCV1, DSFMT_PCV2}; | 
|---|
| 445 | uint64_t tmp[2]; | 
|---|
| 446 | uint64_t inner; | 
|---|
| 447 | int i; | 
|---|
| 448 | #if (DSFMT_PCV2 & 1) != 1 | 
|---|
| 449 | int j; | 
|---|
| 450 | uint64_t work; | 
|---|
| 451 | #endif | 
|---|
| 452 |  | 
|---|
| 453 | tmp[0] = (dsfmt->status[DSFMT_N].u[0] ^ DSFMT_FIX1); | 
|---|
| 454 | tmp[1] = (dsfmt->status[DSFMT_N].u[1] ^ DSFMT_FIX2); | 
|---|
| 455 |  | 
|---|
| 456 | inner = tmp[0] & pcv[0]; | 
|---|
| 457 | inner ^= tmp[1] & pcv[1]; | 
|---|
| 458 | for (i = 32; i > 0; i >>= 1) { | 
|---|
| 459 | inner ^= inner >> i; | 
|---|
| 460 | } | 
|---|
| 461 | inner &= 1; | 
|---|
| 462 | /* check OK */ | 
|---|
| 463 | if (inner == 1) { | 
|---|
| 464 | return; | 
|---|
| 465 | } | 
|---|
| 466 | /* check NG, and modification */ | 
|---|
| 467 | #if (DSFMT_PCV2 & 1) == 1 | 
|---|
| 468 | dsfmt->status[DSFMT_N].u[1] ^= 1; | 
|---|
| 469 | #else | 
|---|
| 470 | for (i = 1; i >= 0; i--) { | 
|---|
| 471 | work = 1; | 
|---|
| 472 | for (j = 0; j < 64; j++) { | 
|---|
| 473 | if ((work & pcv[i]) != 0) { | 
|---|
| 474 | dsfmt->status[DSFMT_N].u[i] ^= work; | 
|---|
| 475 | return; | 
|---|
| 476 | } | 
|---|
| 477 | work = work << 1; | 
|---|
| 478 | } | 
|---|
| 479 | } | 
|---|
| 480 | #endif | 
|---|
| 481 | return; | 
|---|
| 482 | } | 
|---|
| 483 |  | 
|---|
| 484 | /*---------------- | 
|---|
| 485 | PUBLIC FUNCTIONS | 
|---|
| 486 | ----------------*/ | 
|---|
| 487 | /** | 
|---|
| 488 | * This function returns the identification string.  The string shows | 
|---|
| 489 | * the Mersenne exponent, and all parameters of this generator. | 
|---|
| 490 | * @return id string. | 
|---|
| 491 | */ | 
|---|
| 492 | const char *dsfmt_get_idstring(void) { | 
|---|
| 493 | return DSFMT_IDSTR; | 
|---|
| 494 | } | 
|---|
| 495 |  | 
|---|
| 496 | /** | 
|---|
| 497 | * This function returns the minimum size of array used for \b | 
|---|
| 498 | * fill_array functions. | 
|---|
| 499 | * @return minimum size of array used for fill_array functions. | 
|---|
| 500 | */ | 
|---|
| 501 | int dsfmt_get_min_array_size(void) { | 
|---|
| 502 | return DSFMT_N64; | 
|---|
| 503 | } | 
|---|
| 504 |  | 
|---|
| 505 | /** | 
|---|
| 506 | * This function fills the internal state array with double precision | 
|---|
| 507 | * floating point pseudorandom numbers of the IEEE 754 format. | 
|---|
| 508 | * @param dsfmt dsfmt state vector. | 
|---|
| 509 | */ | 
|---|
| 510 | void dsfmt_gen_rand_all(dsfmt_t *dsfmt) { | 
|---|
| 511 | int i; | 
|---|
| 512 | w128_t lung; | 
|---|
| 513 |  | 
|---|
| 514 | lung = dsfmt->status[DSFMT_N]; | 
|---|
| 515 | do_recursion(&dsfmt->status[0], &dsfmt->status[0], | 
|---|
| 516 | &dsfmt->status[DSFMT_POS1], &lung); | 
|---|
| 517 | for (i = 1; i < DSFMT_N - DSFMT_POS1; i++) { | 
|---|
| 518 | do_recursion(&dsfmt->status[i], &dsfmt->status[i], | 
|---|
| 519 | &dsfmt->status[i + DSFMT_POS1], &lung); | 
|---|
| 520 | } | 
|---|
| 521 | for (; i < DSFMT_N; i++) { | 
|---|
| 522 | do_recursion(&dsfmt->status[i], &dsfmt->status[i], | 
|---|
| 523 | &dsfmt->status[i + DSFMT_POS1 - DSFMT_N], &lung); | 
|---|
| 524 | } | 
|---|
| 525 | dsfmt->status[DSFMT_N] = lung; | 
|---|
| 526 | } | 
|---|
| 527 |  | 
|---|
| 528 | /** | 
|---|
| 529 | * This function generates double precision floating point | 
|---|
| 530 | * pseudorandom numbers which distribute in the range [1, 2) to the | 
|---|
| 531 | * specified array[] by one call. The number of pseudorandom numbers | 
|---|
| 532 | * is specified by the argument \b size, which must be at least (SFMT_MEXP | 
|---|
| 533 | * / 128) * 2 and a multiple of two.  The function | 
|---|
| 534 | * get_min_array_size() returns this minimum size.  The generation by | 
|---|
| 535 | * this function is much faster than the following fill_array_xxx functions. | 
|---|
| 536 | * | 
|---|
| 537 | * For initialization, init_gen_rand() or init_by_array() must be called | 
|---|
| 538 | * before the first call of this function. This function can not be | 
|---|
| 539 | * used after calling genrand_xxx functions, without initialization. | 
|---|
| 540 | * | 
|---|
| 541 | * @param dsfmt dsfmt state vector. | 
|---|
| 542 | * @param array an array where pseudorandom numbers are filled | 
|---|
| 543 | * by this function.  The pointer to the array must be "aligned" | 
|---|
| 544 | * (namely, must be a multiple of 16) in the SIMD version, since it | 
|---|
| 545 | * refers to the address of a 128-bit integer.  In the standard C | 
|---|
| 546 | * version, the pointer is arbitrary. | 
|---|
| 547 | * | 
|---|
| 548 | * @param size the number of 64-bit pseudorandom integers to be | 
|---|
| 549 | * generated.  size must be a multiple of 2, and greater than or equal | 
|---|
| 550 | * to (SFMT_MEXP / 128) * 2. | 
|---|
| 551 | * | 
|---|
| 552 | * @note \b memalign or \b posix_memalign is available to get aligned | 
|---|
| 553 | * memory. Mac OSX doesn't have these functions, but \b malloc of OSX | 
|---|
| 554 | * returns the pointer to the aligned memory block. | 
|---|
| 555 | */ | 
|---|
| 556 | void dsfmt_fill_array_close1_open2(dsfmt_t *dsfmt, double array[], int size) { | 
|---|
| 557 | assert(size % 2 == 0); | 
|---|
| 558 | assert(size >= DSFMT_N64); | 
|---|
| 559 | gen_rand_array_c1o2(dsfmt, (w128_t *)array, size / 2); | 
|---|
| 560 | } | 
|---|
| 561 |  | 
|---|
| 562 | /** | 
|---|
| 563 | * This function generates double precision floating point | 
|---|
| 564 | * pseudorandom numbers which distribute in the range (0, 1] to the | 
|---|
| 565 | * specified array[] by one call. This function is the same as | 
|---|
| 566 | * fill_array_close1_open2() except the distribution range. | 
|---|
| 567 | * | 
|---|
| 568 | * @param dsfmt dsfmt state vector. | 
|---|
| 569 | * @param array an array where pseudorandom numbers are filled | 
|---|
| 570 | * by this function. | 
|---|
| 571 | * @param size the number of pseudorandom numbers to be generated. | 
|---|
| 572 | * see also \sa fill_array_close1_open2() | 
|---|
| 573 | */ | 
|---|
| 574 | void dsfmt_fill_array_open_close(dsfmt_t *dsfmt, double array[], int size) { | 
|---|
| 575 | assert(size % 2 == 0); | 
|---|
| 576 | assert(size >= DSFMT_N64); | 
|---|
| 577 | gen_rand_array_o0c1(dsfmt, (w128_t *)array, size / 2); | 
|---|
| 578 | } | 
|---|
| 579 |  | 
|---|
| 580 | /** | 
|---|
| 581 | * This function generates double precision floating point | 
|---|
| 582 | * pseudorandom numbers which distribute in the range [0, 1) to the | 
|---|
| 583 | * specified array[] by one call. This function is the same as | 
|---|
| 584 | * fill_array_close1_open2() except the distribution range. | 
|---|
| 585 | * | 
|---|
| 586 | * @param array an array where pseudorandom numbers are filled | 
|---|
| 587 | * by this function. | 
|---|
| 588 | * @param dsfmt dsfmt state vector. | 
|---|
| 589 | * @param size the number of pseudorandom numbers to be generated. | 
|---|
| 590 | * see also \sa fill_array_close1_open2() | 
|---|
| 591 | */ | 
|---|
| 592 | void dsfmt_fill_array_close_open(dsfmt_t *dsfmt, double array[], int size) { | 
|---|
| 593 | assert(size % 2 == 0); | 
|---|
| 594 | assert(size >= DSFMT_N64); | 
|---|
| 595 | gen_rand_array_c0o1(dsfmt, (w128_t *)array, size / 2); | 
|---|
| 596 | } | 
|---|
| 597 |  | 
|---|
| 598 | /** | 
|---|
| 599 | * This function generates double precision floating point | 
|---|
| 600 | * pseudorandom numbers which distribute in the range (0, 1) to the | 
|---|
| 601 | * specified array[] by one call. This function is the same as | 
|---|
| 602 | * fill_array_close1_open2() except the distribution range. | 
|---|
| 603 | * | 
|---|
| 604 | * @param dsfmt dsfmt state vector. | 
|---|
| 605 | * @param array an array where pseudorandom numbers are filled | 
|---|
| 606 | * by this function. | 
|---|
| 607 | * @param size the number of pseudorandom numbers to be generated. | 
|---|
| 608 | * see also \sa fill_array_close1_open2() | 
|---|
| 609 | */ | 
|---|
| 610 | void dsfmt_fill_array_open_open(dsfmt_t *dsfmt, double array[], int size) { | 
|---|
| 611 | assert(size % 2 == 0); | 
|---|
| 612 | assert(size >= DSFMT_N64); | 
|---|
| 613 | gen_rand_array_o0o1(dsfmt, (w128_t *)array, size / 2); | 
|---|
| 614 | } | 
|---|
| 615 |  | 
|---|
| 616 | #if defined(__INTEL_COMPILER) | 
|---|
| 617 | #  pragma warning(disable:981) | 
|---|
| 618 | #endif | 
|---|
| 619 | /** | 
|---|
| 620 | * This function initializes the internal state array with a 32-bit | 
|---|
| 621 | * integer seed. | 
|---|
| 622 | * @param dsfmt dsfmt state vector. | 
|---|
| 623 | * @param seed a 32-bit integer used as the seed. | 
|---|
| 624 | * @param mexp caller's mersenne expornent | 
|---|
| 625 | */ | 
|---|
| 626 | void dsfmt_chk_init_gen_rand(dsfmt_t *dsfmt, uint32_t seed, int mexp) { | 
|---|
| 627 | int i; | 
|---|
| 628 | uint32_t *psfmt; | 
|---|
| 629 |  | 
|---|
| 630 | /* make sure caller program is compiled with the same MEXP */ | 
|---|
| 631 | if (mexp != dsfmt_mexp) { | 
|---|
| 632 | fprintf(stderr, "DSFMT_MEXP doesn't match with dSFMT.c\n"); | 
|---|
| 633 | exit(1); | 
|---|
| 634 | } | 
|---|
| 635 | psfmt = &dsfmt->status[0].u32[0]; | 
|---|
| 636 | psfmt[idxof(0)] = seed; | 
|---|
| 637 | for (i = 1; i < (DSFMT_N + 1) * 4; i++) { | 
|---|
| 638 | psfmt[idxof(i)] = 1812433253UL | 
|---|
| 639 | * (psfmt[idxof(i - 1)] ^ (psfmt[idxof(i - 1)] >> 30)) + i; | 
|---|
| 640 | } | 
|---|
| 641 | initial_mask(dsfmt); | 
|---|
| 642 | period_certification(dsfmt); | 
|---|
| 643 | dsfmt->idx = DSFMT_N64; | 
|---|
| 644 | #if defined(HAVE_SSE2) | 
|---|
| 645 | setup_const(); | 
|---|
| 646 | #endif | 
|---|
| 647 | } | 
|---|
| 648 |  | 
|---|
| 649 | /** | 
|---|
| 650 | * This function initializes the internal state array, | 
|---|
| 651 | * with an array of 32-bit integers used as the seeds | 
|---|
| 652 | * @param dsfmt dsfmt state vector. | 
|---|
| 653 | * @param init_key the array of 32-bit integers, used as a seed. | 
|---|
| 654 | * @param key_length the length of init_key. | 
|---|
| 655 | * @param mexp caller's mersenne expornent | 
|---|
| 656 | */ | 
|---|
| 657 | void dsfmt_chk_init_by_array(dsfmt_t *dsfmt, uint32_t init_key[], | 
|---|
| 658 | int key_length, int mexp) { | 
|---|
| 659 | int i, j, count; | 
|---|
| 660 | uint32_t r; | 
|---|
| 661 | uint32_t *psfmt32; | 
|---|
| 662 | int lag; | 
|---|
| 663 | int mid; | 
|---|
| 664 | int size = (DSFMT_N + 1) * 4;       /* pulmonary */ | 
|---|
| 665 |  | 
|---|
| 666 | /* make sure caller program is compiled with the same MEXP */ | 
|---|
| 667 | if (mexp != dsfmt_mexp) { | 
|---|
| 668 | fprintf(stderr, "DSFMT_MEXP doesn't match with dSFMT.c\n"); | 
|---|
| 669 | exit(1); | 
|---|
| 670 | } | 
|---|
| 671 | if (size >= 623) { | 
|---|
| 672 | lag = 11; | 
|---|
| 673 | } else if (size >= 68) { | 
|---|
| 674 | lag = 7; | 
|---|
| 675 | } else if (size >= 39) { | 
|---|
| 676 | lag = 5; | 
|---|
| 677 | } else { | 
|---|
| 678 | lag = 3; | 
|---|
| 679 | } | 
|---|
| 680 | mid = (size - lag) / 2; | 
|---|
| 681 |  | 
|---|
| 682 | psfmt32 = &dsfmt->status[0].u32[0]; | 
|---|
| 683 | memset(dsfmt->status, 0x8b, sizeof(dsfmt->status)); | 
|---|
| 684 | if (key_length + 1 > size) { | 
|---|
| 685 | count = key_length + 1; | 
|---|
| 686 | } else { | 
|---|
| 687 | count = size; | 
|---|
| 688 | } | 
|---|
| 689 | r = ini_func1(psfmt32[idxof(0)] ^ psfmt32[idxof(mid % size)] | 
|---|
| 690 | ^ psfmt32[idxof((size - 1) % size)]); | 
|---|
| 691 | psfmt32[idxof(mid % size)] += r; | 
|---|
| 692 | r += key_length; | 
|---|
| 693 | psfmt32[idxof((mid + lag) % size)] += r; | 
|---|
| 694 | psfmt32[idxof(0)] = r; | 
|---|
| 695 | count--; | 
|---|
| 696 | for (i = 1, j = 0; (j < count) && (j < key_length); j++) { | 
|---|
| 697 | r = ini_func1(psfmt32[idxof(i)] | 
|---|
| 698 | ^ psfmt32[idxof((i + mid) % size)] | 
|---|
| 699 | ^ psfmt32[idxof((i + size - 1) % size)]); | 
|---|
| 700 | psfmt32[idxof((i + mid) % size)] += r; | 
|---|
| 701 | r += init_key[j] + i; | 
|---|
| 702 | psfmt32[idxof((i + mid + lag) % size)] += r; | 
|---|
| 703 | psfmt32[idxof(i)] = r; | 
|---|
| 704 | i = (i + 1) % size; | 
|---|
| 705 | } | 
|---|
| 706 | for (; j < count; j++) { | 
|---|
| 707 | r = ini_func1(psfmt32[idxof(i)] | 
|---|
| 708 | ^ psfmt32[idxof((i + mid) % size)] | 
|---|
| 709 | ^ psfmt32[idxof((i + size - 1) % size)]); | 
|---|
| 710 | psfmt32[idxof((i + mid) % size)] += r; | 
|---|
| 711 | r += i; | 
|---|
| 712 | psfmt32[idxof((i + mid + lag) % size)] += r; | 
|---|
| 713 | psfmt32[idxof(i)] = r; | 
|---|
| 714 | i = (i + 1) % size; | 
|---|
| 715 | } | 
|---|
| 716 | for (j = 0; j < size; j++) { | 
|---|
| 717 | r = ini_func2(psfmt32[idxof(i)] | 
|---|
| 718 | + psfmt32[idxof((i + mid) % size)] | 
|---|
| 719 | + psfmt32[idxof((i + size - 1) % size)]); | 
|---|
| 720 | psfmt32[idxof((i + mid) % size)] ^= r; | 
|---|
| 721 | r -= i; | 
|---|
| 722 | psfmt32[idxof((i + mid + lag) % size)] ^= r; | 
|---|
| 723 | psfmt32[idxof(i)] = r; | 
|---|
| 724 | i = (i + 1) % size; | 
|---|
| 725 | } | 
|---|
| 726 | initial_mask(dsfmt); | 
|---|
| 727 | period_certification(dsfmt); | 
|---|
| 728 | dsfmt->idx = DSFMT_N64; | 
|---|
| 729 | #if defined(HAVE_SSE2) | 
|---|
| 730 | setup_const(); | 
|---|
| 731 | #endif | 
|---|
| 732 | } | 
|---|
| 733 | #if defined(__INTEL_COMPILER) | 
|---|
| 734 | #  pragma warning(default:981) | 
|---|
| 735 | #endif | 
|---|