1 /++ 2 This module contains summation algorithms. 3 4 License: $(HTTP www.apache.org/licenses/LICENSE-2.0, Apache-2.0) 5 6 Authors: Ilya Yaroshenko 7 8 Copyright: 2020 Ilya Yaroshenko, Kaleidic Associates Advisory Limited, Symmetry Investments 9 +/ 10 module mir.math.sum; 11 12 /// 13 version(mir_test) 14 unittest 15 { 16 import mir.ndslice.slice: sliced; 17 import mir.ndslice.topology: map; 18 auto ar = [1, 1e100, 1, -1e100].sliced.map!"a * 10_000"; 19 const r = 20_000; 20 assert(r == ar.sum!"kbn"); 21 assert(r == ar.sum!"kb2"); 22 assert(r == ar.sum!"precise"); 23 assert(r == ar.sum!"decimal"); 24 } 25 26 /// Decimal precise summation 27 version(mir_test) 28 unittest 29 { 30 auto ar = [777.7, -777]; 31 assert(ar.sum!"decimal" == 0.7); 32 assert(sum!"decimal"(777.7, -777) == 0.7); 33 34 // The exact binary reuslt is 0.7000000000000455 35 assert(ar[0] + ar[1] == 0.7000000000000455); 36 assert(ar.sum!"fast" == 0.7000000000000455); 37 assert(ar.sum!"kahan" == 0.7000000000000455); 38 assert(ar.sum!"kbn" == 0.7000000000000455); 39 assert(ar.sum!"kb2" == 0.7000000000000455); 40 assert(ar.sum!"precise" == 0.7000000000000455); 41 } 42 43 /// 44 version(mir_test) 45 unittest 46 { 47 import mir.ndslice.slice: sliced, slicedField; 48 import mir.ndslice.topology: map, iota, retro; 49 import mir.ndslice.concatenation: concatenation; 50 import mir.math.common; 51 auto ar = 1000 52 .iota 53 .map!(n => 1.7L.pow(n+1) - 1.7L.pow(n)) 54 ; 55 real d = 1.7L.pow(1000); 56 assert(sum!"precise"(concatenation(ar, [-d].sliced).slicedField) == -1); 57 assert(sum!"precise"(ar.retro, -d) == -1); 58 } 59 60 /++ 61 `Naive`, `Pairwise` and `Kahan` algorithms can be used for user defined types. 62 +/ 63 version(mir_test) 64 unittest 65 { 66 import mir.internal.utility: isFloatingPoint; 67 static struct Quaternion(F) 68 if (isFloatingPoint!F) 69 { 70 F[4] rijk; 71 72 /// + and - operator overloading 73 Quaternion opBinary(string op)(auto ref const Quaternion rhs) const 74 if (op == "+" || op == "-") 75 { 76 Quaternion ret ; 77 foreach (i, ref e; ret.rijk) 78 mixin("e = rijk[i] "~op~" rhs.rijk[i];"); 79 return ret; 80 } 81 82 /// += and -= operator overloading 83 Quaternion opOpAssign(string op)(auto ref const Quaternion rhs) 84 if (op == "+" || op == "-") 85 { 86 foreach (i, ref e; rijk) 87 mixin("e "~op~"= rhs.rijk[i];"); 88 return this; 89 } 90 91 ///constructor with single FP argument 92 this(F f) 93 { 94 rijk[] = f; 95 } 96 97 ///assigment with single FP argument 98 void opAssign(F f) 99 { 100 rijk[] = f; 101 } 102 } 103 104 Quaternion!double q, p, r; 105 q.rijk = [0, 1, 2, 4]; 106 p.rijk = [3, 4, 5, 9]; 107 r.rijk = [3, 5, 7, 13]; 108 109 assert(r == [p, q].sum!"naive"); 110 assert(r == [p, q].sum!"pairwise"); 111 assert(r == [p, q].sum!"kahan"); 112 } 113 114 /++ 115 All summation algorithms available for complex numbers. 116 +/ 117 version(mir_builtincomplex_test) 118 unittest 119 { 120 cdouble[] ar = [1.0 + 2i, 2 + 3i, 3 + 4i, 4 + 5i]; 121 cdouble r = 10 + 14i; 122 assert(r == ar.sum!"fast"); 123 assert(r == ar.sum!"naive"); 124 assert(r == ar.sum!"pairwise"); 125 assert(r == ar.sum!"kahan"); 126 version(LDC) // DMD Internal error: backend/cgxmm.c 628 127 { 128 assert(r == ar.sum!"kbn"); 129 assert(r == ar.sum!"kb2"); 130 } 131 assert(r == ar.sum!"precise"); 132 assert(r == ar.sum!"decimal"); 133 } 134 135 /// 136 version(mir_test) 137 unittest 138 { 139 import std.complex: Complex; 140 141 auto ar = [Complex!double(1.0, 2), Complex!double(2.0, 3), Complex!double(3.0, 4), Complex!double(4.0, 5)]; 142 Complex!double r = Complex!double(10.0, 14); 143 assert(r == ar.sum!"fast"); 144 assert(r == ar.sum!"naive"); 145 assert(r == ar.sum!"pairwise"); 146 assert(r == ar.sum!"kahan"); 147 version(LDC) // DMD Internal error: backend/cgxmm.c 628 148 { 149 assert(r == ar.sum!"kbn"); 150 assert(r == ar.sum!"kb2"); 151 } 152 assert(r == ar.sum!"precise"); 153 assert(r == ar.sum!"decimal"); 154 } 155 156 /// 157 version(mir_test) 158 @safe pure nothrow unittest 159 { 160 import mir.ndslice.topology: repeat, iota; 161 162 //simple integral summation 163 assert(sum([ 1, 2, 3, 4]) == 10); 164 165 //with initial value 166 assert(sum([ 1, 2, 3, 4], 5) == 15); 167 168 //with integral promotion 169 assert(sum([false, true, true, false, true]) == 3); 170 assert(sum(ubyte.max.repeat(100)) == 25_500); 171 172 //The result may overflow 173 assert(uint.max.repeat(3).sum == 4_294_967_293U ); 174 //But a seed can be used to change the summation primitive 175 assert(uint.max.repeat(3).sum(ulong.init) == 12_884_901_885UL); 176 177 //Floating point summation 178 assert(sum([1.0, 2.0, 3.0, 4.0]) == 10); 179 180 //Type overriding 181 static assert(is(typeof(sum!double([1F, 2F, 3F, 4F])) == double)); 182 static assert(is(typeof(sum!double([1F, 2F, 3F, 4F], 5F)) == double)); 183 assert(sum([1F, 2, 3, 4]) == 10); 184 assert(sum([1F, 2, 3, 4], 5F) == 15); 185 186 //Force pair-wise floating point summation on large integers 187 import mir.math : approxEqual; 188 assert(iota!long([4096], uint.max / 2).sum(0.0) 189 .approxEqual((uint.max / 2) * 4096.0 + 4096.0 * 4096.0 / 2)); 190 } 191 192 /// Precise summation 193 version(mir_test) 194 nothrow @nogc unittest 195 { 196 import mir.ndslice.topology: iota, map; 197 import core.stdc.tgmath: pow; 198 assert(iota(1000).map!(n => 1.7L.pow(real(n)+1) - 1.7L.pow(real(n))) 199 .sum!"precise" == -1 + 1.7L.pow(1000.0L)); 200 } 201 202 /// Precise summation with output range 203 version(mir_test) 204 nothrow @nogc unittest 205 { 206 import mir.ndslice.topology: iota, map; 207 import mir.math.common; 208 auto r = iota(1000).map!(n => 1.7L.pow(n+1) - 1.7L.pow(n)); 209 Summator!(real, Summation.precise) s = 0.0; 210 s.put(r); 211 s -= 1.7L.pow(1000); 212 assert(s.sum == -1); 213 } 214 215 /// Precise summation with output range 216 version(mir_test) 217 nothrow @nogc unittest 218 { 219 import mir.math.common; 220 float M = 2.0f ^^ (float.max_exp-1); 221 double N = 2.0 ^^ (float.max_exp-1); 222 auto s = Summator!(float, Summation.precise)(0); 223 s += M; 224 s += M; 225 assert(float.infinity == s.sum); //infinity 226 auto e = cast(Summator!(double, Summation.precise)) s; 227 assert(e.sum < double.infinity); 228 assert(N+N == e.sum()); //finite number 229 } 230 231 /// Moving mean 232 version(mir_test) 233 @safe pure nothrow @nogc 234 unittest 235 { 236 import mir.internal.utility: isFloatingPoint; 237 import mir.math.sum; 238 import mir.ndslice.topology: linspace; 239 import mir.rc.array: rcarray; 240 241 struct MovingAverage(T) 242 if (isFloatingPoint!T) 243 { 244 import mir.math.stat: MeanAccumulator; 245 246 MeanAccumulator!(T, Summation.precise) meanAccumulator; 247 double[] circularBuffer; 248 size_t frontIndex; 249 250 @disable this(this); 251 252 auto avg() @property const 253 { 254 return meanAccumulator.mean; 255 } 256 257 this(double[] buffer) 258 { 259 assert(buffer.length); 260 circularBuffer = buffer; 261 meanAccumulator.put(buffer); 262 } 263 264 ///operation without rounding 265 void put(T x) 266 { 267 import mir.utility: swap; 268 meanAccumulator.summator += x; 269 swap(circularBuffer[frontIndex++], x); 270 frontIndex = frontIndex == circularBuffer.length ? 0 : frontIndex; 271 meanAccumulator.summator -= x; 272 } 273 } 274 275 /// ma always keeps precise average of last 1000 elements 276 auto x = linspace!double([1000], [0.0, 999]).rcarray; 277 auto ma = MovingAverage!double(x[]); 278 assert(ma.avg == (1000 * 999 / 2) / 1000.0); 279 /// move by 10 elements 280 foreach(e; linspace!double([10], [1000.0, 1009.0])) 281 ma.put(e); 282 assert(ma.avg == (1010 * 1009 / 2 - 10 * 9 / 2) / 1000.0); 283 } 284 285 /// Arbitrary sum 286 version(mir_test) 287 @safe pure nothrow 288 unittest 289 { 290 assert(sum(1, 2, 3, 4) == 10); 291 assert(sum!float(1, 2, 3, 4) == 10f); 292 assert(sum(1f, 2, 3, 4) == 10f); 293 assert(sum(1.0 + 2i, 2 + 3i, 3 + 4i, 4 + 5i) == (10 + 14i)); 294 } 295 296 version(X86) 297 version = X86_Any; 298 version(X86_64) 299 version = X86_Any; 300 301 /++ 302 SIMD Vectors 303 Bugs: ICE 1662 (dmd only) 304 +/ 305 version(LDC) 306 version(X86_Any) 307 version(mir_test) 308 unittest 309 { 310 import core.simd; 311 import std.meta : AliasSeq; 312 double2 a = 1, b = 2, c = 3, d = 6; 313 with(Summation) 314 { 315 foreach (algo; AliasSeq!(naive, fast, pairwise, kahan)) 316 { 317 assert([a, b, c].sum!algo.array == d.array); 318 assert([a, b].sum!algo(c).array == d.array); 319 } 320 } 321 } 322 323 import std.traits; 324 private alias AliasSeq(T...) = T; 325 import mir.internal.utility: Iota, isComplex; 326 import mir.math.common: fabs; 327 328 private alias isNaN = x => x != x; 329 private alias isFinite = x => x.fabs < x.infinity; 330 private alias isInfinity = x => x.fabs == x.infinity; 331 332 333 private template chainSeq(size_t n) 334 { 335 static if (n) 336 alias chainSeq = AliasSeq!(n, chainSeq!(n / 2)); 337 else 338 alias chainSeq = AliasSeq!(); 339 } 340 341 /++ 342 Summation algorithms. 343 +/ 344 enum Summation 345 { 346 /++ 347 Performs `pairwise` summation for floating point based types and `fast` summation for integral based types. 348 +/ 349 appropriate, 350 351 /++ 352 $(WEB en.wikipedia.org/wiki/Pairwise_summation, Pairwise summation) algorithm. 353 +/ 354 pairwise, 355 356 /++ 357 Precise summation algorithm. 358 The value of the sum is rounded to the nearest representable 359 floating-point number using the $(LUCKY round-half-to-even rule). 360 The result can differ from the exact value on 32bit `x86`, `nextDown(proir) <= result && result <= nextUp(proir)`. 361 The current implementation re-establish special value semantics across iterations (i.e. handling ±inf). 362 363 References: $(LINK2 http://www.cs.cmu.edu/afs/cs/project/quake/public/papers/robust-arithmetic.ps, 364 "Adaptive Precision Floating-Point Arithmetic and Fast Robust Geometric Predicates", Jonathan Richard Shewchuk), 365 $(LINK2 http://bugs.python.org/file10357/msum4.py, Mark Dickinson's post at bugs.python.org). 366 +/ 367 368 /+ 369 Precise summation function as msum() by Raymond Hettinger in 370 <http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/393090>, 371 enhanced with the exact partials sum and roundoff from Mark 372 Dickinson's post at <http://bugs.python.org/file10357/msum4.py>. 373 See those links for more details, proofs and other references. 374 IEEE 754R floating point semantics are assumed. 375 +/ 376 precise, 377 378 /++ 379 Precise decimal summation algorithm. 380 381 The elements of the sum are converted to a shortest decimal representation that being converted back would result the same floating-point number. 382 The resulting decimal elements are summed without rounding. 383 The decimal sum is converted back to a binary floating point representation using round-half-to-even rule. 384 385 See_also: The $(HTTPS github.com/ulfjack/ryu, Ryu algorithm) 386 +/ 387 decimal, 388 389 /++ 390 $(WEB en.wikipedia.org/wiki/Kahan_summation, Kahan summation) algorithm. 391 +/ 392 /+ 393 --------------------- 394 s := x[1] 395 c := 0 396 FOR k := 2 TO n DO 397 y := x[k] - c 398 t := s + y 399 c := (t - s) - y 400 s := t 401 END DO 402 --------------------- 403 +/ 404 kahan, 405 406 /++ 407 $(LUCKY Kahan-Babuška-Neumaier summation algorithm). `KBN` gives more accurate results then `Kahan`. 408 +/ 409 /+ 410 --------------------- 411 s := x[1] 412 c := 0 413 FOR i := 2 TO n DO 414 t := s + x[i] 415 IF ABS(s) >= ABS(x[i]) THEN 416 c := c + ((s-t)+x[i]) 417 ELSE 418 c := c + ((x[i]-t)+s) 419 END IF 420 s := t 421 END DO 422 s := s + c 423 --------------------- 424 +/ 425 kbn, 426 427 /++ 428 $(LUCKY Generalized Kahan-Babuška summation algorithm), order 2. `KB2` gives more accurate results then `Kahan` and `KBN`. 429 +/ 430 /+ 431 --------------------- 432 s := 0 ; cs := 0 ; ccs := 0 433 FOR j := 1 TO n DO 434 t := s + x[i] 435 IF ABS(s) >= ABS(x[i]) THEN 436 c := (s-t) + x[i] 437 ELSE 438 c := (x[i]-t) + s 439 END IF 440 s := t 441 t := cs + c 442 IF ABS(cs) >= ABS(c) THEN 443 cc := (cs-t) + c 444 ELSE 445 cc := (c-t) + cs 446 END IF 447 cs := t 448 ccs := ccs + cc 449 END FOR 450 RETURN s+cs+ccs 451 --------------------- 452 +/ 453 kb2, 454 455 /++ 456 Naive algorithm (one by one). 457 +/ 458 naive, 459 460 /++ 461 SIMD optimized summation algorithm. 462 +/ 463 fast, 464 } 465 466 /++ 467 Output range for summation. 468 +/ 469 struct Summator(T, Summation summation) 470 if (isMutable!T) 471 { 472 import mir.internal.utility: isComplex; 473 static if (is(T == class) || is(T == interface) || hasElaborateAssign!T && !isComplex!T) 474 static assert (summation == Summation.naive, 475 "Classes, interfaces, and structures with " 476 ~ "elaborate constructor support only naive summation."); 477 478 static if (summation == Summation.fast) 479 { 480 version (LDC) 481 { 482 import ldc.attributes: fastmath; 483 alias attr = fastmath; 484 } 485 else 486 { 487 alias attr = AliasSeq!(); 488 } 489 } 490 else 491 { 492 alias attr = AliasSeq!(); 493 } 494 495 @attr: 496 497 static if (summation == Summation.pairwise) { 498 private enum bool fastPairwise = 499 is(F == float) || 500 is(F == double) || 501 is(F == cfloat) || 502 is(F == cdouble) || 503 (isComplex!F && F.sizeof <= 16) || 504 is(F : __vector(W[N]), W, size_t N); 505 //false; 506 } 507 508 alias F = T; 509 510 static if (summation == Summation.precise) 511 { 512 import std.internal.scopebuffer; 513 import mir.appender; 514 import mir.math.ieee: signbit; 515 private: 516 enum F M = (cast(F)(2)) ^^ (T.max_exp - 1); 517 auto partials = scopedBuffer!(F, 8 * T.sizeof); 518 //sum for NaN and infinity. 519 F s = summationInitValue!F; 520 //Overflow Degree. Count of 2^^F.max_exp minus count of -(2^^F.max_exp) 521 sizediff_t o; 522 523 524 /++ 525 Compute the sum of a list of nonoverlapping floats. 526 On input, partials is a list of nonzero, nonspecial, 527 nonoverlapping floats, strictly increasing in magnitude, but 528 possibly not all having the same sign. 529 On output, the sum of partials gives the error in the returned 530 result, which is correctly rounded (using the round-half-to-even 531 rule). 532 Two floating point values x and y are non-overlapping if the least significant nonzero 533 bit of x is more significant than the most significant nonzero bit of y, or vice-versa. 534 +/ 535 static F partialsReduce(F s, in F[] partials) 536 in 537 { 538 debug(numeric) assert(!partials.length || .isFinite(s)); 539 } 540 do 541 { 542 bool _break; 543 foreach_reverse (i, y; partials) 544 { 545 s = partialsReducePred(s, y, i ? partials[i-1] : 0, _break); 546 if (_break) 547 break; 548 debug(numeric) assert(.isFinite(s)); 549 } 550 return s; 551 } 552 553 static F partialsReducePred(F s, F y, F z, out bool _break) 554 out(result) 555 { 556 debug(numeric) assert(.isFinite(result)); 557 } 558 do 559 { 560 F x = s; 561 s = x + y; 562 F d = s - x; 563 F l = y - d; 564 debug(numeric) 565 { 566 assert(.isFinite(x)); 567 assert(.isFinite(y)); 568 assert(.isFinite(s)); 569 assert(fabs(y) < fabs(x)); 570 } 571 if (l) 572 { 573 //Make half-even rounding work across multiple partials. 574 //Needed so that sum([1e-16, 1, 1e16]) will round-up the last 575 //digit to two instead of down to zero (the 1e-16 makes the 1 576 //slightly closer to two). Can guarantee commutativity. 577 if (z && !signbit(l * z)) 578 { 579 l *= 2; 580 x = s + l; 581 F t = x - s; 582 if (l == t) 583 s = x; 584 } 585 _break = true; 586 } 587 return s; 588 } 589 590 //Returns corresponding infinity if is overflow and 0 otherwise. 591 F overflow()() const 592 { 593 if (o == 0) 594 return 0; 595 if (partials.length && (o == -1 || o == 1) && signbit(o * partials.data[$-1])) 596 { 597 // problem case: decide whether result is representable 598 F x = o * M; 599 F y = partials.data[$-1] / 2; 600 F h = x + y; 601 F d = h - x; 602 F l = (y - d) * 2; 603 y = h * 2; 604 d = h + l; 605 F t = d - h; 606 version(X86) 607 { 608 if (!.isInfinity(cast(T)y) || !.isInfinity(sum())) 609 return 0; 610 } 611 else 612 { 613 if (!.isInfinity(cast(T)y) || 614 ((partials.length > 1 && !signbit(l * partials.data[$-2])) && t == l)) 615 return 0; 616 } 617 } 618 return F.infinity * o; 619 } 620 } 621 else 622 static if (summation == Summation.kb2) 623 { 624 F s = summationInitValue!F; 625 F cs = summationInitValue!F; 626 F ccs = summationInitValue!F; 627 } 628 else 629 static if (summation == Summation.kbn) 630 { 631 F s = summationInitValue!F; 632 F c = summationInitValue!F; 633 } 634 else 635 static if (summation == Summation.kahan) 636 { 637 F s = summationInitValue!F; 638 F c = summationInitValue!F; 639 F y = summationInitValue!F; // do not declare in the loop/put (algo can be used for matrixes and etc) 640 F t = summationInitValue!F; // ditto 641 } 642 else 643 static if (summation == Summation.pairwise) 644 { 645 package size_t counter; 646 size_t index; 647 static if (fastPairwise) 648 { 649 enum registersCount= 16; 650 F[size_t.sizeof * 8] partials; 651 } 652 else 653 { 654 F[size_t.sizeof * 8] partials; 655 } 656 } 657 else 658 static if (summation == Summation.naive) 659 { 660 F s = summationInitValue!F; 661 } 662 else 663 static if (summation == Summation.fast) 664 { 665 F s = summationInitValue!F; 666 } 667 else 668 static if (summation == Summation.decimal) 669 { 670 import mir.bignum.decimal; 671 Decimal!256 s; 672 T ss = 0; 673 } 674 else 675 static assert(0, "Unsupported summation type for std.numeric.Summator."); 676 677 678 public: 679 680 /// 681 this()(T n) 682 { 683 static if (summation == Summation.precise) 684 { 685 s = 0.0; 686 o = 0; 687 if (n) put(n); 688 } 689 else 690 static if (summation == Summation.kb2) 691 { 692 s = n; 693 static if (isComplex!T) 694 { 695 static if (is(T : cfloat)) { 696 cs = 0 + 0fi; 697 ccs = 0 + 0fi; 698 } else { 699 cs = Complex!float(0, 0); 700 ccs = Complex!float(0, 0); 701 } 702 } 703 else 704 { 705 cs = 0.0; 706 ccs = 0.0; 707 } 708 } 709 else 710 static if (summation == Summation.kbn) 711 { 712 s = n; 713 static if (isComplex!T) { 714 static if (is(T : cfloat)) { 715 c = 0 + 0fi; 716 } else { 717 c = Complex!float(0, 0); 718 } 719 } else 720 c = 0.0; 721 } 722 else 723 static if (summation == Summation.kahan) 724 { 725 s = n; 726 static if (isComplex!T) { 727 static if (is(T : cfloat)) { 728 c = 0 + 0fi; 729 } else { 730 c = Complex!float(0, 0); 731 } 732 } else 733 c = 0.0; 734 } 735 else 736 static if (summation == Summation.pairwise) 737 { 738 counter = index = 1; 739 partials[0] = n; 740 } 741 else 742 static if (summation == Summation.naive) 743 { 744 s = n; 745 } 746 else 747 static if (summation == Summation.fast) 748 { 749 s = n; 750 } 751 else 752 static if (summation == Summation.decimal) 753 { 754 ss = 0; 755 if (!(-n.infinity < n && n < n.infinity)) 756 { 757 ss = n; 758 n = 0; 759 } 760 s = n; 761 } 762 else 763 static assert(0); 764 } 765 766 ///Adds `n` to the internal partial sums. 767 void put(N)(N n) 768 if (__traits(compiles, {T a = n; a = n; a += n;})) 769 { 770 static if (isCompesatorAlgorithm!summation) 771 F x = n; 772 static if (summation == Summation.precise) 773 { 774 if (.isFinite(x)) 775 { 776 size_t i; 777 auto partials_data = partials.data; 778 foreach (y; partials_data[]) 779 { 780 F h = x + y; 781 if (.isInfinity(cast(T)h)) 782 { 783 if (fabs(x) < fabs(y)) 784 { 785 F t = x; x = y; y = t; 786 } 787 //h == -F.infinity 788 if (signbit(h)) 789 { 790 x += M; 791 x += M; 792 o--; 793 } 794 //h == +F.infinity 795 else 796 { 797 x -= M; 798 x -= M; 799 o++; 800 } 801 debug(numeric) assert(x.isFinite); 802 h = x + y; 803 } 804 debug(numeric) assert(h.isFinite); 805 F l; 806 if (fabs(x) < fabs(y)) 807 { 808 F t = h - y; 809 l = x - t; 810 } 811 else 812 { 813 F t = h - x; 814 l = y - t; 815 } 816 debug(numeric) assert(l.isFinite); 817 if (l) 818 { 819 partials_data[i++] = l; 820 } 821 x = h; 822 } 823 partials.shrinkTo(i); 824 if (x) 825 { 826 partials.put(x); 827 } 828 } 829 else 830 { 831 s += x; 832 } 833 } 834 else 835 static if (summation == Summation.kb2) 836 { 837 static if (isFloatingPoint!F) 838 { 839 F t = s + x; 840 F c = 0; 841 if (fabs(s) >= fabs(x)) 842 { 843 F d = s - t; 844 c = d + x; 845 } 846 else 847 { 848 F d = x - t; 849 c = d + s; 850 } 851 s = t; 852 t = cs + c; 853 if (fabs(cs) >= fabs(c)) 854 { 855 F d = cs - t; 856 d += c; 857 ccs += d; 858 } 859 else 860 { 861 F d = c - t; 862 d += cs; 863 ccs += d; 864 } 865 cs = t; 866 } 867 else 868 { 869 F t = s + x; 870 if (fabs(s.re) < fabs(x.re)) 871 { 872 auto s_re = s.re; 873 auto x_re = x.re; 874 static if (is(F : cfloat)) { 875 s = x_re + s.im * 1fi; 876 x = s_re + x.im * 1fi; 877 } else { 878 s = F(x_re, s.im); 879 x = F(s_re, x.im); 880 } 881 } 882 if (fabs(s.im) < fabs(x.im)) 883 { 884 auto s_im = s.im; 885 auto x_im = x.im; 886 static if (is(F : cfloat)) { 887 s = s.re + x_im * 1fi; 888 x = x.re + s_im * 1fi; 889 } else { 890 s = F(s.re, x_im); 891 x = F(x.re, s_im); 892 } 893 } 894 F c = (s-t)+x; 895 s = t; 896 if (fabs(cs.re) < fabs(c.re)) 897 { 898 auto c_re = c.re; 899 auto cs_re = cs.re; 900 static if (is(F : cfloat)) { 901 c = cs_re + c.im * 1fi; 902 cs = c_re + cs.im * 1fi; 903 } else { 904 c = F(cs_re, c.im); 905 cs = F(c_re, cs.im); 906 } 907 } 908 if (fabs(cs.im) < fabs(c.im)) 909 { 910 auto c_im = c.im; 911 auto cs_im = cs.im; 912 static if (is(F : cfloat)) { 913 c = c.re + cs_im * 1fi; 914 cs = cs.re + c_im * 1fi; 915 } else { 916 c = F(c.re, cs_im); 917 cs = F(cs.re, c_im); 918 } 919 } 920 F d = cs - t; 921 d += c; 922 ccs += d; 923 cs = t; 924 } 925 } 926 else 927 static if (summation == Summation.kbn) 928 { 929 static if (isFloatingPoint!F) 930 { 931 F t = s + x; 932 if (fabs(s) >= fabs(x)) 933 { 934 F d = s - t; 935 d += x; 936 c += d; 937 } 938 else 939 { 940 F d = x - t; 941 d += s; 942 c += d; 943 } 944 s = t; 945 } 946 else 947 { 948 F t = s + x; 949 if (fabs(s.re) < fabs(x.re)) 950 { 951 auto s_re = s.re; 952 auto x_re = x.re; 953 static if (is(F : cfloat)) { 954 s = x_re + s.im * 1fi; 955 x = s_re + x.im * 1fi; 956 } else { 957 s = F(x_re, s.im); 958 x = F(s_re, x.im); 959 } 960 } 961 if (fabs(s.im) < fabs(x.im)) 962 { 963 auto s_im = s.im; 964 auto x_im = x.im; 965 static if (is(F : cfloat)) { 966 s = s.re + x_im * 1fi; 967 x = x.re + s_im * 1fi; 968 } else { 969 s = F(s.re, x_im); 970 x = F(x.re, s_im); 971 } 972 } 973 F d = s - t; 974 d += x; 975 c += d; 976 s = t; 977 } 978 } 979 else 980 static if (summation == Summation.kahan) 981 { 982 y = x - c; 983 t = s + y; 984 c = t - s; 985 c -= y; 986 s = t; 987 } 988 else 989 static if (summation == Summation.pairwise) 990 { 991 import mir.bitop: cttz; 992 ++counter; 993 partials[index] = n; 994 foreach (_; 0 .. cttz(counter)) 995 { 996 immutable newIndex = index - 1; 997 partials[newIndex] += partials[index]; 998 index = newIndex; 999 } 1000 ++index; 1001 } 1002 else 1003 static if (summation == Summation.naive) 1004 { 1005 s += n; 1006 } 1007 else 1008 static if (summation == Summation.fast) 1009 { 1010 s += n; 1011 } 1012 else 1013 static if (summation == summation.decimal) 1014 { 1015 import mir.bignum.internal.ryu.generic_128: genericBinaryToDecimal; 1016 if (-n.infinity < n && n < n.infinity) 1017 { 1018 auto decimal = genericBinaryToDecimal(n); 1019 s += decimal; 1020 } 1021 else 1022 { 1023 ss += n; 1024 } 1025 } 1026 else 1027 static assert(0); 1028 } 1029 1030 ///ditto 1031 void put(Range)(Range r) 1032 if (isIterable!Range && !is(Range : __vector(V[N]), V, size_t N)) 1033 { 1034 static if (summation == Summation.pairwise && fastPairwise && isDynamicArray!Range) 1035 { 1036 F[registersCount] v; 1037 foreach (i, n; chainSeq!registersCount) 1038 { 1039 if (r.length >= n * 2) do 1040 { 1041 foreach (j; Iota!n) 1042 v[j] = cast(F) r[j]; 1043 foreach (j; Iota!n) 1044 v[j] += cast(F) r[n + j]; 1045 foreach (m; chainSeq!(n / 2)) 1046 foreach (j; Iota!m) 1047 v[j] += v[m + j]; 1048 put(v[0]); 1049 r = r[n * 2 .. $]; 1050 } 1051 while (!i && r.length >= n * 2); 1052 } 1053 if (r.length) 1054 { 1055 put(cast(F) r[0]); 1056 r = r[1 .. $]; 1057 } 1058 assert(r.length == 0); 1059 } 1060 else 1061 static if (summation == Summation.fast) 1062 { 1063 static if (isComplex!F) { 1064 static if (is(F : cfloat)) { 1065 F s0 = 0 + 0fi; 1066 } else { 1067 F s0 = F(0, 0f); 1068 } 1069 } else 1070 F s0 = 0; 1071 foreach (ref elem; r) 1072 s0 += elem; 1073 s += s0; 1074 } 1075 else 1076 { 1077 foreach (ref elem; r) 1078 put(elem); 1079 } 1080 } 1081 1082 import mir.ndslice.slice; 1083 1084 /// ditto 1085 void put(Range: Slice!(Iterator, N, kind), Iterator, size_t N, SliceKind kind)(Range r) 1086 { 1087 static if (N > 1 && kind == Contiguous) 1088 { 1089 import mir.ndslice.topology: flattened; 1090 this.put(r.flattened); 1091 } 1092 else 1093 static if (isPointer!Iterator && kind == Contiguous) 1094 { 1095 this.put(r.field); 1096 } 1097 else 1098 static if (summation == Summation.fast && N == 1) 1099 { 1100 static if (isComplex!F) { 1101 static if (is(F : cfloat)) { 1102 F s0 = 0 + 0fi; 1103 } else { 1104 F s0 = F(0, 0f); 1105 } 1106 } else 1107 F s0 = 0; 1108 import mir.algorithm.iteration: reduce; 1109 s0 = s0.reduce!"a + b"(r); 1110 s += s0; 1111 } 1112 else 1113 { 1114 foreach(elem; r) 1115 this.put(elem); 1116 } 1117 } 1118 1119 /+ 1120 Adds `x` to the internal partial sums. 1121 This operation doesn't re-establish special 1122 value semantics across iterations (i.e. handling ±inf). 1123 Preconditions: `isFinite(x)`. 1124 +/ 1125 version(none) 1126 static if (summation == Summation.precise) 1127 package void unsafePut()(F x) 1128 in { 1129 assert(.isFinite(x)); 1130 } 1131 do { 1132 size_t i; 1133 foreach (y; partials.data[]) 1134 { 1135 F h = x + y; 1136 debug(numeric) assert(.isFinite(h)); 1137 F l; 1138 if (fabs(x) < fabs(y)) 1139 { 1140 F t = h - y; 1141 l = x - t; 1142 } 1143 else 1144 { 1145 F t = h - x; 1146 l = y - t; 1147 } 1148 debug(numeric) assert(.isFinite(l)); 1149 if (l) 1150 { 1151 partials.data[i++] = l; 1152 } 1153 x = h; 1154 } 1155 partials.length = i; 1156 if (x) 1157 { 1158 partials.put(x); 1159 } 1160 } 1161 1162 ///Returns the value of the sum. 1163 T sum()() scope const 1164 { 1165 /++ 1166 Returns the value of the sum, rounded to the nearest representable 1167 floating-point number using the round-half-to-even rule. 1168 The result can differ from the exact value on `X86`, `nextDown`proir) <= result && result <= nextUp(proir)). 1169 +/ 1170 static if (summation == Summation.precise) 1171 { 1172 debug(mir_sum) 1173 { 1174 foreach (y; partials.data[]) 1175 { 1176 assert(y); 1177 assert(y.isFinite); 1178 } 1179 //TODO: Add Non-Overlapping check to std.math 1180 import mir.ndslice.slice: sliced; 1181 import mir.ndslice.sorting: isSorted; 1182 import mir.ndslice.topology: map; 1183 assert(partials.data[].sliced.map!fabs.isSorted); 1184 } 1185 1186 if (s) 1187 return s; 1188 auto parts = partials.data[]; 1189 F y = 0.0; 1190 //pick last 1191 if (parts.length) 1192 { 1193 y = parts[$-1]; 1194 parts = parts[0..$-1]; 1195 } 1196 if (o) 1197 { 1198 immutable F of = o; 1199 if (y && (o == -1 || o == 1) && signbit(of * y)) 1200 { 1201 // problem case: decide whether result is representable 1202 y /= 2; 1203 F x = of * M; 1204 immutable F h = x + y; 1205 F t = h - x; 1206 F l = (y - t) * 2; 1207 y = h * 2; 1208 if (.isInfinity(cast(T)y)) 1209 { 1210 // overflow, except in edge case... 1211 x = h + l; 1212 t = x - h; 1213 y = parts.length && t == l && !signbit(l*parts[$-1]) ? 1214 x * 2 : 1215 F.infinity * of; 1216 parts = null; 1217 } 1218 else if (l) 1219 { 1220 bool _break; 1221 y = partialsReducePred(y, l, parts.length ? parts[$-1] : 0, _break); 1222 if (_break) 1223 parts = null; 1224 } 1225 } 1226 else 1227 { 1228 y = F.infinity * of; 1229 parts = null; 1230 } 1231 } 1232 return partialsReduce(y, parts); 1233 } 1234 else 1235 static if (summation == Summation.kb2) 1236 { 1237 return s + (cs + ccs); 1238 } 1239 else 1240 static if (summation == Summation.kbn) 1241 { 1242 return s + c; 1243 } 1244 else 1245 static if (summation == Summation.kahan) 1246 { 1247 return s; 1248 } 1249 else 1250 static if (summation == Summation.pairwise) 1251 { 1252 F s = summationInitValue!T; 1253 assert((counter == 0) == (index == 0)); 1254 foreach_reverse (ref e; partials[0 .. index]) 1255 { 1256 static if (is(F : __vector(W[N]), W, size_t N)) 1257 s += cast(Unqual!F) e; //DMD bug workaround 1258 else 1259 s += e; 1260 } 1261 return s; 1262 } 1263 else 1264 static if (summation == Summation.naive) 1265 { 1266 return s; 1267 } 1268 else 1269 static if (summation == Summation.fast) 1270 { 1271 return s; 1272 } 1273 else 1274 static if (summation == Summation.decimal) 1275 { 1276 return cast(T) s + ss; 1277 } 1278 else 1279 static assert(0); 1280 } 1281 1282 version(none) 1283 static if (summation == Summation.precise) 1284 F partialsSum()() const 1285 { 1286 debug(numeric) partialsDebug; 1287 auto parts = partials.data[]; 1288 F y = 0.0; 1289 //pick last 1290 if (parts.length) 1291 { 1292 y = parts[$-1]; 1293 parts = parts[0..$-1]; 1294 } 1295 return partialsReduce(y, parts); 1296 } 1297 1298 ///Returns `Summator` with extended internal partial sums. 1299 C opCast(C : Summator!(P, _summation), P, Summation _summation)() const 1300 if ( 1301 _summation == summation && 1302 isMutable!C && 1303 P.max_exp >= T.max_exp && 1304 P.mant_dig >= T.mant_dig 1305 ) 1306 { 1307 static if (is(P == T)) 1308 return this; 1309 else 1310 static if (summation == Summation.precise) 1311 { 1312 auto ret = typeof(return).init; 1313 ret.s = s; 1314 ret.o = o; 1315 foreach (p; partials.data[]) 1316 { 1317 ret.partials.put(p); 1318 } 1319 enum exp_diff = P.max_exp / T.max_exp; 1320 static if (exp_diff) 1321 { 1322 if (ret.o) 1323 { 1324 immutable f = ret.o / exp_diff; 1325 immutable t = cast(int)(ret.o % exp_diff); 1326 ret.o = f; 1327 ret.put((P(2) ^^ T.max_exp) * t); 1328 } 1329 } 1330 return ret; 1331 } 1332 else 1333 static if (summation == Summation.kb2) 1334 { 1335 auto ret = typeof(return).init; 1336 ret.s = s; 1337 ret.cs = cs; 1338 ret.ccs = ccs; 1339 return ret; 1340 } 1341 else 1342 static if (summation == Summation.kbn) 1343 { 1344 auto ret = typeof(return).init; 1345 ret.s = s; 1346 ret.c = c; 1347 return ret; 1348 } 1349 else 1350 static if (summation == Summation.kahan) 1351 { 1352 auto ret = typeof(return).init; 1353 ret.s = s; 1354 ret.c = c; 1355 return ret; 1356 } 1357 else 1358 static if (summation == Summation.pairwise) 1359 { 1360 auto ret = typeof(return).init; 1361 ret.counter = counter; 1362 ret.index = index; 1363 foreach (i; 0 .. index) 1364 ret.partials[i] = partials[i]; 1365 return ret; 1366 } 1367 else 1368 static if (summation == Summation.naive) 1369 { 1370 auto ret = typeof(return).init; 1371 ret.s = s; 1372 return ret; 1373 } 1374 else 1375 static if (summation == Summation.fast) 1376 { 1377 auto ret = typeof(return).init; 1378 ret.s = s; 1379 return ret; 1380 } 1381 else 1382 static assert(0); 1383 } 1384 1385 /++ 1386 `cast(C)` operator overloading. Returns `cast(C)sum()`. 1387 See also: `cast` 1388 +/ 1389 C opCast(C)() const if (is(Unqual!C == T)) 1390 { 1391 return cast(C)sum(); 1392 } 1393 1394 ///Operator overloading. 1395 // opAssign should initialize partials. 1396 void opAssign(T rhs) 1397 { 1398 static if (summation == Summation.precise) 1399 { 1400 partials.reset; 1401 s = 0.0; 1402 o = 0; 1403 if (rhs) put(rhs); 1404 } 1405 else 1406 static if (summation == Summation.kb2) 1407 { 1408 s = rhs; 1409 static if (isComplex!T) 1410 { 1411 static if (is(T : cfloat)) { 1412 cs = 0 + 0fi; 1413 ccs = 0 + 0fi; 1414 } else { 1415 cs = T(0, 0f); 1416 ccs = T(0.0, 0f); 1417 } 1418 } 1419 else 1420 { 1421 cs = 0.0; 1422 ccs = 0.0; 1423 } 1424 } 1425 else 1426 static if (summation == Summation.kbn) 1427 { 1428 s = rhs; 1429 static if (isComplex!T) { 1430 static if (is(T : cfloat)) { 1431 c = 0 + 0fi; 1432 } else { 1433 c = T(0, 0f); 1434 } 1435 } else 1436 c = 0.0; 1437 } 1438 else 1439 static if (summation == Summation.kahan) 1440 { 1441 s = rhs; 1442 static if (isComplex!T) { 1443 static if (is(T : cfloat)) { 1444 c = 0 + 0fi; 1445 } else { 1446 c = T(0, 0f); 1447 } 1448 } else 1449 c = 0.0; 1450 } 1451 else 1452 static if (summation == Summation.pairwise) 1453 { 1454 counter = 1; 1455 index = 1; 1456 partials[0] = rhs; 1457 } 1458 else 1459 static if (summation == Summation.naive) 1460 { 1461 s = rhs; 1462 } 1463 else 1464 static if (summation == Summation.fast) 1465 { 1466 s = rhs; 1467 } 1468 else 1469 static if (summation == summation.decimal) 1470 { 1471 __ctor(rhs); 1472 } 1473 else 1474 static assert(0); 1475 } 1476 1477 ///ditto 1478 void opOpAssign(string op : "+")(T rhs) 1479 { 1480 put(rhs); 1481 } 1482 1483 ///ditto 1484 void opOpAssign(string op : "+")(ref const Summator rhs) 1485 { 1486 static if (summation == Summation.precise) 1487 { 1488 s += rhs.s; 1489 o += rhs.o; 1490 foreach (f; rhs.partials.data[]) 1491 put(f); 1492 } 1493 else 1494 static if (summation == Summation.kb2) 1495 { 1496 put(rhs.ccs); 1497 put(rhs.cs); 1498 put(rhs.s); 1499 } 1500 else 1501 static if (summation == Summation.kbn) 1502 { 1503 put(rhs.c); 1504 put(rhs.s); 1505 } 1506 else 1507 static if (summation == Summation.kahan) 1508 { 1509 put(rhs.s); 1510 } 1511 else 1512 static if (summation == Summation.pairwise) 1513 { 1514 foreach_reverse (e; rhs.partials[0 .. rhs.index]) 1515 put(e); 1516 counter -= rhs.index; 1517 counter += rhs.counter; 1518 } 1519 else 1520 static if (summation == Summation.naive) 1521 { 1522 put(rhs.s); 1523 } 1524 else 1525 static if (summation == Summation.fast) 1526 { 1527 put(rhs.s); 1528 } 1529 else 1530 static assert(0); 1531 } 1532 1533 ///ditto 1534 void opOpAssign(string op : "-")(T rhs) 1535 { 1536 static if (summation == Summation.precise) 1537 { 1538 put(-rhs); 1539 } 1540 else 1541 static if (summation == Summation.kb2) 1542 { 1543 put(-rhs); 1544 } 1545 else 1546 static if (summation == Summation.kbn) 1547 { 1548 put(-rhs); 1549 } 1550 else 1551 static if (summation == Summation.kahan) 1552 { 1553 y = 0.0; 1554 y -= rhs; 1555 y -= c; 1556 t = s + y; 1557 c = t - s; 1558 c -= y; 1559 s = t; 1560 } 1561 else 1562 static if (summation == Summation.pairwise) 1563 { 1564 put(-rhs); 1565 } 1566 else 1567 static if (summation == Summation.naive) 1568 { 1569 s -= rhs; 1570 } 1571 else 1572 static if (summation == Summation.fast) 1573 { 1574 s -= rhs; 1575 } 1576 else 1577 static assert(0); 1578 } 1579 1580 ///ditto 1581 void opOpAssign(string op : "-")(ref const Summator rhs) 1582 { 1583 static if (summation == Summation.precise) 1584 { 1585 s -= rhs.s; 1586 o -= rhs.o; 1587 foreach (f; rhs.partials.data[]) 1588 put(-f); 1589 } 1590 else 1591 static if (summation == Summation.kb2) 1592 { 1593 put(-rhs.ccs); 1594 put(-rhs.cs); 1595 put(-rhs.s); 1596 } 1597 else 1598 static if (summation == Summation.kbn) 1599 { 1600 put(-rhs.c); 1601 put(-rhs.s); 1602 } 1603 else 1604 static if (summation == Summation.kahan) 1605 { 1606 this -= rhs.s; 1607 } 1608 else 1609 static if (summation == Summation.pairwise) 1610 { 1611 foreach_reverse (e; rhs.partials[0 .. rhs.index]) 1612 put(-e); 1613 counter -= rhs.index; 1614 counter += rhs.counter; 1615 } 1616 else 1617 static if (summation == Summation.naive) 1618 { 1619 s -= rhs.s; 1620 } 1621 else 1622 static if (summation == Summation.fast) 1623 { 1624 s -= rhs.s; 1625 } 1626 else 1627 static assert(0); 1628 } 1629 1630 /// 1631 1632 version(mir_test) 1633 @nogc nothrow unittest 1634 { 1635 import mir.math.common; 1636 import mir.ndslice.topology: iota, map; 1637 auto r1 = iota(500).map!(a => 1.7L.pow(a+1) - 1.7L.pow(a)); 1638 auto r2 = iota([500], 500).map!(a => 1.7L.pow(a+1) - 1.7L.pow(a)); 1639 Summator!(real, Summation.precise) s1 = 0, s2 = 0.0; 1640 foreach (e; r1) s1 += e; 1641 foreach (e; r2) s2 -= e; 1642 s1 -= s2; 1643 s1 -= 1.7L.pow(1000); 1644 assert(s1.sum == -1); 1645 } 1646 1647 1648 version(mir_test) 1649 @nogc nothrow unittest 1650 { 1651 with(Summation) 1652 foreach (summation; AliasSeq!(kahan, kbn, kb2, precise, pairwise)) 1653 foreach (T; AliasSeq!(float, double, real)) 1654 { 1655 Summator!(T, summation) sum = 1; 1656 sum += 3; 1657 assert(sum.sum == 4); 1658 sum -= 10; 1659 assert(sum.sum == -6); 1660 Summator!(T, summation) sum2 = 3; 1661 sum -= sum2; 1662 assert(sum.sum == -9); 1663 sum2 = 100; 1664 sum += 100; 1665 assert(sum.sum == 91); 1666 auto sum3 = cast(Summator!(real, summation))sum; 1667 assert(sum3.sum == 91); 1668 sum = sum2; 1669 } 1670 } 1671 1672 1673 version(mir_test) 1674 @nogc nothrow unittest 1675 { 1676 import mir.math.common: approxEqual; 1677 with(Summation) 1678 foreach (summation; AliasSeq!(naive, fast)) 1679 foreach (T; AliasSeq!(float, double, real)) 1680 { 1681 Summator!(T, summation) sum = 1; 1682 sum += 3.5; 1683 assert(sum.sum.approxEqual(4.5)); 1684 sum = 2; 1685 assert(sum.sum == 2); 1686 sum -= 4; 1687 assert(sum.sum.approxEqual(-2)); 1688 } 1689 } 1690 1691 static if (summation == Summation.precise) 1692 { 1693 ///Returns `true` if current sum is a NaN. 1694 bool isNaN()() const 1695 { 1696 return .isNaN(s); 1697 } 1698 1699 ///Returns `true` if current sum is finite (not infinite or NaN). 1700 bool isFinite()() const 1701 { 1702 if (s) 1703 return false; 1704 return !overflow; 1705 } 1706 1707 ///Returns `true` if current sum is ±∞. 1708 bool isInfinity()() const 1709 { 1710 return .isInfinity(s) || overflow(); 1711 } 1712 } 1713 else static if (isFloatingPoint!F) 1714 { 1715 ///Returns `true` if current sum is a NaN. 1716 bool isNaN()() const 1717 { 1718 return .isNaN(sum()); 1719 } 1720 1721 ///Returns `true` if current sum is finite (not infinite or NaN). 1722 bool isFinite()() const 1723 { 1724 return .isFinite(sum()); 1725 } 1726 1727 ///Returns `true` if current sum is ±∞. 1728 bool isInfinity()() const 1729 { 1730 return .isInfinity(sum()); 1731 } 1732 } 1733 else 1734 { 1735 //User defined types 1736 } 1737 } 1738 1739 version(mir_test) 1740 unittest 1741 { 1742 import mir.functional: RefTuple, refTuple; 1743 import mir.ndslice.topology: map, iota, retro; 1744 import mir.array.allocation: array; 1745 import std.math: isInfinity, isFinite, isNaN; 1746 1747 Summator!(double, Summation.precise) summator = 0.0; 1748 1749 enum double M = (cast(double)2) ^^ (double.max_exp - 1); 1750 RefTuple!(double[], double)[] tests = [ 1751 refTuple(new double[0], 0.0), 1752 refTuple([0.0], 0.0), 1753 refTuple([1e100, 1.0, -1e100, 1e-100, 1e50, -1, -1e50], 1e-100), 1754 refTuple([1e308, 1e308, -1e308], 1e308), 1755 refTuple([-1e308, 1e308, 1e308], 1e308), 1756 refTuple([1e308, -1e308, 1e308], 1e308), 1757 refTuple([M, M, -2.0^^1000], 1.7976930277114552e+308), 1758 refTuple([M, M, M, M, -M, -M, -M], 8.9884656743115795e+307), 1759 refTuple([2.0^^53, -0.5, -2.0^^-54], 2.0^^53-1.0), 1760 refTuple([2.0^^53, 1.0, 2.0^^-100], 2.0^^53+2.0), 1761 refTuple([2.0^^53+10.0, 1.0, 2.0^^-100], 2.0^^53+12.0), 1762 refTuple([2.0^^53-4.0, 0.5, 2.0^^-54], 2.0^^53-3.0), 1763 refTuple([M-2.0^^970, -1, M], 1.7976931348623157e+308), 1764 refTuple([double.max, double.max*2.^^-54], double.max), 1765 refTuple([double.max, double.max*2.^^-53], double.infinity), 1766 refTuple(iota([1000], 1).map!(a => 1.0/a).array , 7.4854708605503451), 1767 refTuple(iota([1000], 1).map!(a => (-1.0)^^a/a).array, -0.69264743055982025), //0.693147180559945309417232121458176568075500134360255254120680... 1768 refTuple(iota([1000], 1).map!(a => 1.0/a).retro.array , 7.4854708605503451), 1769 refTuple(iota([1000], 1).map!(a => (-1.0)^^a/a).retro.array, -0.69264743055982025), 1770 refTuple([double.infinity, -double.infinity, double.nan], double.nan), 1771 refTuple([double.nan, double.infinity, -double.infinity], double.nan), 1772 refTuple([double.infinity, double.nan, double.infinity], double.nan), 1773 refTuple([double.infinity, double.infinity], double.infinity), 1774 refTuple([double.infinity, -double.infinity], double.nan), 1775 refTuple([-double.infinity, 1e308, 1e308, -double.infinity], -double.infinity), 1776 refTuple([M-2.0^^970, 0.0, M], double.infinity), 1777 refTuple([M-2.0^^970, 1.0, M], double.infinity), 1778 refTuple([M, M], double.infinity), 1779 refTuple([M, M, -1], double.infinity), 1780 refTuple([M, M, M, M, -M, -M], double.infinity), 1781 refTuple([M, M, M, M, -M, M], double.infinity), 1782 refTuple([-M, -M, -M, -M], -double.infinity), 1783 refTuple([M, M, -2.^^971], double.max), 1784 refTuple([M, M, -2.^^970], double.infinity), 1785 refTuple([-2.^^970, M, M, -0X0.0000000000001P-0 * 2.^^-1022], double.max), 1786 refTuple([M, M, -2.^^970, 0X0.0000000000001P-0 * 2.^^-1022], double.infinity), 1787 refTuple([-M, 2.^^971, -M], -double.max), 1788 refTuple([-M, -M, 2.^^970], -double.infinity), 1789 refTuple([-M, -M, 2.^^970, 0X0.0000000000001P-0 * 2.^^-1022], -double.max), 1790 refTuple([-0X0.0000000000001P-0 * 2.^^-1022, -M, -M, 2.^^970], -double.infinity), 1791 refTuple([2.^^930, -2.^^980, M, M, M, -M], 1.7976931348622137e+308), 1792 refTuple([M, M, -1e307], 1.6976931348623159e+308), 1793 refTuple([1e16, 1., 1e-16], 10_000_000_000_000_002.0), 1794 ]; 1795 foreach (i, test; tests) 1796 { 1797 summator = 0.0; 1798 foreach (t; test.a) summator.put(t); 1799 auto r = test.b; 1800 auto s = summator.sum; 1801 assert(summator.isNaN() == r.isNaN()); 1802 assert(summator.isFinite() == r.isFinite()); 1803 assert(summator.isInfinity() == r.isInfinity()); 1804 assert(s == r || s.isNaN && r.isNaN); 1805 } 1806 } 1807 1808 /++ 1809 Sums elements of `r`, which must be a finite 1810 iterable. 1811 1812 A seed may be passed to `sum`. Not only will this seed be used as an initial 1813 value, but its type will be used if it is not specified. 1814 1815 Note that these specialized summing algorithms execute more primitive operations 1816 than vanilla summation. Therefore, if in certain cases maximum speed is required 1817 at expense of precision, one can use $(LREF Summation.fast). 1818 1819 Returns: 1820 The sum of all the elements in the range r. 1821 +/ 1822 template sum(F, Summation summation = Summation.appropriate) 1823 if (isMutable!F) 1824 { 1825 /// 1826 template sum(Range) 1827 if (isIterable!Range) 1828 { 1829 import core.lifetime: move; 1830 1831 /// 1832 F sum(Range r) 1833 { 1834 static if (isComplex!F && (summation == Summation.precise || summation == Summation.decimal)) 1835 { 1836 return sum(r, summationInitValue!F); 1837 } 1838 else 1839 { 1840 static if (summation == Summation.decimal) 1841 { 1842 Summator!(F, summation) sum = void; 1843 sum = 0; 1844 } 1845 else 1846 { 1847 Summator!(F, ResolveSummationType!(summation, Range, sumType!Range)) sum; 1848 } 1849 sum.put(r.move); 1850 return sum.sum; 1851 } 1852 } 1853 1854 /// 1855 F sum(Range r, F seed) 1856 { 1857 static if (isComplex!F && (summation == Summation.precise || summation == Summation.decimal)) 1858 { 1859 alias T = typeof(F.init.re); 1860 static if (summation == Summation.decimal) 1861 { 1862 Summator!(T, summation) sumRe = void; 1863 sumRe = seed.re; 1864 1865 Summator!(T, summation) sumIm = void; 1866 sumIm = seed.im; 1867 } 1868 else 1869 { 1870 auto sumRe = Summator!(T, Summation.precise)(seed.re); 1871 auto sumIm = Summator!(T, Summation.precise)(seed.im); 1872 } 1873 import mir.ndslice.slice: isSlice; 1874 static if (isSlice!Range) 1875 { 1876 import mir.algorithm.iteration: each; 1877 r.each!((auto ref elem) 1878 { 1879 sumRe.put(elem.re); 1880 sumIm.put(elem.im); 1881 }); 1882 } 1883 else 1884 { 1885 foreach (ref elem; r) 1886 { 1887 sumRe.put(elem.re); 1888 sumIm.put(elem.im); 1889 } 1890 } 1891 static if (is(F : cfloat)) { 1892 return sumRe.sum + sumIm.sum * 1fi; 1893 } else { 1894 return F(sumRe.sum, sumIm.sum); 1895 } 1896 } 1897 else 1898 { 1899 static if (summation == Summation.decimal) 1900 { 1901 Summator!(F, summation) sum = void; 1902 sum = seed; 1903 } 1904 else 1905 { 1906 auto sum = Summator!(F, ResolveSummationType!(summation, Range, F))(seed); 1907 } 1908 sum.put(r.move); 1909 return sum.sum; 1910 } 1911 } 1912 } 1913 1914 /// 1915 F sum(scope const F[] r...) 1916 { 1917 static if (isComplex!F && (summation == Summation.precise || summation == Summation.decimal)) 1918 { 1919 return sum(r, summationInitValue!F); 1920 } 1921 else 1922 { 1923 Summator!(F, ResolveSummationType!(summation, const(F)[], F)) sum; 1924 sum.put(r); 1925 return sum.sum; 1926 } 1927 } 1928 } 1929 1930 ///ditto 1931 template sum(Summation summation = Summation.appropriate) 1932 { 1933 /// 1934 sumType!Range sum(Range)(Range r) 1935 if (isIterable!Range) 1936 { 1937 import core.lifetime: move; 1938 alias F = typeof(return); 1939 return .sum!(F, ResolveSummationType!(summation, Range, F))(r.move); 1940 } 1941 1942 /// 1943 F sum(Range, F)(Range r, F seed) 1944 if (isIterable!Range) 1945 { 1946 import core.lifetime: move; 1947 return .sum!(F, ResolveSummationType!(summation, Range, F))(r.move, seed); 1948 } 1949 1950 /// 1951 sumType!T sum(T)(scope const T[] ar...) 1952 { 1953 alias F = typeof(return); 1954 return .sum!(F, ResolveSummationType!(summation, F[], F))(ar); 1955 } 1956 } 1957 1958 ///ditto 1959 template sum(F, string summation) 1960 if (isMutable!F) 1961 { 1962 mixin("alias sum = .sum!(F, Summation." ~ summation ~ ");"); 1963 } 1964 1965 ///ditto 1966 template sum(string summation) 1967 { 1968 mixin("alias sum = .sum!(Summation." ~ summation ~ ");"); 1969 } 1970 1971 1972 1973 version(mir_test) 1974 @safe pure nothrow unittest 1975 { 1976 static assert(is(typeof(sum([cast( byte)1])) == int)); 1977 static assert(is(typeof(sum([cast(ubyte)1])) == int)); 1978 static assert(is(typeof(sum([ 1, 2, 3, 4])) == int)); 1979 static assert(is(typeof(sum([ 1U, 2U, 3U, 4U])) == uint)); 1980 static assert(is(typeof(sum([ 1L, 2L, 3L, 4L])) == long)); 1981 static assert(is(typeof(sum([1UL, 2UL, 3UL, 4UL])) == ulong)); 1982 1983 int[] empty; 1984 assert(sum(empty) == 0); 1985 assert(sum([42]) == 42); 1986 assert(sum([42, 43]) == 42 + 43); 1987 assert(sum([42, 43, 44]) == 42 + 43 + 44); 1988 assert(sum([42, 43, 44, 45]) == 42 + 43 + 44 + 45); 1989 } 1990 1991 1992 version(mir_test) 1993 @safe pure nothrow unittest 1994 { 1995 static assert(is(typeof(sum([1.0, 2.0, 3.0, 4.0])) == double)); 1996 static assert(is(typeof(sum!double([ 1F, 2F, 3F, 4F])) == double)); 1997 const(float[]) a = [1F, 2F, 3F, 4F]; 1998 static assert(is(typeof(sum!double(a)) == double)); 1999 const(float)[] b = [1F, 2F, 3F, 4F]; 2000 static assert(is(typeof(sum!double(a)) == double)); 2001 2002 double[] empty; 2003 assert(sum(empty) == 0); 2004 assert(sum([42.]) == 42); 2005 assert(sum([42., 43.]) == 42 + 43); 2006 assert(sum([42., 43., 44.]) == 42 + 43 + 44); 2007 assert(sum([42., 43., 44., 45.5]) == 42 + 43 + 44 + 45.5); 2008 } 2009 2010 version(mir_test) 2011 @safe pure nothrow unittest 2012 { 2013 import mir.ndslice.topology: iota; 2014 assert(iota(2, 3).sum == 15); 2015 } 2016 2017 version(mir_test) 2018 @safe pure nothrow unittest 2019 { 2020 import std.container; 2021 static assert(is(typeof(sum!double(SList!float()[])) == double)); 2022 static assert(is(typeof(sum(SList!double()[])) == double)); 2023 static assert(is(typeof(sum(SList!real()[])) == real)); 2024 2025 assert(sum(SList!double()[]) == 0); 2026 assert(sum(SList!double(1)[]) == 1); 2027 assert(sum(SList!double(1, 2)[]) == 1 + 2); 2028 assert(sum(SList!double(1, 2, 3)[]) == 1 + 2 + 3); 2029 assert(sum(SList!double(1, 2, 3, 4)[]) == 10); 2030 } 2031 2032 2033 version(mir_test) 2034 pure nothrow unittest // 12434 2035 { 2036 import mir.ndslice.slice: sliced; 2037 import mir.ndslice.topology: map; 2038 immutable a = [10, 20]; 2039 auto s = a.sliced; 2040 auto s1 = sum(a); // Error 2041 auto s2 = s.map!(x => x).sum; // Error 2042 } 2043 2044 version(mir_test) 2045 unittest 2046 { 2047 import std.bigint; 2048 import mir.ndslice.topology: repeat; 2049 2050 auto a = BigInt("1_000_000_000_000_000_000").repeat(10); 2051 auto b = (ulong.max/2).repeat(10); 2052 auto sa = a.sum(); 2053 auto sb = b.sum(BigInt(0)); //reduce ulongs into bigint 2054 assert(sa == BigInt("10_000_000_000_000_000_000")); 2055 assert(sb == (BigInt(ulong.max/2) * 10)); 2056 } 2057 2058 version(mir_test) 2059 unittest 2060 { 2061 with(Summation) 2062 foreach (F; AliasSeq!(float, double, real)) 2063 { 2064 F[] ar = [1, 2, 3, 4]; 2065 F r = 10; 2066 assert(r == ar.sum!fast()); 2067 assert(r == ar.sum!pairwise()); 2068 assert(r == ar.sum!kahan()); 2069 assert(r == ar.sum!kbn()); 2070 assert(r == ar.sum!kb2()); 2071 } 2072 } 2073 2074 version(mir_test) 2075 unittest 2076 { 2077 assert(sum(1) == 1); 2078 assert(sum(1, 2, 3) == 6); 2079 assert(sum(1.0, 2.0, 3.0) == 6); 2080 } 2081 2082 version(mir_test) 2083 unittest 2084 { 2085 assert(sum!float(1) == 1f); 2086 assert(sum!float(1, 2, 3) == 6f); 2087 assert(sum!float(1.0, 2.0, 3.0) == 6f); 2088 } 2089 2090 version(mir_builtincomplex_test) 2091 unittest 2092 { 2093 assert(sum(1.0 + 1i, 2.0 + 2i, 3.0 + 3i) == (6 + 6i)); 2094 assert(sum!cfloat(1.0 + 1i, 2.0 + 2i, 3.0 + 3i) == (6f + 6i)); 2095 } 2096 2097 version(mir_test) 2098 unittest 2099 { 2100 import std.complex: Complex; 2101 2102 assert(sum(Complex!float(1.0, 1.0), Complex!float(2.0, 2.0), Complex!float(3.0, 3.0)) == Complex!float(6.0, 6.0)); 2103 assert(sum!(Complex!float)(Complex!float(1.0, 1.0), Complex!float(2.0, 2.0), Complex!float(3.0, 3.0)) == Complex!float(6.0, 6.0)); 2104 } 2105 2106 version(LDC) 2107 version(X86_Any) 2108 version(mir_test) 2109 unittest 2110 { 2111 import core.simd; 2112 static if (__traits(compiles, double2.init + double2.init)) 2113 { 2114 2115 alias S = Summation; 2116 alias sums = AliasSeq!(S.kahan, S.pairwise, S.naive, S.fast); 2117 2118 double2[] ar = [double2([1.0, 2]), double2([2, 3]), double2([3, 4]), double2([4, 6])]; 2119 double2 c = double2([10, 15]); 2120 2121 foreach (sumType; sums) 2122 { 2123 double2 s = ar.sum!(sumType); 2124 assert(s.array == c.array); 2125 } 2126 } 2127 } 2128 2129 version(LDC) 2130 version(X86_Any) 2131 version(mir_test) 2132 unittest 2133 { 2134 import core.simd; 2135 import mir.ndslice.topology: iota, as; 2136 2137 alias S = Summation; 2138 alias sums = AliasSeq!(S.kahan, S.pairwise, S.naive, S.fast, S.precise, 2139 S.kbn, S.kb2); 2140 2141 int[2] ns = [9, 101]; 2142 2143 foreach (n; ns) 2144 { 2145 foreach (sumType; sums) 2146 { 2147 auto ar = iota(n).as!double; 2148 double c = n * (n - 1) / 2; // gauss for n=100 2149 double s = ar.sum!(sumType); 2150 assert(s == c); 2151 } 2152 } 2153 } 2154 2155 package(mir) 2156 template ResolveSummationType(Summation summation, Range, F) 2157 { 2158 static if (summation == Summation.appropriate) 2159 { 2160 static if (isSummable!(Range, F)) 2161 enum ResolveSummationType = Summation.pairwise; 2162 else 2163 static if (is(F == class) || is(F == struct) || is(F == interface)) 2164 enum ResolveSummationType = Summation.naive; 2165 else 2166 enum ResolveSummationType = Summation.fast; 2167 } 2168 else 2169 { 2170 enum ResolveSummationType = summation; 2171 } 2172 } 2173 2174 private T summationInitValue(T)() 2175 { 2176 static if (__traits(compiles, {T a = 0.0;})) 2177 { 2178 T a = 0.0; 2179 return a; 2180 } 2181 else 2182 static if (__traits(compiles, {T a = 0;})) 2183 { 2184 T a = 0; 2185 return a; 2186 } 2187 else 2188 static if (__traits(compiles, {T a = 0 + 0fi;})) 2189 { 2190 T a = 0 + 0fi; 2191 return a; 2192 } 2193 else 2194 { 2195 return T.init; 2196 } 2197 } 2198 2199 package(mir) 2200 template elementType(T) 2201 { 2202 import mir.ndslice.slice: isSlice, DeepElementType; 2203 import std.traits: Unqual, ForeachType; 2204 2205 static if (isIterable!T) { 2206 static if (isSlice!T) 2207 alias elementType = Unqual!(DeepElementType!(T.This)); 2208 else 2209 alias elementType = Unqual!(ForeachType!T); 2210 } else { 2211 alias elementType = Unqual!T; 2212 } 2213 } 2214 2215 package(mir) 2216 template sumType(Range) 2217 { 2218 alias T = elementType!Range; 2219 2220 static if (__traits(compiles, { 2221 auto a = T.init + T.init; 2222 a += T.init; 2223 })) 2224 alias sumType = typeof(T.init + T.init); 2225 else 2226 static assert(0, "sumType: Can't sum elements of type " ~ T.stringof); 2227 } 2228 2229 /++ 2230 +/ 2231 template fillCollapseSums(Summation summation, alias combineParts, combineElements...) 2232 { 2233 import mir.ndslice.slice: Slice, SliceKind; 2234 /++ 2235 +/ 2236 auto ref fillCollapseSums(Iterator, SliceKind kind)(Slice!(Iterator, 1, kind) data) @property 2237 { 2238 import mir.algorithm.iteration; 2239 import mir.functional: naryFun; 2240 import mir.ndslice.topology: iota, triplets; 2241 foreach (triplet; data.length.iota.triplets) with(triplet) 2242 { 2243 auto ref ce(size_t i)() 2244 { 2245 static if (summation == Summation.fast) 2246 { 2247 return 2248 sum!summation(naryFun!(combineElements[i])(center, left )) + 2249 sum!summation(naryFun!(combineElements[i])(center, right)); 2250 } 2251 else 2252 { 2253 Summator!summation summator = 0; 2254 summator.put(naryFun!(combineElements[i])(center, left)); 2255 summator.put(naryFun!(combineElements[i])(center, right)); 2256 return summator.sum; 2257 } 2258 } 2259 alias sums = staticMap!(ce, Iota!(combineElements.length)); 2260 data[center] = naryFun!combineParts(center, sums); 2261 } 2262 } 2263 } 2264 2265 package: 2266 2267 template isSummable(F) 2268 { 2269 enum bool isSummable = 2270 __traits(compiles, 2271 { 2272 F a = 0.1, b, c; 2273 b = 2.3; 2274 c = a + b; 2275 c = a - b; 2276 a += b; 2277 a -= b; 2278 }); 2279 } 2280 2281 template isSummable(Range, F) 2282 { 2283 enum bool isSummable = 2284 isIterable!Range && 2285 isImplicitlyConvertible!(sumType!Range, F) && 2286 isSummable!F; 2287 } 2288 2289 version(mir_test) 2290 unittest 2291 { 2292 import mir.ndslice.topology: iota; 2293 static assert(isSummable!(typeof(iota([size_t.init])), double)); 2294 } 2295 2296 private enum bool isCompesatorAlgorithm(Summation summation) = 2297 summation == Summation.precise 2298 || summation == Summation.kb2 2299 || summation == Summation.kbn 2300 || summation == Summation.kahan; 2301 2302 2303 version(mir_test) 2304 unittest 2305 { 2306 import mir.ndslice; 2307 2308 auto p = iota([2, 3, 4, 5]); 2309 auto a = p.as!double; 2310 auto b = a.flattened; 2311 auto c = a.slice; 2312 auto d = c.flattened; 2313 auto s = p.flattened.sum; 2314 2315 assert(a.sum == s); 2316 assert(b.sum == s); 2317 assert(c.sum == s); 2318 assert(d.sum == s); 2319 2320 assert(a.canonical.sum == s); 2321 assert(b.canonical.sum == s); 2322 assert(c.canonical.sum == s); 2323 assert(d.canonical.sum == s); 2324 2325 assert(a.universal.transposed!3.sum == s); 2326 assert(b.universal.sum == s); 2327 assert(c.universal.transposed!3.sum == s); 2328 assert(d.universal.sum == s); 2329 2330 assert(a.sum!"fast" == s); 2331 assert(b.sum!"fast" == s); 2332 assert(c.sum!(float, "fast") == s); 2333 assert(d.sum!"fast" == s); 2334 2335 assert(a.canonical.sum!"fast" == s); 2336 assert(b.canonical.sum!"fast" == s); 2337 assert(c.canonical.sum!"fast" == s); 2338 assert(d.canonical.sum!"fast" == s); 2339 2340 assert(a.universal.transposed!3.sum!"fast" == s); 2341 assert(b.universal.sum!"fast" == s); 2342 assert(c.universal.transposed!3.sum!"fast" == s); 2343 assert(d.universal.sum!"fast" == s); 2344 2345 }