1 /++
2 This module contains summation algorithms.
3 
4 License: $(HTTP www.apache.org/licenses/LICENSE-2.0, Apache-2.0)
5 
6 Authors: Ilya Yaroshenko
7 
8 Copyright: 2020 Ilya Yaroshenko, Kaleidic Associates Advisory Limited, Symmetry Investments
9 +/
10 module mir.math.sum;
11 
12 ///
13 version(mir_test)
14 unittest
15 {
16     import mir.ndslice.slice: sliced;
17     import mir.ndslice.topology: map;
18     auto ar = [1, 1e100, 1, -1e100].sliced.map!"a * 10_000";
19     const r = 20_000;
20     assert(r == ar.sum!"kbn");
21     assert(r == ar.sum!"kb2");
22     assert(r == ar.sum!"precise");
23     assert(r == ar.sum!"decimal");
24 }
25 
26 /// Decimal precise summation
27 version(mir_test)
28 unittest
29 {
30     auto ar = [777.7, -777];
31     assert(ar.sum!"decimal" == 0.7);
32     assert(sum!"decimal"(777.7, -777) == 0.7);
33 
34     // The exact binary reuslt is 0.7000000000000455
35     assert(ar[0] + ar[1] == 0.7000000000000455);
36     assert(ar.sum!"fast" == 0.7000000000000455);
37     assert(ar.sum!"kahan" == 0.7000000000000455);
38     assert(ar.sum!"kbn" == 0.7000000000000455);
39     assert(ar.sum!"kb2" == 0.7000000000000455);
40     assert(ar.sum!"precise" == 0.7000000000000455);
41 }
42 
43 ///
44 version(mir_test)
45 unittest
46 {
47     import mir.ndslice.slice: sliced, slicedField;
48     import mir.ndslice.topology: map, iota, retro;
49     import mir.ndslice.concatenation: concatenation;
50     import mir.math.common;
51     auto ar = 1000
52         .iota
53         .map!(n => 1.7L.pow(n+1) - 1.7L.pow(n))
54         ;
55     real d = 1.7L.pow(1000);
56     assert(sum!"precise"(concatenation(ar, [-d].sliced).slicedField) == -1);
57     assert(sum!"precise"(ar.retro, -d) == -1);
58 }
59 
60 /++
61 `Naive`, `Pairwise` and `Kahan` algorithms can be used for user defined types.
62 +/
63 version(mir_test)
64 unittest
65 {
66     import mir.internal.utility: isFloatingPoint;
67     static struct Quaternion(F)
68         if (isFloatingPoint!F)
69     {
70         F[4] rijk;
71 
72         /// + and - operator overloading
73         Quaternion opBinary(string op)(auto ref const Quaternion rhs) const
74             if (op == "+" || op == "-")
75         {
76             Quaternion ret ;
77             foreach (i, ref e; ret.rijk)
78                 mixin("e = rijk[i] "~op~" rhs.rijk[i];");
79             return ret;
80         }
81 
82         /// += and -= operator overloading
83         Quaternion opOpAssign(string op)(auto ref const Quaternion rhs)
84             if (op == "+" || op == "-")
85         {
86             foreach (i, ref e; rijk)
87                 mixin("e "~op~"= rhs.rijk[i];");
88             return this;
89         }
90 
91         ///constructor with single FP argument
92         this(F f)
93         {
94             rijk[] = f;
95         }
96 
97         ///assigment with single FP argument
98         void opAssign(F f)
99         {
100             rijk[] = f;
101         }
102     }
103 
104     Quaternion!double q, p, r;
105     q.rijk = [0, 1, 2, 4];
106     p.rijk = [3, 4, 5, 9];
107     r.rijk = [3, 5, 7, 13];
108 
109     assert(r == [p, q].sum!"naive");
110     assert(r == [p, q].sum!"pairwise");
111     assert(r == [p, q].sum!"kahan");
112 }
113 
114 /++
115 All summation algorithms available for complex numbers.
116 +/
117 version(mir_builtincomplex_test)
118 unittest
119 {
120     cdouble[] ar = [1.0 + 2i, 2 + 3i, 3 + 4i, 4 + 5i];
121     cdouble r = 10 + 14i;
122     assert(r == ar.sum!"fast");
123     assert(r == ar.sum!"naive");
124     assert(r == ar.sum!"pairwise");
125     assert(r == ar.sum!"kahan");
126     version(LDC) // DMD Internal error: backend/cgxmm.c 628
127     {
128         assert(r == ar.sum!"kbn");
129         assert(r == ar.sum!"kb2");
130     }
131     assert(r == ar.sum!"precise");
132     assert(r == ar.sum!"decimal");
133 }
134 
135 ///
136 version(mir_test)
137 unittest
138 {
139     import std.complex: Complex;
140 
141     auto ar = [Complex!double(1.0, 2), Complex!double(2.0, 3), Complex!double(3.0, 4), Complex!double(4.0, 5)];
142     Complex!double r = Complex!double(10.0, 14);
143     assert(r == ar.sum!"fast");
144     assert(r == ar.sum!"naive");
145     assert(r == ar.sum!"pairwise");
146     assert(r == ar.sum!"kahan");
147     version(LDC) // DMD Internal error: backend/cgxmm.c 628
148     {
149         assert(r == ar.sum!"kbn");
150         assert(r == ar.sum!"kb2");
151     }
152     assert(r == ar.sum!"precise");
153     assert(r == ar.sum!"decimal");
154 }
155 
156 ///
157 version(mir_test)
158 @safe pure nothrow unittest
159 {
160     import mir.ndslice.topology: repeat, iota;
161 
162     //simple integral summation
163     assert(sum([ 1, 2, 3, 4]) == 10);
164 
165     //with initial value
166     assert(sum([ 1, 2, 3, 4], 5) == 15);
167 
168     //with integral promotion
169     assert(sum([false, true, true, false, true]) == 3);
170     assert(sum(ubyte.max.repeat(100)) == 25_500);
171 
172     //The result may overflow
173     assert(uint.max.repeat(3).sum           ==  4_294_967_293U );
174     //But a seed can be used to change the summation primitive
175     assert(uint.max.repeat(3).sum(ulong.init) == 12_884_901_885UL);
176 
177     //Floating point summation
178     assert(sum([1.0, 2.0, 3.0, 4.0]) == 10);
179 
180     //Type overriding
181     static assert(is(typeof(sum!double([1F, 2F, 3F, 4F])) == double));
182     static assert(is(typeof(sum!double([1F, 2F, 3F, 4F], 5F)) == double));
183     assert(sum([1F, 2, 3, 4]) == 10);
184     assert(sum([1F, 2, 3, 4], 5F) == 15);
185 
186     //Force pair-wise floating point summation on large integers
187     import mir.math : approxEqual;
188     assert(iota!long([4096], uint.max / 2).sum(0.0)
189                .approxEqual((uint.max / 2) * 4096.0 + 4096.0 * 4096.0 / 2));
190 }
191 
192 /// Precise summation
193 version(mir_test)
194 nothrow @nogc unittest
195 {
196     import mir.ndslice.topology: iota, map;
197     import core.stdc.tgmath: pow;
198     assert(iota(1000).map!(n => 1.7L.pow(real(n)+1) - 1.7L.pow(real(n)))
199                      .sum!"precise" == -1 + 1.7L.pow(1000.0L));
200 }
201 
202 /// Precise summation with output range
203 version(mir_test)
204 nothrow @nogc unittest
205 {
206     import mir.ndslice.topology: iota, map;
207     import mir.math.common;
208     auto r = iota(1000).map!(n => 1.7L.pow(n+1) - 1.7L.pow(n));
209     Summator!(real, Summation.precise) s = 0.0;
210     s.put(r);
211     s -= 1.7L.pow(1000);
212     assert(s.sum == -1);
213 }
214 
215 /// Precise summation with output range
216 version(mir_test)
217 nothrow @nogc unittest
218 {
219     import mir.math.common;
220     float  M = 2.0f ^^ (float.max_exp-1);
221     double N = 2.0  ^^ (float.max_exp-1);
222     auto s = Summator!(float, Summation.precise)(0);
223     s += M;
224     s += M;
225     assert(float.infinity == s.sum); //infinity
226     auto e = cast(Summator!(double, Summation.precise)) s;
227     assert(e.sum < double.infinity);
228     assert(N+N == e.sum()); //finite number
229 }
230 
231 /// Moving mean
232 version(mir_test)
233 @safe pure nothrow @nogc
234 unittest
235 {
236     import mir.internal.utility: isFloatingPoint;
237     import mir.math.sum;
238     import mir.ndslice.topology: linspace;
239     import mir.rc.array: rcarray;
240 
241     struct MovingAverage(T)
242         if (isFloatingPoint!T)
243     {
244         import mir.math.stat: MeanAccumulator;
245 
246         MeanAccumulator!(T, Summation.precise) meanAccumulator;
247         double[] circularBuffer;
248         size_t frontIndex;
249 
250         @disable this(this);
251 
252         auto avg() @property const
253         {
254             return meanAccumulator.mean;
255         }
256 
257         this(double[] buffer)
258         {
259             assert(buffer.length);
260             circularBuffer = buffer;
261             meanAccumulator.put(buffer);
262         }
263 
264         ///operation without rounding
265         void put(T x)
266         {
267             import mir.utility: swap;
268             meanAccumulator.summator += x;
269             swap(circularBuffer[frontIndex++], x);
270             frontIndex = frontIndex == circularBuffer.length ? 0 : frontIndex;
271             meanAccumulator.summator -= x;
272         }
273     }
274 
275     /// ma always keeps precise average of last 1000 elements
276     auto x = linspace!double([1000], [0.0, 999]).rcarray;
277     auto ma = MovingAverage!double(x[]);
278     assert(ma.avg == (1000 * 999 / 2) / 1000.0);
279     /// move by 10 elements
280     foreach(e; linspace!double([10], [1000.0, 1009.0]))
281         ma.put(e);
282     assert(ma.avg == (1010 * 1009 / 2 - 10 * 9 / 2) / 1000.0);
283 }
284 
285 /// Arbitrary sum
286 version(mir_test)
287 @safe pure nothrow
288 unittest
289 {
290     assert(sum(1, 2, 3, 4) == 10);
291     assert(sum!float(1, 2, 3, 4) == 10f);
292     assert(sum(1f, 2, 3, 4) == 10f);
293     assert(sum(1.0 + 2i, 2 + 3i, 3 + 4i, 4 + 5i) == (10 + 14i));
294 }
295 
296 version(X86)
297     version = X86_Any;
298 version(X86_64)
299     version = X86_Any;
300 
301 /++
302 SIMD Vectors
303 Bugs: ICE 1662 (dmd only)
304 +/
305 version(LDC)
306 version(X86_Any)
307 version(mir_test)
308 unittest
309 {
310     import core.simd;
311     import std.meta : AliasSeq;
312     double2 a = 1, b = 2, c = 3, d = 6;
313     with(Summation)
314     {
315         foreach (algo; AliasSeq!(naive, fast, pairwise, kahan))
316         {
317             assert([a, b, c].sum!algo.array == d.array);
318             assert([a, b].sum!algo(c).array == d.array);
319         }
320     }
321 }
322 
323 import std.traits;
324 private alias AliasSeq(T...) = T;
325 import mir.internal.utility: Iota, isComplex;
326 import mir.math.common: fabs;
327 
328 private alias isNaN = x => x != x;
329 private alias isFinite = x => x.fabs < x.infinity;
330 private alias isInfinity = x => x.fabs == x.infinity;
331 
332 
333 private template chainSeq(size_t n)
334 {
335     static if (n)
336         alias chainSeq = AliasSeq!(n, chainSeq!(n / 2));
337     else
338         alias chainSeq = AliasSeq!();
339 }
340 
341 /++
342 Summation algorithms.
343 +/
344 enum Summation
345 {
346     /++
347     Performs `pairwise` summation for floating point based types and `fast` summation for integral based types.
348     +/
349     appropriate,
350 
351     /++
352     $(WEB en.wikipedia.org/wiki/Pairwise_summation, Pairwise summation) algorithm.
353     +/
354     pairwise,
355 
356     /++
357     Precise summation algorithm.
358     The value of the sum is rounded to the nearest representable
359     floating-point number using the $(LUCKY round-half-to-even rule).
360     The result can differ from the exact value on 32bit `x86`, `nextDown(proir) <= result &&  result <= nextUp(proir)`.
361     The current implementation re-establish special value semantics across iterations (i.e. handling ±inf).
362 
363     References: $(LINK2 http://www.cs.cmu.edu/afs/cs/project/quake/public/papers/robust-arithmetic.ps,
364         "Adaptive Precision Floating-Point Arithmetic and Fast Robust Geometric Predicates", Jonathan Richard Shewchuk),
365         $(LINK2 http://bugs.python.org/file10357/msum4.py, Mark Dickinson's post at bugs.python.org).
366     +/
367 
368     /+
369     Precise summation function as msum() by Raymond Hettinger in
370     <http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/393090>,
371     enhanced with the exact partials sum and roundoff from Mark
372     Dickinson's post at <http://bugs.python.org/file10357/msum4.py>.
373     See those links for more details, proofs and other references.
374     IEEE 754R floating point semantics are assumed.
375     +/
376     precise,
377 
378     /++
379     Precise decimal summation algorithm.
380 
381     The elements of the sum are converted to a shortest decimal representation that being converted back would result the same floating-point number.
382     The resulting decimal elements are summed without rounding.
383     The decimal sum is converted back to a binary floating point representation using round-half-to-even rule.
384 
385     See_also: The $(HTTPS github.com/ulfjack/ryu, Ryu algorithm)
386     +/
387     decimal,
388 
389     /++
390     $(WEB en.wikipedia.org/wiki/Kahan_summation, Kahan summation) algorithm.
391     +/
392     /+
393     ---------------------
394     s := x[1]
395     c := 0
396     FOR k := 2 TO n DO
397     y := x[k] - c
398     t := s + y
399     c := (t - s) - y
400     s := t
401     END DO
402     ---------------------
403     +/
404     kahan,
405 
406     /++
407     $(LUCKY Kahan-Babuška-Neumaier summation algorithm). `KBN` gives more accurate results then `Kahan`.
408     +/
409     /+
410     ---------------------
411     s := x[1]
412     c := 0
413     FOR i := 2 TO n DO
414     t := s + x[i]
415     IF ABS(s) >= ABS(x[i]) THEN
416         c := c + ((s-t)+x[i])
417     ELSE
418         c := c + ((x[i]-t)+s)
419     END IF
420     s := t
421     END DO
422     s := s + c
423     ---------------------
424     +/
425     kbn,
426 
427     /++
428     $(LUCKY Generalized Kahan-Babuška summation algorithm), order 2. `KB2` gives more accurate results then `Kahan` and `KBN`.
429     +/
430     /+
431     ---------------------
432     s := 0 ; cs := 0 ; ccs := 0
433     FOR j := 1 TO n DO
434         t := s + x[i]
435         IF ABS(s) >= ABS(x[i]) THEN
436             c := (s-t) + x[i]
437         ELSE
438             c := (x[i]-t) + s
439         END IF
440         s := t
441         t := cs + c
442         IF ABS(cs) >= ABS(c) THEN
443             cc := (cs-t) + c
444         ELSE
445             cc := (c-t) + cs
446         END IF
447         cs := t
448         ccs := ccs + cc
449     END FOR
450     RETURN s+cs+ccs
451     ---------------------
452     +/
453     kb2,
454 
455     /++
456     Naive algorithm (one by one).
457     +/
458     naive,
459 
460     /++
461     SIMD optimized summation algorithm.
462     +/
463     fast,
464 }
465 
466 /++
467 Output range for summation.
468 +/
469 struct Summator(T, Summation summation)
470     if (isMutable!T)
471 {
472     import mir.internal.utility: isComplex;
473     static if (is(T == class) || is(T == interface) || hasElaborateAssign!T && !isComplex!T)
474         static assert (summation == Summation.naive,
475             "Classes, interfaces, and structures with "
476             ~ "elaborate constructor support only naive summation.");
477 
478     static if (summation == Summation.fast)
479     {
480         version (LDC)
481         {
482             import ldc.attributes: fastmath;
483             alias attr = fastmath;
484         }
485         else
486         {
487             alias attr = AliasSeq!();
488         }
489     }
490     else
491     {
492         alias attr = AliasSeq!();
493     }
494 
495     @attr:
496 
497     static if (summation == Summation.pairwise) {
498         private enum bool fastPairwise =
499             is(F == float) ||
500             is(F == double) ||
501             is(F == cfloat) ||
502             is(F == cdouble) ||
503             (isComplex!F && F.sizeof <= 16) ||
504             is(F : __vector(W[N]), W, size_t N);
505             //false;
506     }
507 
508     alias F = T;
509 
510     static if (summation == Summation.precise)
511     {
512         import std.internal.scopebuffer;
513         import mir.appender;
514         import mir.math.ieee: signbit;
515     private:
516         enum F M = (cast(F)(2)) ^^ (T.max_exp - 1);
517         auto partials = scopedBuffer!(F, 8 * T.sizeof);
518         //sum for NaN and infinity.
519         F s = summationInitValue!F;
520         //Overflow Degree. Count of 2^^F.max_exp minus count of -(2^^F.max_exp)
521         sizediff_t o;
522 
523 
524         /++
525         Compute the sum of a list of nonoverlapping floats.
526         On input, partials is a list of nonzero, nonspecial,
527         nonoverlapping floats, strictly increasing in magnitude, but
528         possibly not all having the same sign.
529         On output, the sum of partials gives the error in the returned
530         result, which is correctly rounded (using the round-half-to-even
531         rule).
532         Two floating point values x and y are non-overlapping if the least significant nonzero
533         bit of x is more significant than the most significant nonzero bit of y, or vice-versa.
534         +/
535         static F partialsReduce(F s, in F[] partials)
536         in
537         {
538             debug(numeric) assert(!partials.length || .isFinite(s));
539         }
540         do
541         {
542             bool _break;
543             foreach_reverse (i, y; partials)
544             {
545                 s = partialsReducePred(s, y, i ? partials[i-1] : 0, _break);
546                 if (_break)
547                     break;
548                 debug(numeric) assert(.isFinite(s));
549             }
550             return s;
551         }
552 
553         static F partialsReducePred(F s, F y, F z, out bool _break)
554         out(result)
555         {
556             debug(numeric) assert(.isFinite(result));
557         }
558         do
559         {
560             F x = s;
561             s = x + y;
562             F d = s - x;
563             F l = y - d;
564             debug(numeric)
565             {
566                 assert(.isFinite(x));
567                 assert(.isFinite(y));
568                 assert(.isFinite(s));
569                 assert(fabs(y) < fabs(x));
570             }
571             if (l)
572             {
573             //Make half-even rounding work across multiple partials.
574             //Needed so that sum([1e-16, 1, 1e16]) will round-up the last
575             //digit to two instead of down to zero (the 1e-16 makes the 1
576             //slightly closer to two). Can guarantee commutativity.
577                 if (z && !signbit(l * z))
578                 {
579                     l *= 2;
580                     x = s + l;
581                     F t = x - s;
582                     if (l == t)
583                         s = x;
584                 }
585                 _break = true;
586             }
587             return s;
588         }
589 
590         //Returns corresponding infinity if is overflow and 0 otherwise.
591         F overflow()() const
592         {
593             if (o == 0)
594                 return 0;
595             if (partials.length && (o == -1 || o == 1)  && signbit(o * partials.data[$-1]))
596             {
597                 // problem case: decide whether result is representable
598                 F x = o * M;
599                 F y = partials.data[$-1] / 2;
600                 F h = x + y;
601                 F d = h - x;
602                 F l = (y - d) * 2;
603                 y = h * 2;
604                 d = h + l;
605                 F t = d - h;
606                 version(X86)
607                 {
608                     if (!.isInfinity(cast(T)y) || !.isInfinity(sum()))
609                         return 0;
610                 }
611                 else
612                 {
613                     if (!.isInfinity(cast(T)y) ||
614                         ((partials.length > 1 && !signbit(l * partials.data[$-2])) && t == l))
615                         return 0;
616                 }
617             }
618             return F.infinity * o;
619         }
620     }
621     else
622     static if (summation == Summation.kb2)
623     {
624         F s = summationInitValue!F;
625         F cs = summationInitValue!F;
626         F ccs = summationInitValue!F;
627     }
628     else
629     static if (summation == Summation.kbn)
630     {
631         F s = summationInitValue!F;
632         F c = summationInitValue!F;
633     }
634     else
635     static if (summation == Summation.kahan)
636     {
637         F s = summationInitValue!F;
638         F c = summationInitValue!F;
639         F y = summationInitValue!F; // do not declare in the loop/put (algo can be used for matrixes and etc)
640         F t = summationInitValue!F; // ditto
641     }
642     else
643     static if (summation == Summation.pairwise)
644     {
645         package size_t counter;
646         size_t index;
647         static if (fastPairwise)
648         {
649             enum registersCount= 16;
650             F[size_t.sizeof * 8] partials;
651         }
652         else
653         {
654             F[size_t.sizeof * 8] partials;
655         }
656     }
657     else
658     static if (summation == Summation.naive)
659     {
660         F s = summationInitValue!F;
661     }
662     else
663     static if (summation == Summation.fast)
664     {
665         F s = summationInitValue!F;
666     }
667     else
668     static if (summation == Summation.decimal)
669     {
670         import mir.bignum.decimal;
671         Decimal!256 s;
672         T ss = 0;
673     }
674     else
675     static assert(0, "Unsupported summation type for std.numeric.Summator.");
676 
677 
678 public:
679 
680     ///
681     this()(T n)
682     {
683         static if (summation == Summation.precise)
684         {
685             s = 0.0;
686             o = 0;
687             if (n) put(n);
688         }
689         else
690         static if (summation == Summation.kb2)
691         {
692             s = n;
693             static if (isComplex!T)
694             {
695                 static if (is(T : cfloat)) {
696                     cs = 0 + 0fi;
697                     ccs = 0 + 0fi;
698                 } else {
699                     cs = Complex!float(0, 0);
700                     ccs = Complex!float(0, 0);
701                 }
702             }
703             else
704             {
705                 cs = 0.0;
706                 ccs = 0.0;
707             }
708         }
709         else
710         static if (summation == Summation.kbn)
711         {
712             s = n;
713             static if (isComplex!T) {
714                 static if (is(T : cfloat)) {
715                     c = 0 + 0fi;
716                 } else {
717                     c = Complex!float(0, 0);
718                 }
719             } else
720                 c = 0.0;
721         }
722         else
723         static if (summation == Summation.kahan)
724         {
725             s = n;
726             static if (isComplex!T) {
727                 static if (is(T : cfloat)) {
728                     c = 0 + 0fi;
729                 } else {
730                     c = Complex!float(0, 0);
731                 }
732             } else
733                 c = 0.0;
734         }
735         else
736         static if (summation == Summation.pairwise)
737         {
738             counter = index = 1;
739             partials[0] = n;
740         }
741         else
742         static if (summation == Summation.naive)
743         {
744             s = n;
745         }
746         else
747         static if (summation == Summation.fast)
748         {
749             s = n;
750         }
751         else
752         static if (summation == Summation.decimal)
753         {
754             ss = 0;
755             if (!(-n.infinity < n && n < n.infinity))
756             {
757                 ss = n;
758                 n = 0;
759             }
760             s = n;
761         }
762         else
763         static assert(0);
764     }
765 
766     ///Adds `n` to the internal partial sums.
767     void put(N)(N n)
768         if (__traits(compiles, {T a = n; a = n; a += n;}))
769     {
770         static if (isCompesatorAlgorithm!summation)
771             F x = n;
772         static if (summation == Summation.precise)
773         {
774             if (.isFinite(x))
775             {
776                 size_t i;
777                 auto partials_data = partials.data;
778                 foreach (y; partials_data[])
779                 {
780                     F h = x + y;
781                     if (.isInfinity(cast(T)h))
782                     {
783                         if (fabs(x) < fabs(y))
784                         {
785                             F t = x; x = y; y = t;
786                         }
787                         //h == -F.infinity
788                         if (signbit(h))
789                         {
790                             x += M;
791                             x += M;
792                             o--;
793                         }
794                         //h == +F.infinity
795                         else
796                         {
797                             x -= M;
798                             x -= M;
799                             o++;
800                         }
801                         debug(numeric) assert(x.isFinite);
802                         h = x + y;
803                     }
804                     debug(numeric) assert(h.isFinite);
805                     F l;
806                     if (fabs(x) < fabs(y))
807                     {
808                         F t = h - y;
809                         l = x - t;
810                     }
811                     else
812                     {
813                         F t = h - x;
814                         l = y - t;
815                     }
816                     debug(numeric) assert(l.isFinite);
817                     if (l)
818                     {
819                         partials_data[i++] = l;
820                     }
821                     x = h;
822                 }
823                 partials.shrinkTo(i);
824                 if (x)
825                 {
826                     partials.put(x);
827                 }
828             }
829             else
830             {
831                 s += x;
832             }
833         }
834         else
835         static if (summation == Summation.kb2)
836         {
837             static if (isFloatingPoint!F)
838             {
839                 F t = s + x;
840                 F c = 0;
841                 if (fabs(s) >= fabs(x))
842                 {
843                     F d = s - t;
844                     c = d + x;
845                 }
846                 else
847                 {
848                     F d = x - t;
849                     c = d + s;
850                 }
851                 s = t;
852                 t = cs + c;
853                 if (fabs(cs) >= fabs(c))
854                 {
855                     F d = cs - t;
856                     d += c;
857                     ccs += d;
858                 }
859                 else
860                 {
861                     F d = c - t;
862                     d += cs;
863                     ccs += d;
864                 }
865                 cs = t;
866             }
867             else
868             {
869                 F t = s + x;
870                 if (fabs(s.re) < fabs(x.re))
871                 {
872                     auto s_re = s.re;
873                     auto x_re = x.re;
874                     static if (is(F : cfloat)) {
875                         s = x_re + s.im * 1fi;
876                         x = s_re + x.im * 1fi;
877                     } else {
878                         s = F(x_re, s.im);
879                         x = F(s_re, x.im);
880                     }
881                 }
882                 if (fabs(s.im) < fabs(x.im))
883                 {
884                     auto s_im = s.im;
885                     auto x_im = x.im;
886                     static if (is(F : cfloat)) {
887                         s = s.re + x_im * 1fi;
888                         x = x.re + s_im * 1fi;
889                     } else {
890                         s = F(s.re, x_im);
891                         x = F(x.re, s_im);
892                     }
893                 }
894                 F c = (s-t)+x;
895                 s = t;
896                 if (fabs(cs.re) < fabs(c.re))
897                 {
898                     auto c_re = c.re;
899                     auto cs_re = cs.re;
900                     static if (is(F : cfloat)) {
901                         c = cs_re + c.im * 1fi;
902                         cs = c_re + cs.im * 1fi;
903                     } else {
904                         c = F(cs_re, c.im);
905                         cs = F(c_re, cs.im);
906                     }
907                 }
908                 if (fabs(cs.im) < fabs(c.im))
909                 {
910                     auto c_im = c.im;
911                     auto cs_im = cs.im;
912                     static if (is(F : cfloat)) {
913                         c = c.re + cs_im * 1fi;
914                         cs = cs.re + c_im * 1fi;
915                     } else {
916                         c = F(c.re, cs_im);
917                         cs = F(cs.re, c_im);
918                     }
919                 }
920                 F d = cs - t;
921                 d += c;
922                 ccs += d;
923                 cs = t;
924             }
925         }
926         else
927         static if (summation == Summation.kbn)
928         {
929             static if (isFloatingPoint!F)
930             {
931                 F t = s + x;
932                 if (fabs(s) >= fabs(x))
933                 {
934                     F d = s - t;
935                     d += x;
936                     c += d;
937                 }
938                 else
939                 {
940                     F d = x - t;
941                     d += s;
942                     c += d;
943                 }
944                 s = t;
945             }
946             else
947             {
948                 F t = s + x;
949                 if (fabs(s.re) < fabs(x.re))
950                 {
951                     auto s_re = s.re;
952                     auto x_re = x.re;
953                     static if (is(F : cfloat)) {
954                         s = x_re + s.im * 1fi;
955                         x = s_re + x.im * 1fi;
956                     } else {
957                         s = F(x_re, s.im);
958                         x = F(s_re, x.im);
959                     }
960                 }
961                 if (fabs(s.im) < fabs(x.im))
962                 {
963                     auto s_im = s.im;
964                     auto x_im = x.im;
965                     static if (is(F : cfloat)) {
966                         s = s.re + x_im * 1fi;
967                         x = x.re + s_im * 1fi;
968                     } else {
969                         s = F(s.re, x_im);
970                         x = F(x.re, s_im);
971                     }
972                 }
973                 F d = s - t;
974                 d += x;
975                 c += d;
976                 s = t;
977             }
978         }
979         else
980         static if (summation == Summation.kahan)
981         {
982             y = x - c;
983             t = s + y;
984             c = t - s;
985             c -= y;
986             s = t;
987         }
988         else
989         static if (summation == Summation.pairwise)
990         {
991             import mir.bitop: cttz;
992             ++counter;
993             partials[index] = n;
994             foreach (_; 0 .. cttz(counter))
995             {
996                 immutable newIndex = index - 1;
997                 partials[newIndex] += partials[index];
998                 index = newIndex;
999             }
1000             ++index;
1001         }
1002         else
1003         static if (summation == Summation.naive)
1004         {
1005             s += n;
1006         }
1007         else
1008         static if (summation == Summation.fast)
1009         {
1010             s += n;
1011         }
1012         else
1013         static if (summation == summation.decimal)
1014         {
1015             import mir.bignum.internal.ryu.generic_128: genericBinaryToDecimal;
1016             if (-n.infinity < n && n < n.infinity)
1017             {
1018                 auto decimal = genericBinaryToDecimal(n);
1019                 s += decimal;
1020             }
1021             else
1022             {
1023                 ss += n;
1024             }
1025         }
1026         else
1027         static assert(0);
1028     }
1029 
1030     ///ditto
1031     void put(Range)(Range r)
1032         if (isIterable!Range && !is(Range : __vector(V[N]), V, size_t N))
1033     {
1034         static if (summation == Summation.pairwise && fastPairwise && isDynamicArray!Range)
1035         {
1036             F[registersCount] v;
1037             foreach (i, n; chainSeq!registersCount)
1038             {
1039                 if (r.length >= n * 2) do
1040                 {
1041                     foreach (j; Iota!n)
1042                         v[j] = cast(F) r[j];
1043                     foreach (j; Iota!n)
1044                         v[j] += cast(F) r[n + j];
1045                     foreach (m; chainSeq!(n / 2))
1046                         foreach (j; Iota!m)
1047                             v[j] += v[m + j];
1048                     put(v[0]);
1049                     r = r[n * 2 .. $];
1050                 }
1051                 while (!i && r.length >= n * 2);
1052             }
1053             if (r.length)
1054             {
1055                 put(cast(F) r[0]);
1056                 r = r[1 .. $];
1057             }
1058             assert(r.length == 0);
1059         }
1060         else
1061         static if (summation == Summation.fast)
1062         {
1063             static if (isComplex!F) {
1064                 static if (is(F : cfloat)) {
1065                     F s0 = 0 + 0fi;
1066                 } else {
1067                     F s0 = F(0, 0f);
1068                 }
1069             } else
1070                 F s0 = 0;
1071             foreach (ref elem; r)
1072                 s0 += elem;
1073             s += s0;
1074         }
1075         else
1076         {
1077             foreach (ref elem; r)
1078                 put(elem);
1079         }
1080     }
1081 
1082     import mir.ndslice.slice;
1083 
1084     /// ditto
1085     void put(Range: Slice!(Iterator, N, kind), Iterator, size_t N, SliceKind kind)(Range r)
1086     {
1087         static if (N > 1 && kind == Contiguous)
1088         {
1089             import mir.ndslice.topology: flattened;
1090             this.put(r.flattened);
1091         }
1092         else
1093         static if (isPointer!Iterator && kind == Contiguous)
1094         {
1095             this.put(r.field);
1096         }
1097         else
1098         static if (summation == Summation.fast && N == 1)
1099         {
1100             static if (isComplex!F) {
1101                 static if (is(F : cfloat)) {
1102                     F s0 = 0 + 0fi;
1103                 } else {
1104                     F s0 = F(0, 0f);
1105                 }
1106             } else
1107                 F s0 = 0;
1108             import mir.algorithm.iteration: reduce;
1109             s0 = s0.reduce!"a + b"(r);
1110             s += s0;
1111         }
1112         else
1113         {
1114             foreach(elem; r)
1115                 this.put(elem);
1116         }
1117     }
1118 
1119     /+
1120     Adds `x` to the internal partial sums.
1121     This operation doesn't re-establish special
1122     value semantics across iterations (i.e. handling ±inf).
1123     Preconditions: `isFinite(x)`.
1124     +/
1125     version(none)
1126     static if (summation == Summation.precise)
1127     package void unsafePut()(F x)
1128     in {
1129         assert(.isFinite(x));
1130     }
1131     do {
1132         size_t i;
1133         foreach (y; partials.data[])
1134         {
1135             F h = x + y;
1136             debug(numeric) assert(.isFinite(h));
1137             F l;
1138             if (fabs(x) < fabs(y))
1139             {
1140                 F t = h - y;
1141                 l = x - t;
1142             }
1143             else
1144             {
1145                 F t = h - x;
1146                 l = y - t;
1147             }
1148             debug(numeric) assert(.isFinite(l));
1149             if (l)
1150             {
1151                 partials.data[i++] = l;
1152             }
1153             x = h;
1154         }
1155         partials.length = i;
1156         if (x)
1157         {
1158             partials.put(x);
1159         }
1160     }
1161 
1162     ///Returns the value of the sum.
1163     T sum()() scope const
1164     {
1165         /++
1166         Returns the value of the sum, rounded to the nearest representable
1167         floating-point number using the round-half-to-even rule.
1168         The result can differ from the exact value on `X86`, `nextDown`proir) <= result &&  result <= nextUp(proir)).
1169         +/
1170         static if (summation == Summation.precise)
1171         {
1172             debug(mir_sum)
1173             {
1174                 foreach (y; partials.data[])
1175                 {
1176                     assert(y);
1177                     assert(y.isFinite);
1178                 }
1179                 //TODO: Add Non-Overlapping check to std.math
1180                 import mir.ndslice.slice: sliced;
1181                 import mir.ndslice.sorting: isSorted;
1182                 import mir.ndslice.topology: map;
1183                 assert(partials.data[].sliced.map!fabs.isSorted);
1184             }
1185 
1186             if (s)
1187                 return s;
1188             auto parts = partials.data[];
1189             F y = 0.0;
1190             //pick last
1191             if (parts.length)
1192             {
1193                 y = parts[$-1];
1194                 parts = parts[0..$-1];
1195             }
1196             if (o)
1197             {
1198                 immutable F of = o;
1199                 if (y && (o == -1 || o == 1)  && signbit(of * y))
1200                 {
1201                     // problem case: decide whether result is representable
1202                     y /= 2;
1203                     F x = of * M;
1204                     immutable F h = x + y;
1205                     F t = h - x;
1206                     F l = (y - t) * 2;
1207                     y = h * 2;
1208                     if (.isInfinity(cast(T)y))
1209                     {
1210                         // overflow, except in edge case...
1211                         x = h + l;
1212                         t = x - h;
1213                         y = parts.length && t == l && !signbit(l*parts[$-1]) ?
1214                             x * 2 :
1215                             F.infinity * of;
1216                         parts = null;
1217                     }
1218                     else if (l)
1219                     {
1220                         bool _break;
1221                         y = partialsReducePred(y, l, parts.length ? parts[$-1] : 0, _break);
1222                         if (_break)
1223                             parts = null;
1224                     }
1225                 }
1226                 else
1227                 {
1228                     y = F.infinity * of;
1229                     parts = null;
1230                 }
1231             }
1232             return partialsReduce(y, parts);
1233         }
1234         else
1235         static if (summation == Summation.kb2)
1236         {
1237             return s + (cs + ccs);
1238         }
1239         else
1240         static if (summation == Summation.kbn)
1241         {
1242             return s + c;
1243         }
1244         else
1245         static if (summation == Summation.kahan)
1246         {
1247             return s;
1248         }
1249         else
1250         static if (summation == Summation.pairwise)
1251         {
1252             F s = summationInitValue!T;
1253             assert((counter == 0) == (index == 0));
1254             foreach_reverse (ref e; partials[0 .. index])
1255             {
1256                 static if (is(F : __vector(W[N]), W, size_t N))
1257                     s += cast(Unqual!F) e; //DMD bug workaround
1258                 else
1259                     s += e;
1260             }
1261             return s;
1262         }
1263         else
1264         static if (summation == Summation.naive)
1265         {
1266             return s;
1267         }
1268         else
1269         static if (summation == Summation.fast)
1270         {
1271             return s;
1272         }
1273         else
1274         static if (summation == Summation.decimal)
1275         {
1276             return cast(T) s + ss;
1277         }
1278         else
1279         static assert(0);
1280     }
1281 
1282     version(none)
1283     static if (summation == Summation.precise)
1284     F partialsSum()() const
1285     {
1286         debug(numeric) partialsDebug;
1287         auto parts = partials.data[];
1288         F y = 0.0;
1289         //pick last
1290         if (parts.length)
1291         {
1292             y = parts[$-1];
1293             parts = parts[0..$-1];
1294         }
1295         return partialsReduce(y, parts);
1296     }
1297 
1298     ///Returns `Summator` with extended internal partial sums.
1299     C opCast(C : Summator!(P, _summation), P, Summation _summation)() const
1300         if (
1301             _summation == summation &&
1302             isMutable!C &&
1303             P.max_exp >= T.max_exp &&
1304             P.mant_dig >= T.mant_dig
1305         )
1306     {
1307         static if (is(P == T))
1308                 return this;
1309         else
1310         static if (summation == Summation.precise)
1311         {
1312             auto ret = typeof(return).init;
1313             ret.s = s;
1314             ret.o = o;
1315             foreach (p; partials.data[])
1316             {
1317                 ret.partials.put(p);
1318             }
1319             enum exp_diff = P.max_exp / T.max_exp;
1320             static if (exp_diff)
1321             {
1322                 if (ret.o)
1323                 {
1324                     immutable f = ret.o / exp_diff;
1325                     immutable t = cast(int)(ret.o % exp_diff);
1326                     ret.o = f;
1327                     ret.put((P(2) ^^ T.max_exp) * t);
1328                 }
1329             }
1330             return ret;
1331         }
1332         else
1333         static if (summation == Summation.kb2)
1334         {
1335             auto ret = typeof(return).init;
1336             ret.s = s;
1337             ret.cs = cs;
1338             ret.ccs = ccs;
1339             return ret;
1340         }
1341         else
1342         static if (summation == Summation.kbn)
1343         {
1344             auto ret = typeof(return).init;
1345             ret.s = s;
1346             ret.c = c;
1347             return ret;
1348         }
1349         else
1350         static if (summation == Summation.kahan)
1351         {
1352             auto ret = typeof(return).init;
1353             ret.s = s;
1354             ret.c = c;
1355             return ret;
1356         }
1357         else
1358         static if (summation == Summation.pairwise)
1359         {
1360             auto ret = typeof(return).init;
1361             ret.counter = counter;
1362             ret.index = index;
1363             foreach (i; 0 .. index)
1364                 ret.partials[i] = partials[i];
1365             return ret;
1366         }
1367         else
1368         static if (summation == Summation.naive)
1369         {
1370             auto ret = typeof(return).init;
1371             ret.s = s;
1372             return ret;
1373         }
1374         else
1375         static if (summation == Summation.fast)
1376         {
1377             auto ret = typeof(return).init;
1378             ret.s = s;
1379             return ret;
1380         }
1381         else
1382         static assert(0);
1383     }
1384 
1385     /++
1386     `cast(C)` operator overloading. Returns `cast(C)sum()`.
1387     See also: `cast`
1388     +/
1389     C opCast(C)() const if (is(Unqual!C == T))
1390     {
1391         return cast(C)sum();
1392     }
1393 
1394     ///Operator overloading.
1395     // opAssign should initialize partials.
1396     void opAssign(T rhs)
1397     {
1398         static if (summation == Summation.precise)
1399         {
1400             partials.reset;
1401             s = 0.0;
1402             o = 0;
1403             if (rhs) put(rhs);
1404         }
1405         else
1406         static if (summation == Summation.kb2)
1407         {
1408             s = rhs;
1409             static if (isComplex!T)
1410             {
1411                 static if (is(T : cfloat)) {
1412                     cs = 0 + 0fi;
1413                     ccs = 0 + 0fi;
1414                 } else {
1415                     cs = T(0, 0f);
1416                     ccs = T(0.0, 0f);
1417                 }
1418             }
1419             else
1420             {
1421                 cs = 0.0;
1422                 ccs = 0.0;
1423             }
1424         }
1425         else
1426         static if (summation == Summation.kbn)
1427         {
1428             s = rhs;
1429             static if (isComplex!T) {
1430                 static if (is(T : cfloat)) {
1431                     c = 0 + 0fi;
1432                 } else {
1433                     c = T(0, 0f);
1434                 }
1435             } else
1436                 c = 0.0;
1437         }
1438         else
1439         static if (summation == Summation.kahan)
1440         {
1441             s = rhs;
1442             static if (isComplex!T) {
1443                 static if (is(T : cfloat)) {
1444                     c = 0 + 0fi;
1445                 } else {
1446                     c = T(0, 0f);
1447                 }
1448             } else
1449                 c = 0.0;
1450         }
1451         else
1452         static if (summation == Summation.pairwise)
1453         {
1454             counter = 1;
1455             index = 1;
1456             partials[0] = rhs;
1457         }
1458         else
1459         static if (summation == Summation.naive)
1460         {
1461             s = rhs;
1462         }
1463         else
1464         static if (summation == Summation.fast)
1465         {
1466             s = rhs;
1467         }
1468         else
1469         static if (summation == summation.decimal)
1470         {
1471             __ctor(rhs);
1472         }
1473         else
1474         static assert(0);
1475     }
1476 
1477     ///ditto
1478     void opOpAssign(string op : "+")(T rhs)
1479     {
1480         put(rhs);
1481     }
1482 
1483     ///ditto
1484     void opOpAssign(string op : "+")(ref const Summator rhs)
1485     {
1486         static if (summation == Summation.precise)
1487         {
1488             s += rhs.s;
1489             o += rhs.o;
1490             foreach (f; rhs.partials.data[])
1491                 put(f);
1492         }
1493         else
1494         static if (summation == Summation.kb2)
1495         {
1496             put(rhs.ccs);
1497             put(rhs.cs);
1498             put(rhs.s);
1499         }
1500         else
1501         static if (summation == Summation.kbn)
1502         {
1503             put(rhs.c);
1504             put(rhs.s);
1505         }
1506         else
1507         static if (summation == Summation.kahan)
1508         {
1509             put(rhs.s);
1510         }
1511         else
1512         static if (summation == Summation.pairwise)
1513         {
1514             foreach_reverse (e; rhs.partials[0 .. rhs.index])
1515                 put(e);
1516             counter -= rhs.index;
1517             counter += rhs.counter;
1518         }
1519         else
1520         static if (summation == Summation.naive)
1521         {
1522             put(rhs.s);
1523         }
1524         else
1525         static if (summation == Summation.fast)
1526         {
1527             put(rhs.s);
1528         }
1529         else
1530         static assert(0);
1531     }
1532 
1533     ///ditto
1534     void opOpAssign(string op : "-")(T rhs)
1535     {
1536         static if (summation == Summation.precise)
1537         {
1538             put(-rhs);
1539         }
1540         else
1541         static if (summation == Summation.kb2)
1542         {
1543             put(-rhs);
1544         }
1545         else
1546         static if (summation == Summation.kbn)
1547         {
1548             put(-rhs);
1549         }
1550         else
1551         static if (summation == Summation.kahan)
1552         {
1553             y = 0.0;
1554             y -= rhs;
1555             y -= c;
1556             t = s + y;
1557             c = t - s;
1558             c -= y;
1559             s = t;
1560         }
1561         else
1562         static if (summation == Summation.pairwise)
1563         {
1564             put(-rhs);
1565         }
1566         else
1567         static if (summation == Summation.naive)
1568         {
1569             s -= rhs;
1570         }
1571         else
1572         static if (summation == Summation.fast)
1573         {
1574             s -= rhs;
1575         }
1576         else
1577         static assert(0);
1578     }
1579 
1580     ///ditto
1581     void opOpAssign(string op : "-")(ref const Summator rhs)
1582     {
1583         static if (summation == Summation.precise)
1584         {
1585             s -= rhs.s;
1586             o -= rhs.o;
1587             foreach (f; rhs.partials.data[])
1588                 put(-f);
1589         }
1590         else
1591         static if (summation == Summation.kb2)
1592         {
1593             put(-rhs.ccs);
1594             put(-rhs.cs);
1595             put(-rhs.s);
1596         }
1597         else
1598         static if (summation == Summation.kbn)
1599         {
1600             put(-rhs.c);
1601             put(-rhs.s);
1602         }
1603         else
1604         static if (summation == Summation.kahan)
1605         {
1606             this -= rhs.s;
1607         }
1608         else
1609         static if (summation == Summation.pairwise)
1610         {
1611             foreach_reverse (e; rhs.partials[0 .. rhs.index])
1612                 put(-e);
1613             counter -= rhs.index;
1614             counter += rhs.counter;
1615         }
1616         else
1617         static if (summation == Summation.naive)
1618         {
1619             s -= rhs.s;
1620         }
1621         else
1622         static if (summation == Summation.fast)
1623         {
1624             s -= rhs.s;
1625         }
1626         else
1627         static assert(0);
1628     }
1629 
1630     ///
1631 
1632     version(mir_test)
1633     @nogc nothrow unittest
1634     {
1635         import mir.math.common;
1636         import mir.ndslice.topology: iota, map;
1637         auto r1 = iota(500).map!(a => 1.7L.pow(a+1) - 1.7L.pow(a));
1638         auto r2 = iota([500], 500).map!(a => 1.7L.pow(a+1) - 1.7L.pow(a));
1639         Summator!(real, Summation.precise) s1 = 0, s2 = 0.0;
1640         foreach (e; r1) s1 += e;
1641         foreach (e; r2) s2 -= e;
1642         s1 -= s2;
1643         s1 -= 1.7L.pow(1000);
1644         assert(s1.sum == -1);
1645     }
1646 
1647 
1648     version(mir_test)
1649     @nogc nothrow unittest
1650     {
1651         with(Summation)
1652         foreach (summation; AliasSeq!(kahan, kbn, kb2, precise, pairwise))
1653         foreach (T; AliasSeq!(float, double, real))
1654         {
1655             Summator!(T, summation) sum = 1;
1656             sum += 3;
1657             assert(sum.sum == 4);
1658             sum -= 10;
1659             assert(sum.sum == -6);
1660             Summator!(T, summation) sum2 = 3;
1661             sum -= sum2;
1662             assert(sum.sum == -9);
1663             sum2 = 100;
1664             sum += 100;
1665             assert(sum.sum == 91);
1666             auto sum3 = cast(Summator!(real, summation))sum;
1667             assert(sum3.sum == 91);
1668             sum = sum2;
1669         }
1670     }
1671 
1672 
1673     version(mir_test)
1674     @nogc nothrow unittest
1675     {
1676         import mir.math.common: approxEqual;
1677         with(Summation)
1678         foreach (summation; AliasSeq!(naive, fast))
1679         foreach (T; AliasSeq!(float, double, real))
1680         {
1681             Summator!(T, summation) sum = 1;
1682             sum += 3.5;
1683             assert(sum.sum.approxEqual(4.5));
1684             sum = 2;
1685             assert(sum.sum == 2);
1686             sum -= 4;
1687             assert(sum.sum.approxEqual(-2));
1688         }
1689     }
1690 
1691     static if (summation == Summation.precise)
1692     {
1693         ///Returns `true` if current sum is a NaN.
1694         bool isNaN()() const
1695         {
1696             return .isNaN(s);
1697         }
1698 
1699         ///Returns `true` if current sum is finite (not infinite or NaN).
1700         bool isFinite()() const
1701         {
1702             if (s)
1703                 return false;
1704             return !overflow;
1705         }
1706 
1707         ///Returns `true` if current sum is ±∞.
1708         bool isInfinity()() const
1709         {
1710             return .isInfinity(s) || overflow();
1711         }
1712     }
1713     else static if (isFloatingPoint!F)
1714     {
1715         ///Returns `true` if current sum is a NaN.
1716         bool isNaN()() const
1717         {
1718             return .isNaN(sum());
1719         }
1720 
1721         ///Returns `true` if current sum is finite (not infinite or NaN).
1722         bool isFinite()() const
1723         {
1724             return .isFinite(sum());
1725         }
1726 
1727         ///Returns `true` if current sum is ±∞.
1728         bool isInfinity()() const
1729         {
1730             return .isInfinity(sum());
1731         }
1732     }
1733     else
1734     {
1735         //User defined types
1736     }
1737 }
1738 
1739 version(mir_test)
1740 unittest
1741 {
1742     import mir.functional: RefTuple, refTuple;
1743     import mir.ndslice.topology: map, iota, retro;
1744     import mir.array.allocation: array;
1745     import std.math: isInfinity, isFinite, isNaN;
1746 
1747     Summator!(double, Summation.precise) summator = 0.0;
1748 
1749     enum double M = (cast(double)2) ^^ (double.max_exp - 1);
1750     RefTuple!(double[], double)[] tests = [
1751         refTuple(new double[0], 0.0),
1752         refTuple([0.0], 0.0),
1753         refTuple([1e100, 1.0, -1e100, 1e-100, 1e50, -1, -1e50], 1e-100),
1754         refTuple([1e308, 1e308, -1e308], 1e308),
1755         refTuple([-1e308, 1e308, 1e308], 1e308),
1756         refTuple([1e308, -1e308, 1e308], 1e308),
1757         refTuple([M, M, -2.0^^1000], 1.7976930277114552e+308),
1758         refTuple([M, M, M, M, -M, -M, -M], 8.9884656743115795e+307),
1759         refTuple([2.0^^53, -0.5, -2.0^^-54], 2.0^^53-1.0),
1760         refTuple([2.0^^53, 1.0, 2.0^^-100], 2.0^^53+2.0),
1761         refTuple([2.0^^53+10.0, 1.0, 2.0^^-100], 2.0^^53+12.0),
1762         refTuple([2.0^^53-4.0, 0.5, 2.0^^-54], 2.0^^53-3.0),
1763         refTuple([M-2.0^^970, -1, M], 1.7976931348623157e+308),
1764         refTuple([double.max, double.max*2.^^-54], double.max),
1765         refTuple([double.max, double.max*2.^^-53], double.infinity),
1766         refTuple(iota([1000], 1).map!(a => 1.0/a).array , 7.4854708605503451),
1767         refTuple(iota([1000], 1).map!(a => (-1.0)^^a/a).array, -0.69264743055982025), //0.693147180559945309417232121458176568075500134360255254120680...
1768         refTuple(iota([1000], 1).map!(a => 1.0/a).retro.array , 7.4854708605503451),
1769         refTuple(iota([1000], 1).map!(a => (-1.0)^^a/a).retro.array, -0.69264743055982025),
1770         refTuple([double.infinity, -double.infinity, double.nan], double.nan),
1771         refTuple([double.nan, double.infinity, -double.infinity], double.nan),
1772         refTuple([double.infinity, double.nan, double.infinity], double.nan),
1773         refTuple([double.infinity, double.infinity], double.infinity),
1774         refTuple([double.infinity, -double.infinity], double.nan),
1775         refTuple([-double.infinity, 1e308, 1e308, -double.infinity], -double.infinity),
1776         refTuple([M-2.0^^970, 0.0, M], double.infinity),
1777         refTuple([M-2.0^^970, 1.0, M], double.infinity),
1778         refTuple([M, M], double.infinity),
1779         refTuple([M, M, -1], double.infinity),
1780         refTuple([M, M, M, M, -M, -M], double.infinity),
1781         refTuple([M, M, M, M, -M, M], double.infinity),
1782         refTuple([-M, -M, -M, -M], -double.infinity),
1783         refTuple([M, M, -2.^^971], double.max),
1784         refTuple([M, M, -2.^^970], double.infinity),
1785         refTuple([-2.^^970, M, M, -0X0.0000000000001P-0 * 2.^^-1022], double.max),
1786         refTuple([M, M, -2.^^970, 0X0.0000000000001P-0 * 2.^^-1022], double.infinity),
1787         refTuple([-M, 2.^^971, -M], -double.max),
1788         refTuple([-M, -M, 2.^^970], -double.infinity),
1789         refTuple([-M, -M, 2.^^970, 0X0.0000000000001P-0 * 2.^^-1022], -double.max),
1790         refTuple([-0X0.0000000000001P-0 * 2.^^-1022, -M, -M, 2.^^970], -double.infinity),
1791         refTuple([2.^^930, -2.^^980, M, M, M, -M], 1.7976931348622137e+308),
1792         refTuple([M, M, -1e307], 1.6976931348623159e+308),
1793         refTuple([1e16, 1., 1e-16], 10_000_000_000_000_002.0),
1794     ];
1795     foreach (i, test; tests)
1796     {
1797         summator = 0.0;
1798         foreach (t; test.a) summator.put(t);
1799         auto r = test.b;
1800         auto s = summator.sum;
1801         assert(summator.isNaN() == r.isNaN());
1802         assert(summator.isFinite() == r.isFinite());
1803         assert(summator.isInfinity() == r.isInfinity());
1804         assert(s == r || s.isNaN && r.isNaN);
1805     }
1806 }
1807 
1808 /++
1809 Sums elements of `r`, which must be a finite
1810 iterable.
1811 
1812 A seed may be passed to `sum`. Not only will this seed be used as an initial
1813 value, but its type will be used if it is not specified.
1814 
1815 Note that these specialized summing algorithms execute more primitive operations
1816 than vanilla summation. Therefore, if in certain cases maximum speed is required
1817 at expense of precision, one can use $(LREF Summation.fast).
1818 
1819 Returns:
1820     The sum of all the elements in the range r.
1821 +/
1822 template sum(F, Summation summation = Summation.appropriate)
1823     if (isMutable!F)
1824 {
1825     ///
1826     template sum(Range)
1827         if (isIterable!Range)
1828     {
1829         import core.lifetime: move;
1830 
1831         ///
1832         F sum(Range r)
1833         {
1834             static if (isComplex!F && (summation == Summation.precise || summation == Summation.decimal))
1835             {
1836                 return sum(r, summationInitValue!F);
1837             }
1838             else
1839             {
1840                 static if (summation == Summation.decimal)
1841                 {
1842                     Summator!(F, summation) sum = void;
1843                     sum = 0;
1844                 }
1845                 else
1846                 {
1847                     Summator!(F, ResolveSummationType!(summation, Range, sumType!Range)) sum;
1848                 }
1849                 sum.put(r.move);
1850                 return sum.sum;
1851             }
1852         }
1853 
1854         ///
1855         F sum(Range r, F seed)
1856         {
1857             static if (isComplex!F && (summation == Summation.precise || summation == Summation.decimal))
1858             {
1859                 alias T = typeof(F.init.re);
1860                 static if (summation == Summation.decimal)
1861                 {
1862                     Summator!(T, summation) sumRe = void;
1863                     sumRe = seed.re;
1864 
1865                     Summator!(T, summation) sumIm = void;
1866                     sumIm = seed.im;
1867                 }
1868                 else
1869                 {
1870                     auto sumRe = Summator!(T, Summation.precise)(seed.re);
1871                     auto sumIm = Summator!(T, Summation.precise)(seed.im);
1872                 }
1873                 import mir.ndslice.slice: isSlice;
1874                 static if (isSlice!Range)
1875                 {
1876                     import mir.algorithm.iteration: each;
1877                     r.each!((auto ref elem)
1878                     {
1879                         sumRe.put(elem.re);
1880                         sumIm.put(elem.im);
1881                     });
1882                 }
1883                 else
1884                 {
1885                     foreach (ref elem; r)
1886                     {
1887                         sumRe.put(elem.re);
1888                         sumIm.put(elem.im);
1889                     }
1890                 }
1891                 static if (is(F : cfloat)) {
1892                     return sumRe.sum + sumIm.sum * 1fi;
1893                 } else {
1894                     return F(sumRe.sum, sumIm.sum);
1895                 }
1896             }
1897             else
1898             {
1899                 static if (summation == Summation.decimal)
1900                 {
1901                     Summator!(F, summation) sum = void;
1902                     sum = seed;
1903                 }
1904                 else
1905                 {
1906                     auto sum = Summator!(F, ResolveSummationType!(summation, Range, F))(seed);
1907                 }
1908                 sum.put(r.move);
1909                 return sum.sum;
1910             }
1911         }
1912     }
1913 
1914     ///
1915     F sum(scope const F[] r...)
1916     {
1917         static if (isComplex!F && (summation == Summation.precise || summation == Summation.decimal))
1918         {
1919             return sum(r, summationInitValue!F);
1920         }
1921         else
1922         {
1923             Summator!(F, ResolveSummationType!(summation, const(F)[], F)) sum;
1924             sum.put(r);
1925             return sum.sum;
1926         }
1927     }
1928 }
1929 
1930 ///ditto
1931 template sum(Summation summation = Summation.appropriate)
1932 {
1933     ///
1934     sumType!Range sum(Range)(Range r)
1935         if (isIterable!Range)
1936     {
1937         import core.lifetime: move;
1938         alias F = typeof(return);
1939         return .sum!(F, ResolveSummationType!(summation, Range, F))(r.move);
1940     }
1941 
1942     ///
1943     F sum(Range, F)(Range r, F seed)
1944         if (isIterable!Range)
1945     {
1946         import core.lifetime: move;
1947         return .sum!(F, ResolveSummationType!(summation, Range, F))(r.move, seed);
1948     }
1949 
1950     ///
1951     sumType!T sum(T)(scope const T[] ar...)
1952     {
1953         alias F = typeof(return);
1954         return .sum!(F, ResolveSummationType!(summation, F[], F))(ar);
1955     }
1956 }
1957 
1958 ///ditto
1959 template sum(F, string summation)
1960     if (isMutable!F)
1961 {
1962     mixin("alias sum = .sum!(F, Summation." ~ summation ~ ");");
1963 }
1964 
1965 ///ditto
1966 template sum(string summation)
1967 {
1968     mixin("alias sum = .sum!(Summation." ~ summation ~ ");");
1969 }
1970 
1971 
1972 
1973 version(mir_test)
1974 @safe pure nothrow unittest
1975 {
1976     static assert(is(typeof(sum([cast( byte)1])) ==  int));
1977     static assert(is(typeof(sum([cast(ubyte)1])) ==  int));
1978     static assert(is(typeof(sum([  1,   2,   3,   4])) ==  int));
1979     static assert(is(typeof(sum([ 1U,  2U,  3U,  4U])) == uint));
1980     static assert(is(typeof(sum([ 1L,  2L,  3L,  4L])) ==  long));
1981     static assert(is(typeof(sum([1UL, 2UL, 3UL, 4UL])) == ulong));
1982 
1983     int[] empty;
1984     assert(sum(empty) == 0);
1985     assert(sum([42]) == 42);
1986     assert(sum([42, 43]) == 42 + 43);
1987     assert(sum([42, 43, 44]) == 42 + 43 + 44);
1988     assert(sum([42, 43, 44, 45]) == 42 + 43 + 44 + 45);
1989 }
1990 
1991 
1992 version(mir_test)
1993 @safe pure nothrow unittest
1994 {
1995     static assert(is(typeof(sum([1.0, 2.0, 3.0, 4.0])) == double));
1996     static assert(is(typeof(sum!double([ 1F,  2F,  3F,  4F])) == double));
1997     const(float[]) a = [1F, 2F, 3F, 4F];
1998     static assert(is(typeof(sum!double(a)) == double));
1999     const(float)[] b = [1F, 2F, 3F, 4F];
2000     static assert(is(typeof(sum!double(a)) == double));
2001 
2002     double[] empty;
2003     assert(sum(empty) == 0);
2004     assert(sum([42.]) == 42);
2005     assert(sum([42., 43.]) == 42 + 43);
2006     assert(sum([42., 43., 44.]) == 42 + 43 + 44);
2007     assert(sum([42., 43., 44., 45.5]) == 42 + 43 + 44 + 45.5);
2008 }
2009 
2010 version(mir_test)
2011 @safe pure nothrow unittest
2012 {
2013     import mir.ndslice.topology: iota;
2014     assert(iota(2, 3).sum == 15);
2015 }
2016 
2017 version(mir_test)
2018 @safe pure nothrow unittest
2019 {
2020     import std.container;
2021     static assert(is(typeof(sum!double(SList!float()[])) == double));
2022     static assert(is(typeof(sum(SList!double()[])) == double));
2023     static assert(is(typeof(sum(SList!real()[])) == real));
2024 
2025     assert(sum(SList!double()[]) == 0);
2026     assert(sum(SList!double(1)[]) == 1);
2027     assert(sum(SList!double(1, 2)[]) == 1 + 2);
2028     assert(sum(SList!double(1, 2, 3)[]) == 1 + 2 + 3);
2029     assert(sum(SList!double(1, 2, 3, 4)[]) == 10);
2030 }
2031 
2032 
2033 version(mir_test)
2034 pure nothrow unittest // 12434
2035 {
2036     import mir.ndslice.slice: sliced;
2037     import mir.ndslice.topology: map;
2038     immutable a = [10, 20];
2039     auto s = a.sliced;
2040     auto s1 = sum(a);             // Error
2041     auto s2 = s.map!(x => x).sum; // Error
2042 }
2043 
2044 version(mir_test)
2045 unittest
2046 {
2047     import std.bigint;
2048     import mir.ndslice.topology: repeat;
2049 
2050     auto a = BigInt("1_000_000_000_000_000_000").repeat(10);
2051     auto b = (ulong.max/2).repeat(10);
2052     auto sa = a.sum();
2053     auto sb = b.sum(BigInt(0)); //reduce ulongs into bigint
2054     assert(sa == BigInt("10_000_000_000_000_000_000"));
2055     assert(sb == (BigInt(ulong.max/2) * 10));
2056 }
2057 
2058 version(mir_test)
2059 unittest
2060 {
2061     with(Summation)
2062     foreach (F; AliasSeq!(float, double, real))
2063     {
2064         F[] ar = [1, 2, 3, 4];
2065         F r = 10;
2066         assert(r == ar.sum!fast());
2067         assert(r == ar.sum!pairwise());
2068         assert(r == ar.sum!kahan());
2069         assert(r == ar.sum!kbn());
2070         assert(r == ar.sum!kb2());
2071     }
2072 }
2073 
2074 version(mir_test)
2075 unittest
2076 {
2077     assert(sum(1) == 1);
2078     assert(sum(1, 2, 3) == 6);
2079     assert(sum(1.0, 2.0, 3.0) == 6);
2080 }
2081 
2082 version(mir_test)
2083 unittest
2084 {
2085     assert(sum!float(1) == 1f);
2086     assert(sum!float(1, 2, 3) == 6f);
2087     assert(sum!float(1.0, 2.0, 3.0) == 6f);
2088 }
2089 
2090 version(mir_builtincomplex_test)
2091 unittest
2092 {
2093     assert(sum(1.0 + 1i, 2.0 + 2i, 3.0 + 3i) == (6 + 6i));
2094     assert(sum!cfloat(1.0 + 1i, 2.0 + 2i, 3.0 + 3i) == (6f + 6i));
2095 }
2096 
2097 version(mir_test)
2098 unittest
2099 {
2100     import std.complex: Complex;
2101 
2102     assert(sum(Complex!float(1.0, 1.0), Complex!float(2.0, 2.0), Complex!float(3.0, 3.0)) == Complex!float(6.0, 6.0));
2103     assert(sum!(Complex!float)(Complex!float(1.0, 1.0), Complex!float(2.0, 2.0), Complex!float(3.0, 3.0)) == Complex!float(6.0, 6.0));
2104 }
2105 
2106 version(LDC)
2107 version(X86_Any)
2108 version(mir_test)
2109 unittest
2110 {
2111     import core.simd;
2112     static if (__traits(compiles, double2.init + double2.init))
2113     {
2114 
2115         alias S = Summation;
2116         alias sums = AliasSeq!(S.kahan, S.pairwise, S.naive, S.fast);
2117 
2118         double2[] ar = [double2([1.0, 2]), double2([2, 3]), double2([3, 4]), double2([4, 6])];
2119         double2 c = double2([10, 15]);
2120 
2121         foreach (sumType; sums)
2122         {
2123             double2 s = ar.sum!(sumType);
2124             assert(s.array == c.array);
2125         }
2126     }
2127 }
2128 
2129 version(LDC)
2130 version(X86_Any)
2131 version(mir_test)
2132 unittest
2133 {
2134     import core.simd;
2135     import mir.ndslice.topology: iota, as;
2136 
2137     alias S = Summation;
2138     alias sums = AliasSeq!(S.kahan, S.pairwise, S.naive, S.fast, S.precise,
2139                            S.kbn, S.kb2);
2140 
2141     int[2] ns = [9, 101];
2142 
2143     foreach (n; ns)
2144     {
2145         foreach (sumType; sums)
2146         {
2147             auto ar = iota(n).as!double;
2148             double c = n * (n - 1) / 2; // gauss for n=100
2149             double s = ar.sum!(sumType);
2150             assert(s == c);
2151         }
2152     }
2153 }
2154 
2155 package(mir)
2156 template ResolveSummationType(Summation summation, Range, F)
2157 {
2158     static if (summation == Summation.appropriate)
2159     {
2160         static if (isSummable!(Range, F))
2161             enum ResolveSummationType = Summation.pairwise;
2162         else
2163         static if (is(F == class) || is(F == struct) || is(F == interface))
2164             enum ResolveSummationType = Summation.naive;
2165         else
2166             enum ResolveSummationType = Summation.fast;
2167     }
2168     else
2169     {
2170         enum ResolveSummationType = summation;
2171     }
2172 }
2173 
2174 private T summationInitValue(T)()
2175 {
2176     static if (__traits(compiles, {T a = 0.0;}))
2177     {
2178         T a = 0.0;
2179         return a;
2180     }
2181     else
2182     static if (__traits(compiles, {T a = 0;}))
2183     {
2184         T a = 0;
2185         return a;
2186     }
2187     else
2188     static if (__traits(compiles, {T a = 0 + 0fi;}))
2189     {
2190         T a = 0 + 0fi;
2191         return a;
2192     }
2193     else
2194     {
2195         return T.init;
2196     }
2197 }
2198 
2199 package(mir)
2200 template elementType(T)
2201 {
2202     import mir.ndslice.slice: isSlice, DeepElementType;
2203     import std.traits: Unqual, ForeachType;
2204 
2205     static if (isIterable!T) {
2206         static if (isSlice!T)
2207             alias elementType = Unqual!(DeepElementType!(T.This));
2208         else
2209             alias elementType = Unqual!(ForeachType!T);
2210     } else {
2211         alias elementType = Unqual!T;
2212     }
2213 }
2214 
2215 package(mir)
2216 template sumType(Range)
2217 {
2218     alias T = elementType!Range;
2219 
2220     static if (__traits(compiles, {
2221         auto a = T.init + T.init;
2222         a += T.init;
2223     }))
2224         alias sumType = typeof(T.init + T.init);
2225     else
2226         static assert(0, "sumType: Can't sum elements of type " ~ T.stringof);
2227 }
2228 
2229 /++
2230 +/
2231 template fillCollapseSums(Summation summation, alias combineParts, combineElements...)
2232 {
2233     import mir.ndslice.slice: Slice, SliceKind;
2234     /++
2235     +/
2236     auto ref fillCollapseSums(Iterator, SliceKind kind)(Slice!(Iterator, 1, kind) data) @property
2237     {
2238         import mir.algorithm.iteration;
2239         import mir.functional: naryFun;
2240         import mir.ndslice.topology: iota, triplets;
2241         foreach (triplet; data.length.iota.triplets) with(triplet)
2242         {
2243             auto ref ce(size_t i)()
2244             {
2245                 static if (summation == Summation.fast)
2246                 {
2247                     return
2248                         sum!summation(naryFun!(combineElements[i])(center, left )) +
2249                         sum!summation(naryFun!(combineElements[i])(center, right));
2250                 }
2251                 else
2252                 {
2253                     Summator!summation summator = 0;
2254                     summator.put(naryFun!(combineElements[i])(center, left));
2255                     summator.put(naryFun!(combineElements[i])(center, right));
2256                     return summator.sum;
2257                 }
2258             }
2259             alias sums = staticMap!(ce, Iota!(combineElements.length));
2260             data[center] = naryFun!combineParts(center, sums);
2261         }
2262     }
2263 }
2264 
2265 package:
2266 
2267 template isSummable(F)
2268 {
2269     enum bool isSummable =
2270         __traits(compiles,
2271         {
2272             F a = 0.1, b, c;
2273             b = 2.3;
2274             c = a + b;
2275             c = a - b;
2276             a += b;
2277             a -= b;
2278         });
2279 }
2280 
2281 template isSummable(Range, F)
2282 {
2283     enum bool isSummable =
2284         isIterable!Range &&
2285         isImplicitlyConvertible!(sumType!Range, F) &&
2286         isSummable!F;
2287 }
2288 
2289 version(mir_test)
2290 unittest
2291 {
2292     import mir.ndslice.topology: iota;
2293     static assert(isSummable!(typeof(iota([size_t.init])), double));
2294 }
2295 
2296 private enum bool isCompesatorAlgorithm(Summation summation) =
2297     summation == Summation.precise
2298  || summation == Summation.kb2
2299  || summation == Summation.kbn
2300  || summation == Summation.kahan;
2301 
2302 
2303 version(mir_test)
2304 unittest
2305 {
2306     import mir.ndslice;
2307 
2308     auto p = iota([2, 3, 4, 5]);
2309     auto a = p.as!double;
2310     auto b = a.flattened;
2311     auto c = a.slice;
2312     auto d = c.flattened;
2313     auto s = p.flattened.sum;
2314 
2315     assert(a.sum == s);
2316     assert(b.sum == s);
2317     assert(c.sum == s);
2318     assert(d.sum == s);
2319 
2320     assert(a.canonical.sum == s);
2321     assert(b.canonical.sum == s);
2322     assert(c.canonical.sum == s);
2323     assert(d.canonical.sum == s);
2324 
2325     assert(a.universal.transposed!3.sum == s);
2326     assert(b.universal.sum == s);
2327     assert(c.universal.transposed!3.sum == s);
2328     assert(d.universal.sum == s);
2329 
2330     assert(a.sum!"fast" == s);
2331     assert(b.sum!"fast" == s);
2332     assert(c.sum!(float, "fast") == s);
2333     assert(d.sum!"fast" == s);
2334 
2335     assert(a.canonical.sum!"fast" == s);
2336     assert(b.canonical.sum!"fast" == s);
2337     assert(c.canonical.sum!"fast" == s);
2338     assert(d.canonical.sum!"fast" == s);
2339 
2340     assert(a.universal.transposed!3.sum!"fast" == s);
2341     assert(b.universal.sum!"fast" == s);
2342     assert(c.universal.transposed!3.sum!"fast" == s);
2343     assert(d.universal.sum!"fast" == s);
2344 
2345 }