1 // Written in the D programming language.
2 /**
3 This is a submodule of $(MREF mir, algorithm). It contains `nothrow` `@nogc` BetterC alternative to `MultiwayMerge` from `std.algorithm.setops`.
4 
5 License: $(HTTP www.apache.org/licenses/LICENSE-2.0, Apache-2.0)
6 Copyright: 2020 Ilya Yaroshenko, Kaleidic Associates Advisory Limited, Symmetry Investments
7 
8 Authors: $(HTTP erdani.com, Andrei Alexandrescu) (original Phobos code), Ilya Yaroshenko (Mir & BetterC rework, optimization).
9  */
10 module mir.algorithm.setops;
11 
12 import core.lifetime: move;
13 import mir.functional: naryFun;
14 import mir.primitives;
15 import mir.qualifier;
16 import std.range.primitives: isRandomAccessRange;
17 
18 /**
19 Merges multiple sets. The input sets are passed as a
20 range of ranges and each is assumed to be sorted by $(D
21 less). Computation is done lazily, one union element at a time. The
22 complexity of one $(D popFront) operation is $(BIGOH
23 log(ror.length)). However, the length of $(D ror) decreases as ranges
24 in it are exhausted, so the complexity of a full pass through $(D
25 MultiwayMerge) is dependent on the distribution of the lengths of ranges
26 contained within $(D ror). If all ranges have the same length $(D n)
27 (worst case scenario), the complexity of a full pass through $(D
28 MultiwayMerge) is $(BIGOH n * ror.length * log(ror.length)), i.e., $(D
29 log(ror.length)) times worse than just spanning all ranges in
30 turn. The output comes sorted (unstably) by $(D less).
31 The length of the resulting range is the sum of all lengths of
32 the ranges passed as input. This means that all elements (duplicates
33 included) are transferred to the resulting range.
34 For backward compatibility, `multiwayMerge` is available under
35 the name `nWayUnion` and `MultiwayMerge` under the name of `NWayUnion` .
36 Future code should use `multiwayMerge` and `MultiwayMerge` as `nWayUnion`
37 and `NWayUnion` will be deprecated.
38 Params:
39     less = Predicate the given ranges are sorted by.
40     ror = A range of ranges sorted by `less` to compute the union for.
41 Returns:
42     A range of the union of the ranges in `ror`.
43 Warning: Because $(D MultiwayMerge) does not allocate extra memory, it
44 will leave $(D ror) modified. Namely, $(D MultiwayMerge) assumes ownership
45 of $(D ror) and discretionarily swaps and advances elements of it. If
46 you want $(D ror) to preserve its contents after the call, you may
47 want to pass a duplicate to $(D MultiwayMerge) (and perhaps cache the
48 duplicate in between calls).
49  */
50 struct MultiwayMerge(alias less, RangeOfRanges)
51     if (isRandomAccessRange!RangeOfRanges)
52 {
53     import mir.primitives;
54     import mir.container.binaryheap;
55 
56     ///
57     @disable this();
58     ///
59     @disable this(this);
60 
61     ///
62     static bool compFront(ElementType!RangeOfRanges a, ElementType!RangeOfRanges b)
63     {
64         // revert comparison order so we get the smallest elements first
65         return less(b.front, a.front);
66     }
67 
68     /// Heap
69     BinaryHeap!(compFront, RangeOfRanges) _heap;
70 
71     ///
72     this(RangeOfRanges ror)
73     {
74         // Preemptively get rid of all empty ranges in the input
75         // No need for stability either
76         auto temp = ror.lightScope;
77         for (;!temp.empty;)
78         {
79             if (!temp.front.empty)
80             {
81                 temp.popFront;
82                 continue;
83             }
84             import mir.utility: swap;
85             swap(temp.back, temp.front);
86             temp.popBack;
87             ror.popBack;
88         }
89         //Build the heap across the range
90         _heap = typeof(_heap)(ror.move);
91     }
92 
93     ///
94     @property bool empty() scope const { return _heap.empty; }
95 
96     ///
97     @property auto ref front()
98     {
99         assert(!empty);
100         return _heap.front.front;
101     }
102 
103     ///
104     void popFront() scope @safe
105     {
106         _heap._store.front.popFront;
107         if (!_heap._store.front.empty)
108             _heap.siftDown(_heap._store[], 0, _heap._length);
109         else
110             _heap.removeFront;
111     }
112 }
113 
114 /// Ditto
115 MultiwayMerge!(naryFun!less, RangeOfRanges) multiwayMerge
116 (alias less = "a < b", RangeOfRanges)
117 (RangeOfRanges ror)
118 {
119     return typeof(return)(move(ror));
120 }
121 
122 ///
123 @safe nothrow @nogc version(mir_test) unittest
124 {
125     import mir.algorithm.iteration: equal;
126 
127     static a =
128     [
129         [ 1, 4, 7, 8 ],
130         [ 1, 7 ],
131         [ 1, 7, 8],
132         [ 4 ],
133         [ 7 ],
134     ];
135     static witness = [
136         1, 1, 1, 4, 4, 7, 7, 7, 7, 8, 8
137     ];
138     assert(a.multiwayMerge.equal(witness));
139 
140     static b =
141     [
142         // range with duplicates
143         [ 1, 1, 4, 7, 8 ],
144         [ 7 ],
145         [ 1, 7, 8],
146         [ 4 ],
147         [ 7 ],
148     ];
149     // duplicates are propagated to the resulting range
150     assert(b.multiwayMerge.equal(witness));
151 }
152 
153 /**
154 Computes the union of multiple ranges. The input ranges are passed
155 as a range of ranges and each is assumed to be sorted by $(D
156 less). Computation is done lazily, one union element at a time.
157 `multiwayUnion(ror)` is functionally equivalent to `multiwayMerge(ror).uniq`.
158 "The output of multiwayUnion has no duplicates even when its inputs contain duplicates."
159 Params:
160     less = Predicate the given ranges are sorted by.
161     ror = A range of ranges sorted by `less` to compute the intersection for.
162 Returns:
163     A range of the union of the ranges in `ror`.
164 See also: $(LREF multiwayMerge)
165  */
166 auto multiwayUnion(alias less = "a < b", RangeOfRanges)(RangeOfRanges ror)
167 {
168     import mir.functional: not;
169     import mir.algorithm.iteration : Uniq;
170 
171     return Uniq!(not!less, typeof(multiwayMerge!less(ror)))(multiwayMerge!less(move(ror)));
172 }
173 
174 ///
175 @safe version(mir_test) unittest
176 {
177     import mir.algorithm.iteration: equal;
178 
179     // sets
180     double[][] a =
181     [
182         [ 1, 4, 7, 8 ],
183         [ 1, 7 ],
184         [ 1, 7, 8],
185         [ 4 ],
186         [ 7 ],
187     ];
188 
189     auto witness = [1, 4, 7, 8];
190     assert(a.multiwayUnion.equal(witness));
191 
192     // multisets
193     double[][] b =
194     [
195         [ 1, 1, 1, 4, 7, 8 ],
196         [ 1, 7 ],
197         [ 1, 7, 7, 8],
198         [ 4 ],
199         [ 7 ],
200     ];
201     assert(b.multiwayUnion.equal(witness));
202 }
203 
204 /++
205 Computes the length of union of multiple ranges. The input ranges are passed
206 as a range of ranges and each is assumed to be sorted by `less`.
207 
208 Params:
209     less = Predicate the given ranges are sorted by.
210     ror = A range of ranges sorted by `less` to compute the intersection for.
211 Returns:
212     A length of the union of the ranges in `ror`.
213 +/
214 pragma(inline, false)
215 size_t unionLength(alias less = "a < b", RangeOfRanges)(RangeOfRanges ror)
216 {
217     size_t length;
218     auto u = move(ror).multiwayUnion!less;
219     if (!u.empty) do {
220         length++;
221         u.popFront;
222     } while(!u.empty);
223     return length;
224 }