1 // Written in the D programming language. 2 /** 3 This is a submodule of $(MREF mir, algorithm). It contains `nothrow` `@nogc` BetterC alternative to `MultiwayMerge` from `std.algorithm.setops`. 4 5 License: $(HTTP www.apache.org/licenses/LICENSE-2.0, Apache-2.0) 6 Copyright: 2020 Ilya Yaroshenko, Kaleidic Associates Advisory Limited, Symmetry Investments 7 8 Authors: $(HTTP erdani.com, Andrei Alexandrescu) (original Phobos code), Ilya Yaroshenko (Mir & BetterC rework, optimization). 9 */ 10 module mir.algorithm.setops; 11 12 import core.lifetime: move; 13 import mir.functional: naryFun; 14 import mir.primitives; 15 import mir.qualifier; 16 import std.range.primitives: isRandomAccessRange; 17 18 /** 19 Merges multiple sets. The input sets are passed as a 20 range of ranges and each is assumed to be sorted by $(D 21 less). Computation is done lazily, one union element at a time. The 22 complexity of one $(D popFront) operation is $(BIGOH 23 log(ror.length)). However, the length of $(D ror) decreases as ranges 24 in it are exhausted, so the complexity of a full pass through $(D 25 MultiwayMerge) is dependent on the distribution of the lengths of ranges 26 contained within $(D ror). If all ranges have the same length $(D n) 27 (worst case scenario), the complexity of a full pass through $(D 28 MultiwayMerge) is $(BIGOH n * ror.length * log(ror.length)), i.e., $(D 29 log(ror.length)) times worse than just spanning all ranges in 30 turn. The output comes sorted (unstably) by $(D less). 31 The length of the resulting range is the sum of all lengths of 32 the ranges passed as input. This means that all elements (duplicates 33 included) are transferred to the resulting range. 34 For backward compatibility, `multiwayMerge` is available under 35 the name `nWayUnion` and `MultiwayMerge` under the name of `NWayUnion` . 36 Future code should use `multiwayMerge` and `MultiwayMerge` as `nWayUnion` 37 and `NWayUnion` will be deprecated. 38 Params: 39 less = Predicate the given ranges are sorted by. 40 ror = A range of ranges sorted by `less` to compute the union for. 41 Returns: 42 A range of the union of the ranges in `ror`. 43 Warning: Because $(D MultiwayMerge) does not allocate extra memory, it 44 will leave $(D ror) modified. Namely, $(D MultiwayMerge) assumes ownership 45 of $(D ror) and discretionarily swaps and advances elements of it. If 46 you want $(D ror) to preserve its contents after the call, you may 47 want to pass a duplicate to $(D MultiwayMerge) (and perhaps cache the 48 duplicate in between calls). 49 */ 50 struct MultiwayMerge(alias less, RangeOfRanges) 51 if (isRandomAccessRange!RangeOfRanges) 52 { 53 import mir.primitives; 54 import mir.container.binaryheap; 55 56 /// 57 @disable this(); 58 /// 59 @disable this(this); 60 61 /// 62 static bool compFront(ElementType!RangeOfRanges a, ElementType!RangeOfRanges b) 63 { 64 // revert comparison order so we get the smallest elements first 65 return less(b.front, a.front); 66 } 67 68 /// Heap 69 BinaryHeap!(compFront, RangeOfRanges) _heap; 70 71 /// 72 this(RangeOfRanges ror) 73 { 74 // Preemptively get rid of all empty ranges in the input 75 // No need for stability either 76 auto temp = ror.lightScope; 77 for (;!temp.empty;) 78 { 79 if (!temp.front.empty) 80 { 81 temp.popFront; 82 continue; 83 } 84 import mir.utility: swap; 85 swap(temp.back, temp.front); 86 temp.popBack; 87 ror.popBack; 88 } 89 //Build the heap across the range 90 _heap = typeof(_heap)(ror.move); 91 } 92 93 /// 94 @property bool empty() scope const { return _heap.empty; } 95 96 /// 97 @property auto ref front() 98 { 99 assert(!empty); 100 return _heap.front.front; 101 } 102 103 /// 104 void popFront() scope @safe 105 { 106 _heap._store.front.popFront; 107 if (!_heap._store.front.empty) 108 _heap.siftDown(_heap._store[], 0, _heap._length); 109 else 110 _heap.removeFront; 111 } 112 } 113 114 /// Ditto 115 MultiwayMerge!(naryFun!less, RangeOfRanges) multiwayMerge 116 (alias less = "a < b", RangeOfRanges) 117 (RangeOfRanges ror) 118 { 119 return typeof(return)(move(ror)); 120 } 121 122 /// 123 @safe nothrow @nogc version(mir_test) unittest 124 { 125 import mir.algorithm.iteration: equal; 126 127 static a = 128 [ 129 [ 1, 4, 7, 8 ], 130 [ 1, 7 ], 131 [ 1, 7, 8], 132 [ 4 ], 133 [ 7 ], 134 ]; 135 static witness = [ 136 1, 1, 1, 4, 4, 7, 7, 7, 7, 8, 8 137 ]; 138 assert(a.multiwayMerge.equal(witness)); 139 140 static b = 141 [ 142 // range with duplicates 143 [ 1, 1, 4, 7, 8 ], 144 [ 7 ], 145 [ 1, 7, 8], 146 [ 4 ], 147 [ 7 ], 148 ]; 149 // duplicates are propagated to the resulting range 150 assert(b.multiwayMerge.equal(witness)); 151 } 152 153 /** 154 Computes the union of multiple ranges. The input ranges are passed 155 as a range of ranges and each is assumed to be sorted by $(D 156 less). Computation is done lazily, one union element at a time. 157 `multiwayUnion(ror)` is functionally equivalent to `multiwayMerge(ror).uniq`. 158 "The output of multiwayUnion has no duplicates even when its inputs contain duplicates." 159 Params: 160 less = Predicate the given ranges are sorted by. 161 ror = A range of ranges sorted by `less` to compute the intersection for. 162 Returns: 163 A range of the union of the ranges in `ror`. 164 See also: $(LREF multiwayMerge) 165 */ 166 auto multiwayUnion(alias less = "a < b", RangeOfRanges)(RangeOfRanges ror) 167 { 168 import mir.functional: not; 169 import mir.algorithm.iteration : Uniq; 170 171 return Uniq!(not!less, typeof(multiwayMerge!less(ror)))(multiwayMerge!less(move(ror))); 172 } 173 174 /// 175 @safe version(mir_test) unittest 176 { 177 import mir.algorithm.iteration: equal; 178 179 // sets 180 double[][] a = 181 [ 182 [ 1, 4, 7, 8 ], 183 [ 1, 7 ], 184 [ 1, 7, 8], 185 [ 4 ], 186 [ 7 ], 187 ]; 188 189 auto witness = [1, 4, 7, 8]; 190 assert(a.multiwayUnion.equal(witness)); 191 192 // multisets 193 double[][] b = 194 [ 195 [ 1, 1, 1, 4, 7, 8 ], 196 [ 1, 7 ], 197 [ 1, 7, 7, 8], 198 [ 4 ], 199 [ 7 ], 200 ]; 201 assert(b.multiwayUnion.equal(witness)); 202 } 203 204 /++ 205 Computes the length of union of multiple ranges. The input ranges are passed 206 as a range of ranges and each is assumed to be sorted by `less`. 207 208 Params: 209 less = Predicate the given ranges are sorted by. 210 ror = A range of ranges sorted by `less` to compute the intersection for. 211 Returns: 212 A length of the union of the ranges in `ror`. 213 +/ 214 pragma(inline, false) 215 size_t unionLength(alias less = "a < b", RangeOfRanges)(RangeOfRanges ror) 216 { 217 size_t length; 218 auto u = move(ror).multiwayUnion!less; 219 if (!u.empty) do { 220 length++; 221 u.popFront; 222 } while(!u.empty); 223 return length; 224 }