1 /++
2 $(H1 @nogc and nothrow Parsing Utilities)
3 
4 License: $(HTTP www.apache.org/licenses/LICENSE-2.0, Apache-2.0)
5 Authors: Ilya Yaroshenko
6 Copyright: 2020 Ilya Yaroshenko, Kaleidic Associates Advisory Limited, Symmetry Investments
7 +/
8 module mir.parse;
9 
10 /// `mir.conv: to` extension.
11 version(mir_test)
12 @safe pure @nogc
13 unittest
14 {
15     import mir.conv: to;
16 
17     assert("123.0".to!double == 123);
18     assert("123".to!int == 123);
19     assert("123".to!byte == 123);
20 
21     import mir.small_string;
22     alias S = SmallString!32;
23     assert(S("123.0").to!double == 123);
24     assert(S("123.").to!double == 123.);
25     assert(S(".123").to!double == .123);
26     assert(S("123").to!(immutable int) == 123);
27 }
28 
29 import mir.primitives;
30 import std.traits: isMutable, isFloatingPoint, isSomeChar;
31 
32 /++
33 Performs `nothrow` and `@nogc` string to native type conversion.
34 
35 Returns:
36     parsed value
37 Throws:
38     `nogc` Exception in case of parse error or non-empty remaining input.
39 
40 Floating_point:
41     Mir parsing supports up-to quadruple precision.
42 The conversion error is 0 ULP for normal numbers. 
43     Subnormal numbers with an exponent greater than or equal to -512 have upper error bound equal to 1 ULP.+/
44 T fromString(T, C)(scope const(C)[] str)
45     if (isMutable!T)
46 {
47     import mir.utility: _expect;
48     static immutable excfp = new Exception("fromString failed to parse " ~ T.stringof);
49 
50     static if (isFloatingPoint!T)
51     {
52         T value;
53         if (_expect(fromString(str, value), true))
54             return value;
55         version (D_Exceptions)
56             throw excfp;
57         else
58             assert(0);
59     }
60     else
61     {
62         static immutable excne = new Exception("fromString: remaining input is not empty after parsing " ~ T.stringof);
63 
64         T value;
65         if (_expect(parse!T(str, value), true))
66         {
67             if (_expect(str.empty, true))
68                 return value;
69             version (D_Exceptions)
70                 throw excne;
71             else
72                 assert(0);
73         }
74         else
75         {
76             version (D_Exceptions)
77                 throw excfp;
78             else
79                 assert(0);
80         }
81     }
82 }
83 
84 ///
85 version(mir_bignum_test)
86 @safe pure @nogc unittest
87 {
88     assert("123".fromString!int == 123);
89     static assert("-123".fromString!int == -123);
90 
91     assert(".5".fromString!float == .5);
92     assert("12.3".fromString!double == 12.3);
93     assert("12.3".fromString!float == 12.3f);
94     assert("12.3".fromString!real == 12.3L);
95     assert("-12.3e-30".fromString!double == -12.3e-30);
96     assert("2.9802322387695312E-8".fromString!double == 2.9802322387695312E-8);
97 
98     // default support of underscores
99     assert("123_456.789_012".fromString!double == 123_456.789_012);
100     assert("12_34_56_78_90_12e-6".fromString!double == 123_456.789_012);
101 
102     // default support of leading zeros
103     assert("010".fromString!double == 10.0);
104     assert("000010".fromString!double == 10.0);
105     assert("0000.10".fromString!double == 0.1);
106     assert("0000e10".fromString!double == 0);
107 
108     /// Test CTFE support  
109     static assert("-12.3e-30".fromString!double == -0x1.f2f280b2414d5p-97);
110     static assert("+12.3e+30".fromString!double == 0x1.367ee3119d2bap+103);
111 
112     static assert("1.448997445238699".fromString!double == 0x1.72f17f1f49aadp0);
113     static if (real.mant_dig >= 64)
114         static assert("1.448997445238699".fromString!real == 1.448997445238699L);
115 
116     static assert("3.518437208883201171875".fromString!float == 0x1.c25c26p+1);
117     static assert("3.518437208883201171875".fromString!double == 0x1.c25c268497684p+1);
118     static if (real.mant_dig >= 64)
119         static assert("3.518437208883201171875".fromString!real == 0xe.12e13424bb4232fp-2L);
120 
121 //  Related DMD Issues:
122 // https://issues.dlang.org/show_bug.cgi?id=20951
123 // https://issues.dlang.org/show_bug.cgi?id=20952
124 // https://issues.dlang.org/show_bug.cgi?id=20953
125 // https://issues.dlang.org/show_bug.cgi?id=20967
126 }
127 
128 version(mir_bignum_test)
129 @safe pure unittest
130 {
131     import std.exception: assertThrown;
132     assertThrown("1_".fromString!float);
133     assertThrown("1__2".fromString!float);
134     assertThrown("_1".fromString!float);
135     assertThrown("123_.456".fromString!float);
136     assertThrown("123_e0".fromString!float);
137     assertThrown("123._456".fromString!float);
138     assertThrown("12__34.56".fromString!float);
139     assertThrown("123.456_".fromString!float);
140     assertThrown("-_123.456".fromString!float);
141     assertThrown("_123.456".fromString!float);
142 }
143 
144 /++
145 Performs `nothrow` and `@nogc` string to native type conversion.
146 
147 Returns: true if success and false otherwise.
148 +/
149 bool fromString(T, C)(scope const(C)[] str, ref T value)
150     if (isSomeChar!C)
151 {
152     static if (isFloatingPoint!T)
153     {
154         import mir.bignum.decimal: Decimal, DecimalExponentKey;
155         import mir.utility: _expect;
156 
157         Decimal!256 decimal = void;
158         DecimalExponentKey key;
159         auto ret = decimal.fromStringImpl(str, key);
160         if (_expect(ret, true))
161         {
162             switch(key) with(DecimalExponentKey)
163             {
164                 case nan: value = decimal.coefficient.sign ? -T.nan : T.nan; break;
165                 case infinity: value = decimal.coefficient.sign ? -T.infinity : T.infinity; break;
166                 default: value =  cast(T) decimal; break;
167             }
168         }
169         return ret;
170     }
171     else
172     {
173         return parse!T(str, value) && str.empty;
174     }
175 }
176 
177 ///
178 version(mir_test)
179 @safe pure nothrow @nogc unittest
180 {
181     int value;
182     assert("123".fromString(value) && value == 123);
183 }
184 
185 /++
186 Single character parsing utilities.
187 
188 Returns: true if success and false otherwise.
189 +/
190 bool parse(T, Range)(scope ref Range r, scope ref T value)
191     if (isInputRange!Range && isSomeChar!T)
192 {
193     if (r.empty)
194         return false;
195     value = r.front;
196     r.popFront;
197     return true;
198 }
199 
200 ///
201 version(mir_test) @safe pure nothrow @nogc
202 unittest
203 {
204     auto s = "str";
205     char c;
206     assert(parse(s, c));
207     assert(c == 's');
208     assert(s == "tr");
209 }
210 
211 /++
212 Integer parsing utilities.
213 
214 Returns: true if success and false otherwise.
215 +/
216 bool parse(T, Range)(scope ref Range r, scope ref T value)
217     if ((is(T == byte) || is(T == short)) && isInputRange!Range && !__traits(isUnsigned, T))
218 {
219     int lvalue;
220     auto ret = parse!(int, Range)(r, lvalue);
221     value = cast(T) lvalue;
222     return ret && value == lvalue;
223 }
224 
225 /// ditto
226 bool parse(T, Range)(scope ref Range r, scope ref T value)
227     if (is(T == int) && isInputRange!Range && !__traits(isUnsigned, T))
228 {
229     version(LDC) pragma(inline, true);
230     return parseSignedImpl!(int, Range)(r, value);
231 }
232 
233 /// ditto
234 bool parse(T, Range)(scope ref Range r, scope ref T value)
235     if (is(T == long) && isInputRange!Range && !__traits(isUnsigned, T))
236 {
237     version(LDC) pragma(inline, true);
238     return parseSignedImpl!(long, Range)(r, value);
239 }
240 
241 /// ditto
242 bool parse(T, Range)(scope ref Range r, scope ref T value)
243     if ((is(T == ubyte) || is(T == ushort)) && isInputRange!Range && __traits(isUnsigned, T))
244 {
245     uint lvalue;
246     auto ret = parse!(uint, Range)(r, lvalue);
247     value = cast(T) lvalue;
248     return ret && value == lvalue;
249 }
250 
251 /// ditto
252 bool parse(T, Range)(scope ref Range r, scope ref T value)
253     if (is(T == uint) && isInputRange!Range && __traits(isUnsigned, T))
254 {
255     version(LDC) pragma(inline, true);
256     return parseUnsignedImpl!(uint, Range)(r, value);
257 }
258 
259 /// ditto
260 bool parse(T, Range)(scope ref Range r, scope ref T value)
261     if (is(T == ulong) && isInputRange!Range && __traits(isUnsigned, T))
262 {
263     version(LDC) pragma(inline, true);
264     return parseUnsignedImpl!(ulong, Range)(r, value);
265 }
266 
267 
268 ///
269 version (mir_test) unittest
270 {
271     import std.meta: AliasSeq;
272     foreach (T; AliasSeq!(byte, ubyte, short, ushort, int, uint, long, ulong))
273     {
274         auto str = "123";
275         T val;
276         assert(parse(str, val));
277         assert(val == 123);
278         str = "0";
279         assert(parse(str, val));
280         assert(val == 0);
281         str = "9";
282         assert(parse(str, val));
283         assert(val == 9);
284         str = "";
285         assert(!parse(str, val));
286         assert(val == 0);
287         str = "text";
288         assert(!parse(str, val));
289         assert(val == 0);
290     }
291 }
292 
293 ///
294 version (mir_test) unittest
295 {
296     import std.meta: AliasSeq;
297     foreach (T; AliasSeq!(byte, short, int, long))
298     {
299         auto str = "-123";
300         T val;
301         assert(parse(str, val));
302         assert(val == -123);
303         str = "-0";
304         assert(parse(str, val));
305         assert(val == 0);
306         str = "-9text";
307         assert(parse(str, val));
308         assert(val == -9);
309         assert(str == "text");
310         enum m = T.min + 0;
311         str = m.stringof;
312         assert(parse(str, val));
313         assert(val == T.min);
314     }
315 }
316 
317 private bool parseUnsignedImpl(T, Range)(scope ref Range r, scope ref T value)
318     if(__traits(isUnsigned, T))
319 {
320     version(LDC) pragma(inline, true);
321     import mir.checkedint: addu, mulu;
322 
323     bool sign;
324 B:
325     if (!r.empty)
326     {
327         auto f = r.front + 0u;
328         if (!sign && f == '+')
329         {
330             r.popFront;
331             sign = true;
332             goto B;
333         }
334         uint c = f - '0';
335         if (c >= 10)
336             goto F;
337         T x = c;
338         for(;;)
339         {
340             r.popFront;
341             if (r.empty)
342                 break;
343             c = r.front - '0';
344             if (c >= 10)
345                 break;
346             bool overflow;
347             T y = mulu(x, cast(uint)10, overflow);
348             if (overflow)
349                 goto R;
350             x = y;
351             T z = addu(x, cast(uint)c, overflow);
352             if (overflow)
353                 goto R;
354             x = z;
355         }
356         value = x;
357         return true;
358     }
359 F:  value = 0;
360 R:  return false;
361 }
362 
363 private bool parseSignedImpl(T, Range)(scope ref Range r, scope ref T value)
364     if(!__traits(isUnsigned, T))
365 {
366     version(LDC) pragma(inline, true);
367     import core.checkedint: negs;
368     import std.traits: Unsigned;
369 
370     bool sign;
371 B:
372     if (!r.empty)
373     {
374         auto f = r.front + 0u;
375         if (!sign && f == '-')
376         {
377             r.popFront;
378             sign = true;
379             goto B;
380         }
381         auto retu = (()@trusted=>parse(r, *cast(Unsigned!T*) &value))();
382         // auto retu = false;
383         if (!retu)
384             goto R;
385         if (!sign)
386         {
387             if (value < 0)
388                 goto R;
389         }
390         else
391         {
392             if (value < 0 && value != T.min)
393                 goto R;
394             value = -value;
395         }
396         return true;
397     }
398 F:  value = 0;
399 R:  return false;
400 }