blob: 0fd9b122f0926bd97eaab8be028b8fb9dc3338a1 [file] [log] [blame]
Matthias Andreas Benkard832a54e2019-01-29 09:27:38 +01001// Package jlexer contains a JSON lexer implementation.
2//
3// It is expected that it is mostly used with generated parser code, so the interface is tuned
4// for a parser that knows what kind of data is expected.
5package jlexer
6
7import (
8 "encoding/base64"
9 "encoding/json"
10 "errors"
11 "fmt"
12 "io"
13 "strconv"
14 "unicode"
15 "unicode/utf16"
16 "unicode/utf8"
17)
18
19// tokenKind determines type of a token.
20type tokenKind byte
21
22const (
23 tokenUndef tokenKind = iota // No token.
24 tokenDelim // Delimiter: one of '{', '}', '[' or ']'.
25 tokenString // A string literal, e.g. "abc\u1234"
26 tokenNumber // Number literal, e.g. 1.5e5
27 tokenBool // Boolean literal: true or false.
28 tokenNull // null keyword.
29)
30
31// token describes a single token: type, position in the input and value.
32type token struct {
33 kind tokenKind // Type of a token.
34
35 boolValue bool // Value if a boolean literal token.
36 byteValue []byte // Raw value of a token.
37 delimValue byte
38}
39
40// Lexer is a JSON lexer: it iterates over JSON tokens in a byte slice.
41type Lexer struct {
42 Data []byte // Input data given to the lexer.
43
44 start int // Start of the current token.
45 pos int // Current unscanned position in the input stream.
46 token token // Last scanned token, if token.kind != tokenUndef.
47
48 firstElement bool // Whether current element is the first in array or an object.
49 wantSep byte // A comma or a colon character, which need to occur before a token.
50
51 UseMultipleErrors bool // If we want to use multiple errors.
52 fatalError error // Fatal error occurred during lexing. It is usually a syntax error.
53 multipleErrors []*LexerError // Semantic errors occurred during lexing. Marshalling will be continued after finding this errors.
54}
55
56// FetchToken scans the input for the next token.
57func (r *Lexer) FetchToken() {
58 r.token.kind = tokenUndef
59 r.start = r.pos
60
61 // Check if r.Data has r.pos element
62 // If it doesn't, it mean corrupted input data
63 if len(r.Data) < r.pos {
64 r.errParse("Unexpected end of data")
65 return
66 }
67 // Determine the type of a token by skipping whitespace and reading the
68 // first character.
69 for _, c := range r.Data[r.pos:] {
70 switch c {
71 case ':', ',':
72 if r.wantSep == c {
73 r.pos++
74 r.start++
75 r.wantSep = 0
76 } else {
77 r.errSyntax()
78 }
79
80 case ' ', '\t', '\r', '\n':
81 r.pos++
82 r.start++
83
84 case '"':
85 if r.wantSep != 0 {
86 r.errSyntax()
87 }
88
89 r.token.kind = tokenString
90 r.fetchString()
91 return
92
93 case '{', '[':
94 if r.wantSep != 0 {
95 r.errSyntax()
96 }
97 r.firstElement = true
98 r.token.kind = tokenDelim
99 r.token.delimValue = r.Data[r.pos]
100 r.pos++
101 return
102
103 case '}', ']':
104 if !r.firstElement && (r.wantSep != ',') {
105 r.errSyntax()
106 }
107 r.wantSep = 0
108 r.token.kind = tokenDelim
109 r.token.delimValue = r.Data[r.pos]
110 r.pos++
111 return
112
113 case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '-':
114 if r.wantSep != 0 {
115 r.errSyntax()
116 }
117 r.token.kind = tokenNumber
118 r.fetchNumber()
119 return
120
121 case 'n':
122 if r.wantSep != 0 {
123 r.errSyntax()
124 }
125
126 r.token.kind = tokenNull
127 r.fetchNull()
128 return
129
130 case 't':
131 if r.wantSep != 0 {
132 r.errSyntax()
133 }
134
135 r.token.kind = tokenBool
136 r.token.boolValue = true
137 r.fetchTrue()
138 return
139
140 case 'f':
141 if r.wantSep != 0 {
142 r.errSyntax()
143 }
144
145 r.token.kind = tokenBool
146 r.token.boolValue = false
147 r.fetchFalse()
148 return
149
150 default:
151 r.errSyntax()
152 return
153 }
154 }
155 r.fatalError = io.EOF
156 return
157}
158
159// isTokenEnd returns true if the char can follow a non-delimiter token
160func isTokenEnd(c byte) bool {
161 return c == ' ' || c == '\t' || c == '\r' || c == '\n' || c == '[' || c == ']' || c == '{' || c == '}' || c == ',' || c == ':'
162}
163
164// fetchNull fetches and checks remaining bytes of null keyword.
165func (r *Lexer) fetchNull() {
166 r.pos += 4
167 if r.pos > len(r.Data) ||
168 r.Data[r.pos-3] != 'u' ||
169 r.Data[r.pos-2] != 'l' ||
170 r.Data[r.pos-1] != 'l' ||
171 (r.pos != len(r.Data) && !isTokenEnd(r.Data[r.pos])) {
172
173 r.pos -= 4
174 r.errSyntax()
175 }
176}
177
178// fetchTrue fetches and checks remaining bytes of true keyword.
179func (r *Lexer) fetchTrue() {
180 r.pos += 4
181 if r.pos > len(r.Data) ||
182 r.Data[r.pos-3] != 'r' ||
183 r.Data[r.pos-2] != 'u' ||
184 r.Data[r.pos-1] != 'e' ||
185 (r.pos != len(r.Data) && !isTokenEnd(r.Data[r.pos])) {
186
187 r.pos -= 4
188 r.errSyntax()
189 }
190}
191
192// fetchFalse fetches and checks remaining bytes of false keyword.
193func (r *Lexer) fetchFalse() {
194 r.pos += 5
195 if r.pos > len(r.Data) ||
196 r.Data[r.pos-4] != 'a' ||
197 r.Data[r.pos-3] != 'l' ||
198 r.Data[r.pos-2] != 's' ||
199 r.Data[r.pos-1] != 'e' ||
200 (r.pos != len(r.Data) && !isTokenEnd(r.Data[r.pos])) {
201
202 r.pos -= 5
203 r.errSyntax()
204 }
205}
206
207// fetchNumber scans a number literal token.
208func (r *Lexer) fetchNumber() {
209 hasE := false
210 afterE := false
211 hasDot := false
212
213 r.pos++
214 for i, c := range r.Data[r.pos:] {
215 switch {
216 case c >= '0' && c <= '9':
217 afterE = false
218 case c == '.' && !hasDot:
219 hasDot = true
220 case (c == 'e' || c == 'E') && !hasE:
221 hasE = true
222 hasDot = true
223 afterE = true
224 case (c == '+' || c == '-') && afterE:
225 afterE = false
226 default:
227 r.pos += i
228 if !isTokenEnd(c) {
229 r.errSyntax()
230 } else {
231 r.token.byteValue = r.Data[r.start:r.pos]
232 }
233 return
234 }
235 }
236
237 r.pos = len(r.Data)
238 r.token.byteValue = r.Data[r.start:]
239}
240
241// findStringLen tries to scan into the string literal for ending quote char to determine required size.
242// The size will be exact if no escapes are present and may be inexact if there are escaped chars.
243func findStringLen(data []byte) (hasEscapes bool, length int) {
244 delta := 0
245
246 for i := 0; i < len(data); i++ {
247 switch data[i] {
248 case '\\':
249 i++
250 delta++
251 if i < len(data) && data[i] == 'u' {
252 delta++
253 }
254 case '"':
255 return (delta > 0), (i - delta)
256 }
257 }
258
259 return false, len(data)
260}
261
262// getu4 decodes \uXXXX from the beginning of s, returning the hex value,
263// or it returns -1.
264func getu4(s []byte) rune {
265 if len(s) < 6 || s[0] != '\\' || s[1] != 'u' {
266 return -1
267 }
268 var val rune
269 for i := 2; i < len(s) && i < 6; i++ {
270 var v byte
271 c := s[i]
272 switch c {
273 case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
274 v = c - '0'
275 case 'a', 'b', 'c', 'd', 'e', 'f':
276 v = c - 'a' + 10
277 case 'A', 'B', 'C', 'D', 'E', 'F':
278 v = c - 'A' + 10
279 default:
280 return -1
281 }
282
283 val <<= 4
284 val |= rune(v)
285 }
286 return val
287}
288
289// processEscape processes a single escape sequence and returns number of bytes processed.
290func (r *Lexer) processEscape(data []byte) (int, error) {
291 if len(data) < 2 {
292 return 0, fmt.Errorf("syntax error at %v", string(data))
293 }
294
295 c := data[1]
296 switch c {
297 case '"', '/', '\\':
298 r.token.byteValue = append(r.token.byteValue, c)
299 return 2, nil
300 case 'b':
301 r.token.byteValue = append(r.token.byteValue, '\b')
302 return 2, nil
303 case 'f':
304 r.token.byteValue = append(r.token.byteValue, '\f')
305 return 2, nil
306 case 'n':
307 r.token.byteValue = append(r.token.byteValue, '\n')
308 return 2, nil
309 case 'r':
310 r.token.byteValue = append(r.token.byteValue, '\r')
311 return 2, nil
312 case 't':
313 r.token.byteValue = append(r.token.byteValue, '\t')
314 return 2, nil
315 case 'u':
316 rr := getu4(data)
317 if rr < 0 {
318 return 0, errors.New("syntax error")
319 }
320
321 read := 6
322 if utf16.IsSurrogate(rr) {
323 rr1 := getu4(data[read:])
324 if dec := utf16.DecodeRune(rr, rr1); dec != unicode.ReplacementChar {
325 read += 6
326 rr = dec
327 } else {
328 rr = unicode.ReplacementChar
329 }
330 }
331 var d [4]byte
332 s := utf8.EncodeRune(d[:], rr)
333 r.token.byteValue = append(r.token.byteValue, d[:s]...)
334 return read, nil
335 }
336
337 return 0, errors.New("syntax error")
338}
339
340// fetchString scans a string literal token.
341func (r *Lexer) fetchString() {
342 r.pos++
343 data := r.Data[r.pos:]
344
345 hasEscapes, length := findStringLen(data)
346 if !hasEscapes {
347 r.token.byteValue = data[:length]
348 r.pos += length + 1
349 return
350 }
351
352 r.token.byteValue = make([]byte, 0, length)
353 p := 0
354 for i := 0; i < len(data); {
355 switch data[i] {
356 case '"':
357 r.pos += i + 1
358 r.token.byteValue = append(r.token.byteValue, data[p:i]...)
359 i++
360 return
361
362 case '\\':
363 r.token.byteValue = append(r.token.byteValue, data[p:i]...)
364 off, err := r.processEscape(data[i:])
365 if err != nil {
366 r.errParse(err.Error())
367 return
368 }
369 i += off
370 p = i
371
372 default:
373 i++
374 }
375 }
376 r.errParse("unterminated string literal")
377}
378
379// scanToken scans the next token if no token is currently available in the lexer.
380func (r *Lexer) scanToken() {
381 if r.token.kind != tokenUndef || r.fatalError != nil {
382 return
383 }
384
385 r.FetchToken()
386}
387
388// consume resets the current token to allow scanning the next one.
389func (r *Lexer) consume() {
390 r.token.kind = tokenUndef
391 r.token.delimValue = 0
392}
393
394// Ok returns true if no error (including io.EOF) was encountered during scanning.
395func (r *Lexer) Ok() bool {
396 return r.fatalError == nil
397}
398
399const maxErrorContextLen = 13
400
401func (r *Lexer) errParse(what string) {
402 if r.fatalError == nil {
403 var str string
404 if len(r.Data)-r.pos <= maxErrorContextLen {
405 str = string(r.Data)
406 } else {
407 str = string(r.Data[r.pos:r.pos+maxErrorContextLen-3]) + "..."
408 }
409 r.fatalError = &LexerError{
410 Reason: what,
411 Offset: r.pos,
412 Data: str,
413 }
414 }
415}
416
417func (r *Lexer) errSyntax() {
418 r.errParse("syntax error")
419}
420
421func (r *Lexer) errInvalidToken(expected string) {
422 if r.fatalError != nil {
423 return
424 }
425 if r.UseMultipleErrors {
426 r.pos = r.start
427 r.consume()
428 r.SkipRecursive()
429 switch expected {
430 case "[":
431 r.token.delimValue = ']'
432 r.token.kind = tokenDelim
433 case "{":
434 r.token.delimValue = '}'
435 r.token.kind = tokenDelim
436 }
437 r.addNonfatalError(&LexerError{
438 Reason: fmt.Sprintf("expected %s", expected),
439 Offset: r.start,
440 Data: string(r.Data[r.start:r.pos]),
441 })
442 return
443 }
444
445 var str string
446 if len(r.token.byteValue) <= maxErrorContextLen {
447 str = string(r.token.byteValue)
448 } else {
449 str = string(r.token.byteValue[:maxErrorContextLen-3]) + "..."
450 }
451 r.fatalError = &LexerError{
452 Reason: fmt.Sprintf("expected %s", expected),
453 Offset: r.pos,
454 Data: str,
455 }
456}
457
458func (r *Lexer) GetPos() int {
459 return r.pos
460}
461
462// Delim consumes a token and verifies that it is the given delimiter.
463func (r *Lexer) Delim(c byte) {
464 if r.token.kind == tokenUndef && r.Ok() {
465 r.FetchToken()
466 }
467
468 if !r.Ok() || r.token.delimValue != c {
469 r.consume() // errInvalidToken can change token if UseMultipleErrors is enabled.
470 r.errInvalidToken(string([]byte{c}))
471 } else {
472 r.consume()
473 }
474}
475
476// IsDelim returns true if there was no scanning error and next token is the given delimiter.
477func (r *Lexer) IsDelim(c byte) bool {
478 if r.token.kind == tokenUndef && r.Ok() {
479 r.FetchToken()
480 }
481 return !r.Ok() || r.token.delimValue == c
482}
483
484// Null verifies that the next token is null and consumes it.
485func (r *Lexer) Null() {
486 if r.token.kind == tokenUndef && r.Ok() {
487 r.FetchToken()
488 }
489 if !r.Ok() || r.token.kind != tokenNull {
490 r.errInvalidToken("null")
491 }
492 r.consume()
493}
494
495// IsNull returns true if the next token is a null keyword.
496func (r *Lexer) IsNull() bool {
497 if r.token.kind == tokenUndef && r.Ok() {
498 r.FetchToken()
499 }
500 return r.Ok() && r.token.kind == tokenNull
501}
502
503// Skip skips a single token.
504func (r *Lexer) Skip() {
505 if r.token.kind == tokenUndef && r.Ok() {
506 r.FetchToken()
507 }
508 r.consume()
509}
510
511// SkipRecursive skips next array or object completely, or just skips a single token if not
512// an array/object.
513//
514// Note: no syntax validation is performed on the skipped data.
515func (r *Lexer) SkipRecursive() {
516 r.scanToken()
517 var start, end byte
518
519 if r.token.delimValue == '{' {
520 start, end = '{', '}'
521 } else if r.token.delimValue == '[' {
522 start, end = '[', ']'
523 } else {
524 r.consume()
525 return
526 }
527
528 r.consume()
529
530 level := 1
531 inQuotes := false
532 wasEscape := false
533
534 for i, c := range r.Data[r.pos:] {
535 switch {
536 case c == start && !inQuotes:
537 level++
538 case c == end && !inQuotes:
539 level--
540 if level == 0 {
541 r.pos += i + 1
542 return
543 }
544 case c == '\\' && inQuotes:
545 wasEscape = !wasEscape
546 continue
547 case c == '"' && inQuotes:
548 inQuotes = wasEscape
549 case c == '"':
550 inQuotes = true
551 }
552 wasEscape = false
553 }
554 r.pos = len(r.Data)
555 r.fatalError = &LexerError{
556 Reason: "EOF reached while skipping array/object or token",
557 Offset: r.pos,
558 Data: string(r.Data[r.pos:]),
559 }
560}
561
562// Raw fetches the next item recursively as a data slice
563func (r *Lexer) Raw() []byte {
564 r.SkipRecursive()
565 if !r.Ok() {
566 return nil
567 }
568 return r.Data[r.start:r.pos]
569}
570
571// IsStart returns whether the lexer is positioned at the start
572// of an input string.
573func (r *Lexer) IsStart() bool {
574 return r.pos == 0
575}
576
577// Consumed reads all remaining bytes from the input, publishing an error if
578// there is anything but whitespace remaining.
579func (r *Lexer) Consumed() {
580 if r.pos > len(r.Data) || !r.Ok() {
581 return
582 }
583
584 for _, c := range r.Data[r.pos:] {
585 if c != ' ' && c != '\t' && c != '\r' && c != '\n' {
586 r.AddError(&LexerError{
587 Reason: "invalid character '" + string(c) + "' after top-level value",
588 Offset: r.pos,
589 Data: string(r.Data[r.pos:]),
590 })
591 return
592 }
593
594 r.pos++
595 r.start++
596 }
597}
598
599func (r *Lexer) unsafeString() (string, []byte) {
600 if r.token.kind == tokenUndef && r.Ok() {
601 r.FetchToken()
602 }
603 if !r.Ok() || r.token.kind != tokenString {
604 r.errInvalidToken("string")
605 return "", nil
606 }
607 bytes := r.token.byteValue
608 ret := bytesToStr(r.token.byteValue)
609 r.consume()
610 return ret, bytes
611}
612
613// UnsafeString returns the string value if the token is a string literal.
614//
615// Warning: returned string may point to the input buffer, so the string should not outlive
616// the input buffer. Intended pattern of usage is as an argument to a switch statement.
617func (r *Lexer) UnsafeString() string {
618 ret, _ := r.unsafeString()
619 return ret
620}
621
622// UnsafeBytes returns the byte slice if the token is a string literal.
623func (r *Lexer) UnsafeBytes() []byte {
624 _, ret := r.unsafeString()
625 return ret
626}
627
628// String reads a string literal.
629func (r *Lexer) String() string {
630 if r.token.kind == tokenUndef && r.Ok() {
631 r.FetchToken()
632 }
633 if !r.Ok() || r.token.kind != tokenString {
634 r.errInvalidToken("string")
635 return ""
636 }
637 ret := string(r.token.byteValue)
638 r.consume()
639 return ret
640}
641
642// Bytes reads a string literal and base64 decodes it into a byte slice.
643func (r *Lexer) Bytes() []byte {
644 if r.token.kind == tokenUndef && r.Ok() {
645 r.FetchToken()
646 }
647 if !r.Ok() || r.token.kind != tokenString {
648 r.errInvalidToken("string")
649 return nil
650 }
651 ret := make([]byte, base64.StdEncoding.DecodedLen(len(r.token.byteValue)))
652 len, err := base64.StdEncoding.Decode(ret, r.token.byteValue)
653 if err != nil {
654 r.fatalError = &LexerError{
655 Reason: err.Error(),
656 }
657 return nil
658 }
659
660 r.consume()
661 return ret[:len]
662}
663
664// Bool reads a true or false boolean keyword.
665func (r *Lexer) Bool() bool {
666 if r.token.kind == tokenUndef && r.Ok() {
667 r.FetchToken()
668 }
669 if !r.Ok() || r.token.kind != tokenBool {
670 r.errInvalidToken("bool")
671 return false
672 }
673 ret := r.token.boolValue
674 r.consume()
675 return ret
676}
677
678func (r *Lexer) number() string {
679 if r.token.kind == tokenUndef && r.Ok() {
680 r.FetchToken()
681 }
682 if !r.Ok() || r.token.kind != tokenNumber {
683 r.errInvalidToken("number")
684 return ""
685 }
686 ret := bytesToStr(r.token.byteValue)
687 r.consume()
688 return ret
689}
690
691func (r *Lexer) Uint8() uint8 {
692 s := r.number()
693 if !r.Ok() {
694 return 0
695 }
696
697 n, err := strconv.ParseUint(s, 10, 8)
698 if err != nil {
699 r.addNonfatalError(&LexerError{
700 Offset: r.start,
701 Reason: err.Error(),
702 Data: s,
703 })
704 }
705 return uint8(n)
706}
707
708func (r *Lexer) Uint16() uint16 {
709 s := r.number()
710 if !r.Ok() {
711 return 0
712 }
713
714 n, err := strconv.ParseUint(s, 10, 16)
715 if err != nil {
716 r.addNonfatalError(&LexerError{
717 Offset: r.start,
718 Reason: err.Error(),
719 Data: s,
720 })
721 }
722 return uint16(n)
723}
724
725func (r *Lexer) Uint32() uint32 {
726 s := r.number()
727 if !r.Ok() {
728 return 0
729 }
730
731 n, err := strconv.ParseUint(s, 10, 32)
732 if err != nil {
733 r.addNonfatalError(&LexerError{
734 Offset: r.start,
735 Reason: err.Error(),
736 Data: s,
737 })
738 }
739 return uint32(n)
740}
741
742func (r *Lexer) Uint64() uint64 {
743 s := r.number()
744 if !r.Ok() {
745 return 0
746 }
747
748 n, err := strconv.ParseUint(s, 10, 64)
749 if err != nil {
750 r.addNonfatalError(&LexerError{
751 Offset: r.start,
752 Reason: err.Error(),
753 Data: s,
754 })
755 }
756 return n
757}
758
759func (r *Lexer) Uint() uint {
760 return uint(r.Uint64())
761}
762
763func (r *Lexer) Int8() int8 {
764 s := r.number()
765 if !r.Ok() {
766 return 0
767 }
768
769 n, err := strconv.ParseInt(s, 10, 8)
770 if err != nil {
771 r.addNonfatalError(&LexerError{
772 Offset: r.start,
773 Reason: err.Error(),
774 Data: s,
775 })
776 }
777 return int8(n)
778}
779
780func (r *Lexer) Int16() int16 {
781 s := r.number()
782 if !r.Ok() {
783 return 0
784 }
785
786 n, err := strconv.ParseInt(s, 10, 16)
787 if err != nil {
788 r.addNonfatalError(&LexerError{
789 Offset: r.start,
790 Reason: err.Error(),
791 Data: s,
792 })
793 }
794 return int16(n)
795}
796
797func (r *Lexer) Int32() int32 {
798 s := r.number()
799 if !r.Ok() {
800 return 0
801 }
802
803 n, err := strconv.ParseInt(s, 10, 32)
804 if err != nil {
805 r.addNonfatalError(&LexerError{
806 Offset: r.start,
807 Reason: err.Error(),
808 Data: s,
809 })
810 }
811 return int32(n)
812}
813
814func (r *Lexer) Int64() int64 {
815 s := r.number()
816 if !r.Ok() {
817 return 0
818 }
819
820 n, err := strconv.ParseInt(s, 10, 64)
821 if err != nil {
822 r.addNonfatalError(&LexerError{
823 Offset: r.start,
824 Reason: err.Error(),
825 Data: s,
826 })
827 }
828 return n
829}
830
831func (r *Lexer) Int() int {
832 return int(r.Int64())
833}
834
835func (r *Lexer) Uint8Str() uint8 {
836 s, b := r.unsafeString()
837 if !r.Ok() {
838 return 0
839 }
840
841 n, err := strconv.ParseUint(s, 10, 8)
842 if err != nil {
843 r.addNonfatalError(&LexerError{
844 Offset: r.start,
845 Reason: err.Error(),
846 Data: string(b),
847 })
848 }
849 return uint8(n)
850}
851
852func (r *Lexer) Uint16Str() uint16 {
853 s, b := r.unsafeString()
854 if !r.Ok() {
855 return 0
856 }
857
858 n, err := strconv.ParseUint(s, 10, 16)
859 if err != nil {
860 r.addNonfatalError(&LexerError{
861 Offset: r.start,
862 Reason: err.Error(),
863 Data: string(b),
864 })
865 }
866 return uint16(n)
867}
868
869func (r *Lexer) Uint32Str() uint32 {
870 s, b := r.unsafeString()
871 if !r.Ok() {
872 return 0
873 }
874
875 n, err := strconv.ParseUint(s, 10, 32)
876 if err != nil {
877 r.addNonfatalError(&LexerError{
878 Offset: r.start,
879 Reason: err.Error(),
880 Data: string(b),
881 })
882 }
883 return uint32(n)
884}
885
886func (r *Lexer) Uint64Str() uint64 {
887 s, b := r.unsafeString()
888 if !r.Ok() {
889 return 0
890 }
891
892 n, err := strconv.ParseUint(s, 10, 64)
893 if err != nil {
894 r.addNonfatalError(&LexerError{
895 Offset: r.start,
896 Reason: err.Error(),
897 Data: string(b),
898 })
899 }
900 return n
901}
902
903func (r *Lexer) UintStr() uint {
904 return uint(r.Uint64Str())
905}
906
907func (r *Lexer) UintptrStr() uintptr {
908 return uintptr(r.Uint64Str())
909}
910
911func (r *Lexer) Int8Str() int8 {
912 s, b := r.unsafeString()
913 if !r.Ok() {
914 return 0
915 }
916
917 n, err := strconv.ParseInt(s, 10, 8)
918 if err != nil {
919 r.addNonfatalError(&LexerError{
920 Offset: r.start,
921 Reason: err.Error(),
922 Data: string(b),
923 })
924 }
925 return int8(n)
926}
927
928func (r *Lexer) Int16Str() int16 {
929 s, b := r.unsafeString()
930 if !r.Ok() {
931 return 0
932 }
933
934 n, err := strconv.ParseInt(s, 10, 16)
935 if err != nil {
936 r.addNonfatalError(&LexerError{
937 Offset: r.start,
938 Reason: err.Error(),
939 Data: string(b),
940 })
941 }
942 return int16(n)
943}
944
945func (r *Lexer) Int32Str() int32 {
946 s, b := r.unsafeString()
947 if !r.Ok() {
948 return 0
949 }
950
951 n, err := strconv.ParseInt(s, 10, 32)
952 if err != nil {
953 r.addNonfatalError(&LexerError{
954 Offset: r.start,
955 Reason: err.Error(),
956 Data: string(b),
957 })
958 }
959 return int32(n)
960}
961
962func (r *Lexer) Int64Str() int64 {
963 s, b := r.unsafeString()
964 if !r.Ok() {
965 return 0
966 }
967
968 n, err := strconv.ParseInt(s, 10, 64)
969 if err != nil {
970 r.addNonfatalError(&LexerError{
971 Offset: r.start,
972 Reason: err.Error(),
973 Data: string(b),
974 })
975 }
976 return n
977}
978
979func (r *Lexer) IntStr() int {
980 return int(r.Int64Str())
981}
982
983func (r *Lexer) Float32() float32 {
984 s := r.number()
985 if !r.Ok() {
986 return 0
987 }
988
989 n, err := strconv.ParseFloat(s, 32)
990 if err != nil {
991 r.addNonfatalError(&LexerError{
992 Offset: r.start,
993 Reason: err.Error(),
994 Data: s,
995 })
996 }
997 return float32(n)
998}
999
1000func (r *Lexer) Float32Str() float32 {
1001 s, b := r.unsafeString()
1002 if !r.Ok() {
1003 return 0
1004 }
1005 n, err := strconv.ParseFloat(s, 32)
1006 if err != nil {
1007 r.addNonfatalError(&LexerError{
1008 Offset: r.start,
1009 Reason: err.Error(),
1010 Data: string(b),
1011 })
1012 }
1013 return float32(n)
1014}
1015
1016func (r *Lexer) Float64() float64 {
1017 s := r.number()
1018 if !r.Ok() {
1019 return 0
1020 }
1021
1022 n, err := strconv.ParseFloat(s, 64)
1023 if err != nil {
1024 r.addNonfatalError(&LexerError{
1025 Offset: r.start,
1026 Reason: err.Error(),
1027 Data: s,
1028 })
1029 }
1030 return n
1031}
1032
1033func (r *Lexer) Float64Str() float64 {
1034 s, b := r.unsafeString()
1035 if !r.Ok() {
1036 return 0
1037 }
1038 n, err := strconv.ParseFloat(s, 64)
1039 if err != nil {
1040 r.addNonfatalError(&LexerError{
1041 Offset: r.start,
1042 Reason: err.Error(),
1043 Data: string(b),
1044 })
1045 }
1046 return n
1047}
1048
1049func (r *Lexer) Error() error {
1050 return r.fatalError
1051}
1052
1053func (r *Lexer) AddError(e error) {
1054 if r.fatalError == nil {
1055 r.fatalError = e
1056 }
1057}
1058
1059func (r *Lexer) AddNonFatalError(e error) {
1060 r.addNonfatalError(&LexerError{
1061 Offset: r.start,
1062 Data: string(r.Data[r.start:r.pos]),
1063 Reason: e.Error(),
1064 })
1065}
1066
1067func (r *Lexer) addNonfatalError(err *LexerError) {
1068 if r.UseMultipleErrors {
1069 // We don't want to add errors with the same offset.
1070 if len(r.multipleErrors) != 0 && r.multipleErrors[len(r.multipleErrors)-1].Offset == err.Offset {
1071 return
1072 }
1073 r.multipleErrors = append(r.multipleErrors, err)
1074 return
1075 }
1076 r.fatalError = err
1077}
1078
1079func (r *Lexer) GetNonFatalErrors() []*LexerError {
1080 return r.multipleErrors
1081}
1082
1083// JsonNumber fetches and json.Number from 'encoding/json' package.
1084// Both int, float or string, contains them are valid values
1085func (r *Lexer) JsonNumber() json.Number {
1086 if r.token.kind == tokenUndef && r.Ok() {
1087 r.FetchToken()
1088 }
1089 if !r.Ok() {
1090 r.errInvalidToken("json.Number")
1091 return json.Number("")
1092 }
1093
1094 switch r.token.kind {
1095 case tokenString:
1096 return json.Number(r.String())
1097 case tokenNumber:
1098 return json.Number(r.Raw())
1099 case tokenNull:
1100 r.Null()
1101 return json.Number("")
1102 default:
1103 r.errSyntax()
1104 return json.Number("")
1105 }
1106}
1107
1108// Interface fetches an interface{} analogous to the 'encoding/json' package.
1109func (r *Lexer) Interface() interface{} {
1110 if r.token.kind == tokenUndef && r.Ok() {
1111 r.FetchToken()
1112 }
1113
1114 if !r.Ok() {
1115 return nil
1116 }
1117 switch r.token.kind {
1118 case tokenString:
1119 return r.String()
1120 case tokenNumber:
1121 return r.Float64()
1122 case tokenBool:
1123 return r.Bool()
1124 case tokenNull:
1125 r.Null()
1126 return nil
1127 }
1128
1129 if r.token.delimValue == '{' {
1130 r.consume()
1131
1132 ret := map[string]interface{}{}
1133 for !r.IsDelim('}') {
1134 key := r.String()
1135 r.WantColon()
1136 ret[key] = r.Interface()
1137 r.WantComma()
1138 }
1139 r.Delim('}')
1140
1141 if r.Ok() {
1142 return ret
1143 } else {
1144 return nil
1145 }
1146 } else if r.token.delimValue == '[' {
1147 r.consume()
1148
1149 var ret []interface{}
1150 for !r.IsDelim(']') {
1151 ret = append(ret, r.Interface())
1152 r.WantComma()
1153 }
1154 r.Delim(']')
1155
1156 if r.Ok() {
1157 return ret
1158 } else {
1159 return nil
1160 }
1161 }
1162 r.errSyntax()
1163 return nil
1164}
1165
1166// WantComma requires a comma to be present before fetching next token.
1167func (r *Lexer) WantComma() {
1168 r.wantSep = ','
1169 r.firstElement = false
1170}
1171
1172// WantColon requires a colon to be present before fetching next token.
1173func (r *Lexer) WantColon() {
1174 r.wantSep = ':'
1175 r.firstElement = false
1176}