Creating a parser combinator library to parse JSON
| Prev: JSON objects | Contents |
Here’s the complete parser combinator library:
(def return (new-parse-position return-value)
(list new-parse-position return-value))
(def match (f)
(only (fn (p)
(let x car.p
(if (f x)
(return cdr.p x))))))
(def alt parsers
(fn (p)
(some [_ p] parsers)))
(def seq parsers
(fn (p)
((afn (p parsers a)
(if parsers
(iflet (p2 r) (car.parsers p)
(self p2 cdr.parsers (cons r a)))
(return p rev.a)))
p parsers nil)))
(def many (parser)
(fn (p)
((afn (p a)
(iflet (s2 r) (parser p)
(self s2 (cons r a))
(return p rev.a)))
p nil)))
(def on-result (f parser)
(fn (p)
(iflet (p2 r) (parser p)
(return p2 (f r)))))
(mac with-result (vars parser . body)
`(on-result (fn (,vars) ,@body)
,parser))
(mac with-seq (vars-parsers . body)
(withs (ps (pair vars-parsers)
vars (map car ps)
parsers (map cadr ps))
`(on-result (fn (,vars) ,@body) (seq ,@parsers))))
(def cons-seq (a b)
(with-seq (r a
rs b)
(cons r rs)))
(def many1 (parser)
(cons-seq parser
(many parser)))
(def must (errmsg parser)
(fn (p)
(or (parser p)
(err errmsg))))
(def seqi (i parsers)
(with-result results (apply seq parsers)
(results i)))
(def seq1 p (seqi 0 p))
(def seq2 p (seqi 1 p))
(def optional (parser)
(alt parser
(fn (p)
(return p nil))))
(mac forward (parser)
(w/uniq p
`(fn (,p) (,parser ,p))))
(def match-is (x)
(match [is x _]))
(def match-literal (pat val)
(with (patlist (coerce pat 'cons)
patlen len.pat)
(fn (p)
(if (begins p patlist)
(return (nthcdr patlen p) val)))))
(def parse-intersperse (separator parser must-message)
(optional (cons-seq parser
(many (seq2 separator
(must must-message parser))))))
(def skipw (p)
(mem nonwhite p))
(def skipwhite (parser)
(fn (p)
(parser (skipw p))))
(def comma-separated (parser must-message)
(parse-intersperse (skipwhite:match-is #\,) parser must-message))
And the JSON parser:
(= json-true (match-literal "true" t))
(= json-false (match-literal "false" nil))
(= json-null (match-literal "null" nil))
(= json-number-char
(match [find _ ".-+eE1234567890"]))
(= json-number
(with-result cs (many1 json-number-char)
(coerce (string cs) 'num)))
(def hexdigit (c)
(and (isa c 'char)
(or (<= #\a c #\f) (<= #\A c #\F) (<= #\0 c #\9))))
(= fourhex
(must "four hex digits required after \\u"
(with-seq (h1 (match hexdigit)
h2 (match hexdigit)
h3 (match hexdigit)
h4 (match hexdigit))
(coerce (int (coerce (list h1 h2 h3 h4) 'string) 16) 'char))))
(def json-backslash-char (c)
(case c
#\" #\"
#\\ #\\
#\/ #\/
#\b #\backspace
#\f #\page
#\n #\newline
#\r #\return
#\t #\tab
(err "invalid backslash char" c)))
(= json-backslash-escape
(seq2 (match-is #\\)
(alt (seq2 (match-is #\u)
fourhex)
(fn (p)
(return cdr.p (json-backslash-char car.p))))))
(= json-string
(on-result string
(seq2 (match-is #\")
(must "missing closing quote in JSON string"
(seq1 (many (alt json-backslash-escape
(match [isnt _ #\"])))
(match-is #\"))))))
(= json-array
(seq2 (match-is #\[)
(optional (cons-seq forward.json-value
(many (seq2 (skipwhite:match-is #\,)
(must "a comma must be followed by a value"
forward.json-value)))))
(must "a JSON array must be terminated with a closing ]"
(skipwhite:match-is #\]))))
(= json-object-kv
(with-seq (key skipwhite.json-string
colon (must "a JSON object key string must be followed by a :"
(skipwhite:match-is #\:))
value (must "a colon in a JSON object must be followed by a value"
forward.json-value))
(list key value)))
(= json-object
(on-result listtab
(seq2 (match-is #\{)
(comma-separated json-object-kv "comma must be followed by a key")
(must "a JSON object must be terminated with a closing }"
(skipwhite:match-is #\})))))
(= json-value
(skipwhite:alt json-true
json-false
json-null
json-number
json-string
json-array
json-object))
(def fromjson (s)
(iflet (p r) (json-value (coerce s 'cons))
(do (if p (err "Unexpected characters after JSON value" (coerce p 'string)))
r)
(err:string "not a JSON value: " s)))
To see the difference Arc and the combinator technique makes, compare this implementation with one of the other JSON parsers available at json.org.
| Prev: JSON objects | Contents |
Questions? Comments? Email me andrew.wilcox [at] gmail.com