Creating a parser combinator library to parse JSON
Prev: JSON objects | Contents |
Here’s the complete parser combinator library:
(def return (new-parse-position return-value) (list new-parse-position return-value)) (def match (f) (only (fn (p) (let x car.p (if (f x) (return cdr.p x)))))) (def alt parsers (fn (p) (some [_ p] parsers))) (def seq parsers (fn (p) ((afn (p parsers a) (if parsers (iflet (p2 r) (car.parsers p) (self p2 cdr.parsers (cons r a))) (return p rev.a))) p parsers nil))) (def many (parser) (fn (p) ((afn (p a) (iflet (s2 r) (parser p) (self s2 (cons r a)) (return p rev.a))) p nil))) (def on-result (f parser) (fn (p) (iflet (p2 r) (parser p) (return p2 (f r))))) (mac with-result (vars parser . body) `(on-result (fn (,vars) ,@body) ,parser)) (mac with-seq (vars-parsers . body) (withs (ps (pair vars-parsers) vars (map car ps) parsers (map cadr ps)) `(on-result (fn (,vars) ,@body) (seq ,@parsers)))) (def cons-seq (a b) (with-seq (r a rs b) (cons r rs))) (def many1 (parser) (cons-seq parser (many parser))) (def must (errmsg parser) (fn (p) (or (parser p) (err errmsg)))) (def seqi (i parsers) (with-result results (apply seq parsers) (results i))) (def seq1 p (seqi 0 p)) (def seq2 p (seqi 1 p)) (def optional (parser) (alt parser (fn (p) (return p nil)))) (mac forward (parser) (w/uniq p `(fn (,p) (,parser ,p)))) (def match-is (x) (match [is x _])) (def match-literal (pat val) (with (patlist (coerce pat 'cons) patlen len.pat) (fn (p) (if (begins p patlist) (return (nthcdr patlen p) val))))) (def parse-intersperse (separator parser must-message) (optional (cons-seq parser (many (seq2 separator (must must-message parser)))))) (def skipw (p) (mem nonwhite p)) (def skipwhite (parser) (fn (p) (parser (skipw p)))) (def comma-separated (parser must-message) (parse-intersperse (skipwhite:match-is #\,) parser must-message))
And the JSON parser:
(= json-true (match-literal "true" t)) (= json-false (match-literal "false" nil)) (= json-null (match-literal "null" nil)) (= json-number-char (match [find _ ".-+eE1234567890"])) (= json-number (with-result cs (many1 json-number-char) (coerce (string cs) 'num))) (def hexdigit (c) (and (isa c 'char) (or (<= #\a c #\f) (<= #\A c #\F) (<= #\0 c #\9)))) (= fourhex (must "four hex digits required after \\u" (with-seq (h1 (match hexdigit) h2 (match hexdigit) h3 (match hexdigit) h4 (match hexdigit)) (coerce (int (coerce (list h1 h2 h3 h4) 'string) 16) 'char)))) (def json-backslash-char (c) (case c #\" #\" #\\ #\\ #\/ #\/ #\b #\backspace #\f #\page #\n #\newline #\r #\return #\t #\tab (err "invalid backslash char" c))) (= json-backslash-escape (seq2 (match-is #\\) (alt (seq2 (match-is #\u) fourhex) (fn (p) (return cdr.p (json-backslash-char car.p)))))) (= json-string (on-result string (seq2 (match-is #\") (must "missing closing quote in JSON string" (seq1 (many (alt json-backslash-escape (match [isnt _ #\"]))) (match-is #\")))))) (= json-array (seq2 (match-is #\[) (optional (cons-seq forward.json-value (many (seq2 (skipwhite:match-is #\,) (must "a comma must be followed by a value" forward.json-value))))) (must "a JSON array must be terminated with a closing ]" (skipwhite:match-is #\])))) (= json-object-kv (with-seq (key skipwhite.json-string colon (must "a JSON object key string must be followed by a :" (skipwhite:match-is #\:)) value (must "a colon in a JSON object must be followed by a value" forward.json-value)) (list key value))) (= json-object (on-result listtab (seq2 (match-is #\{) (comma-separated json-object-kv "comma must be followed by a key") (must "a JSON object must be terminated with a closing }" (skipwhite:match-is #\}))))) (= json-value (skipwhite:alt json-true json-false json-null json-number json-string json-array json-object)) (def fromjson (s) (iflet (p r) (json-value (coerce s 'cons)) (do (if p (err "Unexpected characters after JSON value" (coerce p 'string))) r) (err:string "not a JSON value: " s)))
To see the difference Arc and the combinator technique makes, compare this implementation with one of the other JSON parsers available at json.org.
Prev: JSON objects | Contents |
Questions? Comments? Email me andrew.wilcox [at] gmail.com