awwx.ws

Creating a parser combinator library to parse JSON

Prev: JSON objectsContents

The finished result

Here’s the complete parser combinator library:

(def return (new-parse-position return-value)
  (list new-parse-position return-value))

(def match (f)
  (only (fn (p)
          (let x car.p
            (if (f x)
                 (return cdr.p x))))))

(def alt parsers
  (fn (p)
    (some [_ p] parsers)))

(def seq parsers
  (fn (p)
    ((afn (p parsers a)
       (if parsers
            (iflet (p2 r) (car.parsers p)
              (self p2 cdr.parsers (cons r a)))
            (return p rev.a)))
     p parsers nil)))

(def many (parser)
  (fn (p)
    ((afn (p a)
       (iflet (s2 r) (parser p)
         (self s2 (cons r a))
         (return p rev.a)))
     p nil)))

(def on-result (f parser)
  (fn (p)
    (iflet (p2 r) (parser p)
      (return p2 (f r)))))

(mac with-result (vars parser . body)
  `(on-result (fn (,vars) ,@body)
              ,parser))

(mac with-seq (vars-parsers . body)
  (withs (ps (pair vars-parsers)
          vars (map car ps)
          parsers (map cadr ps))
    `(on-result (fn (,vars) ,@body) (seq ,@parsers))))

(def cons-seq (a b)
  (with-seq (r  a
             rs b)
    (cons r rs)))

(def many1 (parser)
  (cons-seq parser
            (many parser)))

(def must (errmsg parser)
  (fn (p)
    (or (parser p)
        (err errmsg))))

(def seqi (i parsers)
  (with-result results (apply seq parsers)
    (results i)))

(def seq1 p (seqi 0 p))

(def seq2 p (seqi 1 p))

(def optional (parser)
  (alt parser
       (fn (p)
         (return p nil))))

(mac forward (parser)
  (w/uniq p
    `(fn (,p) (,parser ,p))))

(def match-is (x)
  (match [is x _]))

(def match-literal (pat val)
  (with (patlist (coerce pat 'cons)
         patlen  len.pat)
    (fn (p)
      (if (begins p patlist)
           (return (nthcdr patlen p) val)))))

(def parse-intersperse (separator parser must-message)
  (optional (cons-seq parser
                      (many (seq2 separator
                                  (must must-message parser))))))

(def skipw (p)
  (mem nonwhite p))

(def skipwhite (parser)
  (fn (p)
    (parser (skipw p))))

(def comma-separated (parser must-message)
  (parse-intersperse (skipwhite:match-is #\,) parser must-message))

And the JSON parser:

(= json-true  (match-literal "true"  t))
(= json-false (match-literal "false" nil))
(= json-null  (match-literal "null"  nil))

(= json-number-char
  (match [find _ ".-+eE1234567890"]))

(= json-number
  (with-result cs (many1 json-number-char)
    (coerce (string cs) 'num)))

(def hexdigit (c)
  (and (isa c 'char)
       (or (<= #\a c #\f) (<= #\A c #\F) (<= #\0 c #\9))))

(= fourhex
  (must "four hex digits required after \\u"  
    (with-seq (h1 (match hexdigit)
               h2 (match hexdigit)
               h3 (match hexdigit)
               h4 (match hexdigit))
      (coerce (int (coerce (list h1 h2 h3 h4) 'string) 16) 'char))))

(def json-backslash-char (c)
  (case c
    #\" #\"
    #\\ #\\
    #\/ #\/
    #\b #\backspace
    #\f #\page
    #\n #\newline
    #\r #\return
    #\t #\tab
    (err "invalid backslash char" c)))

(= json-backslash-escape
  (seq2 (match-is #\\)
        (alt (seq2 (match-is #\u)
                   fourhex)
             (fn (p)
               (return cdr.p (json-backslash-char car.p))))))

(= json-string
  (on-result string
    (seq2 (match-is #\")
          (must "missing closing quote in JSON string"
                (seq1 (many (alt json-backslash-escape
                                 (match [isnt _ #\"])))
                      (match-is #\"))))))

(= json-array
  (seq2 (match-is #\[)
        (optional (cons-seq forward.json-value
                            (many (seq2 (skipwhite:match-is #\,)
                                        (must "a comma must be followed by a value"
                                              forward.json-value)))))
        (must "a JSON array must be terminated with a closing ]"
              (skipwhite:match-is #\]))))

(= json-object-kv
  (with-seq (key   skipwhite.json-string
             colon (must "a JSON object key string must be followed by a :"
                         (skipwhite:match-is #\:))
             value (must "a colon in a JSON object must be followed by a value"
                         forward.json-value))
    (list key value)))

(= json-object
   (on-result listtab
     (seq2 (match-is #\{)
           (comma-separated json-object-kv "comma must be followed by a key")
           (must "a JSON object must be terminated with a closing }"
                 (skipwhite:match-is #\})))))

(= json-value
  (skipwhite:alt json-true
                 json-false
                 json-null
                 json-number
                 json-string
                 json-array
                 json-object))

(def fromjson (s)
  (iflet (p r) (json-value (coerce s 'cons))
    (do (if p (err "Unexpected characters after JSON value" (coerce p 'string)))
        r)
    (err:string "not a JSON value: " s)))

To see the difference Arc and the combinator technique makes, compare this implementation with one of the other JSON parsers available at json.org.


Prev: JSON objectsContents


Questions? Comments? Email me andrew.wilcox [at] gmail.com