; The goal of this parser is to be a correct subset of the MzScheme ; reader. (http://docs.plt-scheme.org/reference/reader.html) ; ; That is, not everything in the MzScheme reader is implemented, but ; for those things that are, they should parse the same as MzScheme's ; reader does. ; ; A reader for Arc would add Arc extensions such as [...], and could ; remove MzScheme-isms such as #t if desired. ; ; not implemented ; incremental reading ; hash tables #hash((a . 1) (b . 2)) ; here-strings #<< ; byte strings #"abc" ; [] {} as synonyms for () ; capitalized syntax #T, #\NUL, etc. ; vectors #() etc. ; structures #s() etc. ; #% symbols ; keywords #: ; boxes #& ; unix script comments #! and #!/ ; syntax quotes #' etc. ; compiled code #~ ; regular expressions #rx etc. ; case sensitivity #ci, #cs, etc. ; honu #hx ; reader extensions #reader, #lang ; graph structures #1234= etc. (def line-comment () (match #\;) (many (match [isnt _ #\newline])) (optional (match #\newline))) (def block-comment () (mliteral "#|") (many (alt (block-comment) (do (not (mliteral "|#")) (next)))) (must "no closing |#" (mliteral "|#"))) (def s-expression-comment () (mliteral "#;") (value)) (def comment () (alt (line-comment) (block-comment) (s-expression-comment))) (def skip-whitespace () (many (alt (match whitec) (comment)))) ; Parsing numbers is interesting. ; ; "123(" parsers as the number 123 followed by "(" as a delimiter, ; while "123abc(" parsers as the symbol '123abc followed by "(". So ; we can't just use alt to match either a number or a symbol, because ; then "123abc(" would parse as the number 123 followed by "abc(". ; ; And we can't first parse a symbol and then see if the parsed symbol ; looks like a number, because "\3" is the symbol '|3| not the ; number 3. ; ; So the strategy is to first match the input like a symbol ; (msym-like), and then see if that matched input can be reparsed as a ; number. ; ; However, numbers can also start with #i, #e, #b, #o, or #x, which ; symbols can't. So if we see one of these prefixes, we need to parse ; the following input as a number unconditionally. (def delimiter (c) (in c #\( #\) #\[ #\] #\[ #\] #\" #\, #\' #\` #\;)) (def terminator (c) (or (delimiter c) (whitec c))) (def backslash-sym-char () (match #\\) (must "backslash must be followed by a character" (next))) (mac do2 (a b . cs) `(do ,a (do1 ,b ,@cs))) (def bar-quote () (do2 (match #\|) (many (match [isnt _ #\|])) (must "missing closing |" (match #\|)))) (def begins-sym (a) (alt (a (backslash-sym-char)) (map a (bar-quote)) (a (match [and (no (terminator _)) (isnt _ #\#)])))) (def in-sym (a) (alt (a (backslash-sym-char)) (map a (bar-quote)) (a (match [no (terminator _)])))) (def charssym (cs) (sym (coerce cs 'string))) (def msym-like () (let r (charssym (accum a (begins-sym a) (many (in-sym a)))) (if (is r '|.|) (parsefail)) r)) ; duplicate with j1.arc (def hexdigit (c) (or (<= #\a c #\f) (<= #\A c #\F) (<= #\0 c #\9))) (mac match-in args `(match [in _ ,@args])) (def mdigit (n) (case n 2 (match #\0 #\1) 8 (match [in _ #\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7]) 10 (match digit) 16 (match hexdigit) (err "bad mdigit n" n))) (def mdigits (n) (many1 (mdigit n))) (def mdigits# (n) (+ (many1 (mdigit n)) (many (match #\#)))) (def msign () (match #\+ #\-)) (def exact-integer (n) (consif (optional (msign)) (mdigits n))) (def mexact-rational (n) (alt (+ (exact-integer n) (list (match #\/)) (mdigits n)) (exact-integer n))) (def mexact-complex (n) (+ (mexact-rational n) (list (msign)) (mexact-rational n) (list (match #\i)))) (def mexact (n) (alt (mexact-complex n) (mexact-rational n))) (mac loptional (parser) `(aif (optional ,parser) (list it))) (def minexact-simple (n) (alt (+ (mdigits# n) (list (match #\/)) (mdigits# n)) (+ (mdigits n) (list (match #\.)) (mdigits# n)) (+ (mdigits# n) (loptional (match #\.)) (many (match #\#))))) (def exp-mark (n) (if (is n 16) (match #\s #\S #\d #\D #\l #\L) (match #\s #\S #\d #\D #\l #\L #\e #\E #\f #\F))) (def minexact-normal (n) (+ (minexact-simple n) (optional (+ (list (exp-mark n)) (loptional (msign)) (mdigits# n))))) (def mlist (s) (map match (ascons s))) (def minexact-special () (alt (mlist "inf.0") (mlist "nan.0"))) (def minexact-real (n) (alt (minexact-normal n) (cons (msign) (minexact-special)))) (def minexact-unsigned (n) (alt (minexact-normal n) (minexact-special n))) (def minexact-complex (n) (alt (+ (optional (minexact-real n)) (list (msign)) (minexact-unsigned n) (list (match #\i #\I))) (+ (minexact-real n) (list (match #\@)) (minexact-real n)))) (def minexact (n) (alt (minexact-complex n) (minexact-real n))) ; Called "number" in the grammer, but I prefer to use "number" ; for the final parser that will match any number. (def mbasenumber (n) (alt (minexact n) (mexact n))) (def mgeneral-number (n) (+ (optional (alt (mlist "#e") (mlist "#i"))) (mbasenumber n))) (def base-specifier () (alt (mliteral "#b" 2) (mliteral "#o" 8) (mliteral "#d" 10) (mliteral "#x" 16))) (def exactness-specifier () (mliteral-in "#e" "#i")) (def mprefixed-number () (alt (let n (base-specifier) (optional (exactness-specifier)) (mbasenumber n)) (do (exactness-specifier) (let n (alt (base-specifier) 10) (mbasenumber n))))) ; TODO #o37a should be an error, but it's currently parsed as an octal ; 37 followed by the symbol 'a. (def symbol-matcher () (let parsed-sym nil (let inp (matched-input (= parsed-sym (msym-like))) (list inp parsed-sym)))) ; Our parser figures out whether the input looks like a number, but we ; let MzScheme do the hard work of converting the matched input string ; to an actual number value. (def asnum (x) (coerce (string x) 'num)) (def match-sym-or-number () (alt (asnum (matched-input (mprefixed-number))) (let (inp parsed-sym) (symbol-matcher) (or (and (parse inp (matched-input (entire (mbasenumber 10)))) (asnum inp)) parsed-sym)))) (def value () (skip-whitespace) (match-value)) (def report-parse-failed () (err (+ "unable to parse as an Arc value: " (firstn 30 parsepos*)))) (def values () (do1 (many (do (skipwhite) (value))) (skip-whitespace) (unless (at-end) (report-parse-failed)))) (def parse-arc ((o s (stdin))) (parse s (values))) (def scheme-boolean () (mliteral "#t" #t "#f" #f)) (def string-backslash-char () (case (next) #\a #\u0007 #\b #\backspace #\t #\tab #\n #\newline #\v #\vtab #\f #\u000C #\r #\return #\e #\u001B #\" #\" #\' #\' #\\ #\\ (parsefail))) (def aschar (x) (coerce x 'char)) (def intchar (b s) (aschar (int (string s) b))) (def backslash-octal () (intchar 8 (from1upto 3 (mdigit 8)))) (def backslash-hex2 () (match #\x) (must "a \\x must be followed by one or two hex digits" (intchar 16 (from1upto 2 (mdigit 16))))) (def backslash-hex4 () (match #\u) (must "a \\u must be followed by one to four hex digits" (intchar 16 (from1upto 4 (mdigit 16))))) (def backslash-hex8 () (match #\U) (must "a \\U must be followed by one to eight hex digits" (intchar 16 (from1upto 8 (mdigit 16))))) (def string-backslash-newline () (alt (match #\newline) (do (match #\return) (match #\newline)) (match #\return))) (def string-backslash-sequence () (match #\\) (when (at-end) (err "a backslash in a string must be followed by a character")) (must "invalid backslash sequence in string" (alt (string-backslash-char) (backslash-octal) (backslash-hex2) (backslash-hex4) (backslash-hex8)))) (def simple-string () (string (accum a (match #\") (many (alt (a (string-backslash-sequence)) (string-backslash-newline) (a (match [isnt _ #\"])))) (must "missing closing quote in string" (match #\"))))) (def named-char () (mliteral "null" #\nul "nul" #\nul "backspace" #\backspace "tab" #\tab "newline" #\newline "linefeed" #\newline "vtab" #\vtab "page" #\page "return" #\return "space" #\space "rubout" #\rubout)) (def char-constant () (match #\#) (match #\\) (must "invalid character constant" (alt (do1 (named-char) (not (match letter))) (backslash-octal) (backslash-hex2) (backslash-hex4) (backslash-hex8) (do1 (next) (not (match letter))) ))) (def close-list () (skip-whitespace) (match #\))) (def dotted-list-ending () (skip-whitespace) (match #\.) (at (match terminator)) (must "a dotted list period must be followed by a single value and then the closing parenthesis" (do1 (value) (close-list)))) ; can have (a b c d . e) or (a . b), but not (. a) (def nonempty-list-values () (let xs (many1 (value)) (alt (dot xs (dotted-list-ending)) (must "missing closing parenthesis" (do (close-list) xs))))) (def match-list () (match #\() (skip-whitespace) (alt (do (match #\)) nil) (nonempty-list-values))) ; 'foo -> (quote foo) (mac match-prefix (parser prefix-name expansion) `(do ,parser (skip-whitespace) (must ,(string "a " prefix-name " must be followed by an Arc value") `(,,expansion ,(value))))) (def match-quotes () (alt (match-prefix (match #\') "'" 'quote) (match-prefix (match #\`) "`" 'quasiquote) (match-prefix (mliteral ",@") ",@" 'unquote-splicing) (match-prefix (match #\,) "," 'unquote))) ; extension point (def match-value () (alt (match-sym-or-number) (char-constant) (scheme-boolean) (match-list) (match-quotes)))