Lex a lexeme.
(lex-lexeme pstate) → (mv erp lexeme? span new-pstate)
This is the top-level lexing function.
It returns the next lexeme found in the parser state,
or
First we get the next character, propagating errors.
If there is no next character, we return
Function:
(defun lex-lexeme (pstate) (declare (xargs :guard (parstatep pstate))) (let ((__function__ 'lex-lexeme)) (declare (ignorable __function__)) (b* (((reterr) nil (irr-span) (irr-parstate)) ((erp char first-pos pstate) (read-char pstate)) ((unless char) (retok nil (make-span :start first-pos :end first-pos) pstate))) (cond ((or (= char 32) (and (<= 9 char) (<= char 12))) (retok (lexeme-whitespace) (make-span :start first-pos :end first-pos) pstate)) ((= char (char-code #\u)) (b* (((erp char2 pos2 pstate) (read-char pstate))) (cond ((not char2) (retok (lexeme-token (token-ident (ident "u"))) (make-span :start first-pos :end first-pos) pstate)) ((= char2 (char-code #\')) (lex-cconst (cprefix-locase-u) first-pos pstate)) ((= char2 (char-code #\")) (lex-stringlit (eprefix-locase-u) first-pos pstate)) ((= char2 (char-code #\8)) (b* (((erp char3 & pstate) (read-char pstate))) (cond ((not char3) (retok (lexeme-token (token-ident (ident "u8"))) (make-span :start first-pos :end pos2) pstate)) ((= char3 (char-code #\")) (lex-stringlit (eprefix-locase-u8) first-pos pstate)) (t (b* ((pstate (unread-char pstate)) (pstate (unread-char pstate))) (lex-identifier/keyword char first-pos pstate)))))) (t (b* ((pstate (unread-char pstate))) (lex-identifier/keyword char first-pos pstate)))))) ((= char (char-code #\U)) (b* (((erp char2 & pstate) (read-char pstate))) (cond ((not char2) (retok (lexeme-token (token-ident (ident "U"))) (make-span :start first-pos :end first-pos) pstate)) ((= char2 (char-code #\')) (lex-cconst (cprefix-upcase-u) first-pos pstate)) ((= char2 (char-code #\")) (lex-stringlit (eprefix-upcase-u) first-pos pstate)) (t (b* ((pstate (unread-char pstate))) (lex-identifier/keyword char first-pos pstate)))))) ((= char (char-code #\L)) (b* (((erp char2 & pstate) (read-char pstate))) (cond ((not char2) (retok (lexeme-token (token-ident (ident "L"))) (make-span :start first-pos :end first-pos) pstate)) ((= char2 (char-code #\')) (lex-cconst (cprefix-upcase-l) first-pos pstate)) ((= char2 (char-code #\")) (lex-stringlit (eprefix-upcase-l) first-pos pstate)) (t (b* ((pstate (unread-char pstate))) (lex-identifier/keyword char first-pos pstate)))))) ((or (and (<= (char-code #\A) char) (<= char (char-code #\Z))) (and (<= (char-code #\a) char) (<= char (char-code #\z))) (= char (char-code #\_))) (lex-identifier/keyword char first-pos pstate)) ((and (<= (char-code #\0) char) (<= char (char-code #\9))) (b* (((erp const last-pos pstate) (lex-iconst/fconst (code-char char) first-pos pstate))) (retok (lexeme-token (token-const const)) (make-span :start first-pos :end last-pos) pstate))) ((= char (char-code #\.)) (b* (((erp char2 pos2 pstate) (read-char pstate))) (cond ((not char2) (retok (lexeme-token (token-punctuator ".")) (make-span :start first-pos :end first-pos) pstate)) ((and (<= (char-code #\0) char2) (<= char2 (char-code #\9))) (b* (((erp const last-pos pstate) (lex-dec-fconst (code-char char2) pos2 pstate))) (retok (lexeme-token (token-const const)) (make-span :start first-pos :end last-pos) pstate))) ((= char2 (char-code #\.)) (b* (((erp char3 pos3 pstate) (read-char pstate))) (cond ((not char3) (b* ((pstate (unread-char pstate))) (retok (lexeme-token (token-punctuator ".")) (make-span :start first-pos :end first-pos) pstate))) ((= char3 (char-code #\.)) (retok (lexeme-token (token-punctuator "...")) (make-span :start first-pos :end pos3) pstate)) (t (b* ((pstate (unread-char pstate)) (pstate (unread-char pstate))) (retok (lexeme-token (token-punctuator ".")) (make-span :start first-pos :end first-pos) pstate)))))) (t (b* ((pstate (unread-char pstate))) (retok (lexeme-token (token-punctuator ".")) (make-span :start first-pos :end first-pos) pstate)))))) ((= char (char-code #\')) (lex-cconst nil first-pos pstate)) ((= char (char-code #\")) (lex-stringlit nil first-pos pstate)) ((= char (char-code #\/)) (b* (((erp char2 pos2 pstate) (read-char pstate))) (cond ((not char2) (retok (lexeme-token (token-punctuator "/")) (make-span :start first-pos :end first-pos) pstate)) ((= char2 (char-code #\*)) (lex-block-comment first-pos pstate)) ((= char2 (char-code #\/)) (lex-line-comment first-pos pstate)) ((= char2 (char-code #\=)) (retok (lexeme-token (token-punctuator "/=")) (make-span :start first-pos :end pos2) pstate)) (t (b* ((pstate (unread-char pstate))) (retok (lexeme-token (token-punctuator "/")) (make-span :start first-pos :end first-pos) pstate)))))) ((or (= char (char-code #\[)) (= char (char-code #\])) (= char (char-code #\()) (= char (char-code #\))) (= char (char-code #\{)) (= char (char-code #\})) (= char (char-code #\~)) (= char (char-code #\?)) (= char (char-code #\,)) (= char (char-code #\;))) (retok (lexeme-token (token-punctuator (acl2::implode (list (code-char char))))) (make-span :start first-pos :end first-pos) pstate)) ((= char (char-code #\*)) (b* (((erp char2 pos2 pstate) (read-char pstate))) (cond ((not char2) (retok (lexeme-token (token-punctuator "*")) (make-span :start first-pos :end first-pos) pstate)) ((= char2 (char-code #\=)) (retok (lexeme-token (token-punctuator "*=")) (make-span :start first-pos :end pos2) pstate)) (t (b* ((pstate (unread-char pstate))) (retok (lexeme-token (token-punctuator "*")) (make-span :start first-pos :end first-pos) pstate)))))) ((= char (char-code #\^)) (b* (((erp char2 pos2 pstate) (read-char pstate))) (cond ((not char2) (retok (lexeme-token (token-punctuator "^")) (make-span :start first-pos :end first-pos) pstate)) ((= char2 (char-code #\=)) (retok (lexeme-token (token-punctuator "^=")) (make-span :start first-pos :end pos2) pstate)) (t (b* ((pstate (unread-char pstate))) (retok (lexeme-token (token-punctuator "^")) (make-span :start first-pos :end first-pos) pstate)))))) ((= char (char-code #\!)) (b* (((erp char2 pos2 pstate) (read-char pstate))) (cond ((not char2) (retok (lexeme-token (token-punctuator "!")) (make-span :start first-pos :end first-pos) pstate)) ((= char2 (char-code #\=)) (retok (lexeme-token (token-punctuator "!=")) (make-span :start first-pos :end pos2) pstate)) (t (b* ((pstate (unread-char pstate))) (retok (lexeme-token (token-punctuator "!")) (make-span :start first-pos :end first-pos) pstate)))))) ((= char (char-code #\=)) (b* (((erp char2 pos2 pstate) (read-char pstate))) (cond ((not char2) (retok (lexeme-token (token-punctuator "=")) (make-span :start first-pos :end first-pos) pstate)) ((= char2 (char-code #\=)) (retok (lexeme-token (token-punctuator "==")) (make-span :start first-pos :end pos2) pstate)) (t (b* ((pstate (unread-char pstate))) (retok (lexeme-token (token-punctuator "=")) (make-span :start first-pos :end first-pos) pstate)))))) ((= char (char-code #\:)) (b* (((erp char2 pos2 pstate) (read-char pstate))) (cond ((not char2) (retok (lexeme-token (token-punctuator ":")) (make-span :start first-pos :end first-pos) pstate)) ((= char2 (char-code #\>)) (retok (lexeme-token (token-punctuator ":>")) (make-span :start first-pos :end pos2) pstate)) (t (b* ((pstate (unread-char pstate))) (retok (lexeme-token (token-punctuator ":")) (make-span :start first-pos :end first-pos) pstate)))))) ((= char (char-code #\#)) (b* (((erp char2 pos2 pstate) (read-char pstate))) (cond ((not char2) (retok (lexeme-token (token-punctuator "#")) (make-span :start first-pos :end first-pos) pstate)) ((= char2 (char-code #\#)) (retok (lexeme-token (token-punctuator "##")) (make-span :start first-pos :end pos2) pstate)) (t (b* ((pstate (unread-char pstate))) (retok (lexeme-token (token-punctuator "#")) (make-span :start first-pos :end first-pos) pstate)))))) ((= char (char-code #\&)) (b* (((erp char2 pos2 pstate) (read-char pstate))) (cond ((not char2) (retok (lexeme-token (token-punctuator "&")) (make-span :start first-pos :end first-pos) pstate)) ((= char2 (char-code #\&)) (retok (lexeme-token (token-punctuator "&&")) (make-span :start first-pos :end pos2) pstate)) ((= char2 (char-code #\=)) (retok (lexeme-token (token-punctuator "&=")) (make-span :start first-pos :end pos2) pstate)) (t (b* ((pstate (unread-char pstate))) (retok (lexeme-token (token-punctuator "&")) (make-span :start first-pos :end first-pos) pstate)))))) ((= char (char-code #\|)) (b* (((erp char2 pos2 pstate) (read-char pstate))) (cond ((not char2) (retok (lexeme-token (token-punctuator "|")) (make-span :start first-pos :end first-pos) pstate)) ((= char2 (char-code #\|)) (retok (lexeme-token (token-punctuator "||")) (make-span :start first-pos :end pos2) pstate)) ((= char2 (char-code #\=)) (retok (lexeme-token (token-punctuator "|=")) (make-span :start first-pos :end pos2) pstate)) (t (b* ((pstate (unread-char pstate))) (retok (lexeme-token (token-punctuator "|")) (make-span :start first-pos :end first-pos) pstate)))))) ((= char (char-code #\+)) (b* (((erp char2 pos2 pstate) (read-char pstate))) (cond ((not char2) (retok (lexeme-token (token-punctuator "+")) (make-span :start first-pos :end first-pos) pstate)) ((= char2 (char-code #\+)) (retok (lexeme-token (token-punctuator "++")) (make-span :start first-pos :end pos2) pstate)) ((= char2 (char-code #\=)) (retok (lexeme-token (token-punctuator "+=")) (make-span :start first-pos :end pos2) pstate)) (t (b* ((pstate (unread-char pstate))) (retok (lexeme-token (token-punctuator "+")) (make-span :start first-pos :end first-pos) pstate)))))) ((= char (char-code #\-)) (b* (((erp char2 pos2 pstate) (read-char pstate))) (cond ((not char2) (retok (lexeme-token (token-punctuator "-")) (make-span :start first-pos :end first-pos) pstate)) ((= char2 (char-code #\>)) (retok (lexeme-token (token-punctuator "->")) (make-span :start first-pos :end pos2) pstate)) ((= char2 (char-code #\-)) (retok (lexeme-token (token-punctuator "--")) (make-span :start first-pos :end pos2) pstate)) ((= char2 (char-code #\=)) (retok (lexeme-token (token-punctuator "-=")) (make-span :start first-pos :end pos2) pstate)) (t (b* ((pstate (unread-char pstate))) (retok (lexeme-token (token-punctuator "-")) (make-span :start first-pos :end first-pos) pstate)))))) ((= char (char-code #\>)) (b* (((erp char2 pos2 pstate) (read-char pstate))) (cond ((not char2) (retok (lexeme-token (token-punctuator ">")) (make-span :start first-pos :end first-pos) pstate)) ((= char2 (char-code #\>)) (b* (((erp char3 pos3 pstate) (read-char pstate))) (cond ((not char3) (retok (lexeme-token (token-punctuator ">>")) (make-span :start first-pos :end pos2) pstate)) ((= char3 (char-code #\=)) (retok (lexeme-token (token-punctuator ">>=")) (make-span :start first-pos :end pos3) pstate)) (t (b* ((pstate (unread-char pstate))) (retok (lexeme-token (token-punctuator ">>")) (make-span :start first-pos :end pos2) pstate)))))) ((= char2 (char-code #\=)) (retok (lexeme-token (token-punctuator ">=")) (make-span :start first-pos :end first-pos) pstate)) (t (b* ((pstate (unread-char pstate))) (retok (lexeme-token (token-punctuator ">")) (make-span :start first-pos :end first-pos) pstate)))))) ((= char (char-code #\%)) (b* (((erp char2 pos2 pstate) (read-char pstate))) (cond ((not char2) (retok (lexeme-token (token-punctuator "%")) (make-span :start first-pos :end first-pos) pstate)) ((= char2 (char-code #\=)) (retok (lexeme-token (token-punctuator "%=")) (make-span :start first-pos :end pos2) pstate)) ((= char2 (char-code #\:)) (b* (((erp char3 & pstate) (read-char pstate))) (cond ((not char3) (retok (lexeme-token (token-punctuator "%:")) (make-span :start first-pos :end pos2) pstate)) ((= char3 (char-code #\%)) (b* (((erp char4 pos4 pstate) (read-char pstate))) (cond ((not char4) (b* ((pstate (unread-char pstate))) (retok (lexeme-token (token-punctuator "%:")) (make-span :start first-pos :end pos2) pstate))) ((= char4 (char-code #\:)) (retok (lexeme-token (token-punctuator "%:%:")) (make-span :start first-pos :end pos4) pstate)) (t (b* ((pstate (unread-char pstate)) (pstate (unread-char pstate))) (retok (lexeme-token (token-punctuator "%:")) (make-span :start first-pos :end pos2) pstate)))))) (t (b* ((pstate (unread-char pstate))) (retok (lexeme-token (token-punctuator "%:")) (make-span :start first-pos :end pos2) pstate)))))) (t (b* ((pstate (unread-char pstate))) (retok (lexeme-token (token-punctuator "%")) (make-span :start first-pos :end first-pos) pstate)))))) ((= char (char-code #\<)) (b* (((erp char2 pos2 pstate) (read-char pstate))) (cond ((not char2) (retok (lexeme-token (token-punctuator "<")) (make-span :start first-pos :end first-pos) pstate)) ((= char2 (char-code #\<)) (b* (((erp char3 pos3 pstate) (read-char pstate))) (cond ((not char3) (retok (lexeme-token (token-punctuator "<<")) (make-span :start first-pos :end pos2) pstate)) ((= char3 (char-code #\=)) (retok (lexeme-token (token-punctuator "<<=")) (make-span :start first-pos :end pos3) pstate)) (t (b* ((pstate (unread-char pstate))) (retok (lexeme-token (token-punctuator "<<")) (make-span :start first-pos :end pos2) pstate)))))) ((= char2 (char-code #\=)) (retok (lexeme-token (token-punctuator "<=")) (make-span :start first-pos :end pos2) pstate)) ((= char2 (char-code #\:)) (retok (lexeme-token (token-punctuator "<:")) (make-span :start first-pos :end pos2) pstate)) ((= char2 (char-code #\%)) (retok (lexeme-token (token-punctuator "<%")) (make-span :start first-pos :end pos2) pstate)) (t (b* ((pstate (unread-char pstate))) (retok (lexeme-token (token-punctuator "<")) (make-span :start first-pos :end first-pos) pstate)))))) (t (reterr-msg :where (position-to-msg first-pos) :expected "a white-space character ~ (space, ~ new-line, ~ horizontal tab, ~ vertical tab, ~ form feed) ~ or a letter ~ or a digit ~ or an underscore ~ or a round parenthesis ~ or a square bracket ~ or a curly brace ~ or an angle bracket ~ or a dot ~ or a comma ~ or a colon ~ or a semicolon ~ or a plus ~ or a minus ~ or a star ~ or a slash ~ or a percent ~ or a tilde ~ or an equal sign ~ or an exclamation mark ~ or a question mark ~ or a vertical bar ~ or a caret ~ or hash" :found (char-to-msg char)))))))
Theorem:
(defthm lexeme-optionp-of-lex-lexeme.lexeme? (b* (((mv acl2::?erp ?lexeme? ?span ?new-pstate) (lex-lexeme pstate))) (lexeme-optionp lexeme?)) :rule-classes :rewrite)
Theorem:
(defthm spanp-of-lex-lexeme.span (b* (((mv acl2::?erp ?lexeme? ?span ?new-pstate) (lex-lexeme pstate))) (spanp span)) :rule-classes :rewrite)
Theorem:
(defthm parstatep-of-lex-lexeme.new-pstate (b* (((mv acl2::?erp ?lexeme? ?span ?new-pstate) (lex-lexeme pstate))) (parstatep new-pstate)) :rule-classes :rewrite)
Theorem:
(defthm parsize-of-lex-lexeme-uncond (b* (((mv acl2::?erp ?lexeme? ?span ?new-pstate) (lex-lexeme pstate))) (<= (parsize new-pstate) (parsize pstate))) :rule-classes :linear)
Theorem:
(defthm parsize-of-lex-lexeme-cond (b* (((mv acl2::?erp ?lexeme? ?span ?new-pstate) (lex-lexeme pstate))) (implies (and (not erp) lexeme?) (<= (parsize new-pstate) (1- (parsize pstate))))) :rule-classes :linear)