Lex a lexeme.
(lex-lexeme parstate) → (mv erp lexeme? span new-parstate)
This is the top-level lexing function.
It returns the next lexeme found in the parser state,
or
First we get the next character, propagating errors.
If there is no next character, we return
Function:
(defun lex-lexeme (parstate) (declare (xargs :stobjs (parstate))) (declare (xargs :guard (parstatep parstate))) (let ((__function__ 'lex-lexeme)) (declare (ignorable __function__)) (b* (((reterr) nil (irr-span) parstate) ((erp char first-pos parstate) (read-char parstate)) ((unless char) (retok nil (make-span :start first-pos :end first-pos) parstate))) (cond ((or (= char 32) (and (<= 9 char) (<= char 12))) (retok (lexeme-whitespace) (make-span :start first-pos :end first-pos) parstate)) ((= char (char-code #\u)) (b* (((erp char2 pos2 parstate) (read-char parstate))) (cond ((not char2) (retok (lexeme-token (token-ident (ident "u"))) (make-span :start first-pos :end first-pos) parstate)) ((= char2 (char-code #\')) (lex-character-constant (cprefix-locase-u) first-pos parstate)) ((= char2 (char-code #\")) (lex-stringlit (eprefix-locase-u) first-pos parstate)) ((= char2 (char-code #\8)) (b* (((erp char3 & parstate) (read-char parstate))) (cond ((not char3) (retok (lexeme-token (token-ident (ident "u8"))) (make-span :start first-pos :end pos2) parstate)) ((= char3 (char-code #\")) (lex-stringlit (eprefix-locase-u8) first-pos parstate)) (t (b* ((parstate (unread-char parstate)) (parstate (unread-char parstate))) (lex-identifier/keyword char first-pos parstate)))))) (t (b* ((parstate (unread-char parstate))) (lex-identifier/keyword char first-pos parstate)))))) ((= char (char-code #\U)) (b* (((erp char2 & parstate) (read-char parstate))) (cond ((not char2) (retok (lexeme-token (token-ident (ident "U"))) (make-span :start first-pos :end first-pos) parstate)) ((= char2 (char-code #\')) (lex-character-constant (cprefix-upcase-u) first-pos parstate)) ((= char2 (char-code #\")) (lex-stringlit (eprefix-upcase-u) first-pos parstate)) (t (b* ((parstate (unread-char parstate))) (lex-identifier/keyword char first-pos parstate)))))) ((= char (char-code #\L)) (b* (((erp char2 & parstate) (read-char parstate))) (cond ((not char2) (retok (lexeme-token (token-ident (ident "L"))) (make-span :start first-pos :end first-pos) parstate)) ((= char2 (char-code #\')) (lex-character-constant (cprefix-upcase-l) first-pos parstate)) ((= char2 (char-code #\")) (lex-stringlit (eprefix-upcase-l) first-pos parstate)) (t (b* ((parstate (unread-char parstate))) (lex-identifier/keyword char first-pos parstate)))))) ((or (and (<= (char-code #\A) char) (<= char (char-code #\Z))) (and (<= (char-code #\a) char) (<= char (char-code #\z))) (= char (char-code #\_))) (lex-identifier/keyword char first-pos parstate)) ((and (<= (char-code #\0) char) (<= char (char-code #\9))) (b* (((erp const last-pos parstate) (lex-iconst/fconst (code-char char) first-pos parstate))) (retok (lexeme-token (token-const const)) (make-span :start first-pos :end last-pos) parstate))) ((= char (char-code #\.)) (b* (((erp char2 pos2 parstate) (read-char parstate))) (cond ((not char2) (retok (lexeme-token (token-punctuator ".")) (make-span :start first-pos :end first-pos) parstate)) ((and (<= (char-code #\0) char2) (<= char2 (char-code #\9))) (b* (((erp const last-pos parstate) (lex-dec-fconst (code-char char2) pos2 parstate))) (retok (lexeme-token (token-const const)) (make-span :start first-pos :end last-pos) parstate))) ((= char2 (char-code #\.)) (b* (((erp char3 pos3 parstate) (read-char parstate))) (cond ((not char3) (b* ((parstate (unread-char parstate))) (retok (lexeme-token (token-punctuator ".")) (make-span :start first-pos :end first-pos) parstate))) ((= char3 (char-code #\.)) (retok (lexeme-token (token-punctuator "...")) (make-span :start first-pos :end pos3) parstate)) (t (b* ((parstate (unread-char parstate)) (parstate (unread-char parstate))) (retok (lexeme-token (token-punctuator ".")) (make-span :start first-pos :end first-pos) parstate)))))) (t (b* ((parstate (unread-char parstate))) (retok (lexeme-token (token-punctuator ".")) (make-span :start first-pos :end first-pos) parstate)))))) ((= char (char-code #\')) (lex-character-constant nil first-pos parstate)) ((= char (char-code #\")) (lex-stringlit nil first-pos parstate)) ((= char (char-code #\/)) (b* (((erp char2 pos2 parstate) (read-char parstate))) (cond ((not char2) (retok (lexeme-token (token-punctuator "/")) (make-span :start first-pos :end first-pos) parstate)) ((= char2 (char-code #\*)) (lex-block-comment first-pos parstate)) ((= char2 (char-code #\/)) (lex-line-comment first-pos parstate)) ((= char2 (char-code #\=)) (retok (lexeme-token (token-punctuator "/=")) (make-span :start first-pos :end pos2) parstate)) (t (b* ((parstate (unread-char parstate))) (retok (lexeme-token (token-punctuator "/")) (make-span :start first-pos :end first-pos) parstate)))))) ((or (= char (char-code #\[)) (= char (char-code #\])) (= char (char-code #\()) (= char (char-code #\))) (= char (char-code #\{)) (= char (char-code #\})) (= char (char-code #\~)) (= char (char-code #\?)) (= char (char-code #\,)) (= char (char-code #\;))) (retok (lexeme-token (token-punctuator (acl2::implode (list (code-char char))))) (make-span :start first-pos :end first-pos) parstate)) ((= char (char-code #\*)) (b* (((erp char2 pos2 parstate) (read-char parstate))) (cond ((not char2) (retok (lexeme-token (token-punctuator "*")) (make-span :start first-pos :end first-pos) parstate)) ((= char2 (char-code #\=)) (retok (lexeme-token (token-punctuator "*=")) (make-span :start first-pos :end pos2) parstate)) (t (b* ((parstate (unread-char parstate))) (retok (lexeme-token (token-punctuator "*")) (make-span :start first-pos :end first-pos) parstate)))))) ((= char (char-code #\^)) (b* (((erp char2 pos2 parstate) (read-char parstate))) (cond ((not char2) (retok (lexeme-token (token-punctuator "^")) (make-span :start first-pos :end first-pos) parstate)) ((= char2 (char-code #\=)) (retok (lexeme-token (token-punctuator "^=")) (make-span :start first-pos :end pos2) parstate)) (t (b* ((parstate (unread-char parstate))) (retok (lexeme-token (token-punctuator "^")) (make-span :start first-pos :end first-pos) parstate)))))) ((= char (char-code #\!)) (b* (((erp char2 pos2 parstate) (read-char parstate))) (cond ((not char2) (retok (lexeme-token (token-punctuator "!")) (make-span :start first-pos :end first-pos) parstate)) ((= char2 (char-code #\=)) (retok (lexeme-token (token-punctuator "!=")) (make-span :start first-pos :end pos2) parstate)) (t (b* ((parstate (unread-char parstate))) (retok (lexeme-token (token-punctuator "!")) (make-span :start first-pos :end first-pos) parstate)))))) ((= char (char-code #\=)) (b* (((erp char2 pos2 parstate) (read-char parstate))) (cond ((not char2) (retok (lexeme-token (token-punctuator "=")) (make-span :start first-pos :end first-pos) parstate)) ((= char2 (char-code #\=)) (retok (lexeme-token (token-punctuator "==")) (make-span :start first-pos :end pos2) parstate)) (t (b* ((parstate (unread-char parstate))) (retok (lexeme-token (token-punctuator "=")) (make-span :start first-pos :end first-pos) parstate)))))) ((= char (char-code #\:)) (b* (((erp char2 pos2 parstate) (read-char parstate))) (cond ((not char2) (retok (lexeme-token (token-punctuator ":")) (make-span :start first-pos :end first-pos) parstate)) ((= char2 (char-code #\>)) (retok (lexeme-token (token-punctuator ":>")) (make-span :start first-pos :end pos2) parstate)) (t (b* ((parstate (unread-char parstate))) (retok (lexeme-token (token-punctuator ":")) (make-span :start first-pos :end first-pos) parstate)))))) ((= char (char-code #\#)) (b* (((erp char2 pos2 parstate) (read-char parstate))) (cond ((not char2) (retok (lexeme-token (token-punctuator "#")) (make-span :start first-pos :end first-pos) parstate)) ((= char2 (char-code #\#)) (retok (lexeme-token (token-punctuator "##")) (make-span :start first-pos :end pos2) parstate)) (t (b* ((parstate (unread-char parstate))) (retok (lexeme-token (token-punctuator "#")) (make-span :start first-pos :end first-pos) parstate)))))) ((= char (char-code #\&)) (b* (((erp char2 pos2 parstate) (read-char parstate))) (cond ((not char2) (retok (lexeme-token (token-punctuator "&")) (make-span :start first-pos :end first-pos) parstate)) ((= char2 (char-code #\&)) (retok (lexeme-token (token-punctuator "&&")) (make-span :start first-pos :end pos2) parstate)) ((= char2 (char-code #\=)) (retok (lexeme-token (token-punctuator "&=")) (make-span :start first-pos :end pos2) parstate)) (t (b* ((parstate (unread-char parstate))) (retok (lexeme-token (token-punctuator "&")) (make-span :start first-pos :end first-pos) parstate)))))) ((= char (char-code #\|)) (b* (((erp char2 pos2 parstate) (read-char parstate))) (cond ((not char2) (retok (lexeme-token (token-punctuator "|")) (make-span :start first-pos :end first-pos) parstate)) ((= char2 (char-code #\|)) (retok (lexeme-token (token-punctuator "||")) (make-span :start first-pos :end pos2) parstate)) ((= char2 (char-code #\=)) (retok (lexeme-token (token-punctuator "|=")) (make-span :start first-pos :end pos2) parstate)) (t (b* ((parstate (unread-char parstate))) (retok (lexeme-token (token-punctuator "|")) (make-span :start first-pos :end first-pos) parstate)))))) ((= char (char-code #\+)) (b* (((erp char2 pos2 parstate) (read-char parstate))) (cond ((not char2) (retok (lexeme-token (token-punctuator "+")) (make-span :start first-pos :end first-pos) parstate)) ((= char2 (char-code #\+)) (retok (lexeme-token (token-punctuator "++")) (make-span :start first-pos :end pos2) parstate)) ((= char2 (char-code #\=)) (retok (lexeme-token (token-punctuator "+=")) (make-span :start first-pos :end pos2) parstate)) (t (b* ((parstate (unread-char parstate))) (retok (lexeme-token (token-punctuator "+")) (make-span :start first-pos :end first-pos) parstate)))))) ((= char (char-code #\-)) (b* (((erp char2 pos2 parstate) (read-char parstate))) (cond ((not char2) (retok (lexeme-token (token-punctuator "-")) (make-span :start first-pos :end first-pos) parstate)) ((= char2 (char-code #\>)) (retok (lexeme-token (token-punctuator "->")) (make-span :start first-pos :end pos2) parstate)) ((= char2 (char-code #\-)) (retok (lexeme-token (token-punctuator "--")) (make-span :start first-pos :end pos2) parstate)) ((= char2 (char-code #\=)) (retok (lexeme-token (token-punctuator "-=")) (make-span :start first-pos :end pos2) parstate)) (t (b* ((parstate (unread-char parstate))) (retok (lexeme-token (token-punctuator "-")) (make-span :start first-pos :end first-pos) parstate)))))) ((= char (char-code #\>)) (b* (((erp char2 pos2 parstate) (read-char parstate))) (cond ((not char2) (retok (lexeme-token (token-punctuator ">")) (make-span :start first-pos :end first-pos) parstate)) ((= char2 (char-code #\>)) (b* (((erp char3 pos3 parstate) (read-char parstate))) (cond ((not char3) (retok (lexeme-token (token-punctuator ">>")) (make-span :start first-pos :end pos2) parstate)) ((= char3 (char-code #\=)) (retok (lexeme-token (token-punctuator ">>=")) (make-span :start first-pos :end pos3) parstate)) (t (b* ((parstate (unread-char parstate))) (retok (lexeme-token (token-punctuator ">>")) (make-span :start first-pos :end pos2) parstate)))))) ((= char2 (char-code #\=)) (retok (lexeme-token (token-punctuator ">=")) (make-span :start first-pos :end first-pos) parstate)) (t (b* ((parstate (unread-char parstate))) (retok (lexeme-token (token-punctuator ">")) (make-span :start first-pos :end first-pos) parstate)))))) ((= char (char-code #\%)) (b* (((erp char2 pos2 parstate) (read-char parstate))) (cond ((not char2) (retok (lexeme-token (token-punctuator "%")) (make-span :start first-pos :end first-pos) parstate)) ((= char2 (char-code #\=)) (retok (lexeme-token (token-punctuator "%=")) (make-span :start first-pos :end pos2) parstate)) ((= char2 (char-code #\:)) (b* (((erp char3 & parstate) (read-char parstate))) (cond ((not char3) (retok (lexeme-token (token-punctuator "%:")) (make-span :start first-pos :end pos2) parstate)) ((= char3 (char-code #\%)) (b* (((erp char4 pos4 parstate) (read-char parstate))) (cond ((not char4) (b* ((parstate (unread-char parstate))) (retok (lexeme-token (token-punctuator "%:")) (make-span :start first-pos :end pos2) parstate))) ((= char4 (char-code #\:)) (retok (lexeme-token (token-punctuator "%:%:")) (make-span :start first-pos :end pos4) parstate)) (t (b* ((parstate (unread-char parstate)) (parstate (unread-char parstate))) (retok (lexeme-token (token-punctuator "%:")) (make-span :start first-pos :end pos2) parstate)))))) (t (b* ((parstate (unread-char parstate))) (retok (lexeme-token (token-punctuator "%:")) (make-span :start first-pos :end pos2) parstate)))))) (t (b* ((parstate (unread-char parstate))) (retok (lexeme-token (token-punctuator "%")) (make-span :start first-pos :end first-pos) parstate)))))) ((= char (char-code #\<)) (b* (((erp char2 pos2 parstate) (read-char parstate))) (cond ((not char2) (retok (lexeme-token (token-punctuator "<")) (make-span :start first-pos :end first-pos) parstate)) ((= char2 (char-code #\<)) (b* (((erp char3 pos3 parstate) (read-char parstate))) (cond ((not char3) (retok (lexeme-token (token-punctuator "<<")) (make-span :start first-pos :end pos2) parstate)) ((= char3 (char-code #\=)) (retok (lexeme-token (token-punctuator "<<=")) (make-span :start first-pos :end pos3) parstate)) (t (b* ((parstate (unread-char parstate))) (retok (lexeme-token (token-punctuator "<<")) (make-span :start first-pos :end pos2) parstate)))))) ((= char2 (char-code #\=)) (retok (lexeme-token (token-punctuator "<=")) (make-span :start first-pos :end pos2) parstate)) ((= char2 (char-code #\:)) (retok (lexeme-token (token-punctuator "<:")) (make-span :start first-pos :end pos2) parstate)) ((= char2 (char-code #\%)) (retok (lexeme-token (token-punctuator "<%")) (make-span :start first-pos :end pos2) parstate)) (t (b* ((parstate (unread-char parstate))) (retok (lexeme-token (token-punctuator "<")) (make-span :start first-pos :end first-pos) parstate)))))) (t (reterr-msg :where (position-to-msg first-pos) :expected "a white-space character ~ (space, ~ new-line, ~ horizontal tab, ~ vertical tab, ~ form feed) ~ or a letter ~ or a digit ~ or an underscore ~ or a round parenthesis ~ or a square bracket ~ or a curly brace ~ or an angle bracket ~ or a dot ~ or a comma ~ or a colon ~ or a semicolon ~ or a plus ~ or a minus ~ or a star ~ or a slash ~ or a percent ~ or a tilde ~ or an equal sign ~ or an exclamation mark ~ or a question mark ~ or a vertical bar ~ or a caret ~ or hash" :found (char-to-msg char)))))))
Theorem:
(defthm lexeme-optionp-of-lex-lexeme.lexeme? (b* (((mv acl2::?erp ?lexeme? ?span ?new-parstate) (lex-lexeme parstate))) (lexeme-optionp lexeme?)) :rule-classes :rewrite)
Theorem:
(defthm spanp-of-lex-lexeme.span (b* (((mv acl2::?erp ?lexeme? ?span ?new-parstate) (lex-lexeme parstate))) (spanp span)) :rule-classes :rewrite)
Theorem:
(defthm parstatep-of-lex-lexeme.new-parstate (implies (parstatep parstate) (b* (((mv acl2::?erp ?lexeme? ?span ?new-parstate) (lex-lexeme parstate))) (parstatep new-parstate))) :rule-classes :rewrite)
Theorem:
(defthm parsize-of-lex-lexeme-uncond (b* (((mv acl2::?erp ?lexeme? ?span ?new-parstate) (lex-lexeme parstate))) (<= (parsize new-parstate) (parsize parstate))) :rule-classes :linear)
Theorem:
(defthm parsize-of-lex-lexeme-cond (b* (((mv acl2::?erp ?lexeme? ?span ?new-parstate) (lex-lexeme parstate))) (implies (and (not erp) lexeme?) (<= (parsize new-parstate) (1- (parsize parstate))))) :rule-classes :linear)