Lex an identifier or keyword.
(lex-identifier/keyword first-char first-pos pstate) → (mv erp lexeme span new-pstate)
This is called after the first character of the identifier or keyword has been already read; that character is passed to this function. The position of that character is also passed as input.
Since grammatically keywords are identifiers,
we just lex grammatical identifiers,
but return a keyword lexeme if the grammatical identifier
matches a keyword.
If GCC extensions are supported,
we check the grammatical identifier
against some additional keywords;
see the ABNF grammar rule for
Given that the first character (a letter or underscore)
has already been read,
it remains to read zero or more
letters, digits, and underscores.
These are read in a loop,
which stops when no letter, digit, or underscore is read.
The stopping happens if there is no next character (i.e. end of file),
or the next character is something else;
in the latter case, the character is unread,
because it could be part of the next lexeme.
If successful, the loop returns a list of characters (natural numbers),
which the caller combines with the first character to form a string.
This is an ASCII string by construction,
so the characters all satisfy
Function:
(defun lex-identifier/keyword-loop (pos-so-far pstate) (declare (xargs :guard (and (positionp pos-so-far) (parstatep pstate)))) (let ((__function__ 'lex-identifier/keyword-loop)) (declare (ignorable __function__)) (b* (((reterr) nil (irr-position) (irr-parstate)) ((erp char pos pstate) (read-char pstate)) ((when (not char)) (retok nil (position-fix pos-so-far) pstate)) ((unless (or (and (<= (char-code #\A) char) (<= char (char-code #\Z))) (and (<= (char-code #\a) char) (<= char (char-code #\z))) (and (<= (char-code #\0) char) (<= char (char-code #\9))) (= char (char-code #\_)))) (b* ((pstate (unread-char pstate))) (retok nil (position-fix pos-so-far) pstate))) ((erp chars last-pos pstate) (lex-identifier/keyword-loop pos pstate))) (retok (cons char chars) last-pos pstate))))
Theorem:
(defthm return-type-of-lex-identifier/keyword-loop.chars (b* (((mv acl2::?erp ?chars ?last-pos ?new-pstate) (lex-identifier/keyword-loop pos-so-far pstate))) (unsigned-byte-listp 8 chars)) :rule-classes :rewrite)
Theorem:
(defthm positionp-of-lex-identifier/keyword-loop.last-pos (b* (((mv acl2::?erp ?chars ?last-pos ?new-pstate) (lex-identifier/keyword-loop pos-so-far pstate))) (positionp last-pos)) :rule-classes :rewrite)
Theorem:
(defthm parstatep-of-lex-identifier/keyword-loop.new-pstate (b* (((mv acl2::?erp ?chars ?last-pos ?new-pstate) (lex-identifier/keyword-loop pos-so-far pstate))) (parstatep new-pstate)) :rule-classes :rewrite)
Theorem:
(defthm parsize-of-lex-identifier/keyword-loop-<= (b* (((mv acl2::?erp ?chars ?last-pos ?new-pstate) (lex-identifier/keyword-loop pos-so-far pstate))) (<= (parsize new-pstate) (parsize pstate))) :rule-classes :linear)
Function:
(defun lex-identifier/keyword (first-char first-pos pstate) (declare (xargs :guard (and (unsigned-byte-p 8 first-char) (positionp first-pos) (parstatep pstate)))) (declare (xargs :guard (or (and (<= (char-code #\A) first-char) (<= first-char (char-code #\Z))) (and (<= (char-code #\a) first-char) (<= first-char (char-code #\z))) (= first-char (char-code #\_))))) (let ((__function__ 'lex-identifier/keyword)) (declare (ignorable __function__)) (b* (((reterr) (irr-lexeme) (irr-span) (irr-parstate)) ((erp rest-chars last-pos pstate) (lex-identifier/keyword-loop first-pos pstate)) (span (make-span :start first-pos :end last-pos)) (chars (cons first-char rest-chars)) (string (nats=>string chars))) (if (or (member-equal string '("auto" "break" "case" "char" "const" "continue" "default" "do" "double" "else" "enum" "extern" "float" "for" "goto" "if" "inline" "int" "long" "register" "restrict" "return" "short" "signed" "sizeof" "static" "struct" "switch" "typedef" "union" "unsigned" "void" "volatile" "while" "_Alignas" "_Alignof" "_Atomic" "_Bool" "_Complex" "_Generic" "_Imaginary" "_Noreturn" "_Static_assert" "_Thread_local")) (and (parstate->gcc pstate) (member-equal string '("__alignof__" "asm" "__asm__" "__attribute__" "__extension__" "__inline" "__inline__" "__restrict" "__restrict__")))) (retok (lexeme-token (token-keyword string)) span pstate) (retok (lexeme-token (token-ident (ident string))) span pstate)))))
Theorem:
(defthm lexemep-of-lex-identifier/keyword.lexeme (b* (((mv acl2::?erp ?lexeme ?span ?new-pstate) (lex-identifier/keyword first-char first-pos pstate))) (lexemep lexeme)) :rule-classes :rewrite)
Theorem:
(defthm spanp-of-lex-identifier/keyword.span (b* (((mv acl2::?erp ?lexeme ?span ?new-pstate) (lex-identifier/keyword first-char first-pos pstate))) (spanp span)) :rule-classes :rewrite)
Theorem:
(defthm parstatep-of-lex-identifier/keyword.new-pstate (b* (((mv acl2::?erp ?lexeme ?span ?new-pstate) (lex-identifier/keyword first-char first-pos pstate))) (parstatep new-pstate)) :rule-classes :rewrite)
Theorem:
(defthm parsize-of-lex-identifier/keyword-uncond (b* (((mv acl2::?erp ?lexeme ?span ?new-pstate) (lex-identifier/keyword first-char first-pos pstate))) (<= (parsize new-pstate) (parsize pstate))) :rule-classes :linear)