Lex an identifier or keyword.
(lex-identifier/keyword first-char first-pos parstate) → (mv erp lexeme span new-parstate)
This is called after the first character of the identifier or keyword has been already read; that character is passed to this function. The position of that character is also passed as input.
Since grammatically keywords are identifiers,
we just lex grammatical identifiers,
but return a keyword lexeme if the grammatical identifier
matches a keyword.
If GCC extensions are supported,
we check the grammatical identifier
against some additional keywords;
see the ABNF grammar rule for
Given that the first character (a letter or underscore)
has already been read,
it remains to read zero or more
letters, digits, and underscores.
These are read in a loop,
which stops when no letter, digit, or underscore is read.
The stopping happens if there is no next character (i.e. end of file),
or the next character is something else;
in the latter case, the character is unread,
because it could be part of the next lexeme.
If successful, the loop returns a list of characters (natural numbers),
which the caller combines with the first character to form a string.
This is an ASCII string by construction,
so the characters all satisfy
Function:
(defun lex-identifier/keyword-loop (pos-so-far parstate) (declare (xargs :stobjs (parstate))) (declare (xargs :guard (and (positionp pos-so-far) (parstatep parstate)))) (let ((__function__ 'lex-identifier/keyword-loop)) (declare (ignorable __function__)) (b* (((reterr) nil (irr-position) parstate) ((erp char pos parstate) (read-char parstate)) ((when (not char)) (retok nil (position-fix pos-so-far) parstate)) ((unless (or (and (<= (char-code #\A) char) (<= char (char-code #\Z))) (and (<= (char-code #\a) char) (<= char (char-code #\z))) (and (<= (char-code #\0) char) (<= char (char-code #\9))) (= char (char-code #\_)))) (b* ((parstate (unread-char parstate))) (retok nil (position-fix pos-so-far) parstate))) ((erp chars last-pos parstate) (lex-identifier/keyword-loop pos parstate))) (retok (cons char chars) last-pos parstate))))
Theorem:
(defthm return-type-of-lex-identifier/keyword-loop.chars (b* (((mv acl2::?erp ?chars ?last-pos ?new-parstate) (lex-identifier/keyword-loop pos-so-far parstate))) (unsigned-byte-listp 8 chars)) :rule-classes :rewrite)
Theorem:
(defthm positionp-of-lex-identifier/keyword-loop.last-pos (b* (((mv acl2::?erp ?chars ?last-pos ?new-parstate) (lex-identifier/keyword-loop pos-so-far parstate))) (positionp last-pos)) :rule-classes :rewrite)
Theorem:
(defthm parstatep-of-lex-identifier/keyword-loop.new-parstate (implies (parstatep parstate) (b* (((mv acl2::?erp ?chars ?last-pos ?new-parstate) (lex-identifier/keyword-loop pos-so-far parstate))) (parstatep new-parstate))) :rule-classes :rewrite)
Theorem:
(defthm parsize-of-lex-identifier/keyword-loop-<= (b* (((mv acl2::?erp ?chars ?last-pos ?new-parstate) (lex-identifier/keyword-loop pos-so-far parstate))) (<= (parsize new-parstate) (parsize parstate))) :rule-classes :linear)
Function:
(defun lex-identifier/keyword (first-char first-pos parstate) (declare (xargs :stobjs (parstate))) (declare (xargs :guard (and (unsigned-byte-p 8 first-char) (positionp first-pos) (parstatep parstate)))) (declare (xargs :guard (or (and (<= (char-code #\A) first-char) (<= first-char (char-code #\Z))) (and (<= (char-code #\a) first-char) (<= first-char (char-code #\z))) (= first-char (char-code #\_))))) (let ((__function__ 'lex-identifier/keyword)) (declare (ignorable __function__)) (b* (((reterr) (irr-lexeme) (irr-span) parstate) ((erp rest-chars last-pos parstate) (lex-identifier/keyword-loop first-pos parstate)) (span (make-span :start first-pos :end last-pos)) (chars (cons first-char rest-chars)) (string (nats=>string chars))) (if (or (member-equal string '("auto" "break" "case" "char" "const" "continue" "default" "do" "double" "else" "enum" "extern" "float" "for" "goto" "if" "inline" "int" "long" "register" "restrict" "return" "short" "signed" "sizeof" "static" "struct" "switch" "typedef" "union" "unsigned" "void" "volatile" "while" "_Alignas" "_Alignof" "_Atomic" "_Bool" "_Complex" "_Generic" "_Imaginary" "_Noreturn" "_Static_assert" "_Thread_local")) (and (parstate->gcc parstate) (member-equal string '("__alignof" "__alignof__" "asm" "__asm" "__asm__" "__attribute" "__attribute__" "__builtin_offsetof" "__builtin_types_compatible_p" "__builtin_va_list" "__extension__" "_Float128" "__inline" "__inline__" "__int128" "__restrict" "__restrict__" "__signed" "__signed__" "typeof" "__typeof" "__typeof__" "__volatile" "__volatile__")))) (retok (lexeme-token (token-keyword string)) span parstate) (retok (lexeme-token (token-ident (ident string))) span parstate)))))
Theorem:
(defthm lexemep-of-lex-identifier/keyword.lexeme (b* (((mv acl2::?erp ?lexeme ?span ?new-parstate) (lex-identifier/keyword first-char first-pos parstate))) (lexemep lexeme)) :rule-classes :rewrite)
Theorem:
(defthm spanp-of-lex-identifier/keyword.span (b* (((mv acl2::?erp ?lexeme ?span ?new-parstate) (lex-identifier/keyword first-char first-pos parstate))) (spanp span)) :rule-classes :rewrite)
Theorem:
(defthm parstatep-of-lex-identifier/keyword.new-parstate (implies (parstatep parstate) (b* (((mv acl2::?erp ?lexeme ?span ?new-parstate) (lex-identifier/keyword first-char first-pos parstate))) (parstatep new-parstate))) :rule-classes :rewrite)
Theorem:
(defthm parsize-of-lex-identifier/keyword-uncond (b* (((mv acl2::?erp ?lexeme ?span ?new-parstate) (lex-identifier/keyword first-char first-pos parstate))) (<= (parsize new-parstate) (parsize parstate))) :rule-classes :linear)