Skip to content

Instantly share code, notes, and snippets.

@McMartin
Last active May 15, 2018 21:26
Show Gist options
  • Save McMartin/98ef33846467d6a155b18713da8c9e9c to your computer and use it in GitHub Desktop.
Save McMartin/98ef33846467d6a155b18713da8c9e9c to your computer and use it in GitHub Desktop.
A CMake tokenizer written in CMake
cmake_policy(VERSION ${CMAKE_VERSION})
function(tokenize filename out_namespace)
set(tokens_count 0)
set(line 1)
set(column 1)
macro(emit_token_and_advance type)
#[[ For development
if(NOT DEFINED text)
message(FATAL_ERROR "(${line},${column}) Missing text variable.")
endif()
string(LENGTH "${text}" text_length)
if(text_length LESS 0)
message(FATAL_ERROR "No text?")
endif()
print_token("${type}" "${text}" "${line}" "${column}")
#]]
math(EXPR tokens_count "${tokens_count} + 1")
set(${out_namespace}_${tokens_count}_text ${text} PARENT_SCOPE)
set(${out_namespace}_${tokens_count}_type ${type} PARENT_SCOPE)
set(${out_namespace}_${tokens_count}_line ${line} PARENT_SCOPE)
set(${out_namespace}_${tokens_count}_column ${column} PARENT_SCOPE)
string(LENGTH "${text}" length)
string(FIND "${text}" "\n" last_newline_index REVERSE)
if(last_newline_index EQUAL -1)
math(EXPR column "${column} + ${length}")
else()
math(EXPR column "${length} - ${last_newline_index}")
set(newline_count 0)
set(text_remainder "${text}")
while(NOT last_newline_index EQUAL -1)
math(EXPR newline_count "${newline_count} + 1")
string(SUBSTRING "${text_remainder}" 0 ${last_newline_index} text_remainder)
string(FIND "${text_remainder}" "\n" last_newline_index REVERSE)
endwhile()
math(EXPR line "${line} + ${newline_count}")
endif()
string(SUBSTRING "${content}" ${length} -1 content)
#[[ For development
unset(text)
#]]
endmacro()
macro(consume_space)
set(text "${CMAKE_MATCH_0}")
emit_token_and_advance(Token_Space)
endmacro()
macro(consume_identifier)
# If "${CMAKE_MATCH_0}" expands to "PARENT_SCOPE", set(text "PARENT_SCOPE") would
# unset "text" in the parent scope, instead of setting "text" in this scope.
string(CONCAT text "${CMAKE_MATCH_0}")
emit_token_and_advance(Token_Identifier)
endmacro()
macro(consume_lparen)
set(text "(")
emit_token_and_advance(Token_LeftParen)
endmacro()
macro(consume_rparen)
if(NOT content MATCHES "^\\)")
message(FATAL_ERROR "(${line},${column}) Expected ')'.")
endif()
set(text ")")
emit_token_and_advance(Token_RightParen)
endmacro()
macro(consume_bracket)
set(bracket_open "${CMAKE_MATCH_0}")
string(LENGTH "${bracket_open}" bracket_open_len)
string(SUBSTRING "${content}" ${bracket_open_len} -1 bracket_remainder)
string(REGEX REPLACE "[^=]" "" equal_signs "${bracket_open}")
set(bracket_close "]${equal_signs}]")
string(FIND "${bracket_remainder}" "${bracket_close}" bracket_close_pos)
if(bracket_close_pos EQUAL -1)
message(FATAL_ERROR "(${line},${column}) Unterminated bracket.")
endif()
string(SUBSTRING "${bracket_remainder}" 0 ${bracket_close_pos} bracket_content)
set(text "${bracket_open}${bracket_content}${bracket_close}")
if(text MATCHES "^#")
emit_token_and_advance(Token_BracketComment)
else()
emit_token_and_advance(Token_BracketArgument)
endif()
endmacro()
macro(consume_quoted_argument)
if(NOT content MATCHES "^\"([\\].|[^\"\\])*\"")
message(FATAL_ERROR "(${line},${column}) Expected quoted argument.")
endif()
set(text "${CMAKE_MATCH_0}")
emit_token_and_advance(Token_QuotedArgument)
endmacro()
macro(consume_unquoted_argument)
# If "${CMAKE_MATCH_0}" expands to "PARENT_SCOPE", set(text "PARENT_SCOPE") would
# unset "text" in the parent scope, instead of setting "text" in this scope.
string(CONCAT text "${CMAKE_MATCH_0}")
emit_token_and_advance(Token_UnquotedArgument)
endmacro()
macro(consume_line_comment)
set(text "${CMAKE_MATCH_0}")
emit_token_and_advance(Token_LineComment)
endmacro()
macro(consume_newline)
set(text "\n")
emit_token_and_advance(Token_Newline)
endmacro()
macro(consume_arguments)
while(1)
if(content MATCHES "^\\(")
consume_lparen()
consume_arguments()
consume_rparen()
elseif(content MATCHES "^[ \t]+")
consume_space()
elseif(content MATCHES "^\n")
consume_newline()
elseif(content MATCHES "^#")
if(content MATCHES "^#\\[=*\\[")
consume_bracket()
else()
if(NOT content MATCHES "^#[^\n]*")
message(FATAL_ERROR "(${line},${column}) Expected line comment.")
endif()
consume_line_comment()
endif()
elseif(content MATCHES "^\\[=*\\[")
consume_bracket()
elseif(content MATCHES "^\"")
consume_quoted_argument()
elseif(content MATCHES "^(\\.|[^\n \t()#\"\\])+")
consume_unquoted_argument()
else()
break()
endif()
endwhile()
endmacro()
file(READ "${filename}" content)
while(NOT content STREQUAL "")
if(content MATCHES "^[ \t]+")
consume_space()
endif()
if(content MATCHES "^[A-Za-z_][A-Za-z0-9_]*")
consume_identifier()
if(content MATCHES "^[ \t]+")
consume_space()
endif()
if(NOT content MATCHES "^\\(")
message(FATAL_ERROR "(${line},${column}) Expected '('.")
endif()
consume_lparen()
consume_arguments()
consume_rparen()
endif()
while(1)
if(content MATCHES "^#\\[=*\\[")
consume_bracket()
elseif(content MATCHES "^[ \t]+")
consume_space()
else()
break()
endif()
endwhile()
if(content MATCHES "^#[^\n]*")
consume_line_comment()
endif()
if(NOT content STREQUAL "")
if(NOT content MATCHES "^\n")
message(FATAL_ERROR "(${line},${column}) Expected newline.")
endif()
consume_newline()
endif()
endwhile()
set(${out_namespace}_tokens_count ${tokens_count} PARENT_SCOPE)
endfunction()
function(print_token type text line column)
if(column LESS 10)
set(column "0${column}")
endif()
string(REGEX REPLACE "\n" "\\\\n" text "${text}")
string(LENGTH "${type}" type_len)
set(padded_type "${type}")
foreach(i RANGE ${type_len} 24)
string(APPEND padded_type " ")
endforeach()
message(STATUS "${line},${column}: ${padded_type}'${text}'")
endfunction()
function(print_tokens namespace)
foreach(i RANGE 1 ${${namespace}_tokens_count})
print_token(
"${${namespace}_${i}_type}"
"${${namespace}_${i}_text}"
"${${namespace}_${i}_line}"
"${${namespace}_${i}_column}"
)
endforeach()
endfunction()
function(main)
if(NOT DEFINED CMAKE_ARGV3)
message(FATAL_ERROR "usage: cmake -P tokenize.cmake filename")
endif()
set(filename "${CMAKE_ARGV3}")
tokenize("${filename}" my_tokens)
print_tokens(my_tokens)
endfunction()
if(CMAKE_SCRIPT_MODE_FILE STREQUAL CMAKE_CURRENT_LIST_FILE)
main()
endif()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment