headius · December 20, 2022 04:50
diff --git a/outline_escape_munging.diff b/outline_escape_munging.diff
 diff --git a/lib/parser/lexer.rl b/lib/parser/lexer.rl
 index e477b06..88efafc 100644
 --- a/lib/parser/lexer.rl
 +++ b/lib/parser/lexer.rl
 @@ -445,6 +445,60 @@ class Parser::Lexer
     # Ruby >= 32, regexp, exceptional case
     !literal.regexp?
   end
 +  
 +  def current_literal_munge_escape(current_literal)
 +    escaped_char = @source_buffer.slice(@escape_s).chr
 +  
 +    if current_literal.munge_escape? escaped_char
 +      # If this particular literal uses this character as an opening
 +      # or closing delimiter, it is an escape sequence for that
 +      # particular character. Write it without the backslash.
 +  
 +      if current_literal.regexp? && REGEXP_META_CHARACTERS.match(escaped_char)
 +        # Regular expressions should include escaped delimiters in their
 +        # escaped form, except when the escaped character is
 +        # a closing delimiter but not a regexp metacharacter.
 +        #
 +        # The backslash itself cannot be used as a closing delimiter
 +        # at the same time as an escape symbol, but it is always munged,
 +        # so this branch also executes for the non-closing-delimiter case
 +        # for the backslash.
 +        current_literal.extend_string(tok, @ts, @te)
 +      else
 +        current_literal.extend_string(escaped_char, @ts, @te)
 +      end
 +    else
 +      # It does not. So this is an actual escape sequence, yay!
 +      if current_literal.squiggly_heredoc? && escaped_char == "\n".freeze
 +        # Squiggly heredocs like
 +        #   <<~-HERE
 +        #     1\
 +        #     2
 +        #   HERE
 +        # treat '\' as a line continuation, but still dedent the body, so the heredoc above becomes "12\n".
 +        # This information is emitted as is, without escaping,
 +        # later this escape sequence (\\\n) gets handled manually in the Lexer::Dedenter
 +        current_literal.extend_string(tok, @ts, @te)
 +      elsif current_literal.supports_line_continuation_via_slash? && escaped_char == "\n".freeze
 +        # Heredocs, regexp and a few other types of literals support line
 +        # continuation via \\\n sequence. The code like
 +        #   "a\
 +        #   b"
 +        # must be parsed as "ab"
 +        current_literal.extend_string(tok.gsub("\\\n".freeze, ''.freeze), @ts, @te)
 +      elsif current_literal.regexp? && @version >= 31 && %w[c C m M].include?(escaped_char)
 +        # Ruby >= 3.1 escapes \c- and \m chars, that's the only escape sequence
 +        # supported by regexes so far, so it needs a separate branch.
 +        current_literal.extend_string(@escape, @ts, @te)
 +      elsif current_literal.regexp?
 +        # Regular expressions should include escape sequences in their
 +        # escaped form. On the other hand, escaped newlines are removed (in cases like "\\C-\\\n\\M-x")
 +        current_literal.extend_string(tok.gsub("\\\n".freeze, ''.freeze), @ts, @te)
 +      else
 +        current_literal.extend_string(@escape || tok, @ts, @te)
 +      end
 +    end
 +  end
 
   # Mapping of strings to parser tokens.
 
 @@ -911,57 +965,7 @@ class Parser::Lexer
   action extend_string_escaped {
     current_literal = literal
     # Get the first character after the backslash.
 -    escaped_char = @source_buffer.slice(@escape_s).chr
 -
 -    if current_literal.munge_escape? escaped_char
 -      # If this particular literal uses this character as an opening
 -      # or closing delimiter, it is an escape sequence for that
 -      # particular character. Write it without the backslash.
 -
 -      if current_literal.regexp? && REGEXP_META_CHARACTERS.match(escaped_char)
 -        # Regular expressions should include escaped delimiters in their
 -        # escaped form, except when the escaped character is
 -        # a closing delimiter but not a regexp metacharacter.
 -        #
 -        # The backslash itself cannot be used as a closing delimiter
 -        # at the same time as an escape symbol, but it is always munged,
 -        # so this branch also executes for the non-closing-delimiter case
 -        # for the backslash.
 -        current_literal.extend_string(tok, @ts, @te)
 -      else
 -        current_literal.extend_string(escaped_char, @ts, @te)
 -      end
 -    else
 -      # It does not. So this is an actual escape sequence, yay!
 -      if current_literal.squiggly_heredoc? && escaped_char == "\n".freeze
 -        # Squiggly heredocs like
 -        #   <<~-HERE
 -        #     1\
 -        #     2
 -        #   HERE
 -        # treat '\' as a line continuation, but still dedent the body, so the heredoc above becomes "12\n".
 -        # This information is emitted as is, without escaping,
 -        # later this escape sequence (\\\n) gets handled manually in the Lexer::Dedenter
 -        current_literal.extend_string(tok, @ts, @te)
 -      elsif current_literal.supports_line_continuation_via_slash? && escaped_char == "\n".freeze
 -        # Heredocs, regexp and a few other types of literals support line
 -        # continuation via \\\n sequence. The code like
 -        #   "a\
 -        #   b"
 -        # must be parsed as "ab"
 -        current_literal.extend_string(tok.gsub("\\\n".freeze, ''.freeze), @ts, @te)
 -      elsif current_literal.regexp? && @version >= 31 && %w[c C m M].include?(escaped_char)
 -        # Ruby >= 3.1 escapes \c- and \m chars, that's the only escape sequence
 -        # supported by regexes so far, so it needs a separate branch.
 -        current_literal.extend_string(@escape, @ts, @te)
 -      elsif current_literal.regexp?
 -        # Regular expressions should include escape sequences in their
 -        # escaped form. On the other hand, escaped newlines are removed (in cases like "\\C-\\\n\\M-x")
 -        current_literal.extend_string(tok.gsub("\\\n".freeze, ''.freeze), @ts, @te)
 -      else
 -        current_literal.extend_string(@escape || tok, @ts, @te)
 -      end
 -    end
 +    current_literal_munge_escape(current_literal)
   }
 
   # Extend a string with a newline or a EOF character.
	diff --git a/lib/parser/lexer.rl b/lib/parser/lexer.rl
	index e477b06..88efafc 100644
	--- a/lib/parser/lexer.rl
	+++ b/lib/parser/lexer.rl
	@@ -445,6 +445,60 @@ class Parser::Lexer
	# Ruby >= 32, regexp, exceptional case
	!literal.regexp?
	end
	+
	+ def current_literal_munge_escape(current_literal)
	+ escaped_char = @source_buffer.slice(@escape_s).chr
	+
	+ if current_literal.munge_escape? escaped_char
	+ # If this particular literal uses this character as an opening
	+ # or closing delimiter, it is an escape sequence for that
	+ # particular character. Write it without the backslash.
	+
	+ if current_literal.regexp? && REGEXP_META_CHARACTERS.match(escaped_char)
	+ # Regular expressions should include escaped delimiters in their
	+ # escaped form, except when the escaped character is
	+ # a closing delimiter but not a regexp metacharacter.
	+ #
	+ # The backslash itself cannot be used as a closing delimiter
	+ # at the same time as an escape symbol, but it is always munged,
	+ # so this branch also executes for the non-closing-delimiter case
	+ # for the backslash.
	+ current_literal.extend_string(tok, @ts, @te)
	+ else
	+ current_literal.extend_string(escaped_char, @ts, @te)
	+ end
	+ else
	+ # It does not. So this is an actual escape sequence, yay!
	+ if current_literal.squiggly_heredoc? && escaped_char == "\n".freeze
	+ # Squiggly heredocs like
	+ # <<~-HERE
	+ # 1\
	+ # 2
	+ # HERE
	+ # treat '\' as a line continuation, but still dedent the body, so the heredoc above becomes "12\n".
	+ # This information is emitted as is, without escaping,
	+ # later this escape sequence (\\\n) gets handled manually in the Lexer::Dedenter
	+ current_literal.extend_string(tok, @ts, @te)
	+ elsif current_literal.supports_line_continuation_via_slash? && escaped_char == "\n".freeze
	+ # Heredocs, regexp and a few other types of literals support line
	+ # continuation via \\\n sequence. The code like
	+ # "a\
	+ # b"
	+ # must be parsed as "ab"
	+ current_literal.extend_string(tok.gsub("\\\n".freeze, ''.freeze), @ts, @te)
	+ elsif current_literal.regexp? && @version >= 31 && %w[c C m M].include?(escaped_char)
	+ # Ruby >= 3.1 escapes \c- and \m chars, that's the only escape sequence
	+ # supported by regexes so far, so it needs a separate branch.
	+ current_literal.extend_string(@escape, @ts, @te)
	+ elsif current_literal.regexp?
	+ # Regular expressions should include escape sequences in their
	+ # escaped form. On the other hand, escaped newlines are removed (in cases like "\\C-\\\n\\M-x")
	+ current_literal.extend_string(tok.gsub("\\\n".freeze, ''.freeze), @ts, @te)
	+ else
	+ current_literal.extend_string(@escape \|\| tok, @ts, @te)
	+ end
	+ end
	+ end

	# Mapping of strings to parser tokens.

	@@ -911,57 +965,7 @@ class Parser::Lexer
	action extend_string_escaped {
	current_literal = literal
	# Get the first character after the backslash.
	- escaped_char = @source_buffer.slice(@escape_s).chr
	-
	- if current_literal.munge_escape? escaped_char
	- # If this particular literal uses this character as an opening
	- # or closing delimiter, it is an escape sequence for that
	- # particular character. Write it without the backslash.
	-
	- if current_literal.regexp? && REGEXP_META_CHARACTERS.match(escaped_char)
	- # Regular expressions should include escaped delimiters in their
	- # escaped form, except when the escaped character is
	- # a closing delimiter but not a regexp metacharacter.
	- #
	- # The backslash itself cannot be used as a closing delimiter
	- # at the same time as an escape symbol, but it is always munged,
	- # so this branch also executes for the non-closing-delimiter case
	- # for the backslash.
	- current_literal.extend_string(tok, @ts, @te)
	- else
	- current_literal.extend_string(escaped_char, @ts, @te)
	- end
	- else
	- # It does not. So this is an actual escape sequence, yay!
	- if current_literal.squiggly_heredoc? && escaped_char == "\n".freeze
	- # Squiggly heredocs like
	- # <<~-HERE
	- # 1\
	- # 2
	- # HERE
	- # treat '\' as a line continuation, but still dedent the body, so the heredoc above becomes "12\n".
	- # This information is emitted as is, without escaping,
	- # later this escape sequence (\\\n) gets handled manually in the Lexer::Dedenter
	- current_literal.extend_string(tok, @ts, @te)
	- elsif current_literal.supports_line_continuation_via_slash? && escaped_char == "\n".freeze
	- # Heredocs, regexp and a few other types of literals support line
	- # continuation via \\\n sequence. The code like
	- # "a\
	- # b"
	- # must be parsed as "ab"
	- current_literal.extend_string(tok.gsub("\\\n".freeze, ''.freeze), @ts, @te)
	- elsif current_literal.regexp? && @version >= 31 && %w[c C m M].include?(escaped_char)
	- # Ruby >= 3.1 escapes \c- and \m chars, that's the only escape sequence
	- # supported by regexes so far, so it needs a separate branch.
	- current_literal.extend_string(@escape, @ts, @te)
	- elsif current_literal.regexp?
	- # Regular expressions should include escape sequences in their
	- # escaped form. On the other hand, escaped newlines are removed (in cases like "\\C-\\\n\\M-x")
	- current_literal.extend_string(tok.gsub("\\\n".freeze, ''.freeze), @ts, @te)
	- else
	- current_literal.extend_string(@escape \|\| tok, @ts, @te)
	- end
	- end
	+ current_literal_munge_escape(current_literal)
	}

	# Extend a string with a newline or a EOF character.