Created
September 6, 2021 15:40
-
-
Save makotom/ed1c2fb4c0aef6268ab2665d9aa5cd89 to your computer and use it in GitHub Desktop.
Demo: Cygwin "eats up" backslashes unexpectedly under certain circumstances
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <ctype.h> | |
#include <limits.h> | |
#include <stdio.h> | |
#include <stdlib.h> | |
#include <string.h> | |
#include <wchar.h> | |
// https://github.com/cygwin/cygwin/blob/eeeb5650cf706f4dde72ce8b8598aef41f88718a/winsup/cygwin/glob.cc#L112 | |
#define MAXPATHLEN 8192 | |
// https://github.com/cygwin/cygwin/blob/eeeb5650cf706f4dde72ce8b8598aef41f88718a/winsup/cygwin/glob.cc#L112 | |
#define EOS '\0' | |
#define QUOTE '\\' | |
// https://github.com/cygwin/cygwin/blob/eeeb5650cf706f4dde72ce8b8598aef41f88718a/winsup/cygwin/glob.cc#L135 | |
#define M_PROTECT 0x4000000000ULL | |
// https://github.com/cygwin/cygwin/blob/eeeb5650cf706f4dde72ce8b8598aef41f88718a/winsup/cygwin/glob.cc#L148 | |
typedef char Char; | |
// https://github.com/cygwin/cygwin/blob/eeeb5650cf706f4dde72ce8b8598aef41f88718a/winsup/cygwin/dcrt0.cc#L147-L152 | |
static inline int isquote(char c) | |
{ | |
char ch = c; | |
return ch == '"' || ch == '\''; | |
} | |
// https://github.com/cygwin/cygwin/blob/eeeb5650cf706f4dde72ce8b8598aef41f88718a/winsup/cygwin/glob.cc#L187-L254 | |
// The code is hugely modified based on the calling signature of https://github.com/cygwin/cygwin/blob/eeeb5650cf706f4dde72ce8b8598aef41f88718a/winsup/cygwin/dcrt0.cc#L265 | |
char *glob_partial(char *pattern) | |
{ | |
const char *patnext; | |
Char *bufnext, *bufend, *patbuf = malloc(MAXPATHLEN), prot; | |
mbstate_t mbs; | |
wchar_t wc; | |
size_t clen; | |
patnext = pattern; | |
bufnext = patbuf; | |
bufend = bufnext + MAXPATHLEN - 1; | |
memset(&mbs, 0, sizeof(mbs)); | |
while (bufend - bufnext >= MB_CUR_MAX) | |
{ | |
if (*patnext == QUOTE) | |
{ | |
if (*++patnext == EOS) | |
{ | |
*bufnext++ = QUOTE | M_PROTECT; | |
continue; | |
} | |
prot = M_PROTECT; | |
} | |
else | |
prot = 0; | |
clen = mbrtowc(&wc, patnext, MB_LEN_MAX, &mbs); | |
if (clen == (size_t)-1 || clen == (size_t)-2) | |
return malloc(0); | |
else if (clen == 0) | |
break; | |
*bufnext++ = wc | prot; | |
patnext += clen; | |
} | |
*bufnext = EOS; | |
return patbuf; | |
} | |
// https://github.com/cygwin/cygwin/blob/eeeb5650cf706f4dde72ce8b8598aef41f88718a/winsup/cygwin/dcrt0.cc#L205-L288 | |
// The code is hugely modified to illustrate the interest of this test code | |
void globify_test(char *word) | |
{ | |
int n = 0; | |
char *p, *s; | |
int dos_spec = 0; | |
/* We'll need more space if there are quoting characters in | |
word. If that is the case, doubling the size of the | |
string should provide more than enough space. */ | |
if (strpbrk(word, "'\"")) | |
n = strlen(word); | |
char pattern[strlen(word) + ((dos_spec + 1) * n) + 1]; | |
printf("original input: %s\n", word); | |
for (p = pattern, s = word; *s != '\000'; s++, p++) | |
{ | |
if (!isquote(*s)) | |
{ | |
if (dos_spec && *s == '\\') | |
*p++ = '\\'; | |
*p = *s; | |
} | |
else | |
{ | |
char quote = *s; | |
while (*++s && *s != quote) | |
{ | |
if (dos_spec || *s != '\\') | |
/* nothing */; | |
else if (s[1] == quote || s[1] == '\\') | |
// From makotom: | |
// This is where the problem happens. | |
// If there are two consecutive `\`s, then the code omits the first one. | |
// In this way `\\` is effectively reduced into `\`. | |
// This is not an expected behaviour, especially when the word is not a file path. | |
s++; | |
*p++ = '\\'; | |
size_t cnt = isascii(*s) ? 1 : mbtowc(NULL, s, MB_CUR_MAX); | |
if (cnt <= 1 || cnt == (size_t)-1) | |
*p++ = *s; | |
else | |
{ | |
--s; | |
while (cnt-- > 0) | |
*p++ = *++s; | |
} | |
} | |
if (*s == quote) | |
p--; | |
if (*s == '\0') | |
break; | |
} | |
} | |
*p = '\0'; | |
printf("intermediate pattern: %s\n", pattern); | |
printf("globbed string: %s\n", glob_partial(pattern)); | |
} | |
int main(void) | |
{ | |
// Assume that we are calling `bash -c "echo ' aaa\\\\bbb '"` against Cygwin-based Bash | |
// - it is expected to yield a text reading ` aaa\\bbb `, but in fact it does not. | |
// We hereby examine argv[2] only to illustrate what is the cause of this unexpected behaviour. | |
globify_test("\"echo ' aaa\\\\bbb '\""); | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment