Skip to content

Instantly share code, notes, and snippets.

@makotom
Created September 6, 2021 15:40
Show Gist options
  • Save makotom/ed1c2fb4c0aef6268ab2665d9aa5cd89 to your computer and use it in GitHub Desktop.
Save makotom/ed1c2fb4c0aef6268ab2665d9aa5cd89 to your computer and use it in GitHub Desktop.
Demo: Cygwin "eats up" backslashes unexpectedly under certain circumstances
#include <ctype.h>
#include <limits.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <wchar.h>
// https://github.com/cygwin/cygwin/blob/eeeb5650cf706f4dde72ce8b8598aef41f88718a/winsup/cygwin/glob.cc#L112
#define MAXPATHLEN 8192
// https://github.com/cygwin/cygwin/blob/eeeb5650cf706f4dde72ce8b8598aef41f88718a/winsup/cygwin/glob.cc#L112
#define EOS '\0'
#define QUOTE '\\'
// https://github.com/cygwin/cygwin/blob/eeeb5650cf706f4dde72ce8b8598aef41f88718a/winsup/cygwin/glob.cc#L135
#define M_PROTECT 0x4000000000ULL
// https://github.com/cygwin/cygwin/blob/eeeb5650cf706f4dde72ce8b8598aef41f88718a/winsup/cygwin/glob.cc#L148
typedef char Char;
// https://github.com/cygwin/cygwin/blob/eeeb5650cf706f4dde72ce8b8598aef41f88718a/winsup/cygwin/dcrt0.cc#L147-L152
static inline int isquote(char c)
{
char ch = c;
return ch == '"' || ch == '\'';
}
// https://github.com/cygwin/cygwin/blob/eeeb5650cf706f4dde72ce8b8598aef41f88718a/winsup/cygwin/glob.cc#L187-L254
// The code is hugely modified based on the calling signature of https://github.com/cygwin/cygwin/blob/eeeb5650cf706f4dde72ce8b8598aef41f88718a/winsup/cygwin/dcrt0.cc#L265
char *glob_partial(char *pattern)
{
const char *patnext;
Char *bufnext, *bufend, *patbuf = malloc(MAXPATHLEN), prot;
mbstate_t mbs;
wchar_t wc;
size_t clen;
patnext = pattern;
bufnext = patbuf;
bufend = bufnext + MAXPATHLEN - 1;
memset(&mbs, 0, sizeof(mbs));
while (bufend - bufnext >= MB_CUR_MAX)
{
if (*patnext == QUOTE)
{
if (*++patnext == EOS)
{
*bufnext++ = QUOTE | M_PROTECT;
continue;
}
prot = M_PROTECT;
}
else
prot = 0;
clen = mbrtowc(&wc, patnext, MB_LEN_MAX, &mbs);
if (clen == (size_t)-1 || clen == (size_t)-2)
return malloc(0);
else if (clen == 0)
break;
*bufnext++ = wc | prot;
patnext += clen;
}
*bufnext = EOS;
return patbuf;
}
// https://github.com/cygwin/cygwin/blob/eeeb5650cf706f4dde72ce8b8598aef41f88718a/winsup/cygwin/dcrt0.cc#L205-L288
// The code is hugely modified to illustrate the interest of this test code
void globify_test(char *word)
{
int n = 0;
char *p, *s;
int dos_spec = 0;
/* We'll need more space if there are quoting characters in
word. If that is the case, doubling the size of the
string should provide more than enough space. */
if (strpbrk(word, "'\""))
n = strlen(word);
char pattern[strlen(word) + ((dos_spec + 1) * n) + 1];
printf("original input: %s\n", word);
for (p = pattern, s = word; *s != '\000'; s++, p++)
{
if (!isquote(*s))
{
if (dos_spec && *s == '\\')
*p++ = '\\';
*p = *s;
}
else
{
char quote = *s;
while (*++s && *s != quote)
{
if (dos_spec || *s != '\\')
/* nothing */;
else if (s[1] == quote || s[1] == '\\')
// From makotom:
// This is where the problem happens.
// If there are two consecutive `\`s, then the code omits the first one.
// In this way `\\` is effectively reduced into `\`.
// This is not an expected behaviour, especially when the word is not a file path.
s++;
*p++ = '\\';
size_t cnt = isascii(*s) ? 1 : mbtowc(NULL, s, MB_CUR_MAX);
if (cnt <= 1 || cnt == (size_t)-1)
*p++ = *s;
else
{
--s;
while (cnt-- > 0)
*p++ = *++s;
}
}
if (*s == quote)
p--;
if (*s == '\0')
break;
}
}
*p = '\0';
printf("intermediate pattern: %s\n", pattern);
printf("globbed string: %s\n", glob_partial(pattern));
}
int main(void)
{
// Assume that we are calling `bash -c "echo ' aaa\\\\bbb '"` against Cygwin-based Bash
// - it is expected to yield a text reading ` aaa\\bbb `, but in fact it does not.
// We hereby examine argv[2] only to illustrate what is the cause of this unexpected behaviour.
globify_test("\"echo ' aaa\\\\bbb '\"");
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment