Last active
July 17, 2020 01:02
-
-
Save fire-eggs/f9668bc1315f7a002d90032efec2f489 to your computer and use it in GitHub Desktop.
Unit test program: variations on fl_filename_isdir implementations for Windows
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
1. modify the const BASE to the drive letter you want to test on. | |
1a. To build, the additional option "/utf-8" needs to be added to the compiler options. | |
See project settings, compiler, "command line". | |
2. execute the following commands in a console window at the root of said drive | |
chcp 65001 | |
mkdir isdirtest | |
cd isdirtest | |
mkdir testǼ | |
mkdir test♥ | |
echo. 2> file♥.txt | |
echo. 2> file.txt | |
cd test♥ | |
echo. 2> file♥.txt | |
echo. 2> file.txt | |
3. Run the program. | |
*/ | |
#define _CRT_SECURE_NO_WARNINGS | |
#include <string> | |
#include <windows.h> | |
#include <fileapi.h> // GetFileAttributes | |
#include <algorithm> // replace | |
const std::string BASE = "E:"; | |
#define FL_PATH_MAX 2048 | |
unsigned fl_utf8decode(const char* p, const char* end, int* len) | |
{ | |
unsigned char c = *(const unsigned char*)p; | |
if (c < 0x80) { | |
if (len) *len = 1; | |
return c; | |
#if ERRORS_TO_CP1252 | |
} | |
else if (c < 0xa0) { | |
if (len) *len = 1; | |
return cp1252[c - 0x80]; | |
#endif | |
} | |
else if (c < 0xc2) { | |
goto FAIL; | |
} | |
if ((end && p + 1 >= end) || (p[1] & 0xc0) != 0x80) goto FAIL; | |
if (c < 0xe0) { | |
if (len) *len = 2; | |
return | |
((p[0] & 0x1f) << 6) + | |
((p[1] & 0x3f)); | |
} | |
else if (c == 0xe0) { | |
if (((const unsigned char*)p)[1] < 0xa0) goto FAIL; | |
goto UTF8_3; | |
#if STRICT_RFC3629 | |
} | |
else if (c == 0xed) { | |
/* RFC 3629 says surrogate chars are illegal. */ | |
if (((const unsigned char*)p)[1] >= 0xa0) goto FAIL; | |
goto UTF8_3; | |
} | |
else if (c == 0xef) { | |
/* 0xfffe and 0xffff are also illegal characters */ | |
if (((const unsigned char*)p)[1] == 0xbf && | |
((const unsigned char*)p)[2] >= 0xbe) goto FAIL; | |
goto UTF8_3; | |
#endif | |
} | |
else if (c < 0xf0) { | |
UTF8_3: | |
if ((end && p + 2 >= end) || (p[2] & 0xc0) != 0x80) goto FAIL; | |
if (len) *len = 3; | |
return | |
((p[0] & 0x0f) << 12) + | |
((p[1] & 0x3f) << 6) + | |
((p[2] & 0x3f)); | |
} | |
else if (c == 0xf0) { | |
if (((const unsigned char*)p)[1] < 0x90) goto FAIL; | |
goto UTF8_4; | |
} | |
else if (c < 0xf4) { | |
UTF8_4: | |
if ((end && p + 3 >= end) || (p[2] & 0xc0) != 0x80 || (p[3] & 0xc0) != 0x80) goto FAIL; | |
if (len) *len = 4; | |
#if STRICT_RFC3629 | |
/* RFC 3629 says all codes ending in fffe or ffff are illegal: */ | |
if ((p[1] & 0xf) == 0xf && | |
((const unsigned char*)p)[2] == 0xbf && | |
((const unsigned char*)p)[3] >= 0xbe) goto FAIL; | |
#endif | |
return | |
((p[0] & 0x07) << 18) + | |
((p[1] & 0x3f) << 12) + | |
((p[2] & 0x3f) << 6) + | |
((p[3] & 0x3f)); | |
} | |
else if (c == 0xf4) { | |
if (((const unsigned char*)p)[1] > 0x8f) goto FAIL; /* after 0x10ffff */ | |
goto UTF8_4; | |
} | |
else { | |
FAIL: | |
if (len) *len = 1; | |
#if ERRORS_TO_ISO8859_1 | |
return c; | |
#else | |
return 0xfffd; /* Unicode REPLACEMENT CHARACTER */ | |
#endif | |
} | |
} | |
unsigned fl_utf8toUtf16(const char* src, unsigned srclen, | |
unsigned short* dst, unsigned dstlen) | |
{ | |
const char* p = src; | |
const char* e = src + srclen; | |
unsigned count = 0; | |
if (dstlen) for (;;) { | |
if (p >= e) { dst[count] = 0; return count; } | |
if (!(*p & 0x80)) { /* ascii */ | |
dst[count] = *p++; | |
} | |
else { | |
int len; unsigned ucs = fl_utf8decode(p, e, &len); | |
p += len; | |
if (ucs < 0x10000) { | |
dst[count] = ucs; | |
} | |
else { | |
/* make a surrogate pair: */ | |
if (count + 2 >= dstlen) { dst[count] = 0; count += 2; break; } | |
dst[count] = (((ucs - 0x10000u) >> 10) & 0x3ff) | 0xd800; | |
dst[++count] = (ucs & 0x3ff) | 0xdc00; | |
} | |
} | |
if (++count == dstlen) { dst[count - 1] = 0; break; } | |
} | |
/* we filled dst, measure the rest: */ | |
while (p < e) { | |
if (!(*p & 0x80)) p++; | |
else { | |
int len; unsigned ucs = fl_utf8decode(p, e, &len); | |
p += len; | |
if (ucs >= 0x10000) ++count; | |
} | |
++count; | |
} | |
return count; | |
} | |
static wchar_t* wbuf = NULL; | |
static wchar_t* utf8_to_wchar(const char* utf8, wchar_t*& wbuf, int lg = -1) { | |
unsigned len = (lg >= 0) ? (unsigned)lg : (unsigned)strlen(utf8); | |
unsigned wn = fl_utf8toUtf16(utf8, len, NULL, 0) + 1; // Query length | |
wbuf = (wchar_t*)realloc(wbuf, sizeof(wchar_t) * wn); | |
wn = fl_utf8toUtf16(utf8, len, (unsigned short*)wbuf, wn); // Convert string | |
wbuf[wn] = 0; | |
return wbuf; | |
} | |
int isdirGFAW(const char* n) | |
{ | |
utf8_to_wchar(n, wbuf); | |
DWORD res = GetFileAttributesW(wbuf); | |
return res != INVALID_FILE_ATTRIBUTES && (res & FILE_ATTRIBUTE_DIRECTORY); | |
} | |
inline int isdirsep(char c) { return c == '/' || c == '\\'; } | |
int isdirORIG(const char* n) | |
{ | |
struct _stat s; | |
char fn[FL_PATH_MAX]; | |
int length; | |
length = (int)strlen(n); | |
// This workaround brought to you by the fine folks at Microsoft! | |
// (read lots of sarcasm in that...) | |
if (length < (int)(sizeof(fn) - 1)) { | |
if (length < 4 && isalpha(n[0]) && n[1] == ':' && | |
(isdirsep(n[2]) || !n[2])) { | |
// Always use D:/ for drive letters | |
fn[0] = n[0]; | |
strcpy(fn + 1, ":/"); | |
n = fn; | |
} | |
else if (length > 0 && isdirsep(n[length - 1])) { | |
// Strip trailing slash from name... | |
length--; | |
memcpy(fn, n, length); | |
fn[length] = '\0'; | |
n = fn; | |
} | |
} | |
return !_stat(n, &s) && (s.st_mode & _S_IFDIR); | |
} | |
int isdirWStat(const char* n) | |
{ | |
char fn[4]; // used for drive letter only: "X:/" | |
int length = (int)strlen(n); | |
// Strip trailing slash from name... | |
if (length > 0 && isdirsep(n[length - 1])) | |
length--; | |
if (length < 1) | |
return 0; | |
// This workaround brought to you by the fine folks at Microsoft! | |
// (read lots of sarcasm in that...) | |
if (length == 2 && isalpha(n[0]) && n[1] == ':') { | |
fn[0] = n[0]; | |
strcpy(fn + 1, ":/"); | |
n = fn; | |
length = 3; | |
} | |
// convert filename to wide chars using *length* | |
utf8_to_wchar(n, wbuf, length); | |
struct _stat s; | |
return (!_wstat(wbuf, &s) && (s.st_mode & _S_IFDIR)); | |
} | |
// NOTE: these paths should only use the '/' separator. The test will | |
// repeat test each path using the backslash separator. | |
std::pair<std::string, int> testVals [] = | |
{ | |
{BASE, 1}, | |
{BASE + "/", 1}, | |
{"/", 1}, | |
{".", 1}, | |
{"..", 1}, | |
// variations on an ASCII folder | |
{BASE + "/isdirtest", 1}, | |
{BASE + "/isdirtest/", 1}, | |
{BASE + "/isdirtest/.", 1}, | |
{BASE + "/isdirtest/..", 1}, | |
{BASE + "/isdirtest/./", 1}, | |
{BASE + "/isdirtest/../", 1}, | |
// variations on a UTF-8 folder | |
{BASE + "/isdirtest/testǼ", 1}, | |
{BASE + "/isdirtest/testǼ/", 1}, | |
{BASE + "/isdirtest/testǼ/.", 1}, | |
{BASE + "/isdirtest/testǼ/..", 1}, | |
{BASE + "/isdirtest/testǼ/./", 1}, | |
{BASE + "/isdirtest/testǼ/../", 1}, | |
// variations on a different UTF-8 folder [utf-8 value larger than 255] | |
{BASE + "/isdirtest/test♥", 1}, | |
{BASE + "/isdirtest/test♥/", 1}, | |
{BASE + "/isdirtest/test♥/.", 1}, | |
{BASE + "/isdirtest/test♥/..", 1}, | |
{BASE + "/isdirtest/test♥/./", 1}, | |
{BASE + "/isdirtest/test♥/../", 1}, | |
// files within an ASCII folder | |
{BASE + "/isdirtest/file.txt", 0}, | |
{BASE + "/isdirtest/file♥.txt", 0}, | |
// files within a UTF-8 folder. | |
{BASE + "/isdirtest/test♥/file.txt", 0}, | |
{BASE + "/isdirtest/test♥/file♥.txt", 0}, | |
}; | |
void testOne(const char* name, int expect) | |
{ | |
if (isdirORIG(name) != expect) | |
printf("isdirORIG fail: %s\n", name); | |
if (isdirGFAW(name) != expect) | |
printf("isdirGFAW fail: %s\n", name); | |
if (isdirWStat(name) != expect) | |
printf("isdirWStat fail: %s\n", name); | |
} | |
void testBothSlashes(std::string name, int expect) | |
{ | |
testOne(name.c_str(), expect); | |
const char* name2 = name.c_str(); | |
if (name.find('/', 0) != std::string::npos) | |
{ | |
std::replace(name.begin(), name.end(), '/', '\\'); | |
testOne(name.c_str(), expect); | |
} | |
} | |
int main() | |
{ | |
SetConsoleOutputCP(65001); // make sure we can display unicode | |
SetCurrentDirectory((BASE + "/").c_str()); | |
char buf[2048]; | |
GetCurrentDirectory(2048, buf); | |
printf("cwd = %s\n", buf); | |
int count = sizeof(testVals) / sizeof(testVals[0]); | |
for (int i = 0; i < count; i++) | |
{ | |
auto name = testVals[i].first; | |
int expect = testVals[i].second; | |
testBothSlashes(name, expect); | |
} | |
} |
Thanks for this update, I was distracted by PR stuff. I'll check it and decide later. Sorry.
No apologies necessary! I'm sure you are juggling plenty of plates, please don't feel you need to devote any special attention this way!
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Another test case is network paths. Kind of difficult to perform in a generic manner (i.e. the user must have a shared network drive).
I.e.
\\machine\drive\folder\file
. Pretty sure the alternate slashes are not valid for the\\machine\drive
portion.