Created
April 4, 2013 06:50
-
-
Save nvartolomei/5308372 to your computer and use it in GitHub Desktop.
fast url checking
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
int is_urlschemechar(int first_flag, int ch) | |
{ | |
/* | |
* The set of valid URL schemes, as per STD66 (RFC3986) is | |
* '[A-Za-z][A-Za-z0-9+.-]*'. But use sightly looser check | |
* of '[A-Za-z0-9][A-Za-z0-9+.-]*' because earlier version | |
* of check used '[A-Za-z0-9]+' so not to break any remote | |
* helpers. | |
*/ | |
int alphanumeric, special; | |
alphanumeric = ch > 0 && isalnum(ch); | |
special = ch == '+' || ch == '-' || ch == '.'; | |
return alphanumeric || (!first_flag && special); | |
} | |
int is_url(const char *url) | |
{ | |
/* Is "scheme" part reasonable? */ | |
if (!url || !is_urlschemechar(1, *url++)) | |
return 0; | |
while (*url && *url != ':') { | |
if (!is_urlschemechar(0, *url++)) | |
return 0; | |
} | |
/* We've seen "scheme"; we want colon-slash-slash */ | |
return (url[0] == ':' && url[1] == '/' && url[2] == '/'); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment