Skip to content

Instantly share code, notes, and snippets.

@sleroux
Created June 15, 2015 19:12
Show Gist options
  • Save sleroux/f5f47b8d17da0d4f87b2 to your computer and use it in GitHub Desktop.
Save sleroux/f5f47b8d17da0d4f87b2 to your computer and use it in GitHub Desktop.
TLD Parsing code from Gecko
// Finds the base domain for a host, with requested number of additional parts.
// This will fail, generating an error, if the host is an IPv4/IPv6 address,
// if more subdomain parts are requested than are available, or if the hostname
// includes characters that are not valid in a URL. Normalization is performed
// on the host string and the result will be in UTF8.
nsresult
nsEffectiveTLDService::GetBaseDomainInternal(nsCString &aHostname,
int32_t aAdditionalParts,
nsACString &aBaseDomain)
{
if (aHostname.IsEmpty())
return NS_ERROR_INSUFFICIENT_DOMAIN_LEVELS;
// chomp any trailing dot, and keep track of it for later
bool trailingDot = aHostname.Last() == '.';
if (trailingDot)
aHostname.Truncate(aHostname.Length() - 1);
// check the edge cases of the host being '.' or having a second trailing '.',
// since subsequent checks won't catch it.
if (aHostname.IsEmpty() || aHostname.Last() == '.')
return NS_ERROR_INVALID_ARG;
// Check if we're dealing with an IPv4/IPv6 hostname, and return
PRNetAddr addr;
PRStatus result = PR_StringToNetAddr(aHostname.get(), &addr);
if (result == PR_SUCCESS)
return NS_ERROR_HOST_IS_IP_ADDRESS;
// Walk up the domain tree, most specific to least specific,
// looking for matches at each level. Note that a given level may
// have multiple attributes (e.g. IsWild() and IsNormal()).
const char *prevDomain = nullptr;
const char *currDomain = aHostname.get();
const char *nextDot = strchr(currDomain, '.');
const char *end = currDomain + aHostname.Length();
const char *eTLD = currDomain;
while (1) {
// sanity check the string we're about to look up: it should not begin with
// a '.'; this would mean the hostname began with a '.' or had an
// embedded '..' sequence.
if (*currDomain == '.')
return NS_ERROR_INVALID_ARG;
// perform the hash lookup.
nsDomainEntry *entry = mHash.GetEntry(currDomain);
if (entry) {
if (entry->IsWild() && prevDomain) {
// wildcard rules imply an eTLD one level inferior to the match.
eTLD = prevDomain;
break;
} else if (entry->IsNormal() || !nextDot) {
// specific match, or we've hit the top domain level
eTLD = currDomain;
break;
} else if (entry->IsException()) {
// exception rules imply an eTLD one level superior to the match.
eTLD = nextDot + 1;
break;
}
}
if (!nextDot) {
// we've hit the top domain level; use it by default.
eTLD = currDomain;
break;
}
prevDomain = currDomain;
currDomain = nextDot + 1;
nextDot = strchr(currDomain, '.');
}
const char *begin, *iter;
if (aAdditionalParts < 0) {
NS_ASSERTION(aAdditionalParts == -1,
"aAdditionalParts can't be negative and different from -1");
for (iter = aHostname.get(); iter != eTLD && *iter != '.'; iter++);
if (iter != eTLD) {
iter++;
}
if (iter != eTLD) {
aAdditionalParts = 0;
}
} else {
// count off the number of requested domains.
begin = aHostname.get();
iter = eTLD;
while (1) {
if (iter == begin)
break;
if (*(--iter) == '.' && aAdditionalParts-- == 0) {
++iter;
++aAdditionalParts;
break;
}
}
}
if (aAdditionalParts != 0)
return NS_ERROR_INSUFFICIENT_DOMAIN_LEVELS;
aBaseDomain = Substring(iter, end);
// add on the trailing dot, if applicable
if (trailingDot)
aBaseDomain.Append('.');
return NS_OK;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment