Last active
January 2, 2020 05:31
-
-
Save marirs/3e519e59d4c97986c40cfd3d84397014 to your computer and use it in GitHub Desktop.
Extract a Domain name from the url and also check if the domain was IP address or domain name
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
from collections import namedtuple | |
from itertools import groupby | |
from fastnumbers import isint | |
from ipaddress import ip_address | |
def is_ip(test_string): | |
"""Checks whether a given string contains IP address or IP address with port | |
:param test_string: The string to test for IP | |
:return: a tuple of IP address & Port and IP Version if ip_address else None | |
eg: (ip='', port='', version=) or False | |
""" | |
if not isinstance(test_string, str): | |
return False | |
IP = namedtuple('Ip', ['ip', 'port', 'version']) | |
result = False | |
try: | |
# if it worked here then | |
# -> Either IPv6 or IPv4 without a :port_number | |
# and also yes its valid IP | |
result = ip_address(test_string.strip().strip('[').strip(']').strip()) | |
result = IP(ip=str(test_string), port="", version=result.version) | |
except ValueError: | |
# try another method by splitting | |
# using ':' assuming we might have a port number | |
ip, port, *_ = (test_string.rsplit(':', 1) + [None]) | |
ip = ip.strip('[').strip(']') # just in case if ipv6 with port was present | |
try: | |
# if it works here then | |
# we have a valid IPv6 or IPv4 with mostly a port number | |
result = ip_address(ip) | |
if isint(port): | |
# port number present | |
result = IP(ip=str(ip), port=str(port), version=result.version) | |
else: | |
# no port number present, but there was ':' with something after it | |
result = IP(ip=str(ip), port="", version=result.version) | |
except ValueError: | |
# ain't an ip | |
pass | |
return result | |
def extract_domain(url): | |
"""Extract the domain name from the given URL | |
:param url: the url to extract the domain from - string | |
:return: a tuple of type and result | |
eg: (url_type='domain', result='domain.tld') or (url_type='ip' result='ip:port') of None | |
""" | |
if not isinstance(url, str): | |
return None | |
R = namedtuple('Result', ['url_type', 'result']) | |
dom = url.strip().split("//")[-1] | |
dom = [''.join(g) for k, g in groupby(dom, '/?'.__contains__) if not k][0] | |
dom = dom.strip('[').strip('<').strip('(').strip() # just in case if it had to start with any of this | |
dom = dom.split('.', 1)[-1].strip() if dom.startswith('www') else dom | |
dom = dom.strip(']').strip('>').strip(')').strip() # just in case if it had to end with any of this | |
is_ip_dom = is_ip(dom) | |
if is_ip_dom: | |
# its a ip-address | |
if is_ip_dom.version == 6 and is_ip_dom.port: | |
ip = f'[{is_ip_dom.ip}]:{is_ip_dom.port}' | |
elif is_ip_dom.version == 6 and not is_ip_dom.port: | |
ip = is_ip_dom.ip | |
else: | |
ip = f'{is_ip_dom.ip}:{is_ip_dom.port}' if is_ip_dom.port else f'{is_ip_dom.ip}' | |
result = R(url_type='ip', result=f'{ip}') | |
else: | |
# assuming web-address | |
result = R(url_type='domain', result=f'{dom}') | |
return result | |
Author
marirs
commented
Dec 21, 2019
•
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment