Created
January 25, 2019 16:45
-
-
Save ronaldgreeff/286032ceddac8622f86a79f9eea91f42 to your computer and use it in GitHub Desktop.
Regex URL split
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#*-*encoding: utf-8*-* | |
import re | |
PATTERN = re.compile('^(http[s]?|ftp):\/?\/?([^:\/\s]+)((\/\w+)*\/)([\w\-\.]+[^#?\s]+)(.*)?(#[\w\-]+)?$') | |
class Search(object): | |
"""docstring for ClassName""" | |
def __init__(self): | |
self.list_of_links = self.get_links() | |
def get_links(self): | |
return ['http://www.test.com/dir/penis/filename.jpg?var1=foo#bar'] | |
def split_url(self, i): | |
return {c:v for c, v in enumerate( re.match( PATTERN, self.list_of_links[i]).groups())} | |
def main(): | |
search_obj = Search() | |
print search_obj.split_url(0) | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment