Skip to content

Instantly share code, notes, and snippets.

@pirkla
Last active July 30, 2019 21:56
Show Gist options
  • Save pirkla/be537e957e48ef5f9cd4747ceb039516 to your computer and use it in GitHub Desktop.
Save pirkla/be537e957e48ef5f9cd4747ceb039516 to your computer and use it in GitHub Desktop.
A sample script to parse a url with formatting into a list of urls
#!/usr/bin/env python3
import re
someURL = 'https://someurl.com/[1-10:2]/id/[11-20]/more/{a,b}'
def parseURLList(url):
urlList = []
subString = re.search(r"\[(.*?)\]|\{(.*?)\}", url)
if subString == None:
urlList.append(url)
return urlList
elif subString.group(0).startswith('['):
parseStep = re.split(":",subString.group(0).strip('[]'))
nth = 1
if len(parseStep) > 1:
nth = int(parseStep[1])
parseRange = re.split("-",parseStep[0])
start = int(parseRange[0])
end = int(parseRange[1])
for x in range(start,end,nth):
modifiedUrl = re.sub(r"\[(.*?)\]",str(x),url,1)
urlList += parseURLList(modifiedUrl) or []
elif subString.group(0).startswith('{'):
parseElems = re.split(",",subString.group(0).strip('{}'))
for x in parseElems:
modifiedUrl = re.sub(r"\{(.*?)\}",x,url,1)
urlList += parseURLList(modifiedUrl) or []
return urlList
myList = parseURLList(someURL)
print(myList)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment