Created
February 6, 2014 12:47
-
-
Save sprintingdev/8843526 to your computer and use it in GitHub Desktop.
A python script to replace relative paths in src/href attributes of HTML with absolute paths. Created with help from http://stackoverflow.com/questions/3836644/c-sharp-convert-relative-to-absolute-links-in-html-string and http://docs.python.org/2/howto/regex.html
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
import re | |
import os | |
def srcrepl(match): | |
"Return the file contents with paths replaced" | |
absolutePath = "http://www.example.com/" #update the URL to be prefixed here. | |
print "<" + match.group(1) + match.group(2) + "=" + "\"" + absolutePath + match.group(3) + match.group(4) + "\"" + ">" | |
return "<" + match.group(1) + match.group(2) + "=" + "\"" + absolutePath + match.group(3) + match.group(4) + "\"" + ">" | |
# | |
f = open("/path/to/data/file", "r+") #update the file name here. | |
fileContents = f.read() | |
p = re.compile(r"<(.*?)(src|href)=\"(?!http)(.*?)\"(.*?)>") | |
updatedFileContents = p.sub(srcrepl, fileContents) | |
f.write(updatedFileContents) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment