Skip to content

Instantly share code, notes, and snippets.

Created April 23, 2016 13:34
Show Gist options
  • Save nsantorello/957af6cb6800f246691cae41aa975bb1 to your computer and use it in GitHub Desktop.
Save nsantorello/957af6cb6800f246691cae41aa975bb1 to your computer and use it in GitHub Desktop.
Embed dynamic lists from a published Webflow site into your downloaded code
import sys
import os
import urllib
import glob
import re
import urlparse
from shutil import copyfile
from lxml import html
from lxml import etree
if len(sys.argv) < 2:
print 'Pass in the URL to your remote site and try again! For example:'
print ' python'
remoteSiteUrl = sys.argv[1]
# Webflow constants
dynamicListClass = 'w-dyn-list'
localImagesDirectory = 'images/'
backgroundImagePattern = "background-image:\s*url\(\'(.*)\'\)"
# Ensure images directory exists
if not os.path.exists(localImagesDirectory):
def downloadImage(remoteUrl, localUrl):
if remoteUrl.startswith('http') and not os.path.isfile(localUrl):
print ' - Downloading image: ' + remoteUrl
urllib.urlretrieve(remoteUrl, localUrl)
def processImageTag(node):
# Need to change this image's source to point to the local file we will download
src = node.attrib['src']
newImgSrc = urlparse.urljoin(localImagesDirectory, urllib.unquote(src.split('/')[-1]))
node.attrib['src'] = newImgSrc
downloadImage(src, newImgSrc)
def processImageBackground(node):
if 'style' not in node.attrib:
style = node.attrib['style']
reg = re.match(backgroundImagePattern, style)
if reg == None:
src = reg.groups()[0]
newImgSrc = urlparse.urljoin(localImagesDirectory, urllib.unquote(src.split('/')[-1]))
node.attrib['style'] = re.sub(backgroundImagePattern, "background-image: url('" + newImgSrc + "')", node.attrib['style'])
downloadImage(src, newImgSrc)
def replaceDynamicList(dynLists):
# Download images inside of the remote dynamic list
localDynamicList = dynLists[0]
remoteDynamicList = dynLists[1]
map(processImageTag, remoteDynamicList.findall('.//img'))
map(processImageBackground, remoteDynamicList.xpath('.//*'))
localDynamicList.getparent().replace(localDynamicList, remoteDynamicList)
def replaceDynamicListsInFile(htmlFile):
print htmlFile
# Ignore detail files
if htmlFile.startswith('detail_'):
print ' - skipping (dynamic list detail file)'
# Read local file to see if there is any dynamic content
with open(htmlFile, 'r+') as localFile:
localHtml = html.fromstring(
localDynamicLists = localHtml.find_class(dynamicListClass)
if len(localDynamicLists) == 0:
print ' - no dynamic lists were found'
remoteRelativeUrl = htmlFile[0:-5] if htmlFile != 'index.html' else ''
remotePageUrl = urlparse.urljoin(remoteSiteUrl, remoteRelativeUrl)
remoteHtml = html.fromstring(urllib.urlopen(remotePageUrl).read())
remoteDynamicLists = remoteHtml.find_class(dynamicListClass)
if len(remoteDynamicLists) != len(localDynamicLists):
print ' - error: number of dynamic lists does not match up with the remote version at: ' + remotePageUrl
map(replaceDynamicList, zip(localDynamicLists, remoteDynamicLists))
# Save file with dynamic content modifications
encoding="utf-8", method="html", xml_declaration=None,
pretty_print=True, with_tail=True, standalone=None,
doctype='<!DOCTYPE html>'))
print ' - ' + str(len(remoteDynamicLists)) + ' dynamic list(s) processed'
map(replaceDynamicListsInFile, glob.glob('*.html'))
print 'Done processing dynamic data!'
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment