Last active
October 28, 2015 10:55
-
-
Save narate/9a656c66e9ba6fdcc539 to your computer and use it in GitHub Desktop.
Generate urls for wrk lua script
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
#-*-coding: utf-8 -*- | |
from BeautifulSoup import BeautifulSoup | |
from urlparse import urlparse | |
import urllib2 | |
import json | |
import sys | |
import re | |
if len(sys.argv) < 2 : | |
print 'Usage : python %s URL' % sys.argv[0] | |
exit(-1) | |
http = urllib2.build_opener() | |
http.addheaders = [('User-agent', 'โปรแกรมเมอร์เกรด B')] | |
links = [] | |
try : | |
page = http.open(sys.argv[1]) | |
soup = BeautifulSoup(page.read()) | |
url = urlparse(sys.argv[1]) | |
patt = re.compile('^http*|^\/\/') | |
site_url = url.scheme + '://' + url.netloc | |
url_patt = re.compile( '^' + site_url +'\/*') | |
if url.query: | |
links.append(url.path + '?' + url.query) | |
# css | |
for l in soup.findAll('link'): | |
href = l.get('href') | |
if (l.get('rel') == 'stylesheet' or l.get('rel') == 'shortcut icon') \ | |
and url_patt.match(href) \ | |
or not patt.match(href): | |
links.append(href.replace(site_url,'')) | |
# img | |
for s in soup.findAll('img'): | |
src = s.get('src') | |
if url_patt.match(src) and \ | |
not patt.match(src): | |
links.append(src.replace(site_url,'')) | |
# script | |
for s in soup.findAll('script'): | |
src = s.get('src') | |
if src and (url_patt.match(src) or not patt.match(src)): | |
links.append(src.replace(site_url,'')) | |
urls_text = json.dumps(links, sort_keys=False, indent=4, ensure_ascii=False) | |
urls_text = urls_text.replace('[','{') | |
urls_text = urls_text.replace(']','}') | |
print 'local urls =', urls_text | |
print ''' | |
local count = 1 | |
request = function() | |
local url = urls[count] | |
count = count + 1 | |
if count > #urls then | |
count = 1 | |
end | |
return wrk.format( | |
'GET', | |
url | |
) | |
end | |
''' | |
except ValueError: | |
print 'Invalid URL' |
Author
narate
commented
Oct 28, 2015
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment