Created
July 20, 2014 23:02
-
-
Save pingjiang/7d97afeb53ff56def2a2 to your computer and use it in GitHub Desktop.
抓取SAE支持CDN库的列表
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
#--*-- coding: UTF-8 --*-- | |
import urllib | |
import re | |
import sys | |
sae_cdn_libs = ['angular.js', 'backbone', 'bootstrap', 'dojo', 'ext-core', 'highcharts', | |
'highstock', 'jq.mobi', 'jquery', 'jquery-mobile', 'jquery-ui', 'jquery.cookie', | |
'jquery.migrate', 'jquerytools', 'json2', 'lesscss', 'mootools', 'prototype', 'qunit', | |
'scriptaculous', 'swfobject', 'underscore', 'webfont', 'wlige', 'yui', 'zepto'] | |
lib_fix = { | |
'jquery.cookie.js' : 'http://lib.sinaapp.com/js/jquery.cookie/jquery.cookie.js' | |
} | |
RE_HREF = re.compile('href="([^"]*)"') | |
BASE_URL = 'http://lib.sinaapp.com' | |
URL_PREFIX = '/?path=' | |
cache = {} | |
results = [] | |
result_file = 'results.txt' | |
def website_walker(f, url, pattern, max_depth = 10): | |
# print 'walk ' + url | |
if max_depth <= 0: | |
return | |
if cache.has_key(url): | |
links = cache[url] | |
else: | |
try: | |
html = urllib.urlopen(url).read() | |
links = RE_HREF.findall(html) | |
cache[url] = links | |
except IOError, e: | |
print 'error fetching %s: %s' % (url, e) | |
return | |
for link in links: | |
if link.endswith('/docs') or link.endswith('themes') or link.endswith('i18n'): | |
continue | |
if not link.startswith(URL_PREFIX): | |
if link.startswith('/js/') and (link.endswith('.js') or link.endswith('.css')): | |
results.append(link) | |
f.write(link + '\n') | |
continue | |
if link == URL_PREFIX: | |
continue | |
next_url = BASE_URL + link | |
website_walker(f, next_url, link, max_depth - 1) | |
def print_markdown(): | |
for sae_cnd_lib in sae_cdn_libs: | |
lib_css_url = 'http://lib.sinaapp.com/js/%s/latest/css/%s'%(sae_cnd_lib, sae_cnd_lib) | |
lib_js_url = 'http://lib.sinaapp.com/js/%s/latest/js/%s'%(sae_cnd_lib, sae_cnd_lib) | |
print '''### %s | |
%s.css | |
%s.js | |
%s.min.css | |
%s.min.js | |
'''%(sae_cnd_lib, lib_css_url, lib_js_url, lib_css_url, lib_js_url) | |
def main(): | |
"""docstring for main""" | |
with open(result_file, 'w') as f: | |
website_walker(f, BASE_URL, URL_PREFIX) | |
lib_map = {} | |
for result in results: | |
parts = result.split('/') | |
if len(parts) > 2: | |
lib_name = parts[2] | |
if not lib_map.has_key(lib_name): | |
lib_map[lib_name] = [] | |
lib_map[lib_name].append(result) | |
for key in lib_map.keys(): | |
print '### ' + key | |
print '\t' + BASE_URL + ('\n\t' + BASE_URL).join(lib_map[key]) | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment