Created
March 31, 2010 03:19
-
-
Save atr000/349895 to your computer and use it in GitHub Desktop.
user-script-analyze.py
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
import glob | |
import operator | |
import re | |
import string | |
import sys | |
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # | |
apis = ('GM_addStyle', 'GM_deleteValue', 'GM_getResourceText', | |
'GM_getResourceUrl', 'GM_getValue', 'GM_listValues', 'GM_log', | |
'GM_openInTab', 'GM_registerMenuCommand', 'GM_setValue', | |
'GM_xmlhttpRequest', 'unsafeWindow') | |
api_counts = {'all': 0, 'none': 0, 'eval':0} | |
for api in apis: | |
api_counts[api] = 0 | |
metas = ('@require', '@resource', '@include', '@exclude', '@unwrap', '@version', | |
'@name', '@namespace', '@description', '@author', '@homepage', '@date', | |
'@license', '@match') | |
meta_counts = {} | |
for meta in metas: | |
meta_counts[meta] = 0 | |
xhr_hosts = {} | |
set_hosts = {} | |
get_hosts = {} | |
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # | |
def hostToDomain(host): | |
match = re.search(r'.*\.(.+......)$', host) | |
if match: | |
return match.group(1) | |
else: | |
return host | |
def domainsInMetadata(metadata): | |
if not metadata: | |
return set([]) | |
includes = re.findall(r'@include\s+(.*)', metadata) | |
if not includes: | |
# No @include means "@include *" ! | |
return 'infinity' | |
def urlToHost(url): | |
# Try to parse a reasonable URL. | |
match = re.search(r'^\w+.*?://([^/]+)', url) | |
if match: | |
return match.group(1) | |
# Try to parse a stranger pattern like "*.amazon.*/*". | |
match = re.search(r'^([^/]+)', url) | |
if match: | |
return match.group(1) | |
return url | |
includes = [x.strip() for x in includes] | |
hosts = set(map(urlToHost, includes)) | |
for host in hosts: | |
if host == '*': | |
return 'infinity' | |
domains = set(map(hostToDomain, hosts)) | |
return domains | |
def numDomainsInMetadata(metadata): | |
domains = domainsInMetadata(metadata) | |
if 'infinity' == domains: | |
return 'infinity' | |
return len(domainsInMetadata(metadata)) | |
def domainsInXhr(source): | |
regex = re.compile(r"""GM_xmlhttpRequest[^}]+url[^}]+https?://([^'"/]+)""", re.S) | |
hosts = re.findall(regex, source) | |
domains = set(map(hostToDomain, hosts)) | |
return set(domains) | |
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # | |
for i, filename in enumerate( sys.stdin.readlines() ): | |
filename = filename.strip() | |
api_counts['all'] += 1 | |
any_api = False | |
source = file(filename).read() | |
source_clean = source | |
# Strip strings. | |
source_clean = re.sub("""\\\\['"]""", '', source_clean) | |
source_clean = re.sub("""(['"]).*?\\1""", '', source_clean) | |
# Strip comments. | |
source_clean = re.sub(r'/\*.*?\*/', '', source_clean, re.S) | |
source_clean = re.sub('//.*\r?\n?', '', source_clean) | |
this_apis = {} | |
for api in apis: | |
this_apis[api] = False | |
if api in source_clean: | |
this_apis[api] = True | |
any_api = True | |
api_counts[api] += 1 | |
if re.search(r'\beval\b', source_clean): | |
api_counts['eval'] +=1 | |
if not any_api: | |
api_counts['none'] += 1 | |
metadata = re.search(r'// ==UserScript==(.*?)// ==/UserScript==', source, re.S) | |
if metadata and metadata.group(1): | |
metadata = metadata.group(1) | |
for meta in metas: | |
if meta in metadata: | |
meta_counts[meta] += 1 | |
numDomains = numDomainsInMetadata(metadata) | |
if this_apis['GM_setValue']: | |
set_hosts.setdefault(numDomains, 0) | |
set_hosts[numDomains] += 1 | |
if this_apis['GM_getValue']: | |
get_hosts.setdefault(numDomains, 0) | |
get_hosts[numDomains] += 1 | |
if this_apis['GM_xmlhttpRequest']: | |
numDomains = 0 | |
metaDomains = domainsInMetadata(metadata) | |
if 'infinity' == metaDomains: | |
numDomains = 'infinity' | |
else: | |
xhrDomains = domainsInXhr(source) | |
if not xhrDomains: | |
numDomains = 'unknown' | |
else: | |
numDomains = len(set( metaDomains | xhrDomains )) | |
xhr_hosts.setdefault(numDomains, 0) | |
xhr_hosts[numDomains] += 1 | |
print "" | |
print "%10s %s" % ('Number', 'API') | |
for api, count in sorted(api_counts.items(), key=operator.itemgetter(1), reverse=True): | |
print "%10d %s" % (count, api) | |
print "" | |
print "%10s %s" % ('Number', 'Imperative') | |
for meta, count in sorted(meta_counts.items(), key=operator.itemgetter(1), reverse=True): | |
print "%10d %s" % (count, meta) | |
print "" | |
print "%10s %s" % ('Number', 'Distinct hosts (XHR)') | |
print "%10d %s" % (api_counts['GM_xmlhttpRequest'], 'all scripts') | |
for hosts, count in sorted(xhr_hosts.items(), key=operator.itemgetter(1), reverse=True): | |
print "%10d %s" % (count, hosts) | |
print "%10s %s" % ('Number', 'Distinct hosts (set)') | |
print "%10d %s" % (api_counts['GM_setValue'], 'all scripts') | |
for hosts, count in sorted(set_hosts.items(), key=operator.itemgetter(1), reverse=True): | |
print "%10d %s" % (count, hosts) | |
print "%10s %s" % ('Number', 'Distinct hosts (get)') | |
print "%10d %s" % (api_counts['GM_setValue'], 'all scripts') | |
for hosts, count in sorted(get_hosts.items(), key=operator.itemgetter(1), reverse=True): | |
print "%10d %s" % (count, hosts) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment