mrdaemon · January 13, 2011 08:58
diff --git a/cygwinapi_check.py b/cygwinapi_check.py
 from contextlib import closing
 #from collections import defaultdict
 import re
 import shelve

 from urllib2 import Request, urlopen, URLError

 from BeautifulSoup import BeautifulSoup

 apiurls = { 'std-unix': 'http://cygwin.com/cygwin-api/compatibility.html',
            'std-bsd': 'http://cygwin.com/cygwin-api/std-bsd.html',
            'std-gnu': 'http://cygwin.com/cygwin-api/std-gnu.html',
            'std-sunos': 'http://cygwin.com/cygwin-api/std-solaris.html',
            'std-deprec': 'http://cygwin.com/cygwin-api/std-deprec.html',
            # std-notimpl must be present. If unavailable or gone,
            # set to None, the download routine will skip it.
            'std-notimpl': 'http://cygwin.com/cygwin-api/std-notimpl.html',
          }

 cache = shelve.open('.cygwinapicheck.cache')

 def webapipages(apiurls={}):
    for api, url in apiurls.iteritems():

        if url is None: break

        print "Downloading Cygwin API Reference set:", api
        req = Request(url)
        with closing(urlopen(req)) as apipage:
            try:
                html = apipage.read()
                yield (api, html)
            except URLError as e:
                def err():
                    if hasattr(e, 'reason'):
                        return ' '.join("Reason: ", e.reason)
                    elif hasattr(e, 'code'):
                        return ' '.join("Errcode: ", e.code)

                print ("Unable to download %s from: %s! (%s)" %
                        (api, url, err()))
                print ("%s has been skipped." % api)

 def syscalls(html):

    sc_pattern = re.compile(r'(\S+)(?:\\t)?(\S+)?')

    dtree = BeautifulSoup(html)
    apidata = dtree.find('pre', attrs={'class': 'screen'}).text
    apititle = dtree.find('h2', attrs={'class': 'title',
                                               'style': 'clear: both'},
                                  ).text

    if not len(apititle): apititle = u'(no title)'

    if apidata:
        print "API: %s" % (apititle)
        _lines = (l.trim() for l in apidata.split('\n'))
        _rlines = (sc_pattern.match(l) for l in _lines)
        _syscalls = (sc.groups() for sc in _rlines if sc)

        return _syscalls

    else:
        print "No API data found!"

 if __name__ == '__main__':
    for api, html in
	from contextlib import closing
	#from collections import defaultdict
	import re
	import shelve

	from urllib2 import Request, urlopen, URLError

	from BeautifulSoup import BeautifulSoup

	apiurls = { 'std-unix': 'http://cygwin.com/cygwin-api/compatibility.html',
	'std-bsd': 'http://cygwin.com/cygwin-api/std-bsd.html',
	'std-gnu': 'http://cygwin.com/cygwin-api/std-gnu.html',
	'std-sunos': 'http://cygwin.com/cygwin-api/std-solaris.html',
	'std-deprec': 'http://cygwin.com/cygwin-api/std-deprec.html',
	# std-notimpl must be present. If unavailable or gone,
	# set to None, the download routine will skip it.
	'std-notimpl': 'http://cygwin.com/cygwin-api/std-notimpl.html',
	}

	cache = shelve.open('.cygwinapicheck.cache')

	def webapipages(apiurls={}):
	for api, url in apiurls.iteritems():

	if url is None: break

	print "Downloading Cygwin API Reference set:", api
	req = Request(url)
	with closing(urlopen(req)) as apipage:
	try:
	html = apipage.read()
	yield (api, html)
	except URLError as e:
	def err():
	if hasattr(e, 'reason'):
	return ' '.join("Reason: ", e.reason)
	elif hasattr(e, 'code'):
	return ' '.join("Errcode: ", e.code)

	print ("Unable to download %s from: %s! (%s)" %
	(api, url, err()))
	print ("%s has been skipped." % api)

	def syscalls(html):

	sc_pattern = re.compile(r'(\S+)(?:\\t)?(\S+)?')

	dtree = BeautifulSoup(html)
	apidata = dtree.find('pre', attrs={'class': 'screen'}).text
	apititle = dtree.find('h2', attrs={'class': 'title',
	'style': 'clear: both'},
	).text

	if not len(apititle): apititle = u'(no title)'

	if apidata:
	print "API: %s" % (apititle)
	_lines = (l.trim() for l in apidata.split('\n'))
	_rlines = (sc_pattern.match(l) for l in _lines)
	_syscalls = (sc.groups() for sc in _rlines if sc)

	return _syscalls

	else:
	print "No API data found!"

	if __name__ == '__main__':
	for api, html in