nfaggian · July 6, 2013 09:32
diff --git a/XML crawl b/XML crawl
 from xml.etree import cElementTree
 from urlparse import urljoin

 import requests

 # namespaces for XML parsing
 thredds = "http://www.unidata.ucar.edu/namespaces/thredds/InvCatalog/v1.0"
 xlink = "http://www.w3.org/1999/xlink"

 def crawl(catalog):
    r = requests.get(catalog)
    xml = cElementTree.fromstring(r.content)

    # depth first traversal
    for subdir in xml.iterfind(".//{%s}catalogRef" % thredds):
        link = subdir.attrib["{%s}href" % xlink]
        for dataset in crawl(urljoin(catalog, link)):
            yield dataset

    for dataset in xml.iterfind(".//{%s}dataset[@urlPath]" % thredds):
        yield dataset
	from xml.etree import cElementTree
	from urlparse import urljoin

	import requests

	# namespaces for XML parsing
	thredds = "http://www.unidata.ucar.edu/namespaces/thredds/InvCatalog/v1.0"
	xlink = "http://www.w3.org/1999/xlink"

	def crawl(catalog):
	r = requests.get(catalog)
	xml = cElementTree.fromstring(r.content)

	# depth first traversal
	for subdir in xml.iterfind(".//{%s}catalogRef" % thredds):
	link = subdir.attrib["{%s}href" % xlink]
	for dataset in crawl(urljoin(catalog, link)):
	yield dataset

	for dataset in xml.iterfind(".//{%s}dataset[@urlPath]" % thredds):
	yield dataset
No results found