Skip to content

Instantly share code, notes, and snippets.

@abutcher
Created September 28, 2013 20:51
Show Gist options
  • Select an option

  • Save abutcher/6746399 to your computer and use it in GitHub Desktop.

Select an option

Save abutcher/6746399 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
import argparse
import gzip
from BeautifulSoup import BeautifulStoneSoup
import urllib2
import uuid
"""
Looks for duplicate package listings in yum meta-data and prints those
out. They might have different checksums.
./validate-metadata http://some.site.com/repos/repo/repodata/primary.xml.gz
"""
parser = argparse.ArgumentParser(description='Validate remote meta-data')
parser.add_argument(dest='url', action='store', metavar='URL',\
help='Link to remote meta-data')
args = parser.parse_args()
response = urllib2.urlopen(args.url)
tmp = "/tmp/%s" % uuid.uuid4()
out_file = open(tmp, 'wb')
data = response.read()
out_file.write(data)
out_file.close()
in_file = gzip.open(tmp, 'rb')
file_content = in_file.read()
in_file.close()
soup = BeautifulStoneSoup(file_content)
packages = soup.findAll('package')
a = {}
for package in packages:
checksum = package.version.checksum.string
filename = package.version.location['href']
if filename not in a.keys():
a[filename] = [checksum]
else:
a[filename].append(checksum)
for key in a.keys():
if len(a[key]) > 1:
print key, [str(x) for x in a[key]]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment