Last active
April 22, 2019 09:15
-
-
Save orimanabu/f17fd43ce0c81f6cfb829d440fcd5261 to your computer and use it in GitHub Desktop.
Obtain Errata Advisory and Bugzilla URLs for RHOSP
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
import csv | |
import sys | |
import requests | |
import subprocess | |
import urllib.parse | |
from pprint import pprint | |
from lxml import html | |
from argparse import ArgumentParser | |
CHROME_PATH = '/Applications/Google Chrome.app/Contents/MacOS/Google Chrome' | |
PRODUCT = 'Red Hat OpenStack Platform' | |
#URL = 'https://access.redhat.com/errata/#/?q=&p={}&sort=portal_publication_date%20desc&rows=100&portal_product=Red%20Hat%20OpenStack%20Platform&portal_product_version={}&portal_architecture={}' | |
URL = 'https://access.redhat.com/errata/#/?q=&p={}&sort=portal_publication_date%20desc&rows={}&portal_product={}&portal_product_version={}&portal_architecture={}' | |
def fetch_page_browser(url): | |
cmd = [CHROME_PATH, '--headless', '--disable-gpu', '--dump-dom', url] | |
print(" *** {}".format(cmd)) | |
process = subprocess.Popen(cmd, stdout=subprocess.PIPE) | |
out, err = process.communicate() | |
error_code = process.returncode | |
if error_code != 0: | |
print('headless chrome returns {}, exit...'.format(error_code)) | |
sys.exit(1) | |
return out | |
def fetch_page_python(url): | |
res = requests.get(url) | |
return res.content | |
def parse_errata_detail(url): | |
print(' ** {}'.format(url)) | |
content = fetch_page_python(url) | |
doc = html.fromstring(content) | |
none_bz = doc.xpath("//h2[contains(text(), 'Fixes')]/following-sibling::span/text()") | |
if len(none_bz) != 0 and none_bz[0] == '(none)': | |
return {} | |
bzid = doc.xpath("//h2[contains(text(), 'Fixes')]//following-sibling::ul[1]/li/a/text()") | |
bzurl = doc.xpath("//h2[contains(text(), 'Fixes')]//following-sibling::ul[1]/li/a/@href") | |
title = doc.xpath("//h2[contains(text(), 'Fixes')]//following-sibling::ul[1]/li/text()") | |
result = {} | |
for i, bz in enumerate(bzid): | |
_title = bz + ' ' + title[i] | |
print(' ** {}'.format(_title)) | |
result[bz] = {'url': bzurl[i], 'title': _title} | |
return result | |
def get_errata_list(args): | |
errata = {} | |
page = 1 | |
while True: | |
print(' * page={}'.format(page)) | |
url = URL.format(page, args.rows, urllib.parse.quote(PRODUCT), args.version, args.arch) | |
last_page = get_errata_list_pagenated(url, errata) | |
if last_page: | |
break | |
page = page + 1 | |
return errata | |
def get_errata_list_pagenated(url, errata): | |
print(' ** {}'.format(url)) | |
content = fetch_page_browser(url) | |
doc = html.fromstring(content) | |
adv = doc.xpath("//tbody/tr//b[contains(@class, 'ng-binding') and contains(text(), 'Advisory')]/../span/a/text()") | |
url = doc.xpath("//tbody/tr//b[contains(@class, 'ng-binding') and contains(text(), 'Advisory')]/../span/a/@ng-href") | |
synop = doc.xpath("//tbody/tr//b[contains(@class, 'ng-binding') and contains(text(), 'Synopsis')]/../span/text()") | |
prod = doc.xpath("//tbody/tr//b[contains(@class, 'ng-binding') and contains(text(), 'Products')]/../span/text()") | |
date = doc.xpath("//tbody/tr//b[contains(@class, 'ng-binding') and contains(text(), 'Publish Date')]/../span/time/text()") | |
for i, erratum in enumerate(adv): | |
#print('{}: {}'.format(i, erratum)) | |
errata[erratum] = {'url': url[i], | |
'synopsis': synop[i], | |
'products': prod[i], | |
'date': date[i]} | |
pagenation_direction_links = doc.xpath("//div[contains(@class, 'flex-item')]//li[contains(@ng-if, 'directionLinks')]/@class") | |
if len(pagenation_direction_links) == 0: | |
return True | |
print(' *** {}'.format(doc.xpath("//div[contains(@class, 'flex-item')]//li[contains(@ng-if, 'directionLinks')]/@class"))) | |
return 'disabled' in doc.xpath("//div[contains(@class, 'flex-item')]//li[contains(@ng-if, 'directionLinks')]/@class")[1] | |
def build_row_erratum_only(errata, erratum): | |
return [ | |
erratum, | |
errata[erratum]['url'], | |
errata[erratum]['synopsis'], | |
#errata[erratum]['products'], | |
errata[erratum]['date'], | |
] | |
def parse_errata_list(errata, args): | |
results = [] | |
for erratum in sorted(errata.keys()): | |
if args.errata_only: | |
print(' ** {}'.format(errata[erratum]['url'])) | |
results.append(build_row_erratum_only(errata, erratum)) | |
continue | |
bzs_in_erratum = parse_errata_detail(errata[erratum]['url']) | |
if len(bzs_in_erratum.keys()) == 0: | |
row = build_row_erratum_only(errata, erratum) | |
results.append(row) | |
continue | |
for bzid in sorted(bzs_in_erratum.keys()): | |
row = [ | |
erratum, | |
errata[erratum]['url'], | |
errata[erratum]['synopsis'], | |
#errata[erratum]['products'], | |
errata[erratum]['date'], | |
bzs_in_erratum[bzid]['title'], | |
bzs_in_erratum[bzid]['url'], | |
] | |
results.append(row) | |
return results | |
def main(): | |
usage = 'python {} --version VERSION --arch ARCH --rows ROWS'.format(__file__) | |
argparser = ArgumentParser(usage=usage) | |
argparser.add_argument('--debug', help='debug', action='store_true') | |
argparser.add_argument('--version', help='version', type=int, dest='version', default=13) | |
argparser.add_argument('--arch', help='arch', choices=['x86_64', 'ppc64le'], default='x86_64') | |
argparser.add_argument('--rows', help='rows', type=int, dest='rows', default=100) | |
argparser.add_argument('--errata-only', help='errata only, not bugzilla', dest='errata_only', action='store_true') | |
argparser.add_argument('--csv', help='csv', action='store_true') | |
argparser.add_argument('--csvfile', help='csv file name', type=str, dest='csvfile') | |
args = argparser.parse_args() | |
if not args.csvfile: | |
if args.errata_only: | |
args.csvfile = 'RHOSP{}_errata_only.csv'.format(args.version) | |
else: | |
args.csvfile = 'RHOSP{}_errata_bugzilla.csv'.format(args.version) | |
print('** args.debug: {}'.format(args.debug)) | |
print('** args.version: {}'.format(args.version)) | |
print('** args.arch: {}'.format(args.arch)) | |
print('** args.rows: {}'.format(args.rows)) | |
print('** args.errata_only: {}'.format(args.errata_only)) | |
print('** args.csv: {}'.format(args.csv)) | |
print('** args.csvfile: {}'.format(args.csvfile)) | |
errata = get_errata_list(args) | |
results = parse_errata_list(errata, args) | |
print('** # of erratum: {}'.format(len(errata.keys()))) | |
print('** # of results: {}'.format(len(results))) | |
if args.csv: | |
with open(args.csvfile, 'w') as f: | |
writer = csv.writer(f, lineterminator='\n', quoting=csv.QUOTE_ALL) | |
writer.writerows(results) | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment