Last active
October 31, 2019 18:56
-
-
Save ryantuck/9723b96ad2e33a2a0fe3e7a2767b5256 to your computer and use it in GitHub Desktop.
Script to extract content from Looker Content Validator HMTL table
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# this is probably less elegant than just using the API to do this | |
# https://docs.looker.com/reference/api-and-integration/api-reference/v3.1/content#validate_content | |
# largely copied from https://stackoverflow.com/a/44275458 | |
import csv | |
from bs4 import BeautifulSoup | |
# find the table where all the data lives via 'inspect element' or whatever | |
# and save it into a file that we read here. | |
# should look like a <table class="table table-striped">...</table> | |
with open('content_validator_table.html') as f: | |
html = f.read() | |
soup = BeautifulSoup(html, 'html.parser') | |
table = soup.find('table') | |
headings = [th.get_text().strip() for th in table.find("tr").find_all("th")] | |
# pre-parse the table into a list of dicts | |
results = [ | |
dict(zip(headings, (td for td in row.find_all('td')))) | |
for row in table.find_all('tr')[1:] | |
] | |
# these headings contain lists of items, so we define them here so we can break | |
# them out later | |
headings_with_lists = ['Content', 'Folder', 'Model', 'Explore'] | |
# transpose the various column lists into their corresponding rows, for one row | |
# per piece of content | |
final_results = [] | |
for result in results: | |
error = result['Error'].get_text() | |
lists = [ | |
[ | |
# get link if content, name for everything else | |
li.find('a').get('href') if heading == 'Content' else li.get_text() | |
for li in result[heading].find('ul').findAll('li') | |
] | |
for heading in headings_with_lists | |
] | |
transposed = [[x[i] for x in lists] for i in range(len(lists[0]))] | |
for t in transposed: | |
final_results.append([error] + t) | |
print(f'{len(final_results)} content validator issues exist') | |
# write em out to a csv | |
with open('looker_content_validator_results.csv', 'w') as f: | |
writer = csv.writer(f) | |
csv_headers = ['Error'] + headings_with_lists | |
writer.writerow(csv_headers) | |
writer.writerows(final_results) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment