|
javascript: |
|
async function main() { |
|
class DQSChecks { |
|
/** |
|
* |
|
* @param {Object} opts |
|
* @param {AbstractPageHandler} opts.pageHandler |
|
*/ |
|
constructor({checks, pageHandler}) { |
|
this.checks = checks; |
|
this.pageHandler = pageHandler; |
|
this.table = document.createElement('table'); |
|
this.table.classList.add('dqs-results'); |
|
|
|
this.style = document.createElement('style'); |
|
this.style.textContent = ` |
|
.dqs-running-check td { padding: 0 8px; } |
|
.dqs-running-check:nth-child(even) td { background: #eee; } |
|
/** Override the weird stuff we do on mobile */ |
|
.dqs-results td { display: table-cell !important; } |
|
.dqs-results tr { display: table-row !important; } |
|
button.dqs-run-again { border: 0; background: none; cursor: pointer;} |
|
`; |
|
|
|
this.init(); |
|
} |
|
|
|
async init() { |
|
this.mount(); |
|
this.table.textContent = 'Loading...'; |
|
try { |
|
debugger; |
|
await this.pageHandler.init(); |
|
} catch (e) { |
|
if (e instanceof ListSeedsError) { |
|
const url = new URL(e.url, location.origin); |
|
url.searchParams.set('debug', 'true'); |
|
this.table.innerHTML = ` |
|
Error: Unable to fetch list seeds: |
|
Possibly related to <a href="https://github.com/internetarchive/openlibrary/issues/5415">issue #5415</a> for more information; see if you |
|
get the same error when visiting <a href="${url}">/seeds.json</a>. If so, give that issue a thumbs up. |
|
`; |
|
} |
|
throw e; |
|
} |
|
this.table.textContent = ''; |
|
this.runningChecks = this.checks.map(check => new RunningCheck(check, this.pageHandler)); |
|
|
|
|
|
for (const rc of this.runningChecks) { |
|
this.table.appendChild(rc.view); |
|
} |
|
} |
|
|
|
mount() { |
|
document.head.appendChild(this.style); |
|
this.pageHandler.insertionPoint.prepend(this.table); |
|
} |
|
|
|
unmount() { |
|
this.style.remove(); |
|
this.table.remove(); |
|
} |
|
}; |
|
|
|
const CHECKS = [ |
|
{ |
|
name: 'At least 1 subject', |
|
query: 'NOT subject:*', |
|
}, |
|
{ |
|
name: 'At least 1 author', |
|
query: 'NOT author_key:*', |
|
}, |
|
{ |
|
name: 'At least 1 edition', |
|
query: 'edition_count:0', |
|
}, |
|
{ |
|
name: 'Has work (orphaned)', |
|
query: 'key:*M', |
|
}, |
|
{ |
|
name: 'Has publication year', |
|
query: 'NOT publish_year:*', |
|
}, |
|
{ |
|
name: 'Has cover', |
|
query: 'NOT cover_i:*', |
|
}, |
|
{ |
|
name: 'Has language', |
|
query: 'NOT language:*', |
|
}, |
|
{ |
|
name: 'Has publisher', |
|
query: 'NOT publisher:*', |
|
}, |
|
{ |
|
name: 'At least 2 editions', |
|
query: 'edition_count:[0 TO 1]', |
|
}, |
|
{ |
|
name: 'Has dewey decimal', |
|
query: 'NOT ddc:*', |
|
}, |
|
{ |
|
name: 'Has LoC classification', |
|
query: 'NOT lcc:*', |
|
}, |
|
{ |
|
name: 'Has number of pages', |
|
query: 'NOT number_of_pages_median:*', |
|
} |
|
]; |
|
|
|
class RunningCheck { |
|
/** @param {name: string, query: string} check */ |
|
constructor(check, pageHandler) { |
|
this.check = check; |
|
this.pageHandler = pageHandler; |
|
this.view = document.createElement('tr'); |
|
this.view.classList.add('dqs-running-check'); |
|
this.results = null; |
|
this.done = false; |
|
this.error = null; |
|
|
|
this.updateView(); |
|
this.run(); |
|
} |
|
|
|
async run() { |
|
this.error = null; |
|
const url = new URL(this.pageHandler.getSearchUrl(this.check.query, true), location.origin); |
|
url.searchParams.set('rows', 0); |
|
let textResp = null; |
|
try { |
|
textResp = await fetch(url).then(res => res.text()); |
|
this.results = JSON.parse(textResp); |
|
} catch (e) { |
|
this.error = textResp || e; |
|
} |
|
|
|
this.done = true; |
|
this.updateView(); |
|
} |
|
|
|
updateView() { |
|
if (!this.done) { |
|
this.view.innerHTML = ` |
|
<td><b>${this.check.name}</b></td> |
|
<td colspan="3">Loading...</td> |
|
`; |
|
|
|
} else if (this.error) { |
|
const href = this.pageHandler.getSearchUrl(this.check.query, false); |
|
this.view.innerHTML = ` |
|
<td><b>${this.check.name}</b></td> |
|
<td colspan="2"><a href="${href}">Error</a></td> |
|
<td><button class="dqs-run-again" title="Run again"><img src="https://upload.wikimedia.org/wikipedia/commons/2/2a/Gnome-view-refresh.svg" width="20px"></button></td> |
|
`; |
|
this.view.querySelector('td:nth-child(2)').title = this.error; |
|
} else { |
|
const totalCount = this.pageHandler.totalCount; |
|
const percent = Math.floor((totalCount - this.results.numFound) / totalCount * 100); |
|
const href = this.pageHandler.getSearchUrl(this.check.query, false); |
|
this.view.innerHTML = ` |
|
<td><b>${this.check.name}</b></td> |
|
<td> |
|
<meter |
|
value="${percent}" |
|
min="0" |
|
max="100" |
|
title="${totalCount - this.results.numFound} of ${totalCount}" |
|
></meter> ${percent}% |
|
</td> |
|
<td style="text-align:right"><a href="${href}">${this.results.numFound} failing</a></td> |
|
<td><button class="dqs-run-again" title="Run again"><img src="https://upload.wikimedia.org/wikipedia/commons/2/2a/Gnome-view-refresh.svg" width="20px"></button></td> |
|
`; |
|
} |
|
|
|
this.view.querySelector('.dqs-run-again')?.addEventListener('click', ev => { |
|
ev.preventDefault(); |
|
this.done = false; |
|
this.updateView(); |
|
this.run(); |
|
}, { once: true }); |
|
} |
|
}; |
|
|
|
class AbstractPageHandler { |
|
test() { throw new Error('Not implemented'); } |
|
get totalCount() { throw new Error('Not implemented'); } |
|
get curQuery() { throw new Error('Not implemented'); } |
|
get insertionPoint() { return document.getElementById('contentBody'); } |
|
async init() { return ; } |
|
getSearchUrl(extraQuery='', json=true) { |
|
return `/search${json ? '.json' : ''}?` + new URLSearchParams({ |
|
q: this.curQuery + ' ' + extraQuery |
|
}); |
|
} |
|
} |
|
|
|
class SearchPageHandler extends AbstractPageHandler { |
|
test() { |
|
return location.pathname == '/search'; |
|
} |
|
|
|
get totalCount() { |
|
return parseFloat(document.querySelector('#contentHead .darkgreen').textContent.trim().replace(/\,/g, '')) |
|
} |
|
|
|
get curQuery() { |
|
/** |
|
* Because the default search can also have stuff in url parameters, we don't try to convert |
|
* those back into a lucene query. This only supports `getSearchUrl`. |
|
*/ |
|
throw new Error('Not supported'); |
|
} |
|
|
|
getSearchUrl(extraQuery='', json=false) { |
|
const url = json ? new URL(location.toString().replace('/search', '/search.json')) : new URL(location.toString()); |
|
const queryPart = url.searchParams.get('q') |
|
/** This is an OL bug :/ */ |
|
.replace(/author:/g, 'author_name:'); |
|
url.searchParams.set('q', `${queryPart} ${extraQuery}`); |
|
return url; |
|
} |
|
} |
|
|
|
class AuthorPageHandler extends AbstractPageHandler { |
|
test() { |
|
return location.pathname.startsWith('/authors'); |
|
} |
|
|
|
get totalCount() { |
|
return parseFloat(document.querySelector('#works h2').textContent.trim().replace(/\,/g, '')); |
|
} |
|
|
|
get curQuery() { |
|
return `author_key:${location.pathname.split('/')[2]}`; |
|
} |
|
} |
|
|
|
class ListSeedsError extends Error { |
|
constructor(url) { |
|
super(); |
|
this.url = url; |
|
} |
|
} |
|
|
|
class ListPageHandler extends AbstractPageHandler { |
|
test() { |
|
return location.pathname.includes('/lists/'); |
|
} |
|
|
|
async init() { |
|
const url = /\/lists\/OL\d+L$/.test(location.pathname) ? location.pathname + '/seeds.json' : location.pathname.replace(/\/[^\/]+$/, '/seeds.json'); |
|
try { |
|
this.seeds = await fetch(url).then(res => res.json()); |
|
} catch (e) { |
|
throw new ListSeedsError(url); |
|
} |
|
} |
|
|
|
get totalCount() { |
|
return this.seeds.size; |
|
} |
|
|
|
get curQuery() { |
|
const parts = this.seeds.entries.map(e => |
|
e.type == 'edition' ? ['edition_key', e.url.split('/')[2]] : |
|
e.type == 'work' ? ['key', e.url] : |
|
null |
|
).filter(x => x); |
|
/* Lazy groupBy */ |
|
const groups = {}; |
|
for (const [k, v] of parts) { |
|
groups[k] = groups[k] || []; |
|
groups[k].push(v); |
|
} |
|
return `(${Array.from(Object.entries(groups)).map(([k, v]) => `${k}:(${v.join(' OR ')})`).join(' ')})`; |
|
} |
|
|
|
get insertionPoint() { |
|
/* When on my own lists, want it in .details-content. Public lists should use the default. */ |
|
return document.querySelector('.details-content') || super.insertionPoint; |
|
} |
|
} |
|
|
|
|
|
class UnsupportedPageError extends Error {}; |
|
|
|
async function getResultsCount(query) { |
|
const url = `https://openlibrary.org/search.json?${new URLSearchParams({ |
|
q: query, |
|
rows: 0 |
|
})}`; |
|
const res = await fetch(url).then(res => res.json()); |
|
return res.numFound; |
|
} |
|
|
|
const handlers = [ |
|
new SearchPageHandler(), |
|
new AuthorPageHandler(), |
|
new ListPageHandler(), |
|
]; |
|
|
|
async function start() { |
|
const handler = handlers.find(h => h.test()); |
|
if (!handler) { |
|
throw new UnsupportedPageError(); |
|
} |
|
|
|
if (window.DQS_GLOBAL) { |
|
window.DQS_GLOBAL.unmount(); |
|
} |
|
|
|
window.DQS_GLOBAL = new DQSChecks({ |
|
checks: CHECKS, |
|
pageHandler: handler, |
|
}); |
|
} |
|
|
|
|
|
try { |
|
await start(); |
|
} catch (e) { |
|
console.error(e); |
|
if (e instanceof UnsupportedPageError) { |
|
alert('Data Quality Score only works on search or author pages.'); |
|
} else { |
|
alert(e); |
|
} |
|
} |
|
} |
|
|
|
main(); |