Skip to content

Instantly share code, notes, and snippets.

@andrewlinfoot
Last active October 21, 2020 14:09
Show Gist options
  • Save andrewlinfoot/89d74295d263826a921ae6353cae7d5f to your computer and use it in GitHub Desktop.
Save andrewlinfoot/89d74295d263826a921ae6353cae7d5f to your computer and use it in GitHub Desktop.
Puppeteer Xpath Utils
const toArray = a => (Array.isArray(a) ? a : [a]);
export async function waitForXpath(page, selector, options = {}) {
return page.waitForFunction(
selector => {
return (
null !==
document.evaluate(
selector,
document,
null,
XPathResult.FIRST_ORDERED_NODE_TYPE,
null,
).singleNodeValue
);
},
options,
selector,
);
}
export async function xpathNumber(page, selector, contextNode?) {
const number = await page.evaluate(
(_selector, _contextNode) => {
const query = document.evaluate(
_selector,
// Default to document if no context node is defined
_contextNode === undefined ? document : _contextNode,
null,
XPathResult.NUMBER_TYPE,
null,
);
return query.numberValue;
},
selector,
contextNode,
);
return number;
}
export async function xpath(page, selector, contextNode?) {
const resultsHandle = await page.evaluateHandle(
(_selector, _contextNode) => {
const results: any[] = [];
const query = document.evaluate(
_selector,
// Default to document if no context node is defined
_contextNode === undefined ? document : _contextNode,
null,
XPathResult.ORDERED_NODE_SNAPSHOT_TYPE,
null,
);
// tslint:disable-next-line:no-increment-decrement
for (let i = 0, length = query.snapshotLength; i < length; ++i) {
results.push(query.snapshotItem(i));
}
return results;
},
selector,
contextNode,
);
const properties = await resultsHandle.getProperties();
const result: any[] = [];
const releasePromises: any[] = [];
for (const property of properties.values()) {
const element = property.asElement();
if (element) {
result.push(element);
} else {
releasePromises.push(property.dispose());
}
}
await Promise.all(releasePromises);
return result;
}
type xpathJoinCondition = 'or' | 'and';
export function getXpathPredicateForItems(
items: string[],
transform,
condition: xpathJoinCondition,
) {
const itemsXpath = items.map(transform);
return `[${itemsXpath.join(` ${condition} `)}]`;
}
function replaceLastCharacter(string, character, replacement) {
const position = string.lastIndexOf(character);
return (
string.substring(0, position) + replacement + string.substring(position + 1)
);
}
export function combinePredicates(
predicates: string[],
condition: xpathJoinCondition,
) {
const predicatesWithParenthesis = predicates.map(predicate => {
let string = replaceLastCharacter(predicate, ']', ')');
string = string.replace('[', '(');
return string;
});
const combinedPredicate = predicatesWithParenthesis.reduce(
(accumulator, predicate) => {
return `${accumulator} ${condition} ${predicate}`;
},
);
return `[${combinedPredicate}]`;
}
export function getNodesWithTextSelector(textItems: string | string[]) {
const textItemsArray = toArray(textItems);
const predicate = getXpathPredicateForItems(
textItemsArray,
caseInsensitiveMatch,
'or',
);
return `//text()${predicate}/..`;
}
export function getNodesContainingTextSelector(textItems: string | string[]) {
const textItemsArray = toArray(textItems);
const predicate = getXpathPredicateForItems(
textItemsArray,
caseInsensitiveContains,
'or',
);
return `//text()${predicate}/..`;
}
export function caseInsensitiveMatch(text) {
const textLowerCase = text.toLowerCase();
const selector = `${translateToLowerCase(
normalizeText('.'),
)}="${textLowerCase}"`;
return selector;
}
export function caseInsensitiveContains(text) {
const textLowerCase = text.toLowerCase();
return `contains(${translateToLowerCase(
replaceNonBreakingSpace('.'),
)}, "${textLowerCase}")`;
}
export function translateToLowerCase(text: string) {
return `translate(${text},"ABCDEFGHIJKLMNOPQRSTUVWXYZ","abcdefghijklmnopqrstuvwxyz")`;
}
// Also removes &npsp; characters
export function normalizeText(text: string) {
return `normalize-space(${replaceNonBreakingSpace(text)})`;
}
export function replaceNonBreakingSpace(text: string) {
// Note: the space in the second param is actually a non-breaking space character
// option + space on mac. See: https://goo.gl/BhMFDA
return `translate(${text},"${String.fromCharCode(160)}", " ")`;
}
export async function waitForText(
page,
textStrings: string[] | string,
waitOptions = {},
) {
const selector = getNodesWithTextSelector(textStrings);
await waitForXpath(page, selector, waitOptions);
}
export async function waitForContainsText(
page,
textStrings: string[] | string,
waitOptions = {},
) {
const selector = getNodesContainingTextSelector(textStrings);
await waitForXpath(page, selector, waitOptions);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment