Skip to content

Instantly share code, notes, and snippets.

@eric
Created November 16, 2010 09:49
Show Gist options
  • Save eric/701636 to your computer and use it in GitHub Desktop.
Save eric/701636 to your computer and use it in GitHub Desktop.
ReaderJSController.originalArticleFinder()
/* ReaderJSController.originalArticleFinder(): */
{
"contentDocument": [object HTMLDocument],
"didSearchForArticleNode": true,
"article": [object Object],
"didSearchForPrependedArticleNode": true,
"prependedArticle": null,
"_cachedScrollY": 0,
"_cachedScrollX": 0,
"_elementsWithCachedBoundingRects": [object HTMLDivElement],
[object HTMLDivElement],
[object HTMLDivElement],
[object HTMLDivElement],
[object HTMLDivElement],
[object HTMLTableCellElement],
[object HTMLTableRowElement],
[object HTMLTableSectionElement],
[object HTMLTableElement],
[object HTMLTableCellElement],
[object HTMLTableRowElement],
[object HTMLTableSectionElement],
[object HTMLHeadingElement],
[object HTMLTableElement],
[object HTMLTableSectionElement],
[object HTMLTableRowElement],
[object HTMLTableCellElement],
[object HTMLHeadingElement],
[object HTMLHeadingElement],
[object HTMLHeadingElement],
[object HTMLHeadingElement],
[object HTMLHeadingElement],
[object HTMLHeadingElement],
[object HTMLHeadingElement],
[object HTMLHeadingElement],
[object HTMLHeadingElement],
[object HTMLHeadingElement],
[object HTMLHeadingElement],
[object HTMLHeadingElement],
[object HTMLHeadingElement],
[object HTMLHeadingElement],
[object HTMLImageElement],
"_cachedContentTextStyle": [object CSSStyleDeclaration],
"pageNumber": 1,
"_articleIsLTR": true,
"_nextPageURL": null,
"_adoptableArticle": [object HTMLDivElement],
"_articleTitle": 11 / 12 / 2010,
"_articleTitleElement": [object HTMLHeadingElement],
"isReaderModeAvailable": function isReaderModeAvailable() {
this.cacheWindowScrollPosition();
var article = this.articleNode();
return article != null;
},
"prepareToTransitionToReader": function prepareToTransitionToReader() {
clearCachedElementBoundingRects();
this.cacheWindowScrollPosition();
this.nextPageURL();
this.articleIsLTR();
this.adoptableArticle();
},
"nextPageURL": function nextPageURL() {
if (this._nextPageURL === undefined) this._nextPageURL = this.nextPageURLString();
return this._nextPageURL;
},
"articleNode": function articleNode() {
if (!this.didSearchForArticleNode) {
this.article = this.findArticle();
this.didSearchForArticleNode = true;
if (this.article) this.articleIsLTR();
}
return this.article ? this.article.element : null;
},
"prependedArticleNode": function prependedArticleNode() {
if (!this.didSearchForArticleNode) this.articleNode();
if (!this.didSearchForPrependedArticleNode) {
this.prependedArticle = this.findPrependedArticle();
this.didSearchForPrependedArticleNode = true;
}
return this.prependedArticle ? this.prependedArticle.element : null;
},
"cacheWindowScrollPosition": function cacheWindowScrollPosition() {
this._cachedScrollY = window.scrollY;
this._cachedScrollX = window.scrollX;
},
"contentTextStyle": function contentTextStyle() {
if (this._cachedContentTextStyle) return this._cachedContentTextStyle;
this._cachedContentTextStyle = contentTextStyleForNode(this.contentDocument, this.articleNode(), false);
if (!this._cachedContentTextStyle) this._cachedContentTextStyle = getComputedStyle(this.articleNode());
return this._cachedContentTextStyle;
},
"commaCountIsLessThan": function commaCountIsLessThan(node, limit) {
var count = 0;
var textContent = node.textContent;
var i = -1;
while (count < limit && (i = textContent.indexOf(',', i + 1)) >= 0)
count++;
return count < limit;
},
"calculateLinkDensity": function calculateLinkDensity(element) {
var textLength = removeWhitespace(element.textContent).length;
if (!textLength) return 0;
var links = element.querySelectorAll("a");
var linkCharacterCount = 0;
for (var i = 0; i < links.length; i++)
linkCharacterCount += removeWhitespace(links[i].textContent).length;
return linkCharacterCount / textLength;
},
"shouldPruneElement": function shouldPruneElement(element, originalElement) {
const MaxInputToParagraphRatio = 0.33;
const MaxPositiveWeightLinkDensity = 0.5;
const MaxStandardLinkDensity = 0.2;
const MinimumTextLength = 25;
const MinimumAverageImageArea = 200 * 200;
if (!element.parentElement) return false;
if (element.tagName !== "OBJECT" && element.tagName !== "EMBED") {
var childCount = element.childNodes.length;
var hasElementOrTextNodeChild = false;
for (var i = 0; i < childCount; i++) {
var node = element.childNodes[i];
var nodeType = node.nodeType;
if (nodeType === Node.ELEMENT_NODE || (nodeType === Node.TEXT_NODE && !isNodeWhitespace(node))) {
hasElementOrTextNodeChild = true;
break;
}
}
if (!hasElementOrTextNodeChild) return true;
}
var classIdWeight = 0;
if (originalElement) {
if (PositiveRegEx.test(originalElement.className)) classIdWeight++;
if (PositiveRegEx.test(originalElement.id)) classIdWeight++;
if (NegativeRegEx.test(originalElement.className)) classIdWeight--;
if (NegativeRegEx.test(originalElement.id)) classIdWeight--;
}
if (classIdWeight < 0) return true;
if (element.tagName === "UL") {
if (originalElement.querySelector("iframe") && originalElement.querySelector("script")) return true;
return false;
}
if (element.tagName === "OBJECT") {
const PlugInsToKeepRegEx = /youtube|vimeo|dailymotion/;
var embedElement = element.querySelector("embed[src]");
if (embedElement && PlugInsToKeepRegEx.test(embedElement.src)) return false;
var dataAttribute = element.getAttribute("data");
if (dataAttribute && PlugInsToKeepRegEx.test(dataAttribute)) return false;
return true;
}
if (element.childElementCount === 1) {
var childElement = element.firstElementChild;
if (childElement.tagName === "A") return false;
if (childElement.tagName === "SPAN" && childElement.className === "converted-anchor" && elementHasAncestorWithTagName(childElement, "TABLE")) return false;
}
var imageElements = element.querySelectorAll("img");
var imageElementCount = imageElements.length;
if (imageElementCount) {
var averageImageArea = 0;
for (var i = 0; i < imageElementCount; i++) {
var originalImage = imageElements[i].originalElement;
if (!isElementVisible(originalImage)) continue;
var originalRect = cachedElementBoundingRect(originalImage);
averageImageArea += (originalRect.width / imageElementCount) * (originalRect.height / imageElementCount);
}
if (averageImageArea > MinimumAverageImageArea) return false;
}
if (!this.commaCountIsLessThan(element, 10)) return false;
var p = element.querySelectorAll("p").length;
var br = element.querySelectorAll("br").length;
var numParagraphs = p + Math.floor(br / 2);
if (imageElementCount > numParagraphs) return true;
if (element.querySelectorAll("li").length > numParagraphs) return true;
if (element.querySelectorAll("input").length / numParagraphs > MaxInputToParagraphRatio) return true;
if (element.textContent.length < MinimumTextLength && (imageElementCount != 1)) return true;
if (element.querySelector("embed")) return true;
var linkDensity = this.calculateLinkDensity(element);
if (classIdWeight >= 1 && linkDensity > MaxPositiveWeightLinkDensity) return true;
if (classIdWeight < 1 && linkDensity > MaxStandardLinkDensity) return true;
if (element.tagName === "TABLE") {
var textLength = removeWhitespace(element.innerText).length;
var originalTextLength = removeWhitespace(originalElement.innerText).length;
if (textLength <= (originalTextLength * 0.5)) return true;
}
return false;
},
"wordCountIsLessThan": function wordCountIsLessThan(node, limit) {
var count = 0;
var textContent = node.textContent;
var i = -1;
while ((i = textContent.indexOf(' ', i + 1)) >= 0 && count < limit)
count++;
return count < limit;
},
"adoptableArticle": function adoptableArticle() {
if (this._adoptableArticle !== undefined) {
return this._adoptableArticle.cloneNode(true);
}
var rootElement = this.articleNode();
this._adoptableArticle = rootElement ? rootElement.cloneNode(true) : null;
if (!this._adoptableArticle) return this._adoptableArticle;
var articleToPrepend = this.prependedArticleNode();
var cleanedPrependNode = null;
this._adoptableArticle = this.cleanArticleNode(rootElement, this._adoptableArticle, false)
if (articleToPrepend) {
var cleanedPrependNode = this.cleanArticleNode(articleToPrepend, articleToPrepend.cloneNode(true), true);
if (cleanedPrependNode) this._adoptableArticle.insertBefore(cleanedPrependNode, this._adoptableArticle.firstChild);
}
return this._adoptableArticle;
},
"cleanArticleNode": function cleanArticleNode(originalArticleNode, clonedArticleNode, allowedToReturnNull) {
const tagNamesToAlwaysPrune = {
"FORM": 1,
"IFRAME": 1,
"SCRIPT": 1,
"STYLE": 1,
"LINK": 1
};
const tagNamesToConsiderPruning = {
"DIV": 1,
"TABLE": 1,
"OBJECT": 1,
"UL": 1
};
const tagNamesAffectingFontStyle = {
"I": 1,
"EM": 1
};
const tagNamesAffectingFontWeight = {
"B": 1,
"STRONG": 1,
"H1": 1,
"H2": 1,
"H3": 1,
"H4": 1,
"H5": 1,
"H6": 1
};
var elementsToConsiderPruning = [];
var depthInFloat = 0;
var depthInTable = 0;
var depthInFontStyle = 0;
var depthInFontWeight = 0;
var currentElement = originalArticleNode;
var view = currentElement.ownerDocument.defaultView;
var currentCloneElement = clonedArticleNode;
var articleTitle = this.articleTitle();
var articleTitleElement = this._articleTitleElement;
function incrementDepthLevels(delta) {
if (depthInFloat) depthInFloat += delta;
if (depthInTable) depthInTable += delta;
if (depthInFontStyle) depthInFontStyle += delta;
if (depthInFontWeight) depthInFontWeight += delta;
};
function updateDepthLevelsAfterSiblingTraversal() {
if (depthInFloat === 1) depthInFloat = 0;
if (depthInTable === 1) depthInTable = 0;
if (depthInFontStyle === 1) depthInFontStyle = 0;
if (depthInFontWeight === 1) depthInFontWeight = 0;
};
while (currentElement) {
var prunedElement = null;
var tagName = currentCloneElement.tagName;
currentCloneElement.originalElement = currentElement;
if (tagName in tagNamesToAlwaysPrune) prunedElement = currentCloneElement;
if (!prunedElement && currentElement === articleTitleElement) prunedElement = currentCloneElement;
if (!prunedElement && (tagName === "H1" || tagName === "H2")) {
var distanceFromoriginalArticleNodeTop = currentElement.offsetTop - originalArticleNode.offsetTop;
if (distanceFromoriginalArticleNodeTop < HeaderMinimumDistanceFromArticleTop) {
var headerText = currentElement.innerText;
var maxDistanceToConsiderSimilar = headerText.length * HeaderLevenshteinDistanceToLengthRatio;
if (levenshteinDistance(articleTitle, headerText) <= maxDistanceToConsiderSimilar) prunedElement = currentCloneElement;
}
}
var computedStyle;
if (!prunedElement) computedStyle = getComputedStyle(currentElement);
if (!prunedElement && tagName === "DIV" && currentCloneElement.parentNode) {
var elements = currentElement.querySelectorAll("a, blockquote, dl, div, img, ol, p, pre, table, ul");
var inFloat = depthInFloat || computedStyle["float"] !== "none";
if (!inFloat && !elements.length) {
var parentNode = currentCloneElement.parentNode;
var replacementNode = this.contentDocument.createElement("p");
while (currentCloneElement.firstChild) {
var child = currentCloneElement.firstChild;
replacementNode.appendChild(child);
}
parentNode.replaceChild(replacementNode, currentCloneElement);
currentCloneElement = replacementNode;
currentCloneElement.originalElement = currentElement;
tagName = currentCloneElement.tagName;
}
}
if (!prunedElement && currentCloneElement.parentNode && tagName in tagNamesToConsiderPruning) elementsToConsiderPruning.push(currentCloneElement);
if (!prunedElement) {
if (computedStyle.display === "none") prunedElement = currentCloneElement;
else if (currentElement !== originalArticleNode && tagName !== "IMG" && !depthInFloat && computedStyle["float"] !== "none" && (cachedElementBoundingRect(currentElement).height >= FloatMinimumHeight || currentElement.childElementCount > 1)) depthInFloat = 1;
}
if (!prunedElement) {
var attributes = currentCloneElement.attributes;
for (var i = 0; i < attributes.length; i++) {
var attributeName = attributes[i].nodeName;
if (AttributesToRemoveRegEx.test(attributeName)) {
currentCloneElement.removeAttribute(attributeName);
i--;
}
}
if (!depthInFontStyle && computedStyle.fontStyle !== "normal") {
if (!(tagName in tagNamesAffectingFontStyle)) currentCloneElement.style.fontStyle = computedStyle.fontStyle;
depthInFontStyle = 1;
}
if (!depthInFontWeight && computedStyle.fontWeight !== "normal") {
if (!(tagName in tagNamesAffectingFontWeight)) currentCloneElement.style.fontWeight = computedStyle.fontWeight;
depthInFontWeight = 1;
}
if (depthInFloat) {
if (depthInFloat === 1) {
if (cachedElementBoundingRect(currentElement).width === cachedElementBoundingRect(originalArticleNode).width) currentCloneElement.setAttribute("class", "float full-width");
else currentCloneElement.setAttribute("class", "float " + computedStyle["float"]);
}
var widthValue = currentElement.style.getPropertyValue("width");
if (widthValue) currentCloneElement.style.width = widthValue;
else {
var rules = view.getMatchedCSSRules(currentElement, "", true);
if (rules) {
for (var i = rules.length - 1; i >= 0; i--) {
widthValue = rules[i].style.getPropertyValue("width");
if (widthValue) {
currentCloneElement.style.width = widthValue;
break;
}
}
}
}
if (depthInFloat === 1 && !widthValue) currentCloneElement.style.width = cachedElementBoundingRect(currentElement).width + "px";
}
if (tagName === "TABLE") {
if (!depthInTable) depthInTable = 1;
} else if (tagName === "IMG") {
currentCloneElement.removeAttribute("border");
currentCloneElement.removeAttribute("hspace");
currentCloneElement.removeAttribute("vspace");
currentCloneElement.removeAttribute("align");
if (!depthInFloat) {
var imageBoundingRect = cachedElementBoundingRect(currentElement);
if (imageBoundingRect.width < ImageSizeTiny && imageBoundingRect.height < ImageSizeTiny) currentCloneElement.setAttribute("class", "reader-image-tiny");
else if ((imageBoundingRect.width / originalArticleNode.clientWidth) > ImageWidthToParentWidthRatio) {
currentCloneElement.setAttribute("class", "reader-image-large");
}
} else {
currentCloneElement.style.float = computedStyle.float;
}
} else if (tagName === "FONT") {
currentCloneElement.removeAttribute("size");
currentCloneElement.removeAttribute("face");
currentCloneElement.removeAttribute("color");
} else if (tagName === "A" && currentCloneElement.parentNode) {
var href = currentCloneElement.getAttribute("href");
if (href && href.length && (href[0] === "#" || href.substring(0, 11) === "javascript:")) {
if (!depthInTable && !currentCloneElement.childElementCount && currentCloneElement.parentElement.childElementCount === 1) {
var xPathResult = this.contentDocument.evaluate("text()", currentCloneElement.parentElement, null, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null);
if (!xPathResult.snapshotLength) prunedElement = currentCloneElement;
}
if (!prunedElement) {
var replacementNode = this.contentDocument.createElement("span");
if (currentCloneElement.childElementCount === 1 && currentCloneElement.firstElementChild.tagName === "IMG") {
var imageElement = currentCloneElement.firstElementChild;
if (imageElement.width > AnchorImageMinimumWidth && imageElement.height > AnchorImageMinimumHeight) replacementNode.setAttribute("class", "converted-image-anchor");
}
if (!replacementNode.className) replacementNode.setAttribute("class", "converted-anchor");
while (currentCloneElement.firstChild)
replacementNode.appendChild(currentCloneElement.firstChild);
currentCloneElement.parentNode.replaceChild(replacementNode, currentCloneElement);
currentCloneElement = replacementNode;
}
}
}
}
var firstElementChild = prunedElement ? null : currentElement.firstElementChild;
if (firstElementChild) {
currentElement = firstElementChild;
currentCloneElement = currentCloneElement.firstElementChild;
incrementDepthLevels(1);
} else {
var nextElementSibling;
while (currentElement !== originalArticleNode && !(nextElementSibling = currentElement.nextElementSibling)) {
currentElement = currentElement.parentElement;
currentCloneElement = currentCloneElement.parentElement;
incrementDepthLevels(-1);
}
if (currentElement === originalArticleNode) {
if (prunedElement) {
if (prunedElement.parentElement) prunedElement.parentElement.removeChild(prunedElement);
else if (allowedToReturnNull) return null;
}
break;
}
currentElement = nextElementSibling;
currentCloneElement = currentCloneElement.nextElementSibling;
updateDepthLevelsAfterSiblingTraversal();
}
if (prunedElement && !prunedElement.parentElement && shouldDoSOmething) return null;
if (prunedElement) {
if (prunedElement.parentElement) prunedElement.parentElement.removeChild(prunedElement);
else if (allowedToReturnNull) return null;
}
}
for (var i = elementsToConsiderPruning.length - 1; i >= 0; i--) {
var element = elementsToConsiderPruning[i];
if (element.parentNode && this.shouldPruneElement(element, element.originalElement)) element.parentNode.removeChild(element);
}
var floatElements = this._adoptableArticle.querySelectorAll(".float");
for (var i = 0; i < floatElements.length; i++) {
var pruneFloatedElement = false;
var floatElement = floatElements[i];
if (!pruneFloatedElement) {
var anchors = floatElement.querySelectorAll("a, span.converted-image-anchor");
var replacedAnchors = floatElement.querySelectorAll("span.converted-anchor");
pruneFloatedElement = floatElement.parentNode && replacedAnchors.length > anchors.length;
}
if (!pruneFloatedElement) {
var plugInsInClonedElement = floatElement.querySelectorAll("embed, object").length;
var plugInsInOriginalElement = floatElement.originalElement.querySelectorAll("embed, object").length;
if (!plugInsInClonedElement && plugInsInOriginalElement) pruneFloatedElement = true;
}
if (pruneFloatedElement) floatElement.parentNode.removeChild(floatElement);
}
if (allowedToReturnNull && !removeWhitespace(clonedArticleNode.innerText).length) return null;
return clonedArticleNode;
},
"articleTitle": function articleTitle() {
if (this._articleTitle !== undefined) return this._articleTitle;
const HeaderMaximumDistance = 500;
const HeaderMinimumTextLength = 8;
const HeaderMinimumFontSize = 12;
const HeaderFontSizeBonusMinimumRatio = 1.1;
const HeaderFontSizeBonusMultiplier = 1.25;
const HeaderBonusRegEx = /header|title|headline/i;
const HeaderRegexBonusMultiplier = 1.5;
const HeaderLargeImageCheckOffsetY = 150;
const HeaderLargeImageMinimumHeight = 300;
const HeaderLargeImageMinimumWidthRatio = 0.5;
const HeaderMaximumDOMDistance = 8;
const HeaderMinimumFontSizeDifference = 1.5;
var articleRect = cachedElementBoundingRect(this.prependedArticleNode() ? this.prependedArticleNode() : this.articleNode());
var articleCenterX = articleRect.left + (articleRect.width / 2);
var articleTopY = articleRect.top;
var articleAdjustedTopY = articleTopY;
var potentialLeadingImage = this.contentDocument.elementFromPoint(articleCenterX, articleTopY - HeaderLargeImageCheckOffsetY);
if (potentialLeadingImage && potentialLeadingImage.tagName === "IMG") {
var imageRect = cachedElementBoundingRect(potentialLeadingImage);
if (imageRect.height >= HeaderLargeImageMinimumHeight && imageRect.width >= articleRect.width * HeaderLargeImageMinimumWidthRatio) articleAdjustedTopY = imageRect.top;
}
var allHeaders = this.contentDocument.querySelectorAll("h1, h2, h3, h4, h5, .headline, .article_title, #hn-headline");
var bestHeader;
for (var i = 0; i < allHeaders.length; i++) {
var header = allHeaders[i];
var headerRect = cachedElementBoundingRect(header);
var headerCenterX = headerRect.left + (headerRect.width / 2);
var headerCenterY = headerRect.top + (headerRect.height / 2);
var deltaX = headerCenterX - articleCenterX;
var deltaY = headerCenterY - articleAdjustedTopY;
var distance = Math.sqrt((deltaX * deltaX) + (deltaY * deltaY));
var headerScore = Math.max(HeaderMaximumDistance - distance, 0);
if (distance > HeaderMaximumDistance) continue;
if (headerRect.width < articleRect.width * 0.5) continue;
if (headerCenterX < articleRect.left || headerCenterX > articleRect.right) continue;
var headerFontSize = fontSizeFromComputedStyle(getComputedStyle(header));
if (headerFontSize < HeaderMinimumFontSize) continue;
var headerText = header.innerText;
if (headerText.length < HeaderMinimumTextLength) continue;
headerScore *= 1 + TitleCandidateDepthScoreMultiplier * elementDepth(header);
headerScore *= (headerFontSize / BaseFontSize);
var fontSize = parseInt(this.contentTextStyle().fontSize);
if (parseInt(headerFontSize) > fontSize * HeaderFontSizeBonusMinimumRatio) headerScore *= HeaderFontSizeBonusMultiplier;
if (HeaderBonusRegEx.test(header.className) || HeaderBonusRegEx.test(header.id)) headerScore *= HeaderRegexBonusMultiplier;
if (!bestHeader || headerScore > bestHeader.headerScore) {
bestHeader = header;
bestHeader.headerScore = headerScore;
bestHeader.headerText = headerText;
}
}
if (bestHeader && domDistance(bestHeader, this.articleNode(), HeaderMaximumDOMDistance + 1) > HeaderMaximumDOMDistance) {
if (parseInt(getComputedStyle(bestHeader).fontSize) < HeaderMinimumFontSizeDifference * fontSize) bestHeader = null;
}
if (bestHeader) {
this._articleTitle = bestHeader.headerText;
this._articleTitleElement = bestHeader;
}
if (!this._articleTitle) this._articleTitle = this.contentDocument.title;
return this._articleTitle;
},
"articleIsLTR": function articleIsLTR() {
if (this._articleIsLTR === undefined) {
var computedStyle = getComputedStyle(this.articleNode());
this._articleIsLTR = computedStyle ? computedStyle.direction === "ltr" : true;
}
return this._articleIsLTR;
},
"findSuggestedCandidate": function findSuggestedCandidate() {
var route = this.suggestedRouteToArticle;
if (!route || !route.length) return null;
var node;
var i;
for (i = route.length - 1; i >= 0; i--) {
if (route[i].id) {
node = this.contentDocument.getElementById(route[i].id);
if (node) break;
}
}
i++;
if (!node) node = this.contentDocument;
while (i < route.length) {
var step = route[i];
var child = node.nodeType === Node.DOCUMENT_NODE ? node.documentElement : node.firstElementChild;
for (var j = 1; child && j < step.index; j++) {
child = child.nextElementSibling;
}
if (!child) return null;
if (child.tagName !== step.tagName) return null;
if (step.className && child.className !== step.className) return null;
node = child;
i++;
}
if (!isElementVisible(node)) return null;
return new CandidateElement(node, this.contentDocument);
},
"findArticle": function findArticle() {
if (BlacklistedHostsRegEx.test(this.contentDocument.location.hostname)) return null;
var suggestedCandidate = this.findSuggestedCandidate();
var candidateElements = this.findCandidateElements();
if (!candidateElements || !candidateElements.length) return suggestedCandidate;
if (suggestedCandidate && suggestedCandidate.basicScore() >= ReaderMinimumScore) return suggestedCandidate;
var highestScoringElement = this.highestScoringCandidateFromCandidates(candidateElements);
if (highestScoringElement.finalScore() < ReaderMinimumScore) return suggestedCandidate;
if (highestScoringElement.shouldDisqualifyDueToScoreDensity()) return null;
if (highestScoringElement.shouldDisqualifyDueToHorizontalRuleDensity()) return null;
if (highestScoringElement.shouldDisqualifyDueToHeaderDensity()) return null;
if (highestScoringElement.shouldDisqualifyDueToSimilarElements(candidateElements)) return null;
return highestScoringElement;
},
"findPrependedArticle": function findPrependedArticle() {
if (!this.article) return null;
for (var i = 0, candidateSearchScope = this.article.element; i < 3 && candidateSearchScope; i++, candidateSearchScope = candidateSearchScope.parentNode) {
var candidateElements = this.findPrependedArticleCandidateElements(candidateSearchScope);
if (!candidateElements || !candidateElements.length) continue;
var sortedCandidateElements = this.sortCandidateElementsInDescendingScoreOrder(candidateElements);
var highestScoringCandidate;
for (var candidateIndex = 0; candidateIndex < sortedCandidateElements.length; candidateIndex++) {
highestScoringCandidate = sortedCandidateElements[candidateIndex];
if (!highestScoringCandidate || !highestScoringCandidate.basicScore()) break;
if (highestScoringCandidate.shouldDisqualifyDueToScoreDensity()) continue;
if (highestScoringCandidate.shouldDisqualifyDueToHorizontalRuleDensity()) continue;
if (highestScoringCandidate.shouldDisqualifyDueToHeaderDensity()) continue;
if (cachedElementBoundingRect(highestScoringCandidate.element).height < PrependedArticleCandidateMinimumHeight && cachedElementBoundingRect(this.article.element).width != cachedElementBoundingRect(highestScoringCandidate.element).width) continue;
var textNodeStyle = contentTextStyleForNode(this.contentDocument, highestScoringCandidate.element, true);
if (!textNodeStyle) continue;
if (textNodeStyle.fontFamily !== this.contentTextStyle().fontFamily || textNodeStyle.fontSize !== this.contentTextStyle().fontSize) continue;
if (highestScoringCandidate) return highestScoringCandidate;
}
}
return null;
},
"highestScoringCandidateFromCandidates": function highestScoringCandidateFromCandidates(candidateElements) {
var highestScore = 0;
var highestScoringElement = null;
for (var i = 0; i < candidateElements.length; i++) {
var candidateElement = candidateElements[i];
var score = candidateElement.basicScore();
if (score >= highestScore) {
highestScore = score;
highestScoringElement = candidateElement;
}
}
return highestScoringElement;
},
"sortCandidateElementsInDescendingScoreOrder": function sortCandidateElementsInDescendingScoreOrder(candidateElements) {
function sortByScore(candidate1, candidate2) {
if (candidate1.basicScore() != candidate2.basicScore()) return candidate2.basicScore() - candidate1.basicScore();
return candidate2.depth() - candidate1.depth();
}
return candidateElements.sort(sortByScore);
},
"findCandidateElements": function findCandidateElements() {
const MaximumCandidateDetectionTimeInterval = 1000;
var findCandidateElementsTimeoutDate = Date.now() + MaximumCandidateDetectionTimeInterval;
var elements = this.contentDocument.getElementsByTagName("*");
var candidateElements = [];
var elementsLength = elements.length;
for (var i = 0; i < elementsLength; i++) {
var element = elements[i];
if (CandidateTagNamesToIgnore[element.tagName]) continue;
var candidate = CandidateElement.candidateIfElementIsViable(element, this.contentDocument);
if (candidate) candidateElements.push(candidate);
if (Date.now() > findCandidateElementsTimeoutDate) {
console.assert(false, "ReaderArticleFinder aborting CandidateElement detection due to timeout");
candidateElements = [];
break;
}
}
return candidateElements;
},
"findPrependedArticleCandidateElements": function findPrependedArticleCandidateElements(searchScope) {
if (!this.article) return [];
if (!searchScope) searchScope = this.article.element;
var xPathQuery = "preceding-sibling::*/descendant-or-self::*";
var xPathResults = this.contentDocument.evaluate(xPathQuery, searchScope, null, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null);
var possibleCandidateCount = xPathResults.snapshotLength;
var candidateElements = [];
for (var i = 0; i < possibleCandidateCount; i++) {
var element = xPathResults.snapshotItem(i);
if (CandidateTagNamesToIgnore[element.tagName]) continue;
var candidate = CandidateElement.prependedArticleCandidateIfElementIsViable(element, this.article, this.contentDocument);
if (candidate) candidateElements.push(candidate);
}
return candidateElements;
},
"nextPageURLString": function nextPageURLString() {
if (!this.article) return null;
var bestLink;
var bestLinkScore = 0;
var searchScope = this.article.element;
if (searchScope.parentNode && getComputedStyle(searchScope).display === "inline") searchScope = searchScope.parentNode;
var possibleSearchScope = searchScope;
var minimumBottomOffset = cachedElementBoundingRect(searchScope).bottom + LinkMaxVerticalDistanceFromArticle;
while (isElementNode(possibleSearchScope) && cachedElementBoundingRect(possibleSearchScope).bottom <= minimumBottomOffset)
possibleSearchScope = possibleSearchScope.parentNode;
if (isElementNode(possibleSearchScope) && possibleSearchScope != searchScope) searchScope = possibleSearchScope;
var anchorElements = this.contentDocument.evaluate(LinkCandidateXPathQuery, searchScope, null, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null);
var numberOfLinks = anchorElements.snapshotLength;
for (var i = 0; i < numberOfLinks; i++) {
var link = anchorElements.snapshotItem(i);
var score = this.scoreNextPageLinkCandidate(link);
if (score > bestLinkScore) {
bestLink = link;
bestLinkScore = score;
}
}
return bestLink ? bestLink.href : null;
},
"scoreNextPageLinkCandidate": function scoreNextPageLinkCandidate(link) {
function isNextOrdinal(referenceString, linkString, linkText, pageNumber) {
if (linkString.substring(0, referenceString.length) === referenceString) {
linkString = linkString.substring(referenceString.length);
referenceString = "";
}
var linkOrdinal = linkString.lastInteger();
if (isNaN(linkOrdinal)) return false;
var referenceOrdinal = referenceString ? referenceString.lastInteger() : NaN;
if (isNaN(referenceOrdinal) || referenceOrdinal >= MaximumExactIntegralValue) referenceOrdinal = pageNumber;
if (linkOrdinal == referenceOrdinal) return linkText.lastInteger() === referenceOrdinal + 1;
return linkOrdinal === referenceOrdinal + 1;
}
function parametersFromSearch(search) {
var map = {};
var parameters = search.substring(1).split("&");
for (var i = 0; i < parameters.length; i++) {
var parameter = parameters[i];
var equalsIndex = parameter.indexOf("=");
if (equalsIndex === -1) map[parameter] = null;
else map[parameter.substring(0, equalsIndex)] = parameter.substring(equalsIndex + 1);
}
return map;
}
var referenceLocation = this.contentDocument.location;
if (link.host !== referenceLocation.host) return 0;
if (link.pathname === referenceLocation.pathname && link.search === referenceLocation.search) return 0;
if (link.toString().indexOf("#") != -1) return 0;
if (!isElementVisible(link)) return 0;
var linkBoundingRect = cachedElementBoundingRect(link);
var articleBoundingRect = cachedElementBoundingRect(this.article.element);
var verticalDistanceFromArticle = Math.max(0, Math.max(articleBoundingRect.top - (linkBoundingRect.top + linkBoundingRect.height), linkBoundingRect.top - (articleBoundingRect.top + articleBoundingRect.height)));
if (verticalDistanceFromArticle > LinkMaxVerticalDistanceFromArticle) return 0;
var horizontalDistanceFromArticle = Math.max(0, Math.max(articleBoundingRect.left - (linkBoundingRect.left + linkBoundingRect.width), linkBoundingRect.left - (articleBoundingRect.left + articleBoundingRect.width)));
if (horizontalDistanceFromArticle > 0) return 0;
var linkPathComponents = link.pathname.substring(1).split("/");
if (!linkPathComponents[linkPathComponents.length - 1]) linkPathComponents.pop();
var referencePathComponents = referenceLocation.pathname.substring(1).split("/");
if (!referencePathComponents[referencePathComponents.length - 1]) referencePathComponents.pop();
if (linkPathComponents.length < referencePathComponents.length) return 0;
var mismatchCount = 0;
var nextOrdinalMatchValue = 0;
var linkText = link.textContent;
for (var i = 0; i < linkPathComponents.length; i++) {
var linkComponent = linkPathComponents[i];
var referenceComponent = i < referencePathComponents.length ? referencePathComponents[i] : "";
if (referenceComponent !== linkComponent) {
if (i < referencePathComponents.length - 2) return 0;
if (linkComponent.length >= referenceComponent.length) {
var commonSuffixLength = 0;
while (linkComponent[linkComponent.length - 1 - commonSuffixLength] === referenceComponent[referenceComponent.length - 1 - commonSuffixLength])
commonSuffixLength++;
if (commonSuffixLength) {
linkComponent = linkComponent.substring(0, linkComponent.length - commonSuffixLength);
referenceComponent = referenceComponent.substring(0, referenceComponent.length - commonSuffixLength);
}
}
if (isNextOrdinal(referenceComponent, linkComponent, linkText, this.pageNumber)) nextOrdinalMatchValue = Math.pow(LinkNextOrdinalValueBase, (i - linkPathComponents.length + 1));
else mismatchCount++;
}
if (mismatchCount > 1) return 0;
}
if (link.search) {
linkParameters = parametersFromSearch(link.search);
referenceParameters = parametersFromSearch(referenceLocation.search);
for (var key in linkParameters) {
var linkValue = linkParameters[key];
var referenceValue = key in referenceParameters ? referenceParameters[key] : null;
if (referenceValue !== linkValue) {
if (referenceValue === null) referenceValue = "";
if (linkValue === null) linkValue = "";
if (linkValue.length < referenceValue.length) mismatchCount++;
else if (isNextOrdinal(referenceValue, linkValue, linkText, this.pageNumber)) nextOrdinalMatchValue = Math.max(nextOrdinalMatchValue, 1 / LinkNextOrdinalValueBase);
else mismatchCount++;
}
}
}
if (!nextOrdinalMatchValue) return 0;
var score = LinkMatchWeight * (Math.pow(LinkMismatchValueBase, -mismatchCount) + nextOrdinalMatchValue) + LinkVerticalDistanceFromArticleWeight * verticalDistanceFromArticle / LinkMaxVerticalDistanceFromArticle;
if (link.parentNode.tagName === "LI") score += LinkListItemBonus;
var linkText = link.innerText;
if (LinkNextMatchRegEx.test(linkText)) score += LinkNextMatchBonus;
if (LinkPageMatchRegEx.test(linkText)) score += LinkPageMatchBonus;
if (LinkContinueMatchRegEx.test(linkText)) score += LinkContinueMatchBonus;
return score;
},
"isReaderModeAvailable": function isReaderModeAvailable() {
this.cacheWindowScrollPosition();
var article = this.articleNode();
return article != null;
},
"prepareToTransitionToReader": function prepareToTransitionToReader() {
clearCachedElementBoundingRects();
this.cacheWindowScrollPosition();
this.nextPageURL();
this.articleIsLTR();
this.adoptableArticle();
},
"nextPageURL": function nextPageURL() {
if (this._nextPageURL === undefined) this._nextPageURL = this.nextPageURLString();
return this._nextPageURL;
},
"articleNode": function articleNode() {
if (!this.didSearchForArticleNode) {
this.article = this.findArticle();
this.didSearchForArticleNode = true;
if (this.article) this.articleIsLTR();
}
return this.article ? this.article.element : null;
},
"prependedArticleNode": function prependedArticleNode() {
if (!this.didSearchForArticleNode) this.articleNode();
if (!this.didSearchForPrependedArticleNode) {
this.prependedArticle = this.findPrependedArticle();
this.didSearchForPrependedArticleNode = true;
}
return this.prependedArticle ? this.prependedArticle.element : null;
},
"cacheWindowScrollPosition": function cacheWindowScrollPosition() {
this._cachedScrollY = window.scrollY;
this._cachedScrollX = window.scrollX;
},
"contentTextStyle": function contentTextStyle() {
if (this._cachedContentTextStyle) return this._cachedContentTextStyle;
this._cachedContentTextStyle = contentTextStyleForNode(this.contentDocument, this.articleNode(), false);
if (!this._cachedContentTextStyle) this._cachedContentTextStyle = getComputedStyle(this.articleNode());
return this._cachedContentTextStyle;
},
"commaCountIsLessThan": function commaCountIsLessThan(node, limit) {
var count = 0;
var textContent = node.textContent;
var i = -1;
while (count < limit && (i = textContent.indexOf(',', i + 1)) >= 0)
count++;
return count < limit;
},
"calculateLinkDensity": function calculateLinkDensity(element) {
var textLength = removeWhitespace(element.textContent).length;
if (!textLength) return 0;
var links = element.querySelectorAll("a");
var linkCharacterCount = 0;
for (var i = 0; i < links.length; i++)
linkCharacterCount += removeWhitespace(links[i].textContent).length;
return linkCharacterCount / textLength;
},
"shouldPruneElement": function shouldPruneElement(element, originalElement) {
const MaxInputToParagraphRatio = 0.33;
const MaxPositiveWeightLinkDensity = 0.5;
const MaxStandardLinkDensity = 0.2;
const MinimumTextLength = 25;
const MinimumAverageImageArea = 200 * 200;
if (!element.parentElement) return false;
if (element.tagName !== "OBJECT" && element.tagName !== "EMBED") {
var childCount = element.childNodes.length;
var hasElementOrTextNodeChild = false;
for (var i = 0; i < childCount; i++) {
var node = element.childNodes[i];
var nodeType = node.nodeType;
if (nodeType === Node.ELEMENT_NODE || (nodeType === Node.TEXT_NODE && !isNodeWhitespace(node))) {
hasElementOrTextNodeChild = true;
break;
}
}
if (!hasElementOrTextNodeChild) return true;
}
var classIdWeight = 0;
if (originalElement) {
if (PositiveRegEx.test(originalElement.className)) classIdWeight++;
if (PositiveRegEx.test(originalElement.id)) classIdWeight++;
if (NegativeRegEx.test(originalElement.className)) classIdWeight--;
if (NegativeRegEx.test(originalElement.id)) classIdWeight--;
}
if (classIdWeight < 0) return true;
if (element.tagName === "UL") {
if (originalElement.querySelector("iframe") && originalElement.querySelector("script")) return true;
return false;
}
if (element.tagName === "OBJECT") {
const PlugInsToKeepRegEx = /youtube|vimeo|dailymotion/;
var embedElement = element.querySelector("embed[src]");
if (embedElement && PlugInsToKeepRegEx.test(embedElement.src)) return false;
var dataAttribute = element.getAttribute("data");
if (dataAttribute && PlugInsToKeepRegEx.test(dataAttribute)) return false;
return true;
}
if (element.childElementCount === 1) {
var childElement = element.firstElementChild;
if (childElement.tagName === "A") return false;
if (childElement.tagName === "SPAN" && childElement.className === "converted-anchor" && elementHasAncestorWithTagName(childElement, "TABLE")) return false;
}
var imageElements = element.querySelectorAll("img");
var imageElementCount = imageElements.length;
if (imageElementCount) {
var averageImageArea = 0;
for (var i = 0; i < imageElementCount; i++) {
var originalImage = imageElements[i].originalElement;
if (!isElementVisible(originalImage)) continue;
var originalRect = cachedElementBoundingRect(originalImage);
averageImageArea += (originalRect.width / imageElementCount) * (originalRect.height / imageElementCount);
}
if (averageImageArea > MinimumAverageImageArea) return false;
}
if (!this.commaCountIsLessThan(element, 10)) return false;
var p = element.querySelectorAll("p").length;
var br = element.querySelectorAll("br").length;
var numParagraphs = p + Math.floor(br / 2);
if (imageElementCount > numParagraphs) return true;
if (element.querySelectorAll("li").length > numParagraphs) return true;
if (element.querySelectorAll("input").length / numParagraphs > MaxInputToParagraphRatio) return true;
if (element.textContent.length < MinimumTextLength && (imageElementCount != 1)) return true;
if (element.querySelector("embed")) return true;
var linkDensity = this.calculateLinkDensity(element);
if (classIdWeight >= 1 && linkDensity > MaxPositiveWeightLinkDensity) return true;
if (classIdWeight < 1 && linkDensity > MaxStandardLinkDensity) return true;
if (element.tagName === "TABLE") {
var textLength = removeWhitespace(element.innerText).length;
var originalTextLength = removeWhitespace(originalElement.innerText).length;
if (textLength <= (originalTextLength * 0.5)) return true;
}
return false;
},
"wordCountIsLessThan": function wordCountIsLessThan(node, limit) {
var count = 0;
var textContent = node.textContent;
var i = -1;
while ((i = textContent.indexOf(' ', i + 1)) >= 0 && count < limit)
count++;
return count < limit;
},
"adoptableArticle": function adoptableArticle() {
if (this._adoptableArticle !== undefined) {
return this._adoptableArticle.cloneNode(true);
}
var rootElement = this.articleNode();
this._adoptableArticle = rootElement ? rootElement.cloneNode(true) : null;
if (!this._adoptableArticle) return this._adoptableArticle;
var articleToPrepend = this.prependedArticleNode();
var cleanedPrependNode = null;
this._adoptableArticle = this.cleanArticleNode(rootElement, this._adoptableArticle, false)
if (articleToPrepend) {
var cleanedPrependNode = this.cleanArticleNode(articleToPrepend, articleToPrepend.cloneNode(true), true);
if (cleanedPrependNode) this._adoptableArticle.insertBefore(cleanedPrependNode, this._adoptableArticle.firstChild);
}
return this._adoptableArticle;
},
"cleanArticleNode": function cleanArticleNode(originalArticleNode, clonedArticleNode, allowedToReturnNull) {
const tagNamesToAlwaysPrune = {
"FORM": 1,
"IFRAME": 1,
"SCRIPT": 1,
"STYLE": 1,
"LINK": 1
};
const tagNamesToConsiderPruning = {
"DIV": 1,
"TABLE": 1,
"OBJECT": 1,
"UL": 1
};
const tagNamesAffectingFontStyle = {
"I": 1,
"EM": 1
};
const tagNamesAffectingFontWeight = {
"B": 1,
"STRONG": 1,
"H1": 1,
"H2": 1,
"H3": 1,
"H4": 1,
"H5": 1,
"H6": 1
};
var elementsToConsiderPruning = [];
var depthInFloat = 0;
var depthInTable = 0;
var depthInFontStyle = 0;
var depthInFontWeight = 0;
var currentElement = originalArticleNode;
var view = currentElement.ownerDocument.defaultView;
var currentCloneElement = clonedArticleNode;
var articleTitle = this.articleTitle();
var articleTitleElement = this._articleTitleElement;
function incrementDepthLevels(delta) {
if (depthInFloat) depthInFloat += delta;
if (depthInTable) depthInTable += delta;
if (depthInFontStyle) depthInFontStyle += delta;
if (depthInFontWeight) depthInFontWeight += delta;
};
function updateDepthLevelsAfterSiblingTraversal() {
if (depthInFloat === 1) depthInFloat = 0;
if (depthInTable === 1) depthInTable = 0;
if (depthInFontStyle === 1) depthInFontStyle = 0;
if (depthInFontWeight === 1) depthInFontWeight = 0;
};
while (currentElement) {
var prunedElement = null;
var tagName = currentCloneElement.tagName;
currentCloneElement.originalElement = currentElement;
if (tagName in tagNamesToAlwaysPrune) prunedElement = currentCloneElement;
if (!prunedElement && currentElement === articleTitleElement) prunedElement = currentCloneElement;
if (!prunedElement && (tagName === "H1" || tagName === "H2")) {
var distanceFromoriginalArticleNodeTop = currentElement.offsetTop - originalArticleNode.offsetTop;
if (distanceFromoriginalArticleNodeTop < HeaderMinimumDistanceFromArticleTop) {
var headerText = currentElement.innerText;
var maxDistanceToConsiderSimilar = headerText.length * HeaderLevenshteinDistanceToLengthRatio;
if (levenshteinDistance(articleTitle, headerText) <= maxDistanceToConsiderSimilar) prunedElement = currentCloneElement;
}
}
var computedStyle;
if (!prunedElement) computedStyle = getComputedStyle(currentElement);
if (!prunedElement && tagName === "DIV" && currentCloneElement.parentNode) {
var elements = currentElement.querySelectorAll("a, blockquote, dl, div, img, ol, p, pre, table, ul");
var inFloat = depthInFloat || computedStyle["float"] !== "none";
if (!inFloat && !elements.length) {
var parentNode = currentCloneElement.parentNode;
var replacementNode = this.contentDocument.createElement("p");
while (currentCloneElement.firstChild) {
var child = currentCloneElement.firstChild;
replacementNode.appendChild(child);
}
parentNode.replaceChild(replacementNode, currentCloneElement);
currentCloneElement = replacementNode;
currentCloneElement.originalElement = currentElement;
tagName = currentCloneElement.tagName;
}
}
if (!prunedElement && currentCloneElement.parentNode && tagName in tagNamesToConsiderPruning) elementsToConsiderPruning.push(currentCloneElement);
if (!prunedElement) {
if (computedStyle.display === "none") prunedElement = currentCloneElement;
else if (currentElement !== originalArticleNode && tagName !== "IMG" && !depthInFloat && computedStyle["float"] !== "none" && (cachedElementBoundingRect(currentElement).height >= FloatMinimumHeight || currentElement.childElementCount > 1)) depthInFloat = 1;
}
if (!prunedElement) {
var attributes = currentCloneElement.attributes;
for (var i = 0; i < attributes.length; i++) {
var attributeName = attributes[i].nodeName;
if (AttributesToRemoveRegEx.test(attributeName)) {
currentCloneElement.removeAttribute(attributeName);
i--;
}
}
if (!depthInFontStyle && computedStyle.fontStyle !== "normal") {
if (!(tagName in tagNamesAffectingFontStyle)) currentCloneElement.style.fontStyle = computedStyle.fontStyle;
depthInFontStyle = 1;
}
if (!depthInFontWeight && computedStyle.fontWeight !== "normal") {
if (!(tagName in tagNamesAffectingFontWeight)) currentCloneElement.style.fontWeight = computedStyle.fontWeight;
depthInFontWeight = 1;
}
if (depthInFloat) {
if (depthInFloat === 1) {
if (cachedElementBoundingRect(currentElement).width === cachedElementBoundingRect(originalArticleNode).width) currentCloneElement.setAttribute("class", "float full-width");
else currentCloneElement.setAttribute("class", "float " + computedStyle["float"]);
}
var widthValue = currentElement.style.getPropertyValue("width");
if (widthValue) currentCloneElement.style.width = widthValue;
else {
var rules = view.getMatchedCSSRules(currentElement, "", true);
if (rules) {
for (var i = rules.length - 1; i >= 0; i--) {
widthValue = rules[i].style.getPropertyValue("width");
if (widthValue) {
currentCloneElement.style.width = widthValue;
break;
}
}
}
}
if (depthInFloat === 1 && !widthValue) currentCloneElement.style.width = cachedElementBoundingRect(currentElement).width + "px";
}
if (tagName === "TABLE") {
if (!depthInTable) depthInTable = 1;
} else if (tagName === "IMG") {
currentCloneElement.removeAttribute("border");
currentCloneElement.removeAttribute("hspace");
currentCloneElement.removeAttribute("vspace");
currentCloneElement.removeAttribute("align");
if (!depthInFloat) {
var imageBoundingRect = cachedElementBoundingRect(currentElement);
if (imageBoundingRect.width < ImageSizeTiny && imageBoundingRect.height < ImageSizeTiny) currentCloneElement.setAttribute("class", "reader-image-tiny");
else if ((imageBoundingRect.width / originalArticleNode.clientWidth) > ImageWidthToParentWidthRatio) {
currentCloneElement.setAttribute("class", "reader-image-large");
}
} else {
currentCloneElement.style.float = computedStyle.float;
}
} else if (tagName === "FONT") {
currentCloneElement.removeAttribute("size");
currentCloneElement.removeAttribute("face");
currentCloneElement.removeAttribute("color");
} else if (tagName === "A" && currentCloneElement.parentNode) {
var href = currentCloneElement.getAttribute("href");
if (href && href.length && (href[0] === "#" || href.substring(0, 11) === "javascript:")) {
if (!depthInTable && !currentCloneElement.childElementCount && currentCloneElement.parentElement.childElementCount === 1) {
var xPathResult = this.contentDocument.evaluate("text()", currentCloneElement.parentElement, null, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null);
if (!xPathResult.snapshotLength) prunedElement = currentCloneElement;
}
if (!prunedElement) {
var replacementNode = this.contentDocument.createElement("span");
if (currentCloneElement.childElementCount === 1 && currentCloneElement.firstElementChild.tagName === "IMG") {
var imageElement = currentCloneElement.firstElementChild;
if (imageElement.width > AnchorImageMinimumWidth && imageElement.height > AnchorImageMinimumHeight) replacementNode.setAttribute("class", "converted-image-anchor");
}
if (!replacementNode.className) replacementNode.setAttribute("class", "converted-anchor");
while (currentCloneElement.firstChild)
replacementNode.appendChild(currentCloneElement.firstChild);
currentCloneElement.parentNode.replaceChild(replacementNode, currentCloneElement);
currentCloneElement = replacementNode;
}
}
}
}
var firstElementChild = prunedElement ? null : currentElement.firstElementChild;
if (firstElementChild) {
currentElement = firstElementChild;
currentCloneElement = currentCloneElement.firstElementChild;
incrementDepthLevels(1);
} else {
var nextElementSibling;
while (currentElement !== originalArticleNode && !(nextElementSibling = currentElement.nextElementSibling)) {
currentElement = currentElement.parentElement;
currentCloneElement = currentCloneElement.parentElement;
incrementDepthLevels(-1);
}
if (currentElement === originalArticleNode) {
if (prunedElement) {
if (prunedElement.parentElement) prunedElement.parentElement.removeChild(prunedElement);
else if (allowedToReturnNull) return null;
}
break;
}
currentElement = nextElementSibling;
currentCloneElement = currentCloneElement.nextElementSibling;
updateDepthLevelsAfterSiblingTraversal();
}
if (prunedElement && !prunedElement.parentElement && shouldDoSOmething) return null;
if (prunedElement) {
if (prunedElement.parentElement) prunedElement.parentElement.removeChild(prunedElement);
else if (allowedToReturnNull) return null;
}
}
for (var i = elementsToConsiderPruning.length - 1; i >= 0; i--) {
var element = elementsToConsiderPruning[i];
if (element.parentNode && this.shouldPruneElement(element, element.originalElement)) element.parentNode.removeChild(element);
}
var floatElements = this._adoptableArticle.querySelectorAll(".float");
for (var i = 0; i < floatElements.length; i++) {
var pruneFloatedElement = false;
var floatElement = floatElements[i];
if (!pruneFloatedElement) {
var anchors = floatElement.querySelectorAll("a, span.converted-image-anchor");
var replacedAnchors = floatElement.querySelectorAll("span.converted-anchor");
pruneFloatedElement = floatElement.parentNode && replacedAnchors.length > anchors.length;
}
if (!pruneFloatedElement) {
var plugInsInClonedElement = floatElement.querySelectorAll("embed, object").length;
var plugInsInOriginalElement = floatElement.originalElement.querySelectorAll("embed, object").length;
if (!plugInsInClonedElement && plugInsInOriginalElement) pruneFloatedElement = true;
}
if (pruneFloatedElement) floatElement.parentNode.removeChild(floatElement);
}
if (allowedToReturnNull && !removeWhitespace(clonedArticleNode.innerText).length) return null;
return clonedArticleNode;
},
"articleTitle": function articleTitle() {
if (this._articleTitle !== undefined) return this._articleTitle;
const HeaderMaximumDistance = 500;
const HeaderMinimumTextLength = 8;
const HeaderMinimumFontSize = 12;
const HeaderFontSizeBonusMinimumRatio = 1.1;
const HeaderFontSizeBonusMultiplier = 1.25;
const HeaderBonusRegEx = /header|title|headline/i;
const HeaderRegexBonusMultiplier = 1.5;
const HeaderLargeImageCheckOffsetY = 150;
const HeaderLargeImageMinimumHeight = 300;
const HeaderLargeImageMinimumWidthRatio = 0.5;
const HeaderMaximumDOMDistance = 8;
const HeaderMinimumFontSizeDifference = 1.5;
var articleRect = cachedElementBoundingRect(this.prependedArticleNode() ? this.prependedArticleNode() : this.articleNode());
var articleCenterX = articleRect.left + (articleRect.width / 2);
var articleTopY = articleRect.top;
var articleAdjustedTopY = articleTopY;
var potentialLeadingImage = this.contentDocument.elementFromPoint(articleCenterX, articleTopY - HeaderLargeImageCheckOffsetY);
if (potentialLeadingImage && potentialLeadingImage.tagName === "IMG") {
var imageRect = cachedElementBoundingRect(potentialLeadingImage);
if (imageRect.height >= HeaderLargeImageMinimumHeight && imageRect.width >= articleRect.width * HeaderLargeImageMinimumWidthRatio) articleAdjustedTopY = imageRect.top;
}
var allHeaders = this.contentDocument.querySelectorAll("h1, h2, h3, h4, h5, .headline, .article_title, #hn-headline");
var bestHeader;
for (var i = 0; i < allHeaders.length; i++) {
var header = allHeaders[i];
var headerRect = cachedElementBoundingRect(header);
var headerCenterX = headerRect.left + (headerRect.width / 2);
var headerCenterY = headerRect.top + (headerRect.height / 2);
var deltaX = headerCenterX - articleCenterX;
var deltaY = headerCenterY - articleAdjustedTopY;
var distance = Math.sqrt((deltaX * deltaX) + (deltaY * deltaY));
var headerScore = Math.max(HeaderMaximumDistance - distance, 0);
if (distance > HeaderMaximumDistance) continue;
if (headerRect.width < articleRect.width * 0.5) continue;
if (headerCenterX < articleRect.left || headerCenterX > articleRect.right) continue;
var headerFontSize = fontSizeFromComputedStyle(getComputedStyle(header));
if (headerFontSize < HeaderMinimumFontSize) continue;
var headerText = header.innerText;
if (headerText.length < HeaderMinimumTextLength) continue;
headerScore *= 1 + TitleCandidateDepthScoreMultiplier * elementDepth(header);
headerScore *= (headerFontSize / BaseFontSize);
var fontSize = parseInt(this.contentTextStyle().fontSize);
if (parseInt(headerFontSize) > fontSize * HeaderFontSizeBonusMinimumRatio) headerScore *= HeaderFontSizeBonusMultiplier;
if (HeaderBonusRegEx.test(header.className) || HeaderBonusRegEx.test(header.id)) headerScore *= HeaderRegexBonusMultiplier;
if (!bestHeader || headerScore > bestHeader.headerScore) {
bestHeader = header;
bestHeader.headerScore = headerScore;
bestHeader.headerText = headerText;
}
}
if (bestHeader && domDistance(bestHeader, this.articleNode(), HeaderMaximumDOMDistance + 1) > HeaderMaximumDOMDistance) {
if (parseInt(getComputedStyle(bestHeader).fontSize) < HeaderMinimumFontSizeDifference * fontSize) bestHeader = null;
}
if (bestHeader) {
this._articleTitle = bestHeader.headerText;
this._articleTitleElement = bestHeader;
}
if (!this._articleTitle) this._articleTitle = this.contentDocument.title;
return this._articleTitle;
},
"articleIsLTR": function articleIsLTR() {
if (this._articleIsLTR === undefined) {
var computedStyle = getComputedStyle(this.articleNode());
this._articleIsLTR = computedStyle ? computedStyle.direction === "ltr" : true;
}
return this._articleIsLTR;
},
"findSuggestedCandidate": function findSuggestedCandidate() {
var route = this.suggestedRouteToArticle;
if (!route || !route.length) return null;
var node;
var i;
for (i = route.length - 1; i >= 0; i--) {
if (route[i].id) {
node = this.contentDocument.getElementById(route[i].id);
if (node) break;
}
}
i++;
if (!node) node = this.contentDocument;
while (i < route.length) {
var step = route[i];
var child = node.nodeType === Node.DOCUMENT_NODE ? node.documentElement : node.firstElementChild;
for (var j = 1; child && j < step.index; j++) {
child = child.nextElementSibling;
}
if (!child) return null;
if (child.tagName !== step.tagName) return null;
if (step.className && child.className !== step.className) return null;
node = child;
i++;
}
if (!isElementVisible(node)) return null;
return new CandidateElement(node, this.contentDocument);
},
"findArticle": function findArticle() {
if (BlacklistedHostsRegEx.test(this.contentDocument.location.hostname)) return null;
var suggestedCandidate = this.findSuggestedCandidate();
var candidateElements = this.findCandidateElements();
if (!candidateElements || !candidateElements.length) return suggestedCandidate;
if (suggestedCandidate && suggestedCandidate.basicScore() >= ReaderMinimumScore) return suggestedCandidate;
var highestScoringElement = this.highestScoringCandidateFromCandidates(candidateElements);
if (highestScoringElement.finalScore() < ReaderMinimumScore) return suggestedCandidate;
if (highestScoringElement.shouldDisqualifyDueToScoreDensity()) return null;
if (highestScoringElement.shouldDisqualifyDueToHorizontalRuleDensity()) return null;
if (highestScoringElement.shouldDisqualifyDueToHeaderDensity()) return null;
if (highestScoringElement.shouldDisqualifyDueToSimilarElements(candidateElements)) return null;
return highestScoringElement;
},
"findPrependedArticle": function findPrependedArticle() {
if (!this.article) return null;
for (var i = 0, candidateSearchScope = this.article.element; i < 3 && candidateSearchScope; i++, candidateSearchScope = candidateSearchScope.parentNode) {
var candidateElements = this.findPrependedArticleCandidateElements(candidateSearchScope);
if (!candidateElements || !candidateElements.length) continue;
var sortedCandidateElements = this.sortCandidateElementsInDescendingScoreOrder(candidateElements);
var highestScoringCandidate;
for (var candidateIndex = 0; candidateIndex < sortedCandidateElements.length; candidateIndex++) {
highestScoringCandidate = sortedCandidateElements[candidateIndex];
if (!highestScoringCandidate || !highestScoringCandidate.basicScore()) break;
if (highestScoringCandidate.shouldDisqualifyDueToScoreDensity()) continue;
if (highestScoringCandidate.shouldDisqualifyDueToHorizontalRuleDensity()) continue;
if (highestScoringCandidate.shouldDisqualifyDueToHeaderDensity()) continue;
if (cachedElementBoundingRect(highestScoringCandidate.element).height < PrependedArticleCandidateMinimumHeight && cachedElementBoundingRect(this.article.element).width != cachedElementBoundingRect(highestScoringCandidate.element).width) continue;
var textNodeStyle = contentTextStyleForNode(this.contentDocument, highestScoringCandidate.element, true);
if (!textNodeStyle) continue;
if (textNodeStyle.fontFamily !== this.contentTextStyle().fontFamily || textNodeStyle.fontSize !== this.contentTextStyle().fontSize) continue;
if (highestScoringCandidate) return highestScoringCandidate;
}
}
return null;
},
"highestScoringCandidateFromCandidates": function highestScoringCandidateFromCandidates(candidateElements) {
var highestScore = 0;
var highestScoringElement = null;
for (var i = 0; i < candidateElements.length; i++) {
var candidateElement = candidateElements[i];
var score = candidateElement.basicScore();
if (score >= highestScore) {
highestScore = score;
highestScoringElement = candidateElement;
}
}
return highestScoringElement;
},
"sortCandidateElementsInDescendingScoreOrder": function sortCandidateElementsInDescendingScoreOrder(candidateElements) {
function sortByScore(candidate1, candidate2) {
if (candidate1.basicScore() != candidate2.basicScore()) return candidate2.basicScore() - candidate1.basicScore();
return candidate2.depth() - candidate1.depth();
}
return candidateElements.sort(sortByScore);
},
"findCandidateElements": function findCandidateElements() {
const MaximumCandidateDetectionTimeInterval = 1000;
var findCandidateElementsTimeoutDate = Date.now() + MaximumCandidateDetectionTimeInterval;
var elements = this.contentDocument.getElementsByTagName("*");
var candidateElements = [];
var elementsLength = elements.length;
for (var i = 0; i < elementsLength; i++) {
var element = elements[i];
if (CandidateTagNamesToIgnore[element.tagName]) continue;
var candidate = CandidateElement.candidateIfElementIsViable(element, this.contentDocument);
if (candidate) candidateElements.push(candidate);
if (Date.now() > findCandidateElementsTimeoutDate) {
console.assert(false, "ReaderArticleFinder aborting CandidateElement detection due to timeout");
candidateElements = [];
break;
}
}
return candidateElements;
},
"findPrependedArticleCandidateElements": function findPrependedArticleCandidateElements(searchScope) {
if (!this.article) return [];
if (!searchScope) searchScope = this.article.element;
var xPathQuery = "preceding-sibling::*/descendant-or-self::*";
var xPathResults = this.contentDocument.evaluate(xPathQuery, searchScope, null, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null);
var possibleCandidateCount = xPathResults.snapshotLength;
var candidateElements = [];
for (var i = 0; i < possibleCandidateCount; i++) {
var element = xPathResults.snapshotItem(i);
if (CandidateTagNamesToIgnore[element.tagName]) continue;
var candidate = CandidateElement.prependedArticleCandidateIfElementIsViable(element, this.article, this.contentDocument);
if (candidate) candidateElements.push(candidate);
}
return candidateElements;
},
"nextPageURLString": function nextPageURLString() {
if (!this.article) return null;
var bestLink;
var bestLinkScore = 0;
var searchScope = this.article.element;
if (searchScope.parentNode && getComputedStyle(searchScope).display === "inline") searchScope = searchScope.parentNode;
var possibleSearchScope = searchScope;
var minimumBottomOffset = cachedElementBoundingRect(searchScope).bottom + LinkMaxVerticalDistanceFromArticle;
while (isElementNode(possibleSearchScope) && cachedElementBoundingRect(possibleSearchScope).bottom <= minimumBottomOffset)
possibleSearchScope = possibleSearchScope.parentNode;
if (isElementNode(possibleSearchScope) && possibleSearchScope != searchScope) searchScope = possibleSearchScope;
var anchorElements = this.contentDocument.evaluate(LinkCandidateXPathQuery, searchScope, null, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null);
var numberOfLinks = anchorElements.snapshotLength;
for (var i = 0; i < numberOfLinks; i++) {
var link = anchorElements.snapshotItem(i);
var score = this.scoreNextPageLinkCandidate(link);
if (score > bestLinkScore) {
bestLink = link;
bestLinkScore = score;
}
}
return bestLink ? bestLink.href : null;
},
"scoreNextPageLinkCandidate": function scoreNextPageLinkCandidate(link) {
function isNextOrdinal(referenceString, linkString, linkText, pageNumber) {
if (linkString.substring(0, referenceString.length) === referenceString) {
linkString = linkString.substring(referenceString.length);
referenceString = "";
}
var linkOrdinal = linkString.lastInteger();
if (isNaN(linkOrdinal)) return false;
var referenceOrdinal = referenceString ? referenceString.lastInteger() : NaN;
if (isNaN(referenceOrdinal) || referenceOrdinal >= MaximumExactIntegralValue) referenceOrdinal = pageNumber;
if (linkOrdinal == referenceOrdinal) return linkText.lastInteger() === referenceOrdinal + 1;
return linkOrdinal === referenceOrdinal + 1;
}
function parametersFromSearch(search) {
var map = {};
var parameters = search.substring(1).split("&");
for (var i = 0; i < parameters.length; i++) {
var parameter = parameters[i];
var equalsIndex = parameter.indexOf("=");
if (equalsIndex === -1) map[parameter] = null;
else map[parameter.substring(0, equalsIndex)] = parameter.substring(equalsIndex + 1);
}
return map;
}
var referenceLocation = this.contentDocument.location;
if (link.host !== referenceLocation.host) return 0;
if (link.pathname === referenceLocation.pathname && link.search === referenceLocation.search) return 0;
if (link.toString().indexOf("#") != -1) return 0;
if (!isElementVisible(link)) return 0;
var linkBoundingRect = cachedElementBoundingRect(link);
var articleBoundingRect = cachedElementBoundingRect(this.article.element);
var verticalDistanceFromArticle = Math.max(0, Math.max(articleBoundingRect.top - (linkBoundingRect.top + linkBoundingRect.height), linkBoundingRect.top - (articleBoundingRect.top + articleBoundingRect.height)));
if (verticalDistanceFromArticle > LinkMaxVerticalDistanceFromArticle) return 0;
var horizontalDistanceFromArticle = Math.max(0, Math.max(articleBoundingRect.left - (linkBoundingRect.left + linkBoundingRect.width), linkBoundingRect.left - (articleBoundingRect.left + articleBoundingRect.width)));
if (horizontalDistanceFromArticle > 0) return 0;
var linkPathComponents = link.pathname.substring(1).split("/");
if (!linkPathComponents[linkPathComponents.length - 1]) linkPathComponents.pop();
var referencePathComponents = referenceLocation.pathname.substring(1).split("/");
if (!referencePathComponents[referencePathComponents.length - 1]) referencePathComponents.pop();
if (linkPathComponents.length < referencePathComponents.length) return 0;
var mismatchCount = 0;
var nextOrdinalMatchValue = 0;
var linkText = link.textContent;
for (var i = 0; i < linkPathComponents.length; i++) {
var linkComponent = linkPathComponents[i];
var referenceComponent = i < referencePathComponents.length ? referencePathComponents[i] : "";
if (referenceComponent !== linkComponent) {
if (i < referencePathComponents.length - 2) return 0;
if (linkComponent.length >= referenceComponent.length) {
var commonSuffixLength = 0;
while (linkComponent[linkComponent.length - 1 - commonSuffixLength] === referenceComponent[referenceComponent.length - 1 - commonSuffixLength])
commonSuffixLength++;
if (commonSuffixLength) {
linkComponent = linkComponent.substring(0, linkComponent.length - commonSuffixLength);
referenceComponent = referenceComponent.substring(0, referenceComponent.length - commonSuffixLength);
}
}
if (isNextOrdinal(referenceComponent, linkComponent, linkText, this.pageNumber)) nextOrdinalMatchValue = Math.pow(LinkNextOrdinalValueBase, (i - linkPathComponents.length + 1));
else mismatchCount++;
}
if (mismatchCount > 1) return 0;
}
if (link.search) {
linkParameters = parametersFromSearch(link.search);
referenceParameters = parametersFromSearch(referenceLocation.search);
for (var key in linkParameters) {
var linkValue = linkParameters[key];
var referenceValue = key in referenceParameters ? referenceParameters[key] : null;
if (referenceValue !== linkValue) {
if (referenceValue === null) referenceValue = "";
if (linkValue === null) linkValue = "";
if (linkValue.length < referenceValue.length) mismatchCount++;
else if (isNextOrdinal(referenceValue, linkValue, linkText, this.pageNumber)) nextOrdinalMatchValue = Math.max(nextOrdinalMatchValue, 1 / LinkNextOrdinalValueBase);
else mismatchCount++;
}
}
}
if (!nextOrdinalMatchValue) return 0;
var score = LinkMatchWeight * (Math.pow(LinkMismatchValueBase, -mismatchCount) + nextOrdinalMatchValue) + LinkVerticalDistanceFromArticleWeight * verticalDistanceFromArticle / LinkMaxVerticalDistanceFromArticle;
if (link.parentNode.tagName === "LI") score += LinkListItemBonus;
var linkText = link.innerText;
if (LinkNextMatchRegEx.test(linkText)) score += LinkNextMatchBonus;
if (LinkPageMatchRegEx.test(linkText)) score += LinkPageMatchBonus;
if (LinkContinueMatchRegEx.test(linkText)) score += LinkContinueMatchBonus;
return score;
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment