Created
November 16, 2010 09:49
-
-
Save eric/701636 to your computer and use it in GitHub Desktop.
ReaderJSController.originalArticleFinder()
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* ReaderJSController.originalArticleFinder(): */ | |
{ | |
"contentDocument": [object HTMLDocument], | |
"didSearchForArticleNode": true, | |
"article": [object Object], | |
"didSearchForPrependedArticleNode": true, | |
"prependedArticle": null, | |
"_cachedScrollY": 0, | |
"_cachedScrollX": 0, | |
"_elementsWithCachedBoundingRects": [object HTMLDivElement], | |
[object HTMLDivElement], | |
[object HTMLDivElement], | |
[object HTMLDivElement], | |
[object HTMLDivElement], | |
[object HTMLTableCellElement], | |
[object HTMLTableRowElement], | |
[object HTMLTableSectionElement], | |
[object HTMLTableElement], | |
[object HTMLTableCellElement], | |
[object HTMLTableRowElement], | |
[object HTMLTableSectionElement], | |
[object HTMLHeadingElement], | |
[object HTMLTableElement], | |
[object HTMLTableSectionElement], | |
[object HTMLTableRowElement], | |
[object HTMLTableCellElement], | |
[object HTMLHeadingElement], | |
[object HTMLHeadingElement], | |
[object HTMLHeadingElement], | |
[object HTMLHeadingElement], | |
[object HTMLHeadingElement], | |
[object HTMLHeadingElement], | |
[object HTMLHeadingElement], | |
[object HTMLHeadingElement], | |
[object HTMLHeadingElement], | |
[object HTMLHeadingElement], | |
[object HTMLHeadingElement], | |
[object HTMLHeadingElement], | |
[object HTMLHeadingElement], | |
[object HTMLHeadingElement], | |
[object HTMLImageElement], | |
"_cachedContentTextStyle": [object CSSStyleDeclaration], | |
"pageNumber": 1, | |
"_articleIsLTR": true, | |
"_nextPageURL": null, | |
"_adoptableArticle": [object HTMLDivElement], | |
"_articleTitle": 11 / 12 / 2010, | |
"_articleTitleElement": [object HTMLHeadingElement], | |
"isReaderModeAvailable": function isReaderModeAvailable() { | |
this.cacheWindowScrollPosition(); | |
var article = this.articleNode(); | |
return article != null; | |
}, | |
"prepareToTransitionToReader": function prepareToTransitionToReader() { | |
clearCachedElementBoundingRects(); | |
this.cacheWindowScrollPosition(); | |
this.nextPageURL(); | |
this.articleIsLTR(); | |
this.adoptableArticle(); | |
}, | |
"nextPageURL": function nextPageURL() { | |
if (this._nextPageURL === undefined) this._nextPageURL = this.nextPageURLString(); | |
return this._nextPageURL; | |
}, | |
"articleNode": function articleNode() { | |
if (!this.didSearchForArticleNode) { | |
this.article = this.findArticle(); | |
this.didSearchForArticleNode = true; | |
if (this.article) this.articleIsLTR(); | |
} | |
return this.article ? this.article.element : null; | |
}, | |
"prependedArticleNode": function prependedArticleNode() { | |
if (!this.didSearchForArticleNode) this.articleNode(); | |
if (!this.didSearchForPrependedArticleNode) { | |
this.prependedArticle = this.findPrependedArticle(); | |
this.didSearchForPrependedArticleNode = true; | |
} | |
return this.prependedArticle ? this.prependedArticle.element : null; | |
}, | |
"cacheWindowScrollPosition": function cacheWindowScrollPosition() { | |
this._cachedScrollY = window.scrollY; | |
this._cachedScrollX = window.scrollX; | |
}, | |
"contentTextStyle": function contentTextStyle() { | |
if (this._cachedContentTextStyle) return this._cachedContentTextStyle; | |
this._cachedContentTextStyle = contentTextStyleForNode(this.contentDocument, this.articleNode(), false); | |
if (!this._cachedContentTextStyle) this._cachedContentTextStyle = getComputedStyle(this.articleNode()); | |
return this._cachedContentTextStyle; | |
}, | |
"commaCountIsLessThan": function commaCountIsLessThan(node, limit) { | |
var count = 0; | |
var textContent = node.textContent; | |
var i = -1; | |
while (count < limit && (i = textContent.indexOf(',', i + 1)) >= 0) | |
count++; | |
return count < limit; | |
}, | |
"calculateLinkDensity": function calculateLinkDensity(element) { | |
var textLength = removeWhitespace(element.textContent).length; | |
if (!textLength) return 0; | |
var links = element.querySelectorAll("a"); | |
var linkCharacterCount = 0; | |
for (var i = 0; i < links.length; i++) | |
linkCharacterCount += removeWhitespace(links[i].textContent).length; | |
return linkCharacterCount / textLength; | |
}, | |
"shouldPruneElement": function shouldPruneElement(element, originalElement) { | |
const MaxInputToParagraphRatio = 0.33; | |
const MaxPositiveWeightLinkDensity = 0.5; | |
const MaxStandardLinkDensity = 0.2; | |
const MinimumTextLength = 25; | |
const MinimumAverageImageArea = 200 * 200; | |
if (!element.parentElement) return false; | |
if (element.tagName !== "OBJECT" && element.tagName !== "EMBED") { | |
var childCount = element.childNodes.length; | |
var hasElementOrTextNodeChild = false; | |
for (var i = 0; i < childCount; i++) { | |
var node = element.childNodes[i]; | |
var nodeType = node.nodeType; | |
if (nodeType === Node.ELEMENT_NODE || (nodeType === Node.TEXT_NODE && !isNodeWhitespace(node))) { | |
hasElementOrTextNodeChild = true; | |
break; | |
} | |
} | |
if (!hasElementOrTextNodeChild) return true; | |
} | |
var classIdWeight = 0; | |
if (originalElement) { | |
if (PositiveRegEx.test(originalElement.className)) classIdWeight++; | |
if (PositiveRegEx.test(originalElement.id)) classIdWeight++; | |
if (NegativeRegEx.test(originalElement.className)) classIdWeight--; | |
if (NegativeRegEx.test(originalElement.id)) classIdWeight--; | |
} | |
if (classIdWeight < 0) return true; | |
if (element.tagName === "UL") { | |
if (originalElement.querySelector("iframe") && originalElement.querySelector("script")) return true; | |
return false; | |
} | |
if (element.tagName === "OBJECT") { | |
const PlugInsToKeepRegEx = /youtube|vimeo|dailymotion/; | |
var embedElement = element.querySelector("embed[src]"); | |
if (embedElement && PlugInsToKeepRegEx.test(embedElement.src)) return false; | |
var dataAttribute = element.getAttribute("data"); | |
if (dataAttribute && PlugInsToKeepRegEx.test(dataAttribute)) return false; | |
return true; | |
} | |
if (element.childElementCount === 1) { | |
var childElement = element.firstElementChild; | |
if (childElement.tagName === "A") return false; | |
if (childElement.tagName === "SPAN" && childElement.className === "converted-anchor" && elementHasAncestorWithTagName(childElement, "TABLE")) return false; | |
} | |
var imageElements = element.querySelectorAll("img"); | |
var imageElementCount = imageElements.length; | |
if (imageElementCount) { | |
var averageImageArea = 0; | |
for (var i = 0; i < imageElementCount; i++) { | |
var originalImage = imageElements[i].originalElement; | |
if (!isElementVisible(originalImage)) continue; | |
var originalRect = cachedElementBoundingRect(originalImage); | |
averageImageArea += (originalRect.width / imageElementCount) * (originalRect.height / imageElementCount); | |
} | |
if (averageImageArea > MinimumAverageImageArea) return false; | |
} | |
if (!this.commaCountIsLessThan(element, 10)) return false; | |
var p = element.querySelectorAll("p").length; | |
var br = element.querySelectorAll("br").length; | |
var numParagraphs = p + Math.floor(br / 2); | |
if (imageElementCount > numParagraphs) return true; | |
if (element.querySelectorAll("li").length > numParagraphs) return true; | |
if (element.querySelectorAll("input").length / numParagraphs > MaxInputToParagraphRatio) return true; | |
if (element.textContent.length < MinimumTextLength && (imageElementCount != 1)) return true; | |
if (element.querySelector("embed")) return true; | |
var linkDensity = this.calculateLinkDensity(element); | |
if (classIdWeight >= 1 && linkDensity > MaxPositiveWeightLinkDensity) return true; | |
if (classIdWeight < 1 && linkDensity > MaxStandardLinkDensity) return true; | |
if (element.tagName === "TABLE") { | |
var textLength = removeWhitespace(element.innerText).length; | |
var originalTextLength = removeWhitespace(originalElement.innerText).length; | |
if (textLength <= (originalTextLength * 0.5)) return true; | |
} | |
return false; | |
}, | |
"wordCountIsLessThan": function wordCountIsLessThan(node, limit) { | |
var count = 0; | |
var textContent = node.textContent; | |
var i = -1; | |
while ((i = textContent.indexOf(' ', i + 1)) >= 0 && count < limit) | |
count++; | |
return count < limit; | |
}, | |
"adoptableArticle": function adoptableArticle() { | |
if (this._adoptableArticle !== undefined) { | |
return this._adoptableArticle.cloneNode(true); | |
} | |
var rootElement = this.articleNode(); | |
this._adoptableArticle = rootElement ? rootElement.cloneNode(true) : null; | |
if (!this._adoptableArticle) return this._adoptableArticle; | |
var articleToPrepend = this.prependedArticleNode(); | |
var cleanedPrependNode = null; | |
this._adoptableArticle = this.cleanArticleNode(rootElement, this._adoptableArticle, false) | |
if (articleToPrepend) { | |
var cleanedPrependNode = this.cleanArticleNode(articleToPrepend, articleToPrepend.cloneNode(true), true); | |
if (cleanedPrependNode) this._adoptableArticle.insertBefore(cleanedPrependNode, this._adoptableArticle.firstChild); | |
} | |
return this._adoptableArticle; | |
}, | |
"cleanArticleNode": function cleanArticleNode(originalArticleNode, clonedArticleNode, allowedToReturnNull) { | |
const tagNamesToAlwaysPrune = { | |
"FORM": 1, | |
"IFRAME": 1, | |
"SCRIPT": 1, | |
"STYLE": 1, | |
"LINK": 1 | |
}; | |
const tagNamesToConsiderPruning = { | |
"DIV": 1, | |
"TABLE": 1, | |
"OBJECT": 1, | |
"UL": 1 | |
}; | |
const tagNamesAffectingFontStyle = { | |
"I": 1, | |
"EM": 1 | |
}; | |
const tagNamesAffectingFontWeight = { | |
"B": 1, | |
"STRONG": 1, | |
"H1": 1, | |
"H2": 1, | |
"H3": 1, | |
"H4": 1, | |
"H5": 1, | |
"H6": 1 | |
}; | |
var elementsToConsiderPruning = []; | |
var depthInFloat = 0; | |
var depthInTable = 0; | |
var depthInFontStyle = 0; | |
var depthInFontWeight = 0; | |
var currentElement = originalArticleNode; | |
var view = currentElement.ownerDocument.defaultView; | |
var currentCloneElement = clonedArticleNode; | |
var articleTitle = this.articleTitle(); | |
var articleTitleElement = this._articleTitleElement; | |
function incrementDepthLevels(delta) { | |
if (depthInFloat) depthInFloat += delta; | |
if (depthInTable) depthInTable += delta; | |
if (depthInFontStyle) depthInFontStyle += delta; | |
if (depthInFontWeight) depthInFontWeight += delta; | |
}; | |
function updateDepthLevelsAfterSiblingTraversal() { | |
if (depthInFloat === 1) depthInFloat = 0; | |
if (depthInTable === 1) depthInTable = 0; | |
if (depthInFontStyle === 1) depthInFontStyle = 0; | |
if (depthInFontWeight === 1) depthInFontWeight = 0; | |
}; | |
while (currentElement) { | |
var prunedElement = null; | |
var tagName = currentCloneElement.tagName; | |
currentCloneElement.originalElement = currentElement; | |
if (tagName in tagNamesToAlwaysPrune) prunedElement = currentCloneElement; | |
if (!prunedElement && currentElement === articleTitleElement) prunedElement = currentCloneElement; | |
if (!prunedElement && (tagName === "H1" || tagName === "H2")) { | |
var distanceFromoriginalArticleNodeTop = currentElement.offsetTop - originalArticleNode.offsetTop; | |
if (distanceFromoriginalArticleNodeTop < HeaderMinimumDistanceFromArticleTop) { | |
var headerText = currentElement.innerText; | |
var maxDistanceToConsiderSimilar = headerText.length * HeaderLevenshteinDistanceToLengthRatio; | |
if (levenshteinDistance(articleTitle, headerText) <= maxDistanceToConsiderSimilar) prunedElement = currentCloneElement; | |
} | |
} | |
var computedStyle; | |
if (!prunedElement) computedStyle = getComputedStyle(currentElement); | |
if (!prunedElement && tagName === "DIV" && currentCloneElement.parentNode) { | |
var elements = currentElement.querySelectorAll("a, blockquote, dl, div, img, ol, p, pre, table, ul"); | |
var inFloat = depthInFloat || computedStyle["float"] !== "none"; | |
if (!inFloat && !elements.length) { | |
var parentNode = currentCloneElement.parentNode; | |
var replacementNode = this.contentDocument.createElement("p"); | |
while (currentCloneElement.firstChild) { | |
var child = currentCloneElement.firstChild; | |
replacementNode.appendChild(child); | |
} | |
parentNode.replaceChild(replacementNode, currentCloneElement); | |
currentCloneElement = replacementNode; | |
currentCloneElement.originalElement = currentElement; | |
tagName = currentCloneElement.tagName; | |
} | |
} | |
if (!prunedElement && currentCloneElement.parentNode && tagName in tagNamesToConsiderPruning) elementsToConsiderPruning.push(currentCloneElement); | |
if (!prunedElement) { | |
if (computedStyle.display === "none") prunedElement = currentCloneElement; | |
else if (currentElement !== originalArticleNode && tagName !== "IMG" && !depthInFloat && computedStyle["float"] !== "none" && (cachedElementBoundingRect(currentElement).height >= FloatMinimumHeight || currentElement.childElementCount > 1)) depthInFloat = 1; | |
} | |
if (!prunedElement) { | |
var attributes = currentCloneElement.attributes; | |
for (var i = 0; i < attributes.length; i++) { | |
var attributeName = attributes[i].nodeName; | |
if (AttributesToRemoveRegEx.test(attributeName)) { | |
currentCloneElement.removeAttribute(attributeName); | |
i--; | |
} | |
} | |
if (!depthInFontStyle && computedStyle.fontStyle !== "normal") { | |
if (!(tagName in tagNamesAffectingFontStyle)) currentCloneElement.style.fontStyle = computedStyle.fontStyle; | |
depthInFontStyle = 1; | |
} | |
if (!depthInFontWeight && computedStyle.fontWeight !== "normal") { | |
if (!(tagName in tagNamesAffectingFontWeight)) currentCloneElement.style.fontWeight = computedStyle.fontWeight; | |
depthInFontWeight = 1; | |
} | |
if (depthInFloat) { | |
if (depthInFloat === 1) { | |
if (cachedElementBoundingRect(currentElement).width === cachedElementBoundingRect(originalArticleNode).width) currentCloneElement.setAttribute("class", "float full-width"); | |
else currentCloneElement.setAttribute("class", "float " + computedStyle["float"]); | |
} | |
var widthValue = currentElement.style.getPropertyValue("width"); | |
if (widthValue) currentCloneElement.style.width = widthValue; | |
else { | |
var rules = view.getMatchedCSSRules(currentElement, "", true); | |
if (rules) { | |
for (var i = rules.length - 1; i >= 0; i--) { | |
widthValue = rules[i].style.getPropertyValue("width"); | |
if (widthValue) { | |
currentCloneElement.style.width = widthValue; | |
break; | |
} | |
} | |
} | |
} | |
if (depthInFloat === 1 && !widthValue) currentCloneElement.style.width = cachedElementBoundingRect(currentElement).width + "px"; | |
} | |
if (tagName === "TABLE") { | |
if (!depthInTable) depthInTable = 1; | |
} else if (tagName === "IMG") { | |
currentCloneElement.removeAttribute("border"); | |
currentCloneElement.removeAttribute("hspace"); | |
currentCloneElement.removeAttribute("vspace"); | |
currentCloneElement.removeAttribute("align"); | |
if (!depthInFloat) { | |
var imageBoundingRect = cachedElementBoundingRect(currentElement); | |
if (imageBoundingRect.width < ImageSizeTiny && imageBoundingRect.height < ImageSizeTiny) currentCloneElement.setAttribute("class", "reader-image-tiny"); | |
else if ((imageBoundingRect.width / originalArticleNode.clientWidth) > ImageWidthToParentWidthRatio) { | |
currentCloneElement.setAttribute("class", "reader-image-large"); | |
} | |
} else { | |
currentCloneElement.style.float = computedStyle.float; | |
} | |
} else if (tagName === "FONT") { | |
currentCloneElement.removeAttribute("size"); | |
currentCloneElement.removeAttribute("face"); | |
currentCloneElement.removeAttribute("color"); | |
} else if (tagName === "A" && currentCloneElement.parentNode) { | |
var href = currentCloneElement.getAttribute("href"); | |
if (href && href.length && (href[0] === "#" || href.substring(0, 11) === "javascript:")) { | |
if (!depthInTable && !currentCloneElement.childElementCount && currentCloneElement.parentElement.childElementCount === 1) { | |
var xPathResult = this.contentDocument.evaluate("text()", currentCloneElement.parentElement, null, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null); | |
if (!xPathResult.snapshotLength) prunedElement = currentCloneElement; | |
} | |
if (!prunedElement) { | |
var replacementNode = this.contentDocument.createElement("span"); | |
if (currentCloneElement.childElementCount === 1 && currentCloneElement.firstElementChild.tagName === "IMG") { | |
var imageElement = currentCloneElement.firstElementChild; | |
if (imageElement.width > AnchorImageMinimumWidth && imageElement.height > AnchorImageMinimumHeight) replacementNode.setAttribute("class", "converted-image-anchor"); | |
} | |
if (!replacementNode.className) replacementNode.setAttribute("class", "converted-anchor"); | |
while (currentCloneElement.firstChild) | |
replacementNode.appendChild(currentCloneElement.firstChild); | |
currentCloneElement.parentNode.replaceChild(replacementNode, currentCloneElement); | |
currentCloneElement = replacementNode; | |
} | |
} | |
} | |
} | |
var firstElementChild = prunedElement ? null : currentElement.firstElementChild; | |
if (firstElementChild) { | |
currentElement = firstElementChild; | |
currentCloneElement = currentCloneElement.firstElementChild; | |
incrementDepthLevels(1); | |
} else { | |
var nextElementSibling; | |
while (currentElement !== originalArticleNode && !(nextElementSibling = currentElement.nextElementSibling)) { | |
currentElement = currentElement.parentElement; | |
currentCloneElement = currentCloneElement.parentElement; | |
incrementDepthLevels(-1); | |
} | |
if (currentElement === originalArticleNode) { | |
if (prunedElement) { | |
if (prunedElement.parentElement) prunedElement.parentElement.removeChild(prunedElement); | |
else if (allowedToReturnNull) return null; | |
} | |
break; | |
} | |
currentElement = nextElementSibling; | |
currentCloneElement = currentCloneElement.nextElementSibling; | |
updateDepthLevelsAfterSiblingTraversal(); | |
} | |
if (prunedElement && !prunedElement.parentElement && shouldDoSOmething) return null; | |
if (prunedElement) { | |
if (prunedElement.parentElement) prunedElement.parentElement.removeChild(prunedElement); | |
else if (allowedToReturnNull) return null; | |
} | |
} | |
for (var i = elementsToConsiderPruning.length - 1; i >= 0; i--) { | |
var element = elementsToConsiderPruning[i]; | |
if (element.parentNode && this.shouldPruneElement(element, element.originalElement)) element.parentNode.removeChild(element); | |
} | |
var floatElements = this._adoptableArticle.querySelectorAll(".float"); | |
for (var i = 0; i < floatElements.length; i++) { | |
var pruneFloatedElement = false; | |
var floatElement = floatElements[i]; | |
if (!pruneFloatedElement) { | |
var anchors = floatElement.querySelectorAll("a, span.converted-image-anchor"); | |
var replacedAnchors = floatElement.querySelectorAll("span.converted-anchor"); | |
pruneFloatedElement = floatElement.parentNode && replacedAnchors.length > anchors.length; | |
} | |
if (!pruneFloatedElement) { | |
var plugInsInClonedElement = floatElement.querySelectorAll("embed, object").length; | |
var plugInsInOriginalElement = floatElement.originalElement.querySelectorAll("embed, object").length; | |
if (!plugInsInClonedElement && plugInsInOriginalElement) pruneFloatedElement = true; | |
} | |
if (pruneFloatedElement) floatElement.parentNode.removeChild(floatElement); | |
} | |
if (allowedToReturnNull && !removeWhitespace(clonedArticleNode.innerText).length) return null; | |
return clonedArticleNode; | |
}, | |
"articleTitle": function articleTitle() { | |
if (this._articleTitle !== undefined) return this._articleTitle; | |
const HeaderMaximumDistance = 500; | |
const HeaderMinimumTextLength = 8; | |
const HeaderMinimumFontSize = 12; | |
const HeaderFontSizeBonusMinimumRatio = 1.1; | |
const HeaderFontSizeBonusMultiplier = 1.25; | |
const HeaderBonusRegEx = /header|title|headline/i; | |
const HeaderRegexBonusMultiplier = 1.5; | |
const HeaderLargeImageCheckOffsetY = 150; | |
const HeaderLargeImageMinimumHeight = 300; | |
const HeaderLargeImageMinimumWidthRatio = 0.5; | |
const HeaderMaximumDOMDistance = 8; | |
const HeaderMinimumFontSizeDifference = 1.5; | |
var articleRect = cachedElementBoundingRect(this.prependedArticleNode() ? this.prependedArticleNode() : this.articleNode()); | |
var articleCenterX = articleRect.left + (articleRect.width / 2); | |
var articleTopY = articleRect.top; | |
var articleAdjustedTopY = articleTopY; | |
var potentialLeadingImage = this.contentDocument.elementFromPoint(articleCenterX, articleTopY - HeaderLargeImageCheckOffsetY); | |
if (potentialLeadingImage && potentialLeadingImage.tagName === "IMG") { | |
var imageRect = cachedElementBoundingRect(potentialLeadingImage); | |
if (imageRect.height >= HeaderLargeImageMinimumHeight && imageRect.width >= articleRect.width * HeaderLargeImageMinimumWidthRatio) articleAdjustedTopY = imageRect.top; | |
} | |
var allHeaders = this.contentDocument.querySelectorAll("h1, h2, h3, h4, h5, .headline, .article_title, #hn-headline"); | |
var bestHeader; | |
for (var i = 0; i < allHeaders.length; i++) { | |
var header = allHeaders[i]; | |
var headerRect = cachedElementBoundingRect(header); | |
var headerCenterX = headerRect.left + (headerRect.width / 2); | |
var headerCenterY = headerRect.top + (headerRect.height / 2); | |
var deltaX = headerCenterX - articleCenterX; | |
var deltaY = headerCenterY - articleAdjustedTopY; | |
var distance = Math.sqrt((deltaX * deltaX) + (deltaY * deltaY)); | |
var headerScore = Math.max(HeaderMaximumDistance - distance, 0); | |
if (distance > HeaderMaximumDistance) continue; | |
if (headerRect.width < articleRect.width * 0.5) continue; | |
if (headerCenterX < articleRect.left || headerCenterX > articleRect.right) continue; | |
var headerFontSize = fontSizeFromComputedStyle(getComputedStyle(header)); | |
if (headerFontSize < HeaderMinimumFontSize) continue; | |
var headerText = header.innerText; | |
if (headerText.length < HeaderMinimumTextLength) continue; | |
headerScore *= 1 + TitleCandidateDepthScoreMultiplier * elementDepth(header); | |
headerScore *= (headerFontSize / BaseFontSize); | |
var fontSize = parseInt(this.contentTextStyle().fontSize); | |
if (parseInt(headerFontSize) > fontSize * HeaderFontSizeBonusMinimumRatio) headerScore *= HeaderFontSizeBonusMultiplier; | |
if (HeaderBonusRegEx.test(header.className) || HeaderBonusRegEx.test(header.id)) headerScore *= HeaderRegexBonusMultiplier; | |
if (!bestHeader || headerScore > bestHeader.headerScore) { | |
bestHeader = header; | |
bestHeader.headerScore = headerScore; | |
bestHeader.headerText = headerText; | |
} | |
} | |
if (bestHeader && domDistance(bestHeader, this.articleNode(), HeaderMaximumDOMDistance + 1) > HeaderMaximumDOMDistance) { | |
if (parseInt(getComputedStyle(bestHeader).fontSize) < HeaderMinimumFontSizeDifference * fontSize) bestHeader = null; | |
} | |
if (bestHeader) { | |
this._articleTitle = bestHeader.headerText; | |
this._articleTitleElement = bestHeader; | |
} | |
if (!this._articleTitle) this._articleTitle = this.contentDocument.title; | |
return this._articleTitle; | |
}, | |
"articleIsLTR": function articleIsLTR() { | |
if (this._articleIsLTR === undefined) { | |
var computedStyle = getComputedStyle(this.articleNode()); | |
this._articleIsLTR = computedStyle ? computedStyle.direction === "ltr" : true; | |
} | |
return this._articleIsLTR; | |
}, | |
"findSuggestedCandidate": function findSuggestedCandidate() { | |
var route = this.suggestedRouteToArticle; | |
if (!route || !route.length) return null; | |
var node; | |
var i; | |
for (i = route.length - 1; i >= 0; i--) { | |
if (route[i].id) { | |
node = this.contentDocument.getElementById(route[i].id); | |
if (node) break; | |
} | |
} | |
i++; | |
if (!node) node = this.contentDocument; | |
while (i < route.length) { | |
var step = route[i]; | |
var child = node.nodeType === Node.DOCUMENT_NODE ? node.documentElement : node.firstElementChild; | |
for (var j = 1; child && j < step.index; j++) { | |
child = child.nextElementSibling; | |
} | |
if (!child) return null; | |
if (child.tagName !== step.tagName) return null; | |
if (step.className && child.className !== step.className) return null; | |
node = child; | |
i++; | |
} | |
if (!isElementVisible(node)) return null; | |
return new CandidateElement(node, this.contentDocument); | |
}, | |
"findArticle": function findArticle() { | |
if (BlacklistedHostsRegEx.test(this.contentDocument.location.hostname)) return null; | |
var suggestedCandidate = this.findSuggestedCandidate(); | |
var candidateElements = this.findCandidateElements(); | |
if (!candidateElements || !candidateElements.length) return suggestedCandidate; | |
if (suggestedCandidate && suggestedCandidate.basicScore() >= ReaderMinimumScore) return suggestedCandidate; | |
var highestScoringElement = this.highestScoringCandidateFromCandidates(candidateElements); | |
if (highestScoringElement.finalScore() < ReaderMinimumScore) return suggestedCandidate; | |
if (highestScoringElement.shouldDisqualifyDueToScoreDensity()) return null; | |
if (highestScoringElement.shouldDisqualifyDueToHorizontalRuleDensity()) return null; | |
if (highestScoringElement.shouldDisqualifyDueToHeaderDensity()) return null; | |
if (highestScoringElement.shouldDisqualifyDueToSimilarElements(candidateElements)) return null; | |
return highestScoringElement; | |
}, | |
"findPrependedArticle": function findPrependedArticle() { | |
if (!this.article) return null; | |
for (var i = 0, candidateSearchScope = this.article.element; i < 3 && candidateSearchScope; i++, candidateSearchScope = candidateSearchScope.parentNode) { | |
var candidateElements = this.findPrependedArticleCandidateElements(candidateSearchScope); | |
if (!candidateElements || !candidateElements.length) continue; | |
var sortedCandidateElements = this.sortCandidateElementsInDescendingScoreOrder(candidateElements); | |
var highestScoringCandidate; | |
for (var candidateIndex = 0; candidateIndex < sortedCandidateElements.length; candidateIndex++) { | |
highestScoringCandidate = sortedCandidateElements[candidateIndex]; | |
if (!highestScoringCandidate || !highestScoringCandidate.basicScore()) break; | |
if (highestScoringCandidate.shouldDisqualifyDueToScoreDensity()) continue; | |
if (highestScoringCandidate.shouldDisqualifyDueToHorizontalRuleDensity()) continue; | |
if (highestScoringCandidate.shouldDisqualifyDueToHeaderDensity()) continue; | |
if (cachedElementBoundingRect(highestScoringCandidate.element).height < PrependedArticleCandidateMinimumHeight && cachedElementBoundingRect(this.article.element).width != cachedElementBoundingRect(highestScoringCandidate.element).width) continue; | |
var textNodeStyle = contentTextStyleForNode(this.contentDocument, highestScoringCandidate.element, true); | |
if (!textNodeStyle) continue; | |
if (textNodeStyle.fontFamily !== this.contentTextStyle().fontFamily || textNodeStyle.fontSize !== this.contentTextStyle().fontSize) continue; | |
if (highestScoringCandidate) return highestScoringCandidate; | |
} | |
} | |
return null; | |
}, | |
"highestScoringCandidateFromCandidates": function highestScoringCandidateFromCandidates(candidateElements) { | |
var highestScore = 0; | |
var highestScoringElement = null; | |
for (var i = 0; i < candidateElements.length; i++) { | |
var candidateElement = candidateElements[i]; | |
var score = candidateElement.basicScore(); | |
if (score >= highestScore) { | |
highestScore = score; | |
highestScoringElement = candidateElement; | |
} | |
} | |
return highestScoringElement; | |
}, | |
"sortCandidateElementsInDescendingScoreOrder": function sortCandidateElementsInDescendingScoreOrder(candidateElements) { | |
function sortByScore(candidate1, candidate2) { | |
if (candidate1.basicScore() != candidate2.basicScore()) return candidate2.basicScore() - candidate1.basicScore(); | |
return candidate2.depth() - candidate1.depth(); | |
} | |
return candidateElements.sort(sortByScore); | |
}, | |
"findCandidateElements": function findCandidateElements() { | |
const MaximumCandidateDetectionTimeInterval = 1000; | |
var findCandidateElementsTimeoutDate = Date.now() + MaximumCandidateDetectionTimeInterval; | |
var elements = this.contentDocument.getElementsByTagName("*"); | |
var candidateElements = []; | |
var elementsLength = elements.length; | |
for (var i = 0; i < elementsLength; i++) { | |
var element = elements[i]; | |
if (CandidateTagNamesToIgnore[element.tagName]) continue; | |
var candidate = CandidateElement.candidateIfElementIsViable(element, this.contentDocument); | |
if (candidate) candidateElements.push(candidate); | |
if (Date.now() > findCandidateElementsTimeoutDate) { | |
console.assert(false, "ReaderArticleFinder aborting CandidateElement detection due to timeout"); | |
candidateElements = []; | |
break; | |
} | |
} | |
return candidateElements; | |
}, | |
"findPrependedArticleCandidateElements": function findPrependedArticleCandidateElements(searchScope) { | |
if (!this.article) return []; | |
if (!searchScope) searchScope = this.article.element; | |
var xPathQuery = "preceding-sibling::*/descendant-or-self::*"; | |
var xPathResults = this.contentDocument.evaluate(xPathQuery, searchScope, null, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null); | |
var possibleCandidateCount = xPathResults.snapshotLength; | |
var candidateElements = []; | |
for (var i = 0; i < possibleCandidateCount; i++) { | |
var element = xPathResults.snapshotItem(i); | |
if (CandidateTagNamesToIgnore[element.tagName]) continue; | |
var candidate = CandidateElement.prependedArticleCandidateIfElementIsViable(element, this.article, this.contentDocument); | |
if (candidate) candidateElements.push(candidate); | |
} | |
return candidateElements; | |
}, | |
"nextPageURLString": function nextPageURLString() { | |
if (!this.article) return null; | |
var bestLink; | |
var bestLinkScore = 0; | |
var searchScope = this.article.element; | |
if (searchScope.parentNode && getComputedStyle(searchScope).display === "inline") searchScope = searchScope.parentNode; | |
var possibleSearchScope = searchScope; | |
var minimumBottomOffset = cachedElementBoundingRect(searchScope).bottom + LinkMaxVerticalDistanceFromArticle; | |
while (isElementNode(possibleSearchScope) && cachedElementBoundingRect(possibleSearchScope).bottom <= minimumBottomOffset) | |
possibleSearchScope = possibleSearchScope.parentNode; | |
if (isElementNode(possibleSearchScope) && possibleSearchScope != searchScope) searchScope = possibleSearchScope; | |
var anchorElements = this.contentDocument.evaluate(LinkCandidateXPathQuery, searchScope, null, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null); | |
var numberOfLinks = anchorElements.snapshotLength; | |
for (var i = 0; i < numberOfLinks; i++) { | |
var link = anchorElements.snapshotItem(i); | |
var score = this.scoreNextPageLinkCandidate(link); | |
if (score > bestLinkScore) { | |
bestLink = link; | |
bestLinkScore = score; | |
} | |
} | |
return bestLink ? bestLink.href : null; | |
}, | |
"scoreNextPageLinkCandidate": function scoreNextPageLinkCandidate(link) { | |
function isNextOrdinal(referenceString, linkString, linkText, pageNumber) { | |
if (linkString.substring(0, referenceString.length) === referenceString) { | |
linkString = linkString.substring(referenceString.length); | |
referenceString = ""; | |
} | |
var linkOrdinal = linkString.lastInteger(); | |
if (isNaN(linkOrdinal)) return false; | |
var referenceOrdinal = referenceString ? referenceString.lastInteger() : NaN; | |
if (isNaN(referenceOrdinal) || referenceOrdinal >= MaximumExactIntegralValue) referenceOrdinal = pageNumber; | |
if (linkOrdinal == referenceOrdinal) return linkText.lastInteger() === referenceOrdinal + 1; | |
return linkOrdinal === referenceOrdinal + 1; | |
} | |
function parametersFromSearch(search) { | |
var map = {}; | |
var parameters = search.substring(1).split("&"); | |
for (var i = 0; i < parameters.length; i++) { | |
var parameter = parameters[i]; | |
var equalsIndex = parameter.indexOf("="); | |
if (equalsIndex === -1) map[parameter] = null; | |
else map[parameter.substring(0, equalsIndex)] = parameter.substring(equalsIndex + 1); | |
} | |
return map; | |
} | |
var referenceLocation = this.contentDocument.location; | |
if (link.host !== referenceLocation.host) return 0; | |
if (link.pathname === referenceLocation.pathname && link.search === referenceLocation.search) return 0; | |
if (link.toString().indexOf("#") != -1) return 0; | |
if (!isElementVisible(link)) return 0; | |
var linkBoundingRect = cachedElementBoundingRect(link); | |
var articleBoundingRect = cachedElementBoundingRect(this.article.element); | |
var verticalDistanceFromArticle = Math.max(0, Math.max(articleBoundingRect.top - (linkBoundingRect.top + linkBoundingRect.height), linkBoundingRect.top - (articleBoundingRect.top + articleBoundingRect.height))); | |
if (verticalDistanceFromArticle > LinkMaxVerticalDistanceFromArticle) return 0; | |
var horizontalDistanceFromArticle = Math.max(0, Math.max(articleBoundingRect.left - (linkBoundingRect.left + linkBoundingRect.width), linkBoundingRect.left - (articleBoundingRect.left + articleBoundingRect.width))); | |
if (horizontalDistanceFromArticle > 0) return 0; | |
var linkPathComponents = link.pathname.substring(1).split("/"); | |
if (!linkPathComponents[linkPathComponents.length - 1]) linkPathComponents.pop(); | |
var referencePathComponents = referenceLocation.pathname.substring(1).split("/"); | |
if (!referencePathComponents[referencePathComponents.length - 1]) referencePathComponents.pop(); | |
if (linkPathComponents.length < referencePathComponents.length) return 0; | |
var mismatchCount = 0; | |
var nextOrdinalMatchValue = 0; | |
var linkText = link.textContent; | |
for (var i = 0; i < linkPathComponents.length; i++) { | |
var linkComponent = linkPathComponents[i]; | |
var referenceComponent = i < referencePathComponents.length ? referencePathComponents[i] : ""; | |
if (referenceComponent !== linkComponent) { | |
if (i < referencePathComponents.length - 2) return 0; | |
if (linkComponent.length >= referenceComponent.length) { | |
var commonSuffixLength = 0; | |
while (linkComponent[linkComponent.length - 1 - commonSuffixLength] === referenceComponent[referenceComponent.length - 1 - commonSuffixLength]) | |
commonSuffixLength++; | |
if (commonSuffixLength) { | |
linkComponent = linkComponent.substring(0, linkComponent.length - commonSuffixLength); | |
referenceComponent = referenceComponent.substring(0, referenceComponent.length - commonSuffixLength); | |
} | |
} | |
if (isNextOrdinal(referenceComponent, linkComponent, linkText, this.pageNumber)) nextOrdinalMatchValue = Math.pow(LinkNextOrdinalValueBase, (i - linkPathComponents.length + 1)); | |
else mismatchCount++; | |
} | |
if (mismatchCount > 1) return 0; | |
} | |
if (link.search) { | |
linkParameters = parametersFromSearch(link.search); | |
referenceParameters = parametersFromSearch(referenceLocation.search); | |
for (var key in linkParameters) { | |
var linkValue = linkParameters[key]; | |
var referenceValue = key in referenceParameters ? referenceParameters[key] : null; | |
if (referenceValue !== linkValue) { | |
if (referenceValue === null) referenceValue = ""; | |
if (linkValue === null) linkValue = ""; | |
if (linkValue.length < referenceValue.length) mismatchCount++; | |
else if (isNextOrdinal(referenceValue, linkValue, linkText, this.pageNumber)) nextOrdinalMatchValue = Math.max(nextOrdinalMatchValue, 1 / LinkNextOrdinalValueBase); | |
else mismatchCount++; | |
} | |
} | |
} | |
if (!nextOrdinalMatchValue) return 0; | |
var score = LinkMatchWeight * (Math.pow(LinkMismatchValueBase, -mismatchCount) + nextOrdinalMatchValue) + LinkVerticalDistanceFromArticleWeight * verticalDistanceFromArticle / LinkMaxVerticalDistanceFromArticle; | |
if (link.parentNode.tagName === "LI") score += LinkListItemBonus; | |
var linkText = link.innerText; | |
if (LinkNextMatchRegEx.test(linkText)) score += LinkNextMatchBonus; | |
if (LinkPageMatchRegEx.test(linkText)) score += LinkPageMatchBonus; | |
if (LinkContinueMatchRegEx.test(linkText)) score += LinkContinueMatchBonus; | |
return score; | |
}, | |
"isReaderModeAvailable": function isReaderModeAvailable() { | |
this.cacheWindowScrollPosition(); | |
var article = this.articleNode(); | |
return article != null; | |
}, | |
"prepareToTransitionToReader": function prepareToTransitionToReader() { | |
clearCachedElementBoundingRects(); | |
this.cacheWindowScrollPosition(); | |
this.nextPageURL(); | |
this.articleIsLTR(); | |
this.adoptableArticle(); | |
}, | |
"nextPageURL": function nextPageURL() { | |
if (this._nextPageURL === undefined) this._nextPageURL = this.nextPageURLString(); | |
return this._nextPageURL; | |
}, | |
"articleNode": function articleNode() { | |
if (!this.didSearchForArticleNode) { | |
this.article = this.findArticle(); | |
this.didSearchForArticleNode = true; | |
if (this.article) this.articleIsLTR(); | |
} | |
return this.article ? this.article.element : null; | |
}, | |
"prependedArticleNode": function prependedArticleNode() { | |
if (!this.didSearchForArticleNode) this.articleNode(); | |
if (!this.didSearchForPrependedArticleNode) { | |
this.prependedArticle = this.findPrependedArticle(); | |
this.didSearchForPrependedArticleNode = true; | |
} | |
return this.prependedArticle ? this.prependedArticle.element : null; | |
}, | |
"cacheWindowScrollPosition": function cacheWindowScrollPosition() { | |
this._cachedScrollY = window.scrollY; | |
this._cachedScrollX = window.scrollX; | |
}, | |
"contentTextStyle": function contentTextStyle() { | |
if (this._cachedContentTextStyle) return this._cachedContentTextStyle; | |
this._cachedContentTextStyle = contentTextStyleForNode(this.contentDocument, this.articleNode(), false); | |
if (!this._cachedContentTextStyle) this._cachedContentTextStyle = getComputedStyle(this.articleNode()); | |
return this._cachedContentTextStyle; | |
}, | |
"commaCountIsLessThan": function commaCountIsLessThan(node, limit) { | |
var count = 0; | |
var textContent = node.textContent; | |
var i = -1; | |
while (count < limit && (i = textContent.indexOf(',', i + 1)) >= 0) | |
count++; | |
return count < limit; | |
}, | |
"calculateLinkDensity": function calculateLinkDensity(element) { | |
var textLength = removeWhitespace(element.textContent).length; | |
if (!textLength) return 0; | |
var links = element.querySelectorAll("a"); | |
var linkCharacterCount = 0; | |
for (var i = 0; i < links.length; i++) | |
linkCharacterCount += removeWhitespace(links[i].textContent).length; | |
return linkCharacterCount / textLength; | |
}, | |
"shouldPruneElement": function shouldPruneElement(element, originalElement) { | |
const MaxInputToParagraphRatio = 0.33; | |
const MaxPositiveWeightLinkDensity = 0.5; | |
const MaxStandardLinkDensity = 0.2; | |
const MinimumTextLength = 25; | |
const MinimumAverageImageArea = 200 * 200; | |
if (!element.parentElement) return false; | |
if (element.tagName !== "OBJECT" && element.tagName !== "EMBED") { | |
var childCount = element.childNodes.length; | |
var hasElementOrTextNodeChild = false; | |
for (var i = 0; i < childCount; i++) { | |
var node = element.childNodes[i]; | |
var nodeType = node.nodeType; | |
if (nodeType === Node.ELEMENT_NODE || (nodeType === Node.TEXT_NODE && !isNodeWhitespace(node))) { | |
hasElementOrTextNodeChild = true; | |
break; | |
} | |
} | |
if (!hasElementOrTextNodeChild) return true; | |
} | |
var classIdWeight = 0; | |
if (originalElement) { | |
if (PositiveRegEx.test(originalElement.className)) classIdWeight++; | |
if (PositiveRegEx.test(originalElement.id)) classIdWeight++; | |
if (NegativeRegEx.test(originalElement.className)) classIdWeight--; | |
if (NegativeRegEx.test(originalElement.id)) classIdWeight--; | |
} | |
if (classIdWeight < 0) return true; | |
if (element.tagName === "UL") { | |
if (originalElement.querySelector("iframe") && originalElement.querySelector("script")) return true; | |
return false; | |
} | |
if (element.tagName === "OBJECT") { | |
const PlugInsToKeepRegEx = /youtube|vimeo|dailymotion/; | |
var embedElement = element.querySelector("embed[src]"); | |
if (embedElement && PlugInsToKeepRegEx.test(embedElement.src)) return false; | |
var dataAttribute = element.getAttribute("data"); | |
if (dataAttribute && PlugInsToKeepRegEx.test(dataAttribute)) return false; | |
return true; | |
} | |
if (element.childElementCount === 1) { | |
var childElement = element.firstElementChild; | |
if (childElement.tagName === "A") return false; | |
if (childElement.tagName === "SPAN" && childElement.className === "converted-anchor" && elementHasAncestorWithTagName(childElement, "TABLE")) return false; | |
} | |
var imageElements = element.querySelectorAll("img"); | |
var imageElementCount = imageElements.length; | |
if (imageElementCount) { | |
var averageImageArea = 0; | |
for (var i = 0; i < imageElementCount; i++) { | |
var originalImage = imageElements[i].originalElement; | |
if (!isElementVisible(originalImage)) continue; | |
var originalRect = cachedElementBoundingRect(originalImage); | |
averageImageArea += (originalRect.width / imageElementCount) * (originalRect.height / imageElementCount); | |
} | |
if (averageImageArea > MinimumAverageImageArea) return false; | |
} | |
if (!this.commaCountIsLessThan(element, 10)) return false; | |
var p = element.querySelectorAll("p").length; | |
var br = element.querySelectorAll("br").length; | |
var numParagraphs = p + Math.floor(br / 2); | |
if (imageElementCount > numParagraphs) return true; | |
if (element.querySelectorAll("li").length > numParagraphs) return true; | |
if (element.querySelectorAll("input").length / numParagraphs > MaxInputToParagraphRatio) return true; | |
if (element.textContent.length < MinimumTextLength && (imageElementCount != 1)) return true; | |
if (element.querySelector("embed")) return true; | |
var linkDensity = this.calculateLinkDensity(element); | |
if (classIdWeight >= 1 && linkDensity > MaxPositiveWeightLinkDensity) return true; | |
if (classIdWeight < 1 && linkDensity > MaxStandardLinkDensity) return true; | |
if (element.tagName === "TABLE") { | |
var textLength = removeWhitespace(element.innerText).length; | |
var originalTextLength = removeWhitespace(originalElement.innerText).length; | |
if (textLength <= (originalTextLength * 0.5)) return true; | |
} | |
return false; | |
}, | |
"wordCountIsLessThan": function wordCountIsLessThan(node, limit) { | |
var count = 0; | |
var textContent = node.textContent; | |
var i = -1; | |
while ((i = textContent.indexOf(' ', i + 1)) >= 0 && count < limit) | |
count++; | |
return count < limit; | |
}, | |
"adoptableArticle": function adoptableArticle() { | |
if (this._adoptableArticle !== undefined) { | |
return this._adoptableArticle.cloneNode(true); | |
} | |
var rootElement = this.articleNode(); | |
this._adoptableArticle = rootElement ? rootElement.cloneNode(true) : null; | |
if (!this._adoptableArticle) return this._adoptableArticle; | |
var articleToPrepend = this.prependedArticleNode(); | |
var cleanedPrependNode = null; | |
this._adoptableArticle = this.cleanArticleNode(rootElement, this._adoptableArticle, false) | |
if (articleToPrepend) { | |
var cleanedPrependNode = this.cleanArticleNode(articleToPrepend, articleToPrepend.cloneNode(true), true); | |
if (cleanedPrependNode) this._adoptableArticle.insertBefore(cleanedPrependNode, this._adoptableArticle.firstChild); | |
} | |
return this._adoptableArticle; | |
}, | |
"cleanArticleNode": function cleanArticleNode(originalArticleNode, clonedArticleNode, allowedToReturnNull) { | |
const tagNamesToAlwaysPrune = { | |
"FORM": 1, | |
"IFRAME": 1, | |
"SCRIPT": 1, | |
"STYLE": 1, | |
"LINK": 1 | |
}; | |
const tagNamesToConsiderPruning = { | |
"DIV": 1, | |
"TABLE": 1, | |
"OBJECT": 1, | |
"UL": 1 | |
}; | |
const tagNamesAffectingFontStyle = { | |
"I": 1, | |
"EM": 1 | |
}; | |
const tagNamesAffectingFontWeight = { | |
"B": 1, | |
"STRONG": 1, | |
"H1": 1, | |
"H2": 1, | |
"H3": 1, | |
"H4": 1, | |
"H5": 1, | |
"H6": 1 | |
}; | |
var elementsToConsiderPruning = []; | |
var depthInFloat = 0; | |
var depthInTable = 0; | |
var depthInFontStyle = 0; | |
var depthInFontWeight = 0; | |
var currentElement = originalArticleNode; | |
var view = currentElement.ownerDocument.defaultView; | |
var currentCloneElement = clonedArticleNode; | |
var articleTitle = this.articleTitle(); | |
var articleTitleElement = this._articleTitleElement; | |
function incrementDepthLevels(delta) { | |
if (depthInFloat) depthInFloat += delta; | |
if (depthInTable) depthInTable += delta; | |
if (depthInFontStyle) depthInFontStyle += delta; | |
if (depthInFontWeight) depthInFontWeight += delta; | |
}; | |
function updateDepthLevelsAfterSiblingTraversal() { | |
if (depthInFloat === 1) depthInFloat = 0; | |
if (depthInTable === 1) depthInTable = 0; | |
if (depthInFontStyle === 1) depthInFontStyle = 0; | |
if (depthInFontWeight === 1) depthInFontWeight = 0; | |
}; | |
while (currentElement) { | |
var prunedElement = null; | |
var tagName = currentCloneElement.tagName; | |
currentCloneElement.originalElement = currentElement; | |
if (tagName in tagNamesToAlwaysPrune) prunedElement = currentCloneElement; | |
if (!prunedElement && currentElement === articleTitleElement) prunedElement = currentCloneElement; | |
if (!prunedElement && (tagName === "H1" || tagName === "H2")) { | |
var distanceFromoriginalArticleNodeTop = currentElement.offsetTop - originalArticleNode.offsetTop; | |
if (distanceFromoriginalArticleNodeTop < HeaderMinimumDistanceFromArticleTop) { | |
var headerText = currentElement.innerText; | |
var maxDistanceToConsiderSimilar = headerText.length * HeaderLevenshteinDistanceToLengthRatio; | |
if (levenshteinDistance(articleTitle, headerText) <= maxDistanceToConsiderSimilar) prunedElement = currentCloneElement; | |
} | |
} | |
var computedStyle; | |
if (!prunedElement) computedStyle = getComputedStyle(currentElement); | |
if (!prunedElement && tagName === "DIV" && currentCloneElement.parentNode) { | |
var elements = currentElement.querySelectorAll("a, blockquote, dl, div, img, ol, p, pre, table, ul"); | |
var inFloat = depthInFloat || computedStyle["float"] !== "none"; | |
if (!inFloat && !elements.length) { | |
var parentNode = currentCloneElement.parentNode; | |
var replacementNode = this.contentDocument.createElement("p"); | |
while (currentCloneElement.firstChild) { | |
var child = currentCloneElement.firstChild; | |
replacementNode.appendChild(child); | |
} | |
parentNode.replaceChild(replacementNode, currentCloneElement); | |
currentCloneElement = replacementNode; | |
currentCloneElement.originalElement = currentElement; | |
tagName = currentCloneElement.tagName; | |
} | |
} | |
if (!prunedElement && currentCloneElement.parentNode && tagName in tagNamesToConsiderPruning) elementsToConsiderPruning.push(currentCloneElement); | |
if (!prunedElement) { | |
if (computedStyle.display === "none") prunedElement = currentCloneElement; | |
else if (currentElement !== originalArticleNode && tagName !== "IMG" && !depthInFloat && computedStyle["float"] !== "none" && (cachedElementBoundingRect(currentElement).height >= FloatMinimumHeight || currentElement.childElementCount > 1)) depthInFloat = 1; | |
} | |
if (!prunedElement) { | |
var attributes = currentCloneElement.attributes; | |
for (var i = 0; i < attributes.length; i++) { | |
var attributeName = attributes[i].nodeName; | |
if (AttributesToRemoveRegEx.test(attributeName)) { | |
currentCloneElement.removeAttribute(attributeName); | |
i--; | |
} | |
} | |
if (!depthInFontStyle && computedStyle.fontStyle !== "normal") { | |
if (!(tagName in tagNamesAffectingFontStyle)) currentCloneElement.style.fontStyle = computedStyle.fontStyle; | |
depthInFontStyle = 1; | |
} | |
if (!depthInFontWeight && computedStyle.fontWeight !== "normal") { | |
if (!(tagName in tagNamesAffectingFontWeight)) currentCloneElement.style.fontWeight = computedStyle.fontWeight; | |
depthInFontWeight = 1; | |
} | |
if (depthInFloat) { | |
if (depthInFloat === 1) { | |
if (cachedElementBoundingRect(currentElement).width === cachedElementBoundingRect(originalArticleNode).width) currentCloneElement.setAttribute("class", "float full-width"); | |
else currentCloneElement.setAttribute("class", "float " + computedStyle["float"]); | |
} | |
var widthValue = currentElement.style.getPropertyValue("width"); | |
if (widthValue) currentCloneElement.style.width = widthValue; | |
else { | |
var rules = view.getMatchedCSSRules(currentElement, "", true); | |
if (rules) { | |
for (var i = rules.length - 1; i >= 0; i--) { | |
widthValue = rules[i].style.getPropertyValue("width"); | |
if (widthValue) { | |
currentCloneElement.style.width = widthValue; | |
break; | |
} | |
} | |
} | |
} | |
if (depthInFloat === 1 && !widthValue) currentCloneElement.style.width = cachedElementBoundingRect(currentElement).width + "px"; | |
} | |
if (tagName === "TABLE") { | |
if (!depthInTable) depthInTable = 1; | |
} else if (tagName === "IMG") { | |
currentCloneElement.removeAttribute("border"); | |
currentCloneElement.removeAttribute("hspace"); | |
currentCloneElement.removeAttribute("vspace"); | |
currentCloneElement.removeAttribute("align"); | |
if (!depthInFloat) { | |
var imageBoundingRect = cachedElementBoundingRect(currentElement); | |
if (imageBoundingRect.width < ImageSizeTiny && imageBoundingRect.height < ImageSizeTiny) currentCloneElement.setAttribute("class", "reader-image-tiny"); | |
else if ((imageBoundingRect.width / originalArticleNode.clientWidth) > ImageWidthToParentWidthRatio) { | |
currentCloneElement.setAttribute("class", "reader-image-large"); | |
} | |
} else { | |
currentCloneElement.style.float = computedStyle.float; | |
} | |
} else if (tagName === "FONT") { | |
currentCloneElement.removeAttribute("size"); | |
currentCloneElement.removeAttribute("face"); | |
currentCloneElement.removeAttribute("color"); | |
} else if (tagName === "A" && currentCloneElement.parentNode) { | |
var href = currentCloneElement.getAttribute("href"); | |
if (href && href.length && (href[0] === "#" || href.substring(0, 11) === "javascript:")) { | |
if (!depthInTable && !currentCloneElement.childElementCount && currentCloneElement.parentElement.childElementCount === 1) { | |
var xPathResult = this.contentDocument.evaluate("text()", currentCloneElement.parentElement, null, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null); | |
if (!xPathResult.snapshotLength) prunedElement = currentCloneElement; | |
} | |
if (!prunedElement) { | |
var replacementNode = this.contentDocument.createElement("span"); | |
if (currentCloneElement.childElementCount === 1 && currentCloneElement.firstElementChild.tagName === "IMG") { | |
var imageElement = currentCloneElement.firstElementChild; | |
if (imageElement.width > AnchorImageMinimumWidth && imageElement.height > AnchorImageMinimumHeight) replacementNode.setAttribute("class", "converted-image-anchor"); | |
} | |
if (!replacementNode.className) replacementNode.setAttribute("class", "converted-anchor"); | |
while (currentCloneElement.firstChild) | |
replacementNode.appendChild(currentCloneElement.firstChild); | |
currentCloneElement.parentNode.replaceChild(replacementNode, currentCloneElement); | |
currentCloneElement = replacementNode; | |
} | |
} | |
} | |
} | |
var firstElementChild = prunedElement ? null : currentElement.firstElementChild; | |
if (firstElementChild) { | |
currentElement = firstElementChild; | |
currentCloneElement = currentCloneElement.firstElementChild; | |
incrementDepthLevels(1); | |
} else { | |
var nextElementSibling; | |
while (currentElement !== originalArticleNode && !(nextElementSibling = currentElement.nextElementSibling)) { | |
currentElement = currentElement.parentElement; | |
currentCloneElement = currentCloneElement.parentElement; | |
incrementDepthLevels(-1); | |
} | |
if (currentElement === originalArticleNode) { | |
if (prunedElement) { | |
if (prunedElement.parentElement) prunedElement.parentElement.removeChild(prunedElement); | |
else if (allowedToReturnNull) return null; | |
} | |
break; | |
} | |
currentElement = nextElementSibling; | |
currentCloneElement = currentCloneElement.nextElementSibling; | |
updateDepthLevelsAfterSiblingTraversal(); | |
} | |
if (prunedElement && !prunedElement.parentElement && shouldDoSOmething) return null; | |
if (prunedElement) { | |
if (prunedElement.parentElement) prunedElement.parentElement.removeChild(prunedElement); | |
else if (allowedToReturnNull) return null; | |
} | |
} | |
for (var i = elementsToConsiderPruning.length - 1; i >= 0; i--) { | |
var element = elementsToConsiderPruning[i]; | |
if (element.parentNode && this.shouldPruneElement(element, element.originalElement)) element.parentNode.removeChild(element); | |
} | |
var floatElements = this._adoptableArticle.querySelectorAll(".float"); | |
for (var i = 0; i < floatElements.length; i++) { | |
var pruneFloatedElement = false; | |
var floatElement = floatElements[i]; | |
if (!pruneFloatedElement) { | |
var anchors = floatElement.querySelectorAll("a, span.converted-image-anchor"); | |
var replacedAnchors = floatElement.querySelectorAll("span.converted-anchor"); | |
pruneFloatedElement = floatElement.parentNode && replacedAnchors.length > anchors.length; | |
} | |
if (!pruneFloatedElement) { | |
var plugInsInClonedElement = floatElement.querySelectorAll("embed, object").length; | |
var plugInsInOriginalElement = floatElement.originalElement.querySelectorAll("embed, object").length; | |
if (!plugInsInClonedElement && plugInsInOriginalElement) pruneFloatedElement = true; | |
} | |
if (pruneFloatedElement) floatElement.parentNode.removeChild(floatElement); | |
} | |
if (allowedToReturnNull && !removeWhitespace(clonedArticleNode.innerText).length) return null; | |
return clonedArticleNode; | |
}, | |
"articleTitle": function articleTitle() { | |
if (this._articleTitle !== undefined) return this._articleTitle; | |
const HeaderMaximumDistance = 500; | |
const HeaderMinimumTextLength = 8; | |
const HeaderMinimumFontSize = 12; | |
const HeaderFontSizeBonusMinimumRatio = 1.1; | |
const HeaderFontSizeBonusMultiplier = 1.25; | |
const HeaderBonusRegEx = /header|title|headline/i; | |
const HeaderRegexBonusMultiplier = 1.5; | |
const HeaderLargeImageCheckOffsetY = 150; | |
const HeaderLargeImageMinimumHeight = 300; | |
const HeaderLargeImageMinimumWidthRatio = 0.5; | |
const HeaderMaximumDOMDistance = 8; | |
const HeaderMinimumFontSizeDifference = 1.5; | |
var articleRect = cachedElementBoundingRect(this.prependedArticleNode() ? this.prependedArticleNode() : this.articleNode()); | |
var articleCenterX = articleRect.left + (articleRect.width / 2); | |
var articleTopY = articleRect.top; | |
var articleAdjustedTopY = articleTopY; | |
var potentialLeadingImage = this.contentDocument.elementFromPoint(articleCenterX, articleTopY - HeaderLargeImageCheckOffsetY); | |
if (potentialLeadingImage && potentialLeadingImage.tagName === "IMG") { | |
var imageRect = cachedElementBoundingRect(potentialLeadingImage); | |
if (imageRect.height >= HeaderLargeImageMinimumHeight && imageRect.width >= articleRect.width * HeaderLargeImageMinimumWidthRatio) articleAdjustedTopY = imageRect.top; | |
} | |
var allHeaders = this.contentDocument.querySelectorAll("h1, h2, h3, h4, h5, .headline, .article_title, #hn-headline"); | |
var bestHeader; | |
for (var i = 0; i < allHeaders.length; i++) { | |
var header = allHeaders[i]; | |
var headerRect = cachedElementBoundingRect(header); | |
var headerCenterX = headerRect.left + (headerRect.width / 2); | |
var headerCenterY = headerRect.top + (headerRect.height / 2); | |
var deltaX = headerCenterX - articleCenterX; | |
var deltaY = headerCenterY - articleAdjustedTopY; | |
var distance = Math.sqrt((deltaX * deltaX) + (deltaY * deltaY)); | |
var headerScore = Math.max(HeaderMaximumDistance - distance, 0); | |
if (distance > HeaderMaximumDistance) continue; | |
if (headerRect.width < articleRect.width * 0.5) continue; | |
if (headerCenterX < articleRect.left || headerCenterX > articleRect.right) continue; | |
var headerFontSize = fontSizeFromComputedStyle(getComputedStyle(header)); | |
if (headerFontSize < HeaderMinimumFontSize) continue; | |
var headerText = header.innerText; | |
if (headerText.length < HeaderMinimumTextLength) continue; | |
headerScore *= 1 + TitleCandidateDepthScoreMultiplier * elementDepth(header); | |
headerScore *= (headerFontSize / BaseFontSize); | |
var fontSize = parseInt(this.contentTextStyle().fontSize); | |
if (parseInt(headerFontSize) > fontSize * HeaderFontSizeBonusMinimumRatio) headerScore *= HeaderFontSizeBonusMultiplier; | |
if (HeaderBonusRegEx.test(header.className) || HeaderBonusRegEx.test(header.id)) headerScore *= HeaderRegexBonusMultiplier; | |
if (!bestHeader || headerScore > bestHeader.headerScore) { | |
bestHeader = header; | |
bestHeader.headerScore = headerScore; | |
bestHeader.headerText = headerText; | |
} | |
} | |
if (bestHeader && domDistance(bestHeader, this.articleNode(), HeaderMaximumDOMDistance + 1) > HeaderMaximumDOMDistance) { | |
if (parseInt(getComputedStyle(bestHeader).fontSize) < HeaderMinimumFontSizeDifference * fontSize) bestHeader = null; | |
} | |
if (bestHeader) { | |
this._articleTitle = bestHeader.headerText; | |
this._articleTitleElement = bestHeader; | |
} | |
if (!this._articleTitle) this._articleTitle = this.contentDocument.title; | |
return this._articleTitle; | |
}, | |
"articleIsLTR": function articleIsLTR() { | |
if (this._articleIsLTR === undefined) { | |
var computedStyle = getComputedStyle(this.articleNode()); | |
this._articleIsLTR = computedStyle ? computedStyle.direction === "ltr" : true; | |
} | |
return this._articleIsLTR; | |
}, | |
"findSuggestedCandidate": function findSuggestedCandidate() { | |
var route = this.suggestedRouteToArticle; | |
if (!route || !route.length) return null; | |
var node; | |
var i; | |
for (i = route.length - 1; i >= 0; i--) { | |
if (route[i].id) { | |
node = this.contentDocument.getElementById(route[i].id); | |
if (node) break; | |
} | |
} | |
i++; | |
if (!node) node = this.contentDocument; | |
while (i < route.length) { | |
var step = route[i]; | |
var child = node.nodeType === Node.DOCUMENT_NODE ? node.documentElement : node.firstElementChild; | |
for (var j = 1; child && j < step.index; j++) { | |
child = child.nextElementSibling; | |
} | |
if (!child) return null; | |
if (child.tagName !== step.tagName) return null; | |
if (step.className && child.className !== step.className) return null; | |
node = child; | |
i++; | |
} | |
if (!isElementVisible(node)) return null; | |
return new CandidateElement(node, this.contentDocument); | |
}, | |
"findArticle": function findArticle() { | |
if (BlacklistedHostsRegEx.test(this.contentDocument.location.hostname)) return null; | |
var suggestedCandidate = this.findSuggestedCandidate(); | |
var candidateElements = this.findCandidateElements(); | |
if (!candidateElements || !candidateElements.length) return suggestedCandidate; | |
if (suggestedCandidate && suggestedCandidate.basicScore() >= ReaderMinimumScore) return suggestedCandidate; | |
var highestScoringElement = this.highestScoringCandidateFromCandidates(candidateElements); | |
if (highestScoringElement.finalScore() < ReaderMinimumScore) return suggestedCandidate; | |
if (highestScoringElement.shouldDisqualifyDueToScoreDensity()) return null; | |
if (highestScoringElement.shouldDisqualifyDueToHorizontalRuleDensity()) return null; | |
if (highestScoringElement.shouldDisqualifyDueToHeaderDensity()) return null; | |
if (highestScoringElement.shouldDisqualifyDueToSimilarElements(candidateElements)) return null; | |
return highestScoringElement; | |
}, | |
"findPrependedArticle": function findPrependedArticle() { | |
if (!this.article) return null; | |
for (var i = 0, candidateSearchScope = this.article.element; i < 3 && candidateSearchScope; i++, candidateSearchScope = candidateSearchScope.parentNode) { | |
var candidateElements = this.findPrependedArticleCandidateElements(candidateSearchScope); | |
if (!candidateElements || !candidateElements.length) continue; | |
var sortedCandidateElements = this.sortCandidateElementsInDescendingScoreOrder(candidateElements); | |
var highestScoringCandidate; | |
for (var candidateIndex = 0; candidateIndex < sortedCandidateElements.length; candidateIndex++) { | |
highestScoringCandidate = sortedCandidateElements[candidateIndex]; | |
if (!highestScoringCandidate || !highestScoringCandidate.basicScore()) break; | |
if (highestScoringCandidate.shouldDisqualifyDueToScoreDensity()) continue; | |
if (highestScoringCandidate.shouldDisqualifyDueToHorizontalRuleDensity()) continue; | |
if (highestScoringCandidate.shouldDisqualifyDueToHeaderDensity()) continue; | |
if (cachedElementBoundingRect(highestScoringCandidate.element).height < PrependedArticleCandidateMinimumHeight && cachedElementBoundingRect(this.article.element).width != cachedElementBoundingRect(highestScoringCandidate.element).width) continue; | |
var textNodeStyle = contentTextStyleForNode(this.contentDocument, highestScoringCandidate.element, true); | |
if (!textNodeStyle) continue; | |
if (textNodeStyle.fontFamily !== this.contentTextStyle().fontFamily || textNodeStyle.fontSize !== this.contentTextStyle().fontSize) continue; | |
if (highestScoringCandidate) return highestScoringCandidate; | |
} | |
} | |
return null; | |
}, | |
"highestScoringCandidateFromCandidates": function highestScoringCandidateFromCandidates(candidateElements) { | |
var highestScore = 0; | |
var highestScoringElement = null; | |
for (var i = 0; i < candidateElements.length; i++) { | |
var candidateElement = candidateElements[i]; | |
var score = candidateElement.basicScore(); | |
if (score >= highestScore) { | |
highestScore = score; | |
highestScoringElement = candidateElement; | |
} | |
} | |
return highestScoringElement; | |
}, | |
"sortCandidateElementsInDescendingScoreOrder": function sortCandidateElementsInDescendingScoreOrder(candidateElements) { | |
function sortByScore(candidate1, candidate2) { | |
if (candidate1.basicScore() != candidate2.basicScore()) return candidate2.basicScore() - candidate1.basicScore(); | |
return candidate2.depth() - candidate1.depth(); | |
} | |
return candidateElements.sort(sortByScore); | |
}, | |
"findCandidateElements": function findCandidateElements() { | |
const MaximumCandidateDetectionTimeInterval = 1000; | |
var findCandidateElementsTimeoutDate = Date.now() + MaximumCandidateDetectionTimeInterval; | |
var elements = this.contentDocument.getElementsByTagName("*"); | |
var candidateElements = []; | |
var elementsLength = elements.length; | |
for (var i = 0; i < elementsLength; i++) { | |
var element = elements[i]; | |
if (CandidateTagNamesToIgnore[element.tagName]) continue; | |
var candidate = CandidateElement.candidateIfElementIsViable(element, this.contentDocument); | |
if (candidate) candidateElements.push(candidate); | |
if (Date.now() > findCandidateElementsTimeoutDate) { | |
console.assert(false, "ReaderArticleFinder aborting CandidateElement detection due to timeout"); | |
candidateElements = []; | |
break; | |
} | |
} | |
return candidateElements; | |
}, | |
"findPrependedArticleCandidateElements": function findPrependedArticleCandidateElements(searchScope) { | |
if (!this.article) return []; | |
if (!searchScope) searchScope = this.article.element; | |
var xPathQuery = "preceding-sibling::*/descendant-or-self::*"; | |
var xPathResults = this.contentDocument.evaluate(xPathQuery, searchScope, null, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null); | |
var possibleCandidateCount = xPathResults.snapshotLength; | |
var candidateElements = []; | |
for (var i = 0; i < possibleCandidateCount; i++) { | |
var element = xPathResults.snapshotItem(i); | |
if (CandidateTagNamesToIgnore[element.tagName]) continue; | |
var candidate = CandidateElement.prependedArticleCandidateIfElementIsViable(element, this.article, this.contentDocument); | |
if (candidate) candidateElements.push(candidate); | |
} | |
return candidateElements; | |
}, | |
"nextPageURLString": function nextPageURLString() { | |
if (!this.article) return null; | |
var bestLink; | |
var bestLinkScore = 0; | |
var searchScope = this.article.element; | |
if (searchScope.parentNode && getComputedStyle(searchScope).display === "inline") searchScope = searchScope.parentNode; | |
var possibleSearchScope = searchScope; | |
var minimumBottomOffset = cachedElementBoundingRect(searchScope).bottom + LinkMaxVerticalDistanceFromArticle; | |
while (isElementNode(possibleSearchScope) && cachedElementBoundingRect(possibleSearchScope).bottom <= minimumBottomOffset) | |
possibleSearchScope = possibleSearchScope.parentNode; | |
if (isElementNode(possibleSearchScope) && possibleSearchScope != searchScope) searchScope = possibleSearchScope; | |
var anchorElements = this.contentDocument.evaluate(LinkCandidateXPathQuery, searchScope, null, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null); | |
var numberOfLinks = anchorElements.snapshotLength; | |
for (var i = 0; i < numberOfLinks; i++) { | |
var link = anchorElements.snapshotItem(i); | |
var score = this.scoreNextPageLinkCandidate(link); | |
if (score > bestLinkScore) { | |
bestLink = link; | |
bestLinkScore = score; | |
} | |
} | |
return bestLink ? bestLink.href : null; | |
}, | |
"scoreNextPageLinkCandidate": function scoreNextPageLinkCandidate(link) { | |
function isNextOrdinal(referenceString, linkString, linkText, pageNumber) { | |
if (linkString.substring(0, referenceString.length) === referenceString) { | |
linkString = linkString.substring(referenceString.length); | |
referenceString = ""; | |
} | |
var linkOrdinal = linkString.lastInteger(); | |
if (isNaN(linkOrdinal)) return false; | |
var referenceOrdinal = referenceString ? referenceString.lastInteger() : NaN; | |
if (isNaN(referenceOrdinal) || referenceOrdinal >= MaximumExactIntegralValue) referenceOrdinal = pageNumber; | |
if (linkOrdinal == referenceOrdinal) return linkText.lastInteger() === referenceOrdinal + 1; | |
return linkOrdinal === referenceOrdinal + 1; | |
} | |
function parametersFromSearch(search) { | |
var map = {}; | |
var parameters = search.substring(1).split("&"); | |
for (var i = 0; i < parameters.length; i++) { | |
var parameter = parameters[i]; | |
var equalsIndex = parameter.indexOf("="); | |
if (equalsIndex === -1) map[parameter] = null; | |
else map[parameter.substring(0, equalsIndex)] = parameter.substring(equalsIndex + 1); | |
} | |
return map; | |
} | |
var referenceLocation = this.contentDocument.location; | |
if (link.host !== referenceLocation.host) return 0; | |
if (link.pathname === referenceLocation.pathname && link.search === referenceLocation.search) return 0; | |
if (link.toString().indexOf("#") != -1) return 0; | |
if (!isElementVisible(link)) return 0; | |
var linkBoundingRect = cachedElementBoundingRect(link); | |
var articleBoundingRect = cachedElementBoundingRect(this.article.element); | |
var verticalDistanceFromArticle = Math.max(0, Math.max(articleBoundingRect.top - (linkBoundingRect.top + linkBoundingRect.height), linkBoundingRect.top - (articleBoundingRect.top + articleBoundingRect.height))); | |
if (verticalDistanceFromArticle > LinkMaxVerticalDistanceFromArticle) return 0; | |
var horizontalDistanceFromArticle = Math.max(0, Math.max(articleBoundingRect.left - (linkBoundingRect.left + linkBoundingRect.width), linkBoundingRect.left - (articleBoundingRect.left + articleBoundingRect.width))); | |
if (horizontalDistanceFromArticle > 0) return 0; | |
var linkPathComponents = link.pathname.substring(1).split("/"); | |
if (!linkPathComponents[linkPathComponents.length - 1]) linkPathComponents.pop(); | |
var referencePathComponents = referenceLocation.pathname.substring(1).split("/"); | |
if (!referencePathComponents[referencePathComponents.length - 1]) referencePathComponents.pop(); | |
if (linkPathComponents.length < referencePathComponents.length) return 0; | |
var mismatchCount = 0; | |
var nextOrdinalMatchValue = 0; | |
var linkText = link.textContent; | |
for (var i = 0; i < linkPathComponents.length; i++) { | |
var linkComponent = linkPathComponents[i]; | |
var referenceComponent = i < referencePathComponents.length ? referencePathComponents[i] : ""; | |
if (referenceComponent !== linkComponent) { | |
if (i < referencePathComponents.length - 2) return 0; | |
if (linkComponent.length >= referenceComponent.length) { | |
var commonSuffixLength = 0; | |
while (linkComponent[linkComponent.length - 1 - commonSuffixLength] === referenceComponent[referenceComponent.length - 1 - commonSuffixLength]) | |
commonSuffixLength++; | |
if (commonSuffixLength) { | |
linkComponent = linkComponent.substring(0, linkComponent.length - commonSuffixLength); | |
referenceComponent = referenceComponent.substring(0, referenceComponent.length - commonSuffixLength); | |
} | |
} | |
if (isNextOrdinal(referenceComponent, linkComponent, linkText, this.pageNumber)) nextOrdinalMatchValue = Math.pow(LinkNextOrdinalValueBase, (i - linkPathComponents.length + 1)); | |
else mismatchCount++; | |
} | |
if (mismatchCount > 1) return 0; | |
} | |
if (link.search) { | |
linkParameters = parametersFromSearch(link.search); | |
referenceParameters = parametersFromSearch(referenceLocation.search); | |
for (var key in linkParameters) { | |
var linkValue = linkParameters[key]; | |
var referenceValue = key in referenceParameters ? referenceParameters[key] : null; | |
if (referenceValue !== linkValue) { | |
if (referenceValue === null) referenceValue = ""; | |
if (linkValue === null) linkValue = ""; | |
if (linkValue.length < referenceValue.length) mismatchCount++; | |
else if (isNextOrdinal(referenceValue, linkValue, linkText, this.pageNumber)) nextOrdinalMatchValue = Math.max(nextOrdinalMatchValue, 1 / LinkNextOrdinalValueBase); | |
else mismatchCount++; | |
} | |
} | |
} | |
if (!nextOrdinalMatchValue) return 0; | |
var score = LinkMatchWeight * (Math.pow(LinkMismatchValueBase, -mismatchCount) + nextOrdinalMatchValue) + LinkVerticalDistanceFromArticleWeight * verticalDistanceFromArticle / LinkMaxVerticalDistanceFromArticle; | |
if (link.parentNode.tagName === "LI") score += LinkListItemBonus; | |
var linkText = link.innerText; | |
if (LinkNextMatchRegEx.test(linkText)) score += LinkNextMatchBonus; | |
if (LinkPageMatchRegEx.test(linkText)) score += LinkPageMatchBonus; | |
if (LinkContinueMatchRegEx.test(linkText)) score += LinkContinueMatchBonus; | |
return score; | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment