Created
September 22, 2016 17:17
-
-
Save liushooter/4e52754ecd136ec1b0ebf596dd698cf0 to your computer and use it in GitHub Desktop.
A main text search algorithm prototype depend on browser rendering tree.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
'use strict' | |
import R from 'ramda' | |
let fixOverflowVisible = ([clientValue, scrollValue]) => scrollValue / clientValue > 2 ? scrollValue : clientValue | |
, getHeight = R.compose(fixOverflowVisible, e => [e.clientHeight, e.scrollHeight]) | |
, getWidth = R.compose(fixOverflowVisible, e => [e.clientWidth, e.scrollWidth]) | |
, getWindowHeight = () => document.documentElement.clientHeight | |
, getWindowWidth = () => document.documentElement.clientWidth | |
, getScrollX = () => window.scrollX | |
, getScrollY = () => window.scrollY | |
, getX = e => e.offsetLeft | |
, getY = e => e.offsetTop | |
, getRelativeX = e => getX(e) - getScrollX() | |
, getRelativeY = e => getY(e) - getScrollY() | |
, getVisiblePart = R.curry((partFunc, windowPartFunc, relativePointFunc, e) => { | |
let part = partFunc(e) | |
, wPart = windowPartFunc() | |
, begin = relativePointFunc(e) | |
, end = begin + part | |
if (begin > wPart || end < 0) {return 0} | |
if (begin < 0) {begin = 0} | |
if (end > wPart) {end = wPart} | |
return end - begin | |
}) | |
, getVisibleHeight = getVisiblePart(getHeight, getWindowHeight, getRelativeY) | |
, getVisibleWidth = getVisiblePart(getWidth, getWindowWidth, getRelativeX) | |
, getVisibleArea = e => getVisibleHeight(e) * getVisibleWidth(e) | |
, getComputedStyleByList = R.curry((styleList, e) => R.pick(styleList, getComputedStyle(e))) | |
, mapToList = R.compose(R.values, R.map) | |
, sumBy = R.curry(R.compose(R.sum, mapToList)) | |
, getHorizontalMargin = R.compose(sumBy(parseFloat), getComputedStyleByList(['marginLeft', 'marginRight'])) | |
, getVerticalMargin = R.compose(sumBy(parseFloat), getComputedStyleByList(['marginTop', 'marginBottom'])) | |
, getWidthWithMargin = e => getWidth(e) + getHorizontalMargin(e) | |
, getHeightWithMargin = e => getHeight(e) + getVerticalMargin(e) | |
, getArea = e => getWidth(e) * getHeight(e) | |
, getAreaWithMargin = e => getWidthWithMargin(e) * getHeightWithMargin(e) | |
, getTextLength = e => { | |
if (e && e.innerText) { | |
return e.innerText.replace(/\s/g, '').length | |
} else { | |
return 0 | |
} | |
} | |
, divideBy = R.curry((f, a, b) => R.divide(f(a), f(b))) | |
, divideByArea = divideBy(getArea) | |
, divideByVisibleArea = divideBy(getVisibleArea) | |
, divideByTextLength = divideBy(getTextLength) | |
function findMainElement(parent) { | |
const Indicators = R.mapObjIndexed((value, name) => R.merge(value, {name}), { | |
AreaRatioOfParent: {func: divideByArea(R.__, parent), weight: 100} | |
, AreaWithMarginRatioOfParent: {func: R.curry((child, parent) => getAreaWithMargin(child) / getArea(parent))(R.__, parent), weight: 50} | |
, VisibleAreaRatioOfScreen: {func: divideByVisibleArea(R.__, document.documentElement), weight: 200} | |
, VisibleAreaRatioOfParent: {func: divideByVisibleArea(R.__, parent), weight: 150} | |
, TextLengthRatioOfParent: {func: divideByTextLength(R.__, parent), weight: 50} | |
}) | |
let maybeMainElement = R.allPass([ | |
R.compose(R.not, R.isNil) | |
, R.compose(x => x > 0, getTextLength) | |
, R.compose(R.contains(R.__, [Node.ELEMENT_NODE, Node.DOCUMENT_NODE, Node.DOCUMENT_FRAGMENT_NODE]), R.prop('nodeType')) | |
, R.compose(R.not, R.contains(R.__, R.map(R.toUpper, ['script', 'style', 'noscript'])), R.prop('nodeName')) | |
, R.compose(R.not, R.equals(0), getArea) | |
]) | |
, computeScore = R.curry((indicators, e) => { | |
let score = sumBy(R.identity, R.map(x => { | |
let result = x.func(e) * x.weight | |
return result | |
}, indicators)) / sumBy(x => x.weight, indicators) | |
return score | |
}) | |
let children = R.filter(maybeMainElement, R.values(parent.children)) | |
if (R.isEmpty(children)) { | |
return parent | |
} | |
let mainElement = R.reduce(R.maxBy(computeScore(Indicators)), R.head(children), children) | |
if (computeScore(Indicators, mainElement) > 0.3) { | |
return findMainElement(mainElement) | |
} | |
return parent | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment