Skip to content

Instantly share code, notes, and snippets.

@liushooter
Created September 22, 2016 17:17
Show Gist options
  • Save liushooter/4e52754ecd136ec1b0ebf596dd698cf0 to your computer and use it in GitHub Desktop.
Save liushooter/4e52754ecd136ec1b0ebf596dd698cf0 to your computer and use it in GitHub Desktop.
A main text search algorithm prototype depend on browser rendering tree.
'use strict'
import R from 'ramda'
let fixOverflowVisible = ([clientValue, scrollValue]) => scrollValue / clientValue > 2 ? scrollValue : clientValue
, getHeight = R.compose(fixOverflowVisible, e => [e.clientHeight, e.scrollHeight])
, getWidth = R.compose(fixOverflowVisible, e => [e.clientWidth, e.scrollWidth])
, getWindowHeight = () => document.documentElement.clientHeight
, getWindowWidth = () => document.documentElement.clientWidth
, getScrollX = () => window.scrollX
, getScrollY = () => window.scrollY
, getX = e => e.offsetLeft
, getY = e => e.offsetTop
, getRelativeX = e => getX(e) - getScrollX()
, getRelativeY = e => getY(e) - getScrollY()
, getVisiblePart = R.curry((partFunc, windowPartFunc, relativePointFunc, e) => {
let part = partFunc(e)
, wPart = windowPartFunc()
, begin = relativePointFunc(e)
, end = begin + part
if (begin > wPart || end < 0) {return 0}
if (begin < 0) {begin = 0}
if (end > wPart) {end = wPart}
return end - begin
})
, getVisibleHeight = getVisiblePart(getHeight, getWindowHeight, getRelativeY)
, getVisibleWidth = getVisiblePart(getWidth, getWindowWidth, getRelativeX)
, getVisibleArea = e => getVisibleHeight(e) * getVisibleWidth(e)
, getComputedStyleByList = R.curry((styleList, e) => R.pick(styleList, getComputedStyle(e)))
, mapToList = R.compose(R.values, R.map)
, sumBy = R.curry(R.compose(R.sum, mapToList))
, getHorizontalMargin = R.compose(sumBy(parseFloat), getComputedStyleByList(['marginLeft', 'marginRight']))
, getVerticalMargin = R.compose(sumBy(parseFloat), getComputedStyleByList(['marginTop', 'marginBottom']))
, getWidthWithMargin = e => getWidth(e) + getHorizontalMargin(e)
, getHeightWithMargin = e => getHeight(e) + getVerticalMargin(e)
, getArea = e => getWidth(e) * getHeight(e)
, getAreaWithMargin = e => getWidthWithMargin(e) * getHeightWithMargin(e)
, getTextLength = e => {
if (e && e.innerText) {
return e.innerText.replace(/\s/g, '').length
} else {
return 0
}
}
, divideBy = R.curry((f, a, b) => R.divide(f(a), f(b)))
, divideByArea = divideBy(getArea)
, divideByVisibleArea = divideBy(getVisibleArea)
, divideByTextLength = divideBy(getTextLength)
function findMainElement(parent) {
const Indicators = R.mapObjIndexed((value, name) => R.merge(value, {name}), {
AreaRatioOfParent: {func: divideByArea(R.__, parent), weight: 100}
, AreaWithMarginRatioOfParent: {func: R.curry((child, parent) => getAreaWithMargin(child) / getArea(parent))(R.__, parent), weight: 50}
, VisibleAreaRatioOfScreen: {func: divideByVisibleArea(R.__, document.documentElement), weight: 200}
, VisibleAreaRatioOfParent: {func: divideByVisibleArea(R.__, parent), weight: 150}
, TextLengthRatioOfParent: {func: divideByTextLength(R.__, parent), weight: 50}
})
let maybeMainElement = R.allPass([
R.compose(R.not, R.isNil)
, R.compose(x => x > 0, getTextLength)
, R.compose(R.contains(R.__, [Node.ELEMENT_NODE, Node.DOCUMENT_NODE, Node.DOCUMENT_FRAGMENT_NODE]), R.prop('nodeType'))
, R.compose(R.not, R.contains(R.__, R.map(R.toUpper, ['script', 'style', 'noscript'])), R.prop('nodeName'))
, R.compose(R.not, R.equals(0), getArea)
])
, computeScore = R.curry((indicators, e) => {
let score = sumBy(R.identity, R.map(x => {
let result = x.func(e) * x.weight
return result
}, indicators)) / sumBy(x => x.weight, indicators)
return score
})
let children = R.filter(maybeMainElement, R.values(parent.children))
if (R.isEmpty(children)) {
return parent
}
let mainElement = R.reduce(R.maxBy(computeScore(Indicators)), R.head(children), children)
if (computeScore(Indicators, mainElement) > 0.3) {
return findMainElement(mainElement)
}
return parent
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment