Caldis · August 13, 2018 10:55
diff --git a/getWikipediaRelatedTopics.js b/getWikipediaRelatedTopics.js
 // 从给定范围内选择有效链接
 function findValidLinkTags(fragement, type) {
    let linkTags = []
    switch(type) {
        case "wikiwand":
            linkTags = fragement.querySelectorAll(".int-link")
            break
        case "wikipedia":
            linkTags = fragement.querySelectorAll("a")
            break
        default:
            break
    }
    return linkTags
 }

 // 从给定标签组中提取连接
 function extractLinks(linkTags) {
    const links = []
    for (i = 0; i<linkTags.length; i++) {
        var tag = linkTags[i]
        var href = tag.getAttribute("href");
        // 当 href 中不包含 : # 时, 且父元素不为 <small> 或 <sup> 才加入到 links 中
        -1!==href.indexOf(":") || 
        -1!==href.indexOf("#") || 
        tag.parentElement.nodeName==="SMALL" || 
        tag.parentElement.nodeName==="SUP" || 
        links.push(href)
    }
    return links
 }

 // 分析连接
 function decodeLinks(links, prefixCode, fullContent) {
    // 页面文本
    const contentText = fullContent.innerText.toLowerCase()
    // 解码链接
    const decodedLinks = {}
    links.forEach((link, i) => {
        // 移除名称中的 languageCode, 将下划线替换为空格, 并解码
        let title = link.split(prefixCode).join("").split("_").join(" ")
        title = decodeURIComponent(title)
        // 通过 split 页面文本后的长度方式来统计其出现次数
        const value = contentText.split(title.toLowerCase()).length
        // 导入 decodedLinks 中
        decodedLinks[link] = { link, title, value }
    })
    // 将 decodedLinks 转为数组, 并按出现次数排序
    const sortedLinks = Object.keys(decodedLinks)
        .map(key => decodedLinks[key])
        .sort((a, b) => b.value-a.value)
    return sortedLinks
 }

 // 构建基础数据
 // type:        网站类型
 // prefixCode:  URL前缀(wikiwand为语言代码, 如/zh/, wikipedia为/wiki/)
 // firstPara:   介绍的第一段
 // overview:    介绍的全文
 // fullContent: 所有有效文本
 // res:         最终结果
 let type, firstPara, overview, fullContent, res = []
 if (location.host.match("wikiwand")) {
    type = "wikiwand"
    prefixCode = "/" + articleData.langCodeUrl + "/"
    firstPara = document.querySelector("#overview p.first-paragraph")
    overview = document.querySelector("#overview")
    fullContent = document.querySelector("#fullContent")
 }
 if (location.host.match("wikipedia")) {
    type = "wikipedia"
    prefixCode = "/wiki/"
    firstPara = document.querySelector(".infobox+p")
    overview = new DocumentFragment()
    const tags = document.querySelectorAll(".infobox~*") 
    for (i = 0; i<tags.length; i++) {
        if (tags[i].nodeName==="P") {
            overview.append(tags[i])
        } else {
            break
        }
    }
    fullContent = document.querySelector(".infobox~p")
 }
 // 取出第一段的关键词
 res = res.concat(extractLinks(findValidLinkTags(firstPara, type))).slice()
 // 如果结果小于 10 个, 则进一步取 overview 全部的
 res.length<10 && (res = res.concat(extractLinks(findValidLinkTags(overview, type))).slice())
 // 如果仍然小于 10 个, 则再进一步取 fullContent 所有的
 res.length<10 && (res = res.concat(extractLinks(findValidLinkTags(fullContent, type))).slice())
 // 分析连接, 提取出现频次与名称
 res = decodeLinks(res, prefixCode, fullContent)
 // 如果大于 10 个, 则取 3-6 位, 否则取前三位
 res.length>10 ? (res.slice(3, 6)) : (res.slice(0, 3))
	// 从给定范围内选择有效链接
	function findValidLinkTags(fragement, type) {
	let linkTags = []
	switch(type) {
	case "wikiwand":
	linkTags = fragement.querySelectorAll(".int-link")
	break
	case "wikipedia":
	linkTags = fragement.querySelectorAll("a")
	break
	default:
	break
	}
	return linkTags
	}

	// 从给定标签组中提取连接
	function extractLinks(linkTags) {
	const links = []
	for (i = 0; i<linkTags.length; i++) {
	var tag = linkTags[i]
	var href = tag.getAttribute("href");
	// 当 href 中不包含 : # 时, 且父元素不为 <small> 或 <sup> 才加入到 links 中
	-1!==href.indexOf(":") \|\|
	-1!==href.indexOf("#") \|\|
	tag.parentElement.nodeName==="SMALL" \|\|
	tag.parentElement.nodeName==="SUP" \|\|
	links.push(href)
	}
	return links
	}

	// 分析连接
	function decodeLinks(links, prefixCode, fullContent) {
	// 页面文本
	const contentText = fullContent.innerText.toLowerCase()
	// 解码链接
	const decodedLinks = {}
	links.forEach((link, i) => {
	// 移除名称中的 languageCode, 将下划线替换为空格, 并解码
	let title = link.split(prefixCode).join("").split("_").join(" ")
	title = decodeURIComponent(title)
	// 通过 split 页面文本后的长度方式来统计其出现次数
	const value = contentText.split(title.toLowerCase()).length
	// 导入 decodedLinks 中
	decodedLinks[link] = { link, title, value }
	})
	// 将 decodedLinks 转为数组, 并按出现次数排序
	const sortedLinks = Object.keys(decodedLinks)
	.map(key => decodedLinks[key])
	.sort((a, b) => b.value-a.value)
	return sortedLinks
	}

	// 构建基础数据
	// type: 网站类型
	// prefixCode: URL前缀(wikiwand为语言代码, 如/zh/, wikipedia为/wiki/)
	// firstPara: 介绍的第一段
	// overview: 介绍的全文
	// fullContent: 所有有效文本
	// res: 最终结果
	let type, firstPara, overview, fullContent, res = []
	if (location.host.match("wikiwand")) {
	type = "wikiwand"
	prefixCode = "/" + articleData.langCodeUrl + "/"
	firstPara = document.querySelector("#overview p.first-paragraph")
	overview = document.querySelector("#overview")
	fullContent = document.querySelector("#fullContent")
	}
	if (location.host.match("wikipedia")) {
	type = "wikipedia"
	prefixCode = "/wiki/"
	firstPara = document.querySelector(".infobox+p")
	overview = new DocumentFragment()
	const tags = document.querySelectorAll(".infobox~*")
	for (i = 0; i<tags.length; i++) {
	if (tags[i].nodeName==="P") {
	overview.append(tags[i])
	} else {
	break
	}
	}
	fullContent = document.querySelector(".infobox~p")
	}
	// 取出第一段的关键词
	res = res.concat(extractLinks(findValidLinkTags(firstPara, type))).slice()
	// 如果结果小于 10 个, 则进一步取 overview 全部的
	res.length<10 && (res = res.concat(extractLinks(findValidLinkTags(overview, type))).slice())
	// 如果仍然小于 10 个, 则再进一步取 fullContent 所有的
	res.length<10 && (res = res.concat(extractLinks(findValidLinkTags(fullContent, type))).slice())
	// 分析连接, 提取出现频次与名称
	res = decodeLinks(res, prefixCode, fullContent)
	// 如果大于 10 个, 则取 3-6 位, 否则取前三位
	res.length>10 ? (res.slice(3, 6)) : (res.slice(0, 3))