Created
June 20, 2012 05:56
-
-
Save steckel/2958353 to your computer and use it in GitHub Desktop.
Readability Proof Of Concept (in CoffeeScript)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
dbg = (s) -> | |
console.log "Readability: " + s if typeof console isnt "undefined" | |
readability = | |
version: "0.5.1" | |
emailSrc: "http://lab.arc90.com/experiments/readability/email.php" | |
kindleSrc: "http://lab.arc90.com/experiments/readability/kindle.php" | |
iframeLoads: 0 | |
# The frame hack is to workaround a firefox bug where if you | |
# pull content out of a frame and stick it into the parent element, the scrollbar won't appear. | |
# So we fake a scrollbar in the wrapping div. | |
frameHack: false | |
# Cache the body HTML in case we need to re-use it later | |
bodyCache: null | |
# All of the regular expressions in use within readability. | |
# Defined up here so we don't instantiate them repeatedly in loops. | |
regexps: | |
unlikelyCandidatesRe: /combx|comment|disqus|foot|header|menu|meta|nav|rss|shoutbox|sidebar|sponsor/i | |
okMaybeItsACandidateRe: /and|article|body|column|main/i | |
positiveRe: /article|body|content|entry|hentry|page|pagination|post|text/i | |
negativeRe: /combx|comment|contact|foot|footer|footnote|link|media|meta|promo|related|scroll|shoutbox|sponsor|tags|widget/i | |
divToPElementsRe: /<(a|blockquote|dl|div|img|ol|p|pre|table|ul)/i | |
replaceBrsRe: /(<br[^>]*>[ \n\r\t]*){2,}/g | |
replaceFontsRe: /<(\/?)font[^>]*>/g | |
trimRe: /^\s+|\s+$/g | |
normalizeRe: /\s{2,}/g | |
killBreaksRe: /(<br\s*\/?>(\s| ?)*){1,}/g | |
videoRe: /http:\/\/(www\.)?(youtube|vimeo)\.com/i | |
# Runs readability. | |
# | |
# Workflow: | |
# 1. Prep the document by removing script tags, css, etc. | |
# 2. Build readability's DOM tree. | |
# 3. Grab the article content from the current dom tree. | |
# 4. Replace the current DOM tree with the new one. | |
# 5. Read peacefully. | |
# | |
# @return void | |
# | |
init: (preserveUnlikelyCandidates) -> | |
preserveUnlikelyCandidates = (if (typeof preserveUnlikelyCandidates is "undefined") then false else preserveUnlikelyCandidates) | |
readability.bodyCache = document.body.innerHTML if document.body and not readability.bodyCache | |
readability.prepDocument() | |
overlay = document.createElement("DIV") | |
innerDiv = document.createElement("DIV") | |
articleTools = readability.getArticleTools() | |
articleTitle = readability.getArticleTitle() | |
articleContent = readability.grabArticle(preserveUnlikelyCandidates) | |
articleFooter = readability.getArticleFooter() | |
# If we attempted to strip unlikely candidates on the first run through, and we ended up with no content, | |
# that may mean we stripped out the actual content so we couldn't parse it. So re-run init while preserving | |
# unlikely candidates to have a better shot at getting our content out properly. | |
if readability.getInnerText(articleContent, false) is "" | |
unless preserveUnlikelyCandidates | |
document.body.innerHTML = readability.bodyCache | |
return readability.init(true) | |
else | |
articleContent.innerHTML = "<p>Sorry, readability was unable to parse this page for content. If you feel like it should have been able to, please <a href='http://code.google.com/p/arc90labs-readability/issues/entry'>let us know by submitting an issue.</a></p>" | |
overlay.id = "readOverlay" | |
innerDiv.id = "readInner" | |
# Apply user-selected styling | |
document.body.className = readStyle | |
overlay.className = readStyle | |
innerDiv.className = readMargin + " " + readSize | |
articleContent.appendChild articleFooter | |
innerDiv.appendChild articleTitle | |
innerDiv.appendChild articleContent | |
# Glue the structure of our document together | |
overlay.appendChild articleTools | |
overlay.appendChild innerDiv | |
# Clear the old HTML, insert the new content. | |
document.body.innerHTML = "" | |
document.body.insertBefore overlay, document.body.firstChild | |
if readability.frameHack | |
readOverlay = document.getElementById("readOverlay") | |
readOverlay.style.height = "100%" | |
readOverlay.style.overflow = "auto" | |
# Get the article tools Element that has buttons like reload, print, email. | |
# @return void | |
getArticleTools: -> | |
articleTools = document.createElement("DIV") | |
articleTools.id = "readTools" | |
articleTools.innerHTML = "\t\t\t<a href='#' onclick='return window.location.reload()' title='Reload original page' id='reload-page'>Reload Original Page</a>\t\t\t<a href='#' onclick='javascript:window.print();' title='Print page' id='print-page'>Print Page</a>\t\t\t<a href='#' onclick='readability.emailBox(); return false;' title='Email page' id='email-page'>Email Page</a>\t\t\t<a href='#' onclick='readability.kindleBox(); return false;' title='Send to Amazon Kindle' id='kindle-page'>Send to Kindle</a>\t\t" | |
articleTools | |
# Get the article title as an H1. Currently just uses document.title, | |
# we might want to be smarter in the future. | |
# @return void | |
getArticleTitle: -> | |
articleTitle = document.createElement("H1") | |
articleTitle.innerHTML = document.title | |
articleTitle | |
# Get the footer with the readability mark etc. | |
# @return void | |
getArticleFooter: -> | |
articleFooter = document.createElement("DIV") | |
articleFooter.id = "readFooter" | |
articleFooter.innerHTML = "\t\t\t<a href='http://lab.arc90.com/experiments/readability'><img src='http://lab.arc90.com/experiments/readability/images/footer-readability.png'></a>\t\t\t<a href='http://www.arc90.com'><img src='http://lab.arc90.com/experiments/readability/images/footer-arc90.png'></a>\t\t\t<a href='http://www.twitter.com/arc90' class='footer-twitterLink'>Follow us on Twitter »</a>\t <div class='footer-right' >\t <span class='version'>Readability version " + readability.version + "</span>\t\t\t\t\t</div>\t\t" | |
articleFooter | |
# Prepare the HTML document for readability to scrape it. | |
# This includes things like stripping javascript, CSS, and handling terrible markup. | |
# @return void | |
prepDocument: -> | |
# In some cases a body element can't be found (if the HTML is totally hosed for example) | |
# so we create a new body node and append it to the document. | |
if document.body is null | |
body = document.createElement("body") | |
try | |
document.body = body | |
catch e | |
document.documentElement.appendChild body | |
frames = document.getElementsByTagName("frame") | |
if frames.length > 0 | |
bestFrame = null | |
bestFrameSize = 0 | |
frameIndex = 0 | |
while frameIndex < frames.length | |
frameSize = frames[frameIndex].offsetWidth + frames[frameIndex].offsetHeight | |
canAccessFrame = false | |
try | |
frames[frameIndex].contentWindow.document.body | |
canAccessFrame = true | |
if canAccessFrame and frameSize > bestFrameSize | |
bestFrame = frames[frameIndex] | |
bestFrameSize = frameSize | |
frameIndex++ | |
if bestFrame | |
newBody = document.createElement("body") | |
newBody.innerHTML = bestFrame.contentWindow.document.body.innerHTML | |
newBody.style.overflow = "scroll" | |
document.body = newBody | |
frameset = document.getElementsByTagName("frameset")[0] | |
frameset.parentNode.removeChild frameset if frameset | |
readability.frameHack = true | |
# If we're using a typekit style, inject the JS for it. | |
if readStyle is "style-classy" | |
typeKitScript = document.createElement("script") | |
typeKitScript.type = "text/javascript" | |
typeKitScript.src = "http://use.typekit.com/sxt6vzy.js" | |
document.body.appendChild typeKitScript | |
# Done as a script elem so that it's ensured it will activate | |
# after typekit is loaded from the previous script src. | |
typeKitLoader = document.createElement("script") | |
typeKitLoader.type = "text/javascript" | |
typeKitLoaderContent = document.createTextNode("try{Typekit.load();}catch(e){}") | |
typeKitLoader.appendChild typeKitLoaderContent | |
document.body.appendChild typeKitLoader | |
# remove all scripts that are not readability | |
scripts = document.getElementsByTagName("script") | |
i = scripts.length - 1 | |
while i >= 0 | |
scripts[i].parentNode.removeChild scripts[i] if typeof (scripts[i].src) is "undefined" or scripts[i].src.indexOf("readability") is -1 | |
i-- | |
k = 0 | |
# remove all stylesheets | |
while k < document.styleSheets.length | |
document.styleSheets[k].disabled = true if document.styleSheets[k].href? and document.styleSheets[k].href.lastIndexOf("readability") is -1 | |
k++ | |
# Remove all style tags in head (not doing this on IE) - TODO: Why not? | |
styleTags = document.getElementsByTagName("style") | |
j = 0 | |
while j < styleTags.length | |
styleTags[j].textContent = "" unless navigator.appName is "Microsoft Internet Explorer" | |
j++ | |
# Turn all double br's into p's | |
# Note, this is pretty costly as far as processing goes. Maybe optimize later. | |
document.body.innerHTML = document.body.innerHTML.replace(readability.regexps.replaceBrsRe, "</p><p>").replace(readability.regexps.replaceFontsRe, "<$1span>") | |
# Prepare the article node for display. Clean out any inline styles, | |
# iframes, forms, strip extraneous <p> tags, etc. | |
# @param Element | |
# @return void | |
prepArticle: (articleContent) -> | |
readability.cleanStyles articleContent | |
readability.killBreaks articleContent | |
# Clean out junk from the article content | |
readability.clean articleContent, "form" | |
readability.clean articleContent, "object" | |
readability.clean articleContent, "h1" | |
# If there is only one h2, they are probably using it | |
# as a header and not a subheader, so remove it since we already have a header. | |
readability.clean articleContent, "h2" if articleContent.getElementsByTagName("h2").length is 1 | |
readability.clean articleContent, "iframe" | |
readability.cleanHeaders articleContent | |
# Do these last as the previous stuff may have removed junk that will affect these | |
readability.cleanConditionally articleContent, "table" | |
readability.cleanConditionally articleContent, "ul" | |
readability.cleanConditionally articleContent, "div" | |
# Remove extra paragraphs | |
articleParagraphs = articleContent.getElementsByTagName("p") | |
i = articleParagraphs.length - 1 | |
while i >= 0 | |
imgCount = articleParagraphs[i].getElementsByTagName("img").length | |
embedCount = articleParagraphs[i].getElementsByTagName("embed").length | |
objectCount = articleParagraphs[i].getElementsByTagName("object").length | |
articleParagraphs[i].parentNode.removeChild articleParagraphs[i] if imgCount is 0 and embedCount is 0 and objectCount is 0 and readability.getInnerText(articleParagraphs[i], false) is "" | |
i-- | |
try | |
articleContent.innerHTML = articleContent.innerHTML.replace(/<br[^>]*>\s*<p/g, "<p") | |
catch e | |
dbg "Cleaning innerHTML of breaks failed. This is an IE strict-block-elements bug. Ignoring." | |
# Initialize a node with the readability object. Also checks the | |
# className/id for special names to add to its score. | |
# @param Element | |
# @return void | |
initializeNode: (node) -> | |
node.readability = contentScore: 0 | |
switch node.tagName | |
when "DIV" | |
node.readability.contentScore += 5 | |
when "PRE", "TD" | |
, "BLOCKQUOTE" | |
node.readability.contentScore += 3 | |
when "ADDRESS", "OL" | |
, "UL" | |
, "DL" | |
, "DD" | |
, "DT" | |
, "LI" | |
, "FORM" | |
node.readability.contentScore -= 3 | |
when "H1", "H2" | |
, "H3" | |
, "H4" | |
, "H5" | |
, "H6" | |
, "TH" | |
node.readability.contentScore -= 5 | |
node.readability.contentScore += readability.getClassWeight(node) | |
# grabArticle - Using a variety of metrics (content score, classname, element types), find the content that is | |
# most likely to be the stuff a user wants to read. Then return it wrapped up in a div. | |
# @return Element | |
grabArticle: (preserveUnlikelyCandidates) -> | |
# First, node prepping. Trash nodes that look cruddy (like ones with the class name "comment", etc), and turn divs | |
# into P tags where they have been used inappropriately (as in, where they contain no other block level elements.) | |
# Note: Assignment from index for performance. See http://www.peachpit.com/articles/article.aspx?p=31567&seqNum=5 | |
# TODO: Shouldn't this be a reverse traversal? | |
nodeIndex = 0 | |
while (node = document.getElementsByTagName("*")[nodeIndex]) | |
# Remove unlikely candidates | |
unless preserveUnlikelyCandidates | |
unlikelyMatchString = node.className + node.id | |
if unlikelyMatchString.search(readability.regexps.unlikelyCandidatesRe) isnt -1 and unlikelyMatchString.search(readability.regexps.okMaybeItsACandidateRe) is -1 and node.tagName isnt "BODY" | |
dbg "Removing unlikely candidate - " + unlikelyMatchString | |
node.parentNode.removeChild node | |
nodeIndex-- | |
continue | |
# Turn all divs that don't have children block level elements into p's | |
if node.tagName is "DIV" | |
if node.innerHTML.search(readability.regexps.divToPElementsRe) is -1 | |
dbg "Altering div to p" | |
newNode = document.createElement("p") | |
try | |
newNode.innerHTML = node.innerHTML | |
node.parentNode.replaceChild newNode, node | |
nodeIndex-- | |
catch e | |
dbg "Could not alter div to p, probably an IE restriction, reverting back to div." | |
else | |
# EXPERIMENTAL | |
i = 0 | |
il = node.childNodes.length | |
while i < il | |
childNode = node.childNodes[i] | |
if childNode.nodeType is Node.TEXT_NODE | |
dbg "replacing text node with a p tag with the same content." | |
p = document.createElement("p") | |
p.innerHTML = childNode.nodeValue | |
p.style.display = "inline" | |
p.className = "readability-styled" | |
childNode.parentNode.replaceChild p, childNode | |
i++ | |
nodeIndex++ | |
# Loop through all paragraphs, and assign a score to them based on how content-y they look. | |
# Then add their score to their parent node. | |
# A score is determined by things like number of commas, class names, etc. Maybe eventually link density. | |
allParagraphs = document.getElementsByTagName("p") | |
candidates = [] | |
j = 0 | |
while j < allParagraphs.length | |
parentNode = allParagraphs[j].parentNode | |
grandParentNode = parentNode.parentNode | |
innerText = readability.getInnerText(allParagraphs[j]) | |
# If this paragraph is less than 25 characters, don't even count it. | |
continue if innerText.length < 25 | |
# Initialize readability data for the parent. | |
if typeof parentNode.readability is "undefined" | |
readability.initializeNode parentNode | |
candidates.push parentNode | |
# Initialize readability data for the grandparent. | |
if typeof grandParentNode.readability is "undefined" | |
readability.initializeNode grandParentNode | |
candidates.push grandParentNode | |
contentScore = 0 | |
# Add a point for the paragraph itself as a base. | |
contentScore++ | |
# Add points for any commas within this paragraph | |
contentScore += innerText.split(",").length | |
# For every 100 characters in this paragraph, add another point. Up to 3 points. | |
contentScore += Math.min(Math.floor(innerText.length / 100), 3) | |
# Add the score to the parent. The grandparent gets half. | |
parentNode.readability.contentScore += contentScore | |
grandParentNode.readability.contentScore += contentScore / 2 | |
j++ | |
# After we've calculated scores, loop through all of the possible candidate nodes we found | |
# and find the one with the highest score. | |
topCandidate = null | |
i = 0 | |
il = candidates.length | |
while i < il | |
# Scale the final candidates score based on link density. Good content should have a | |
# relatively small link density (5% or less) and be mostly unaffected by this operation. | |
candidates[i].readability.contentScore = candidates[i].readability.contentScore * (1 - readability.getLinkDensity(candidates[i])) | |
dbg "Candidate: " + candidates[i] + " (" + candidates[i].className + ":" + candidates[i].id + ") with score " + candidates[i].readability.contentScore | |
topCandidate = candidates[i] if not topCandidate or candidates[i].readability.contentScore > topCandidate.readability.contentScore | |
i++ | |
# If we still have no top candidate, just use the body as a last resort. | |
# We also have to copy the body node so it is something we can modify. | |
if not topCandidate? or topCandidate.tagName is "BODY" | |
topCandidate = document.createElement("DIV") | |
topCandidate.innerHTML = document.body.innerHTML | |
document.body.innerHTML = "" | |
document.body.appendChild topCandidate | |
readability.initializeNode topCandidate | |
# Now that we have the top candidate, look through its siblings for content that might also be related. | |
# Things like preambles, content split by ads that we removed, etc. | |
articleContent = document.createElement("DIV") | |
articleContent.id = "readability-content" | |
siblingScoreThreshold = Math.max(10, topCandidate.readability.contentScore * 0.2) | |
siblingNodes = topCandidate.parentNode.childNodes | |
i = 0 | |
il = siblingNodes.length | |
while i < il | |
siblingNode = siblingNodes[i] | |
append = false | |
dbg "Looking at sibling node: " + siblingNode + " (" + siblingNode.className + ":" + siblingNode.id + ")" + (if (typeof siblingNode.readability isnt "undefined") then (" with score " + siblingNode.readability.contentScore) else "") | |
dbg "Sibling has score " + (if siblingNode.readability then siblingNode.readability.contentScore else "Unknown") | |
append = true if siblingNode is topCandidate | |
append = true if typeof siblingNode.readability isnt "undefined" and siblingNode.readability.contentScore >= siblingScoreThreshold | |
if siblingNode.nodeName is "P" | |
linkDensity = readability.getLinkDensity(siblingNode) | |
nodeContent = readability.getInnerText(siblingNode) | |
nodeLength = nodeContent.length | |
if nodeLength > 80 and linkDensity < 0.25 | |
append = true | |
else append = true if nodeLength < 80 and linkDensity is 0 and nodeContent.search(/\.( |$)/) isnt -1 | |
if append | |
dbg "Appending node: " + siblingNode | |
articleContent.appendChild siblingNode | |
i-- | |
il-- | |
i++ | |
# So we have all of the content that we need. Now we clean it up for presentation. | |
readability.prepArticle articleContent | |
articleContent | |
# Get the inner text of a node - cross browser compatibly. | |
# This also strips out any excess whitespace to be found. | |
# @param Element | |
# @return string | |
getInnerText: (e, normalizeSpaces) -> | |
textContent = "" | |
normalizeSpaces = (if (typeof normalizeSpaces is "undefined") then true else normalizeSpaces) | |
if navigator.appName is "Microsoft Internet Explorer" | |
textContent = e.innerText.replace(readability.regexps.trimRe, "") | |
else | |
textContent = e.textContent.replace(readability.regexps.trimRe, "") | |
if normalizeSpaces | |
textContent.replace readability.regexps.normalizeRe, " " | |
else | |
textContent | |
# Get the number of times a string s appears in the node e. | |
# @param Element | |
# @param string - what to split on. Default is "," | |
# @return number (integer) | |
getCharCount: (e, s) -> | |
s = s or "," | |
readability.getInnerText(e).split(s).length | |
# Remove the style attribute on every e and under. | |
# TODO: Test if getElementsByTagName(*) is faster. | |
# @param Element | |
# @return void | |
cleanStyles: (e) -> | |
e = e or document | |
cur = e.firstChild | |
return unless e | |
# Remove any root styles, if we're able. | |
e.removeAttribute "style" if typeof e.removeAttribute is "function" and e.className isnt "readability-styled" | |
# Go until there are no more child nodes | |
while cur? | |
if cur.nodeType is 1 | |
cur.removeAttribute "style" unless cur.className is "readability-styled" | |
readability.cleanStyles cur | |
cur = cur.nextSibling | |
# Get the density of links as a percentage of the content | |
# This is the amount of text that is inside a link divided by the total text in the node. | |
# | |
# @param Element | |
# @return number (float) | |
getLinkDensity: (e) -> | |
links = e.getElementsByTagName("a") | |
textLength = readability.getInnerText(e).length | |
linkLength = 0 | |
i = 0 | |
il = links.length | |
while i < il | |
linkLength += readability.getInnerText(links[i]).length | |
i++ | |
linkLength / textLength | |
# Get an elements class/id weight. Uses regular expressions to tell if this | |
# element looks good or bad. | |
# | |
# @param Element | |
# @return number (Integer) | |
getClassWeight: (e) -> | |
weight = 0 | |
# Look for a special classname | |
unless e.className is "" | |
weight -= 25 if e.className.search(readability.regexps.negativeRe) isnt -1 | |
weight += 25 if e.className.search(readability.regexps.positiveRe) isnt -1 | |
# Look for a special ID | |
if typeof (e.id) is "string" and e.id isnt "" | |
weight -= 25 if e.id.search(readability.regexps.negativeRe) isnt -1 | |
weight += 25 if e.id.search(readability.regexps.positiveRe) isnt -1 | |
weight | |
# Remove extraneous break tags from a node. | |
# | |
# @param Element | |
# @return void | |
killBreaks: (e) -> | |
try | |
e.innerHTML = e.innerHTML.replace(readability.regexps.killBreaksRe, "<br />") | |
catch e | |
dbg "KillBreaks failed - this is an IE bug. Ignoring." | |
# Clean a node of all elements of type "tag". | |
# (Unless it's a youtube/vimeo video. People love movies.) | |
# @param Element | |
# @param string tag to clean | |
# @return void | |
clean: (e, tag) -> | |
targetList = e.getElementsByTagName(tag) | |
isEmbed = (tag is "object" or tag is "embed") | |
y = targetList.length - 1 | |
while y >= 0 | |
continue if isEmbed and targetList[y].innerHTML.search(readability.regexps.videoRe) isnt -1 | |
# Allow youtube and vimeo videos through as people usually want to see those. | |
targetList[y].parentNode.removeChild targetList[y] | |
y-- | |
# Clean an element of all tags of type "tag" if they look fishy. | |
# "Fishy" is an algorithm based on content length, classnames, link density, number of images & embeds, etc. | |
# @return void | |
cleanConditionally: (e, tag) -> | |
tagsList = e.getElementsByTagName(tag) | |
curTagsLength = tagsList.length | |
i = curTagsLength - 1 | |
# Gather counts for other typical elements embedded within. | |
# Traverse backwards so we can remove nodes at the same time without effecting the traversal. | |
# TODO: Consider taking into account original contentScore here. | |
while i >= 0 | |
weight = readability.getClassWeight(tagsList[i]) | |
dbg "Cleaning Conditionally " + tagsList[i] + " (" + tagsList[i].className + ":" + tagsList[i].id + ")" + (if (typeof tagsList[i].readability isnt "undefined") then (" with score " + tagsList[i].readability.contentScore) else "") | |
if weight < 0 | |
tagsList[i].parentNode.removeChild tagsList[i] | |
else if readability.getCharCount(tagsList[i], ",") < 10 | |
# If there are not very many commas, and the number of | |
# non-paragraph elements is more than paragraphs or other ominous signs, remove the element. | |
p = tagsList[i].getElementsByTagName("p").length | |
img = tagsList[i].getElementsByTagName("img").length | |
li = tagsList[i].getElementsByTagName("li").length - 100 | |
input = tagsList[i].getElementsByTagName("input").length | |
embedCount = 0 | |
embeds = tagsList[i].getElementsByTagName("embed") | |
ei = 0 | |
il = embeds.length | |
while ei < il | |
embedCount++ if embeds[ei].src.search(readability.regexps.videoRe) is -1 | |
ei++ | |
linkDensity = readability.getLinkDensity(tagsList[i]) | |
contentLength = readability.getInnerText(tagsList[i]).length | |
toRemove = false | |
if img > p | |
toRemove = true | |
else if li > p and tag isnt "ul" and tag isnt "ol" | |
toRemove = true | |
else if input > Math.floor(p / 3) | |
toRemove = true | |
else if contentLength < 25 and (img is 0 or img > 2) | |
toRemove = true | |
else if weight < 25 and linkDensity > .2 | |
toRemove = true | |
else if weight >= 25 and linkDensity > .5 | |
toRemove = true | |
else toRemove = true if (embedCount is 1 and contentLength < 75) or embedCount > 1 | |
tagsList[i].parentNode.removeChild tagsList[i] if toRemove | |
i-- | |
# Clean out spurious headers from an Element. Checks things like classnames and link density. | |
# @param Element | |
# @return void | |
cleanHeaders: (e) -> | |
headerIndex = 1 | |
while headerIndex < 7 | |
headers = e.getElementsByTagName("h" + headerIndex) | |
i = headers.length - 1 | |
while i >= 0 | |
headers[i].parentNode.removeChild headers[i] if readability.getClassWeight(headers[i]) < 0 or readability.getLinkDensity(headers[i]) > 0.33 | |
i-- | |
headerIndex++ | |
# Show the email popup. | |
# @return void | |
emailBox: -> | |
emailContainer = document.getElementById("email-container") | |
return unless null is emailContainer | |
emailContainer = document.createElement("div") | |
emailContainer.setAttribute "id", "email-container" | |
emailContainer.innerHTML = "<iframe src=\"" + readability.emailSrc + "?pageUrl=" + escape(window.location) + "&pageTitle=" + escape(document.title) + "\" scrolling=\"no\" onload=\"readability.removeFrame()\" style=\"width:500px; height: 490px; border: 0;\"></iframe>" | |
document.body.appendChild emailContainer | |
# Show the email popup. | |
# @return void | |
kindleBox: -> | |
kindleContainer = document.getElementById("kindle-container") | |
return unless null is kindleContainer | |
kindleContainer = document.createElement("div") | |
kindleContainer.setAttribute "id", "kindle-container" | |
kindleContainer.innerHTML = "<iframe id=\"readabilityKindleIframe\" name=\"readabilityKindleIframe\" scrolling=\"no\" onload=\"readability.removeFrame()\" style=\"width:500px; height: 490px; border: 0;\"></iframe>" | |
document.body.appendChild kindleContainer | |
# Dynamically create a form to be POSTed to the iframe | |
formHtml = "<form id=\"readabilityKindleForm\" style=\"display: none;\" target=\"readabilityKindleIframe\" method=\"post\" action=\"" + readability.kindleSrc + "\">\t\t <input type=\"hidden\" name=\"bodyContent\" id=\"bodyContent\" value=\"" + readability.htmlspecialchars(document.getElementById("readability-content").innerHTML) + "\" />\t\t\t\t\t\t <input type=\"hidden\" name=\"pageUrl\" id=\"pageUrl\" value=\"" + readability.htmlspecialchars(window.location) + "\" />\t\t\t\t\t\t <input type=\"hidden\" name=\"pageTitle\" id=\"pageUrl\" value=\"" + readability.htmlspecialchars(document.title) + "\" /> </form>" | |
document.body.innerHTML += formHtml | |
document.forms["readabilityKindleForm"].submit() | |
# Close the email popup. This is a hacktackular way to check if we're in a "close loop". | |
# Since we don't have crossdomain access to the frame, we can only know when it has | |
# loaded again. If it's loaded over 3 times, we know to close the frame. | |
# @return void | |
removeFrame: -> | |
readability.iframeLoads++ | |
if readability.iframeLoads > 3 | |
emailContainer = document.getElementById("email-container") | |
emailContainer.parentNode.removeChild emailContainer if null isnt emailContainer | |
kindleContainer = document.getElementById("kindle-container") | |
kindleContainer.parentNode.removeChild kindleContainer if null isnt kindleContainer | |
readability.iframeLoads = 0 | |
htmlspecialchars: (s) -> | |
if typeof (s) is "string" | |
s = s.replace(/&/g, "&") | |
s = s.replace(/"/g, """) | |
s = s.replace(/'/g, "'") | |
s = s.replace(/</g, "<") | |
s = s.replace(/>/g, ">") | |
s | |
readability.init() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment