Created
April 1, 2019 18:28
-
-
Save Rulexec/fb6a2ab794198219e229f5d8c7e95e47 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(function(){ | |
const IGNORE_URLS = [ | |
/^\/..\/blog/, | |
]; | |
const TIMEOUT = 30000; | |
let framesPool = new Pool({ | |
maxCount: 4, | |
maxCountForSlow: 8, | |
slowTimeout: 10000, | |
create() { | |
let frame = document.createElement('iframe'); | |
document.body.appendChild(frame); | |
return frame; | |
}, | |
dispose(frame) { | |
document.body.removeChild(frame); | |
}, | |
}); | |
let currentDomainRegexp = createCurrentDomainRegexp(); | |
let processingSet = new Set(); | |
let processedUrlsSet = new Set(); | |
let ignoredUrlsSet = new Set(); | |
let errorUrls = []; | |
let timeoutUrls = []; | |
let tasksQueue = new TasksProcessor({ | |
maxCount: 64, | |
}); | |
window._links = { | |
processingSet, | |
processedUrlsSet, | |
ignoredUrlsSet, | |
errorUrls, | |
timeoutUrls, | |
startTime: performance.now(), | |
lastTime: 0, | |
getRPM() { | |
return processedUrlsSet.size / ((performance.now() - this.startTime) / (1000 * 60)); | |
}, | |
}; | |
processDocument(document); | |
function processDocument(document) { | |
let url = document.location.href; | |
let links = document.querySelectorAll('a'); | |
Array.from(links).map(link => { | |
let href = link.href; | |
return href.replace(/^http:/, 'https:'); | |
}).filter(href => { | |
if (!currentDomainRegexp.test(href)) return false; | |
if (processedUrlsSet.has(href)) return false; | |
if (!/^https?:/.test(href)) return false; | |
let match = /:\/\/[^\/]+(\/.+)$/.exec(href); | |
let path = match && match[1]; | |
let ignored = path && IGNORE_URLS.some(regexp => regexp.test(path)); | |
if (ignored) { | |
ignoredUrlsSet.add(href); | |
return false; | |
} | |
return !ignored; | |
}).forEach(href => { | |
tasksQueue.addTask(processUrl.bind(null, href, url)); | |
}); | |
} | |
async function processUrl(url, parentUrl) { | |
processedUrlsSet.add(url); | |
processingSet.add(url); | |
let frame = await framesPool.get(); | |
console.log('processing', url); | |
frame.addEventListener('load', onLoad); | |
frame.addEventListener('error', onError); | |
frame.src = url; | |
let onResolve; | |
let promise = new Promise(resolve => { onResolve = resolve; }); | |
let timeoutId = setTimeout(() => { | |
console.error('timeout', url); | |
timeoutUrls.push({ | |
url, | |
parentUrl, | |
}); | |
freeFrame(); | |
}, TIMEOUT); | |
return promise; | |
async function onLoad() { | |
frame.removeEventListener('load', onLoad); | |
await sleep(500); // just to be sure/js scripts/etc | |
let frameDocument = frame.contentDocument; | |
if (isErrorDocument(frameDocument)) { | |
errorUrls.push({ | |
url, | |
parentUrl, | |
}); | |
freeFrame(); | |
return; | |
} | |
processDocument(frameDocument); | |
freeFrame(); | |
} | |
function onError(e) { | |
console.error(e); | |
errorUrls.push(url); | |
freeFrame(); | |
} | |
function freeFrame() { | |
clearTimeout(timeoutId); | |
frame.removeEventListener('load', onLoad); | |
frame.removeEventListener('error', onError); | |
frame.src = ''; | |
framesPool.free(frame); | |
processingSet.delete(url); | |
onResolve(); | |
_links.lastTime = performance.now(); | |
} | |
} | |
function isErrorDocument(document) { | |
if (!document) return true; | |
if ((/error/i).test(document.title)) return true; | |
return Array.from(document.querySelectorAll('.error-page-title')).some(el => { | |
return el.textContent.indexOf('404') >= 0; | |
}); | |
} | |
function createCurrentDomainRegexp() { | |
let str = document.location.protocol + '//' + document.location.host + '/'; | |
return new RegExp('^' + str.replace(/\./g, '\\.')); | |
} | |
async function sleep(ms) { | |
return new Promise(onResolve => { | |
setTimeout(() => { onResolve(); }, ms); | |
}); | |
} | |
function TasksProcessor(options) { | |
let { maxCount, | |
//, | |
} = options; | |
let queue = []; | |
let count = 0; | |
this.addTask = function(task) { | |
if (count < maxCount) { | |
count++; | |
processTask(task); | |
} else { | |
queue.push(task); | |
} | |
function processTask(task) { | |
task().then(function() { | |
if (queue.length) { | |
processTask(queue.pop()); | |
} else { | |
count--; | |
} | |
}); | |
} | |
}; | |
} | |
function Pool(options) { | |
let { maxCount, | |
maxCountForSlow, | |
slowTimeout, | |
create, | |
dispose, | |
} = options; | |
let pool = []; | |
let poolWaiters = []; | |
let count = 0; | |
this.get = async () => { | |
if (pool.length) return Promise.resolve(pool.pop()); | |
if (count >= maxCount) { | |
let onResolve; | |
let called = false; | |
let slowTimeoutId = 0; | |
let promise = new Promise(resolve => { onResolve = resolve; }); | |
let resolve = function(frame) { | |
if (called) return false; | |
called = true; | |
if (slowTimeoutId) clearTimeout(slowTimeoutId); | |
onResolve(frame); | |
return true; | |
}; | |
poolWaiters.push(resolve); | |
if (slowTimeout) { | |
slowTimeoutId = setTimeout(() => { | |
slowTimeoutId = 0; | |
if (count < maxCountForSlow) { | |
count++; | |
resolve(create()); | |
} | |
}, slowTimeout); | |
} | |
return promise; | |
} | |
count++; | |
return Promise.resolve(create()); | |
}; | |
this.free = frame => { | |
while (poolWaiters.length) { | |
let consumed = poolWaiters.pop()(frame); | |
if (consumed) return; | |
} | |
if (count >= maxCount) { | |
count--; | |
dispose(frame); | |
return; | |
} | |
count--; | |
pool.push(frame); | |
}; | |
} | |
})(); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment