Created
March 27, 2019 21:22
-
-
Save vingkan/f3c5546cf09c966754809bff6232cde6 to your computer and use it in GitHub Desktop.
Roster scraper for HawkLink (2019).
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
* 1. Go to https://iit.campuslabs.com/engage/organization/acm/roster | |
* 2. Open the browser console | |
* 3. Copy and paste this script into the console and run | |
* 4. Keep tab open and do not click anywhere until the script completes | |
* 5. Save the page output as a .csv | |
*/ | |
var showing = document.querySelector('#roster-members').parentElement.children[1].children[0].innerText.split(' '); | |
var total = parseInt(showing[3]); | |
var chunk = parseInt(showing[1].split('-')[1]); | |
loadMembersThen(total, chunk, 500, function() { | |
console.log('done opening') | |
var container = document.querySelector('[role="main"]').children[0].children[3].children[0].children[1].children[0].children[0]; | |
var boxes = Array.from(container.children); | |
var results = []; | |
getEmailByBoxIndex({ | |
container: container, | |
results: results, | |
boxes: boxes, | |
index: 0, | |
time: 250, | |
callback: printResults | |
}); | |
}); | |
function printResults(results) { | |
console.log('printing') | |
results.forEach((res) => { | |
var out = res.name + ',' + res.email + '<br>'; | |
document.write(out); | |
}); | |
} | |
function getEmailByBoxIndex(params) { | |
var results = params.results; | |
var boxes = params.boxes; | |
var index = params.index; | |
var time = params.time; | |
var box = boxes[index]; | |
if (box) { | |
var subBox = box.children[0].children[0]; | |
subBox.click(); | |
querySelectorThen('[role="alert"]', time, function(alertBox) { | |
var nameBox = alertBox.parentElement.children[2]; | |
var emailBox = alertBox.parentElement.children[3]; | |
if (nameBox && emailBox) { | |
var name = nameBox.innerText.trim(); | |
var email = emailBox.innerText.trim(); | |
var res = { | |
name: name, | |
email: email | |
}; | |
results.push(res); | |
console.log(res); | |
} else { | |
console.log('Error: No profile found.'); | |
} | |
var holder = alertBox.parentElement.parentElement.parentElement.parentElement.parentElement.parentElement.parentElement.parentElement.children[1]; | |
window.alertBox = alertBox; | |
holder.click(); | |
params.index++; | |
getEmailByBoxIndex(params); | |
}); | |
} else { | |
console.log('Done scraping.'); | |
params.callback(results); | |
} | |
} | |
function querySelectorThen(queryString, time, callback) { | |
var interval = setInterval(function() { | |
var nodes = document.querySelectorAll(queryString); | |
var el = nodes[nodes.length - 1]; | |
if (el) { | |
clearInterval(interval); | |
callback(el); | |
} | |
}, time); | |
} | |
function loadMembersThen(total, chunk, time, callback) { | |
var i = 0; | |
var loadInterval = setInterval(function() { | |
var nodes = Array.from(document.querySelectorAll('span')).filter(d => d.innerText.toLowerCase() === 'load more members'); | |
var loader = nodes[nodes.length - 1]; | |
if (loader) { | |
loader.click(); | |
document.body.click(); | |
i += chunk; | |
console.log(`${i} / ${total}`); | |
if (i >= total) { | |
console.log(i, total) | |
clearInterval(loadInterval); | |
callback(); | |
} | |
} else { | |
clearInterval(loadInterval); | |
callback(); | |
} | |
}, time); | |
} |
Oh, and for future reference, https://github.com/Supermas123/Engage-Campus-Labs-University-Contact-Information-Webscraper seems quite close to this gist.
Hi @aubertc I have since graduated and my university switched from Campus Labs to a new system, but if you send me an example https://.campuslabs.com/faculty/trends#/
page, I can check it out!
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Thanks for working on it!
It seems not to be working anymore (at least, not on my institution's instance), but I appreciate the idea. Do you know if a similar scrapper exists for e.g. https://.campuslabs.com/faculty/trends#/ ?
Thanks!