Created
March 27, 2019 21:22
-
-
Save vingkan/f3c5546cf09c966754809bff6232cde6 to your computer and use it in GitHub Desktop.
Roster scraper for HawkLink (2019).
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
* 1. Go to https://iit.campuslabs.com/engage/organization/acm/roster | |
* 2. Open the browser console | |
* 3. Copy and paste this script into the console and run | |
* 4. Keep tab open and do not click anywhere until the script completes | |
* 5. Save the page output as a .csv | |
*/ | |
var showing = document.querySelector('#roster-members').parentElement.children[1].children[0].innerText.split(' '); | |
var total = parseInt(showing[3]); | |
var chunk = parseInt(showing[1].split('-')[1]); | |
loadMembersThen(total, chunk, 500, function() { | |
console.log('done opening') | |
var container = document.querySelector('[role="main"]').children[0].children[3].children[0].children[1].children[0].children[0]; | |
var boxes = Array.from(container.children); | |
var results = []; | |
getEmailByBoxIndex({ | |
container: container, | |
results: results, | |
boxes: boxes, | |
index: 0, | |
time: 250, | |
callback: printResults | |
}); | |
}); | |
function printResults(results) { | |
console.log('printing') | |
results.forEach((res) => { | |
var out = res.name + ',' + res.email + '<br>'; | |
document.write(out); | |
}); | |
} | |
function getEmailByBoxIndex(params) { | |
var results = params.results; | |
var boxes = params.boxes; | |
var index = params.index; | |
var time = params.time; | |
var box = boxes[index]; | |
if (box) { | |
var subBox = box.children[0].children[0]; | |
subBox.click(); | |
querySelectorThen('[role="alert"]', time, function(alertBox) { | |
var nameBox = alertBox.parentElement.children[2]; | |
var emailBox = alertBox.parentElement.children[3]; | |
if (nameBox && emailBox) { | |
var name = nameBox.innerText.trim(); | |
var email = emailBox.innerText.trim(); | |
var res = { | |
name: name, | |
email: email | |
}; | |
results.push(res); | |
console.log(res); | |
} else { | |
console.log('Error: No profile found.'); | |
} | |
var holder = alertBox.parentElement.parentElement.parentElement.parentElement.parentElement.parentElement.parentElement.parentElement.children[1]; | |
window.alertBox = alertBox; | |
holder.click(); | |
params.index++; | |
getEmailByBoxIndex(params); | |
}); | |
} else { | |
console.log('Done scraping.'); | |
params.callback(results); | |
} | |
} | |
function querySelectorThen(queryString, time, callback) { | |
var interval = setInterval(function() { | |
var nodes = document.querySelectorAll(queryString); | |
var el = nodes[nodes.length - 1]; | |
if (el) { | |
clearInterval(interval); | |
callback(el); | |
} | |
}, time); | |
} | |
function loadMembersThen(total, chunk, time, callback) { | |
var i = 0; | |
var loadInterval = setInterval(function() { | |
var nodes = Array.from(document.querySelectorAll('span')).filter(d => d.innerText.toLowerCase() === 'load more members'); | |
var loader = nodes[nodes.length - 1]; | |
if (loader) { | |
loader.click(); | |
document.body.click(); | |
i += chunk; | |
console.log(`${i} / ${total}`); | |
if (i >= total) { | |
console.log(i, total) | |
clearInterval(loadInterval); | |
callback(); | |
} | |
} else { | |
clearInterval(loadInterval); | |
callback(); | |
} | |
}, time); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Hi @aubertc I have since graduated and my university switched from Campus Labs to a new system, but if you send me an example
https://.campuslabs.com/faculty/trends#/
page, I can check it out!