Skip to content

Instantly share code, notes, and snippets.

@AshleyGrant
Last active January 22, 2019 19:13
Show Gist options
  • Save AshleyGrant/69762f2f91a3ca3b7108567a45666fbc to your computer and use it in GitHub Desktop.
Save AshleyGrant/69762f2f91a3ca3b7108567a45666fbc to your computer and use it in GitHub Desktop.
(async function () {
const year = document.querySelector('h1.rankings-page__heading').innerText.split(' ')[0];
// const numberOfPages = 40;
let i = 2;
// for (let i = 0; i < numberOfPages; i++) {
while(true) {
const link = $('a[data-js="showmore"]');
if (link.length > 0) {
console.log(`Loading page: ${i++}`);
link.click();
} else {
break;
}
while ($('div#global-loading').length) {
await new Promise(resolve => setTimeout(resolve, 50));
}
console.log('Finished loading recruits');
const recruitsWithMoreData = [...document.querySelectorAll('a.icon-caret-down.expand-anchor:not(.clicked)')];
console.log(`Loading additional info for ${recruitsWithMoreData.length} recruits`);
recruitsWithMoreData.forEach(link => {
link.classList.add('clicked');
link.click();
});
while ($('div#global-loading').length) {
await new Promise(resolve => setTimeout(resolve, 50));
}
console.log('Finished loading additional info');
window.scrollTo(0, document.body.scrollHeight);
}
console.log('Building JSON');
const data = [...document.querySelectorAll('li.rankings-page__list-item:not(.rankings-page__showmore)')]
.filter(x => x.querySelector('div.wrapper'))
.map(row => {
const p = row.querySelector('div.wrapper');
const primaryRanking = p.querySelector('div.rank-column > div.primary').childNodes[0].wholeText.trim();
const otherRanking = p.querySelector('div.rank-column > div.other').innerText;
const imageElement = p.querySelector('div.circle-image-block > img');
const name = p.querySelector('a.rankings-page__name-link').innerText;
const stars = [...p.querySelector('div.rankings-page__star-and-score').querySelectorAll('span.icon-starsolid.yellow')].length;
const score = p.querySelector('span.score').innerText;
const position = p.querySelector('div.position').innerText;
const positionRanking = p.querySelector('a.posrank').innerText;
const stateRanking = p.querySelector('a.sttrank').innerText;
const schoolImageElement = p.querySelector('div.status a.img-link img');
const [height, weight] = p.querySelector('.metrics').innerText.split('/').map(x => x.trim());
const hometownInfo = p.querySelector('.recruit span.meta');
let playerInfo = {
year,
name: name,
stars: stars,
score: score,
school: schoolImageElement ? schoolImageElement.title : '',
primaryRanking: primaryRanking,
stateRanking: stateRanking,
otherRanking: otherRanking,
position: position,
positionRanking: positionRanking,
image: imageElement.dataset.src || imageElement.src,
// homeTownRaw: hometownInfo,
height,
weight,
hasCommitInfo: false,
// html: row.innerHTML,
};
if (hometownInfo) {
const text = hometownInfo.innerText;
const [_, highSchool, homeTown] = text.match(/([^\(]*)\(([^\)]*)/).map(x => x.trim());
playerInfo = { ...playerInfo, highSchool, homeTown };
try {
const state = homeTown.split(',')[1].trim();
playerInfo = { ...playerInfo, state };
} catch (e) {
//just swallow.. i ain't gettin' paid for this
}
}
const commitInfo = row.querySelector('div.flyout div.commit-block span');
if (commitInfo && commitInfo.childNodes.length === 3) {
playerInfo.hasCommitInfo = true;
playerInfo.status = commitInfo.childNodes[0].data;
playerInfo.statusDate = commitInfo.childNodes[2].data;
} else {
const topFiveSchools = [...row.querySelectorAll('.flyout .ranking-flyout .top-five-choices li')];
if (topFiveSchools && topFiveSchools.length) {
playerInfo.topFiveSchools = topFiveSchools.map(school => ({
school: school.querySelector('.image-container img').title,
interest: school.querySelector('.interest-level span').innerText,
percentage: school.querySelector('.percent').innerText
}));
}
}
return playerInfo;
});
console.log(JSON.stringify(data));
console.log('Scraped data', data);
})();
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment