Last active
January 22, 2019 19:13
-
-
Save AshleyGrant/69762f2f91a3ca3b7108567a45666fbc to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(async function () { | |
const year = document.querySelector('h1.rankings-page__heading').innerText.split(' ')[0]; | |
// const numberOfPages = 40; | |
let i = 2; | |
// for (let i = 0; i < numberOfPages; i++) { | |
while(true) { | |
const link = $('a[data-js="showmore"]'); | |
if (link.length > 0) { | |
console.log(`Loading page: ${i++}`); | |
link.click(); | |
} else { | |
break; | |
} | |
while ($('div#global-loading').length) { | |
await new Promise(resolve => setTimeout(resolve, 50)); | |
} | |
console.log('Finished loading recruits'); | |
const recruitsWithMoreData = [...document.querySelectorAll('a.icon-caret-down.expand-anchor:not(.clicked)')]; | |
console.log(`Loading additional info for ${recruitsWithMoreData.length} recruits`); | |
recruitsWithMoreData.forEach(link => { | |
link.classList.add('clicked'); | |
link.click(); | |
}); | |
while ($('div#global-loading').length) { | |
await new Promise(resolve => setTimeout(resolve, 50)); | |
} | |
console.log('Finished loading additional info'); | |
window.scrollTo(0, document.body.scrollHeight); | |
} | |
console.log('Building JSON'); | |
const data = [...document.querySelectorAll('li.rankings-page__list-item:not(.rankings-page__showmore)')] | |
.filter(x => x.querySelector('div.wrapper')) | |
.map(row => { | |
const p = row.querySelector('div.wrapper'); | |
const primaryRanking = p.querySelector('div.rank-column > div.primary').childNodes[0].wholeText.trim(); | |
const otherRanking = p.querySelector('div.rank-column > div.other').innerText; | |
const imageElement = p.querySelector('div.circle-image-block > img'); | |
const name = p.querySelector('a.rankings-page__name-link').innerText; | |
const stars = [...p.querySelector('div.rankings-page__star-and-score').querySelectorAll('span.icon-starsolid.yellow')].length; | |
const score = p.querySelector('span.score').innerText; | |
const position = p.querySelector('div.position').innerText; | |
const positionRanking = p.querySelector('a.posrank').innerText; | |
const stateRanking = p.querySelector('a.sttrank').innerText; | |
const schoolImageElement = p.querySelector('div.status a.img-link img'); | |
const [height, weight] = p.querySelector('.metrics').innerText.split('/').map(x => x.trim()); | |
const hometownInfo = p.querySelector('.recruit span.meta'); | |
let playerInfo = { | |
year, | |
name: name, | |
stars: stars, | |
score: score, | |
school: schoolImageElement ? schoolImageElement.title : '', | |
primaryRanking: primaryRanking, | |
stateRanking: stateRanking, | |
otherRanking: otherRanking, | |
position: position, | |
positionRanking: positionRanking, | |
image: imageElement.dataset.src || imageElement.src, | |
// homeTownRaw: hometownInfo, | |
height, | |
weight, | |
hasCommitInfo: false, | |
// html: row.innerHTML, | |
}; | |
if (hometownInfo) { | |
const text = hometownInfo.innerText; | |
const [_, highSchool, homeTown] = text.match(/([^\(]*)\(([^\)]*)/).map(x => x.trim()); | |
playerInfo = { ...playerInfo, highSchool, homeTown }; | |
try { | |
const state = homeTown.split(',')[1].trim(); | |
playerInfo = { ...playerInfo, state }; | |
} catch (e) { | |
//just swallow.. i ain't gettin' paid for this | |
} | |
} | |
const commitInfo = row.querySelector('div.flyout div.commit-block span'); | |
if (commitInfo && commitInfo.childNodes.length === 3) { | |
playerInfo.hasCommitInfo = true; | |
playerInfo.status = commitInfo.childNodes[0].data; | |
playerInfo.statusDate = commitInfo.childNodes[2].data; | |
} else { | |
const topFiveSchools = [...row.querySelectorAll('.flyout .ranking-flyout .top-five-choices li')]; | |
if (topFiveSchools && topFiveSchools.length) { | |
playerInfo.topFiveSchools = topFiveSchools.map(school => ({ | |
school: school.querySelector('.image-container img').title, | |
interest: school.querySelector('.interest-level span').innerText, | |
percentage: school.querySelector('.percent').innerText | |
})); | |
} | |
} | |
return playerInfo; | |
}); | |
console.log(JSON.stringify(data)); | |
console.log('Scraped data', data); | |
})(); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment