-
-
Save andykais/04a02b61bb3b6d92aa3388c45ea816bd to your computer and use it in GitHub Desktop.
looping config using scrapeNext
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
module.exports = { | |
input: 'username', | |
scrape: { | |
name: 'home', | |
download: 'https://ifunny.co/user/{{ username }}', | |
// annoying bit, the 'first loop step' is here | |
// alternative could be calling scrapeNext before 'grid's download, seems less straigtforward though | |
parse: { | |
name: 'batch-id', | |
selector: '.stream__item:first-child', | |
attribute: 'data-next' | |
}, | |
scrapeEach: { | |
name: 'gallery', | |
download: | |
'https://ifunny.co/user/{{ username }}/timeline/{{ value }}?batch=2?mode=grid', | |
scrapeNext: { | |
name: 'next-batch-id', | |
parse: { | |
selector: '.stream__item:first-child', | |
attribute: 'data-next' | |
} | |
}, | |
scrapeEach: { | |
name: 'batch-page', | |
parse: { | |
selector: '.post a', | |
attribute: 'href' | |
}, | |
scrapeEach: { | |
name: 'image-page', | |
download: 'https://ifunny.co{{ value }}', | |
parse: { | |
selector: '.post .media__image', | |
attribute: 'src' | |
}, | |
scrapeEach: { | |
name: 'image', | |
download: '{{ value }}' | |
} | |
} | |
} | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment