Skip to content

Instantly share code, notes, and snippets.

@imneonizer
Last active August 13, 2020 02:23
Show Gist options
  • Save imneonizer/23d2faa12833716e22830f807b082a58 to your computer and use it in GitHub Desktop.
Save imneonizer/23d2faa12833716e22830f807b082a58 to your computer and use it in GitHub Desktop.
// collect images links
function collect(){
images = document.getElementsByClassName("serp-item")
images = Object.keys(images).map(key => JSON.parse(images[key].getAttribute("data-bem"))['serp-item']["img_href"])
return images
}
// returs boolean value to check if page end has reached
function footer_hidden(){
return document.getElementsByClassName('footer')[0].getAttribute('class').includes('footer_hidden_yes')
}
function scroll(){
$(document).scrollTop($(document).height());
}
// function to save and download url text file
function save(file_name, links){
var textToSave = links.join("\n");
var hiddenElement = document.createElement('a');
hiddenElement.href = 'data:attachment/text,' + encodeURI(textToSave);
hiddenElement.target = '_blank';
hiddenElement.download = file_name;
hiddenElement.click();
}
var timeoutId = null;
var links = null;
function collect_images(file_name) {
if (!footer_hidden()){
// when all the images are loaded
console.log("finished");
links = collect();
save(file_name, links);
}
// while images are loading
console.log("collected images: ", collect().length);
// scroll page down
scroll();
// reschedule function call
if (footer_hidden()){
//timeoutId = setTimeout(collect_images, 3000);
setTimeout(function() {
collect_images(file_name);
}, 3000)
}
}
collect_images("urls.txt")
@imneonizer
Copy link
Author

imneonizer commented Apr 12, 2020

Yandex images crawler

  1. visit https://yandex.com/images/search and search for the images you want collect
  2. open dev tool using ctrl+shift+j and navigate to JS console tab.
  3. paste the above JavaScript into the console, it will automatically start scrolling the webpage and will save the links to a text file.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment