uluQulu · April 11, 2018 02:01
diff --git a/get_links_for_location.py b/get_links_for_location.py
 def get_links_for_location(browser,
                           location,
                           amount,
                           logger,
                           media=None,
                           skip_top_posts=True):
    """Fetches the number of links specified
    by amount and returns a list of links"""
    
    if media is None:
        # All known media types
        media = ['', 'Post', 'Video']
    elif media == 'Photo':
        # Include posts with multiple images in it
        media = ['', 'Post']
    else:
        # Make it an array to use it in the following part
        media = [media]

    browser.get('https://www.instagram.com/explore/locations/' + location)
    # update server calls
    update_activity()
    sleep(2)

    top_elements = browser.find_element_by_xpath('//main/article/div[1]')
    top_posts = top_elements.find_elements_by_tag_name('a')
    sleep(1)

    if skip_top_posts:
        main_elem = browser.find_element_by_xpath('//main/article/div[2]')
    else:
        main_elem = browser.find_element_by_tag_name('main')
    link_elems = main_elem.find_elements_by_tag_name('a')
    sleep(1)

    if not link_elems:   #this location does not have `Top Posts` or it really is empty..
        main_elem = browser.find_element_by_xpath('//main/article/div[1]')
        top_posts = []
    sleep(2)

    #Get links
    links = get_links(browser, location, logger, media, main_elem)
    filtered_links = len(links)
    try_again = 0
    sc_rolled = 0
    nap = 1.5
    put_sleep = 0
    try:
        while filtered_links in range(1, amount):
            if sc_rolled > 100:
                logger.info("Scrolled too much! ~ sleeping a bit :>")
                sleep(600)
                sc_rolled = 0
            for i in range(3):
                browser.execute_script(
                    "window.scrollTo(0, document.body.scrollHeight);")
                sc_rolled += 1
                update_activity()
                sleep(nap)   #if not slept, and internet speed is low, instagram will only scroll one time, instead of many times you sent scroll command...
            sleep(3)
            links.extend(get_links(browser, location, logger, media, main_elem))

            links_all = links   #uniqify links while preserving order
            s = set()
            links = []
            for i in links_all:
                if i not in s:
                    s.add(i)
                    links.append(i)
            if len(links) == filtered_links:
                try_again += 1
                nap = 3 if try_again==1 else 5
                logger.info("Insufficient amount of links ~ trying again: {}".format(try_again))
                sleep(3)
                if try_again > 2:   #you can try again as much as you want by changing this number
                    if put_sleep < 1 and filtered_links <= 21 :
                        logger.info("Cor! Did you send too many requests? ~ let's rest some")
                        sleep(600)
                        put_sleep += 1
                        browser.execute_script("location.reload()")
                        try_again = 0
                        sleep(10)
                        main_elem = (browser.find_element_by_xpath('//main/article/div[1]') if not link_elems else
                                      browser.find_element_by_xpath('//main/article/div[2]') if skip_top_posts else
                                       browser.find_element_by_tag_name('main'))
                    else:
                        logger.info("'{}' location POSSIBLY has less images than desired...".format(location))
                        break
            else:
                filtered_links = len(links)
                try_again = 0
                nap = 1.5
    except:
        raise
    
    sleep(4)
    
    return links[:amount]
	def get_links_for_location(browser,
	location,
	amount,
	logger,
	media=None,
	skip_top_posts=True):
	"""Fetches the number of links specified
	by amount and returns a list of links"""

	if media is None:
	# All known media types
	media = ['', 'Post', 'Video']
	elif media == 'Photo':
	# Include posts with multiple images in it
	media = ['', 'Post']
	else:
	# Make it an array to use it in the following part
	media = [media]

	browser.get('https://www.instagram.com/explore/locations/' + location)
	# update server calls
	update_activity()
	sleep(2)

	top_elements = browser.find_element_by_xpath('//main/article/div[1]')
	top_posts = top_elements.find_elements_by_tag_name('a')
	sleep(1)

	if skip_top_posts:
	main_elem = browser.find_element_by_xpath('//main/article/div[2]')
	else:
	main_elem = browser.find_element_by_tag_name('main')
	link_elems = main_elem.find_elements_by_tag_name('a')
	sleep(1)

	if not link_elems: #this location does not have `Top Posts` or it really is empty..
	main_elem = browser.find_element_by_xpath('//main/article/div[1]')
	top_posts = []
	sleep(2)

	#Get links
	links = get_links(browser, location, logger, media, main_elem)
	filtered_links = len(links)
	try_again = 0
	sc_rolled = 0
	nap = 1.5
	put_sleep = 0
	try:
	while filtered_links in range(1, amount):
	if sc_rolled > 100:
	logger.info("Scrolled too much! ~ sleeping a bit :>")
	sleep(600)
	sc_rolled = 0
	for i in range(3):
	browser.execute_script(
	"window.scrollTo(0, document.body.scrollHeight);")
	sc_rolled += 1
	update_activity()
	sleep(nap) #if not slept, and internet speed is low, instagram will only scroll one time, instead of many times you sent scroll command...
	sleep(3)
	links.extend(get_links(browser, location, logger, media, main_elem))

	links_all = links #uniqify links while preserving order
	s = set()
	links = []
	for i in links_all:
	if i not in s:
	s.add(i)
	links.append(i)
	if len(links) == filtered_links:
	try_again += 1
	nap = 3 if try_again==1 else 5
	logger.info("Insufficient amount of links ~ trying again: {}".format(try_again))
	sleep(3)
	if try_again > 2: #you can try again as much as you want by changing this number
	if put_sleep < 1 and filtered_links <= 21 :
	logger.info("Cor! Did you send too many requests? ~ let's rest some")
	sleep(600)
	put_sleep += 1
	browser.execute_script("location.reload()")
	try_again = 0
	sleep(10)
	main_elem = (browser.find_element_by_xpath('//main/article/div[1]') if not link_elems else
	browser.find_element_by_xpath('//main/article/div[2]') if skip_top_posts else
	browser.find_element_by_tag_name('main'))
	else:
	logger.info("'{}' location POSSIBLY has less images than desired...".format(location))
	break
	else:
	filtered_links = len(links)
	try_again = 0
	nap = 1.5
	except:
	raise

	sleep(4)

	return links[:amount]