yfe404 · May 25, 2018 16:57
diff --git a/main.py b/main.py
 """
 Scrap caption and images from instagram using https://deskgram.org 
 """
 import requests
 from bs4 import BeautifulSoup

 BASE_URL = 'https://deskgram.org'
 USER = 'healthymealsberlin'

 start_url = BASE_URL + '/' + USER

 r = requests.get(start_url)
 soup = BeautifulSoup(r.text, 'html.parser')
 captions = soup.findAll("div", {"class": "post-caption"})
 images = soup.findAll("div", {"class": "post-img"})

 has_next = True
 print ('Found {0} captions.'.format(len(captions)))
 print ('Found {0} images.'.format(len(images)))

 while True:
    links = soup.findAll('a')
    next_link = list(filter( lambda x: 'next_id' in x['href'], links))
    if len(next_link) == 0:
        break
    else:
        dest = next_link[0]['href']
        next_url = BASE_URL + dest
        print ('fetching {0}'.format(next_url))
        r = requests.get(next_url)
        soup = BeautifulSoup(r.text, 'html.parser')
        captions = soup.findAll("div", {"class": "post-caption"})
        images = soup.findAll("div", {"class": "post-img"})
        print ('Found {0} captions.'.format(len(captions)))
        print ('Found {0} images.'.format(len(images)))
	"""
	Scrap caption and images from instagram using https://deskgram.org
	"""
	import requests
	from bs4 import BeautifulSoup

	BASE_URL = 'https://deskgram.org'
	USER = 'healthymealsberlin'

	start_url = BASE_URL + '/' + USER

	r = requests.get(start_url)
	soup = BeautifulSoup(r.text, 'html.parser')
	captions = soup.findAll("div", {"class": "post-caption"})
	images = soup.findAll("div", {"class": "post-img"})

	has_next = True
	print ('Found {0} captions.'.format(len(captions)))
	print ('Found {0} images.'.format(len(images)))

	while True:
	links = soup.findAll('a')
	next_link = list(filter( lambda x: 'next_id' in x['href'], links))
	if len(next_link) == 0:
	break
	else:
	dest = next_link[0]['href']
	next_url = BASE_URL + dest
	print ('fetching {0}'.format(next_url))
	r = requests.get(next_url)
	soup = BeautifulSoup(r.text, 'html.parser')
	captions = soup.findAll("div", {"class": "post-caption"})
	images = soup.findAll("div", {"class": "post-img"})
	print ('Found {0} captions.'.format(len(captions)))
	print ('Found {0} images.'.format(len(images)))
No results found