sinebeef · November 28, 2019 17:39
diff --git a/get-indexed-urls.py b/get-indexed-urls.py
 import requests
 import csv
 from   bs4        import BeautifulSoup
 #from   decimal    import *
 from   decimal    import Decimal

 product = ['index1.html','index2.html','index3.html']

 for prod in product:

    with open( prod, "r") as f:
         meh = f.read()
    f.close()

    soup  = BeautifulSoup(meh, 'html.parser')

    for div in soup.find_all('div', 'r'):
        for link in div.find_all('a'):
            if '#' not in link['href']:
                if 'google' not in link['href']:
                    print(link['href'])
	import requests
	import csv
	from bs4 import BeautifulSoup
	#from decimal import *
	from decimal import Decimal

	product = ['index1.html','index2.html','index3.html']

	for prod in product:

	with open( prod, "r") as f:
	meh = f.read()
	f.close()

	soup = BeautifulSoup(meh, 'html.parser')

	for div in soup.find_all('div', 'r'):
	for link in div.find_all('a'):
	if '#' not in link['href']:
	if 'google' not in link['href']:
	print(link['href'])