nsdevaraj · March 3, 2024 12:06
diff --git a/scrap.py b/scrap.py
 """
 This script scrapes a Blogspot blog by iterating back in its history.

 Usage:
    1. Provide blogspot links
    2. Press CTRL-C when you want to stop it.

 Note: Your IP-number may be temporarily banned from the Blogger service if over-used.
 Use on your own risk.
 """

 import requests
 import io
 import re
 from bs4 import BeautifulSoup 
 file1 = open('link.txt', 'r')
 lines = file1.readlines()
 counter = 0

 for l in lines:
    counter += 1 #Update the counter from proper filenames 
    page = requests.get(l)
    soup = BeautifulSoup(page.content, "html.parser") 
    div = soup.find_all("div", {"class": "entry"})
    with open(str(counter) + ".html", "w") as outputfile: #open file
        outputfile.write(str(div)) #write to file         
        print("Press CTRL-C to exit the program.")
	"""
	This script scrapes a Blogspot blog by iterating back in its history.

	Usage:
	1. Provide blogspot links
	2. Press CTRL-C when you want to stop it.

	Note: Your IP-number may be temporarily banned from the Blogger service if over-used.
	Use on your own risk.
	"""

	import requests
	import io
	import re
	from bs4 import BeautifulSoup
	file1 = open('link.txt', 'r')
	lines = file1.readlines()
	counter = 0

	for l in lines:
	counter += 1 #Update the counter from proper filenames
	page = requests.get(l)
	soup = BeautifulSoup(page.content, "html.parser")
	div = soup.find_all("div", {"class": "entry"})
	with open(str(counter) + ".html", "w") as outputfile: #open file
	outputfile.write(str(div)) #write to file
	print("Press CTRL-C to exit the program.")