Mahdisadjadi · October 22, 2016 23:28
diff --git a/html_remover.py b/html_remover.py
 import codecs # to read html
 from bs4 import BeautifulSoup # to parse html
 import glob # to find all files with a pattern
 pages = glob.glob("*.html")

 for page in pages:
    # loop through all files
    name = page.split(".")[0]
    print (name)
    html=codecs.open(page, 'r') # read html
    text = html.read()
    soup = BeautifulSoup(text,"html.parser") # parse html
    for link in soup.findAll('a'):
        link['href'] = link['href'].replace(".html", "") # find .html and replace it with null string
    html = soup.prettify() # re-style the output
    
    with open(name+".html", "w") as f:
        f.write(html)
	import codecs # to read html
	from bs4 import BeautifulSoup # to parse html
	import glob # to find all files with a pattern
	pages = glob.glob("*.html")

	for page in pages:
	# loop through all files
	name = page.split(".")[0]
	print (name)
	html=codecs.open(page, 'r') # read html
	text = html.read()
	soup = BeautifulSoup(text,"html.parser") # parse html
	for link in soup.findAll('a'):
	link['href'] = link['href'].replace(".html", "") # find .html and replace it with null string
	html = soup.prettify() # re-style the output

	with open(name+".html", "w") as f:
	f.write(html)