afiaka87 · January 29, 2021 04:09
diff --git a/transcripts.py b/transcripts.py
 """
 # Setup:

 `python3 -m pip install bs4`

 # Usage:
    ```python3
    # Change the episode_id_param and num_pages
    python3 transcripts.py
    ```
 """
 from bs4 import BeautifulSoup
 import urllib

 episode_id_param = 104 # Go to show on foreverdreaming. episode_id is found in the `.*/?f=(\d+).*`
 num_pages = 7 # Enter the number of pages for the episode.

 start_param = 0 # (dont change) The first page of results is at index 0
 increment_by = 25 # (dont change) Pages are indexed via 25, 50, 75, etc.
 page = "https://transcripts.foreverdreaming.org/viewforum.php?f={}&start={}" # (dont change)

 page_urls = [page.format(episode_id_param, page_start) for page_start in range(start_param, num_ pages * 25, increment_by)]
 each_pages_html = [urllib.request.urlopen(_url) for _url in page_urls]

 links = []
 for _html in each_pages_html:
    soup = BeautifulSoup(_html, "html.parser")

    links.extend(
        (link.get('href') for link in soup.findAll('a'))
    )

 episode_texts = []
 for link in links:
    if "./viewtopic" in link:
        full_url = "https://transcripts.foreverdreaming.org/" + link.replace("./","")
        episode_html = urllib.request.urlopen(full_url)
        soup = BeautifulSoup(episode_html, 'html.parser')
        p_elements = [elem.text for elem in soup.findAll('p')]
        print("\n".join(p_elements), file=open(link, 'w'))
	"""
	# Setup:

	`python3 -m pip install bs4`

	# Usage:
	```python3
	# Change the episode_id_param and num_pages
	python3 transcripts.py
	```
	"""
	from bs4 import BeautifulSoup
	import urllib

	episode_id_param = 104 # Go to show on foreverdreaming. episode_id is found in the `./?f=(\d+).`
	num_pages = 7 # Enter the number of pages for the episode.

	start_param = 0 # (dont change) The first page of results is at index 0
	increment_by = 25 # (dont change) Pages are indexed via 25, 50, 75, etc.
	page = "https://transcripts.foreverdreaming.org/viewforum.php?f={}&start={}" # (dont change)

	page_urls = [page.format(episode_id_param, page_start) for page_start in range(start_param, num_ pages * 25, increment_by)]
	each_pages_html = [urllib.request.urlopen(_url) for _url in page_urls]

	links = []
	for _html in each_pages_html:
	soup = BeautifulSoup(_html, "html.parser")

	links.extend(
	(link.get('href') for link in soup.findAll('a'))
	)

	episode_texts = []
	for link in links:
	if "./viewtopic" in link:
	full_url = "https://transcripts.foreverdreaming.org/" + link.replace("./","")
	episode_html = urllib.request.urlopen(full_url)
	soup = BeautifulSoup(episode_html, 'html.parser')
	p_elements = [elem.text for elem in soup.findAll('p')]
	print("\n".join(p_elements), file=open(link, 'w'))