Created
February 10, 2024 13:18
-
-
Save hansen033/1bb348879fabd51ba0491ee32bc5abd2 to your computer and use it in GitHub Desktop.
Open 5 random books in New Taipei City Library WEBPAC
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import random | |
import requests | |
from bs4 import BeautifulSoup | |
import webbrowser | |
def get_random_mid(): | |
# Generate a random number under 2000000 | |
# 1107787 is the biggest known entry | |
return random.randint(0, 2000000) | |
def check_title_exists(mid): | |
url = f"https://webpac.tphcc.gov.tw/webpac/content.cfm?mid={mid}" | |
response = requests.get(url) | |
soup = BeautifulSoup(response.content, "html.parser") | |
# Check if there is any <h2> tag with content (book title) | |
# Check if the entry exist | |
return soup.find("h2") is not None and len(soup.find("h2").text.strip()) > 0 | |
def open_random_catalog_links(num_links): | |
base_url = "https://webpac.tphcc.gov.tw/webpac/content.cfm?mid=" | |
opened_links = 0 | |
tried_links = 0 | |
total_tried_links = 0 | |
while opened_links < num_links: | |
mid = get_random_mid() | |
tried_links += 1 | |
if check_title_exists(mid): | |
print(f"Found after {tried_links} tries") | |
url = base_url + str(mid) | |
webbrowser.open(url) | |
opened_links += 1 | |
total_tried_links += tried_links | |
tried_links = 0 | |
return total_tried_links | |
# Open 5 random catalog links with existing book titles | |
n = 5 | |
ttl = open_random_catalog_links(n) | |
print(f"Success rate: {n / ttl}") | |
print("Press a key to continue...") | |
input() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment