HariharanUmapathi · September 2, 2024 18:46
diff --git a/lwn_calendar_extract.py b/lwn_calendar_extract.py
 import requests
 from lxml import html
 import re
 year = "2024" 
 month = "09"  

 response = requests.get(f'https://lwn.net/Calendar/Monthly/{year}-{month}/')

 if response.status_code==200:
    document_root =html.fromstring(response.content)
    # Find all <a> elements (links)
    links = document_root.xpath('/html/body/div[4]/div[1]/div[2]/div/table//a')

    # Use a set to store unique links
    unique_links = set()

    # Extract and store unique href attributes
    for link in links:
        href = link.get('href')
        text = link.text_content().strip()
        if href:
            unique_links.add((href, text))

    # Print the unique links
    for href, text in unique_links:
        #filtering external url from the calander
        if re.match("/^http|https://(.*)/",href):
            print(f"Link: {href}, Text: {text}")
    else:
        print(response.status_code)
	import requests
	from lxml import html
	import re
	year = "2024"
	month = "09"

	response = requests.get(f'https://lwn.net/Calendar/Monthly/{year}-{month}/')

	if response.status_code==200:
	document_root =html.fromstring(response.content)
	# Find all <a> elements (links)
	links = document_root.xpath('/html/body/div[4]/div[1]/div[2]/div/table//a')

	# Use a set to store unique links
	unique_links = set()

	# Extract and store unique href attributes
	for link in links:
	href = link.get('href')
	text = link.text_content().strip()
	if href:
	unique_links.add((href, text))

	# Print the unique links
	for href, text in unique_links:
	#filtering external url from the calander
	if re.match("/^http\|https://(.*)/",href):
	print(f"Link: {href}, Text: {text}")
	else:
	print(response.status_code)