Skip to content

Instantly share code, notes, and snippets.

@p-i-
Last active October 15, 2024 18:47
Show Gist options
  • Save p-i-/b2637eae7138ea547906aa3a14d0ffc7 to your computer and use it in GitHub Desktop.
Save p-i-/b2637eae7138ea547906aa3a14d0ffc7 to your computer and use it in GitHub Desktop.
Message titles scraped from core-audio mailing list

🔸2001

🔹May 2001

🔹Jun 2001

🔹Jul 2001

🔹Aug 2001

🔹Sep 2001

🔹Oct 2001

🔹Nov 2001

🔹Dec 2001

🔸2002

🔹Jan 2002

🔹Feb 2002

🔹Mar 2002

🔹Apr 2002

🔹May 2002

🔹Jun 2002

🔹Jul 2002

🔹Aug 2002

🔹Sep 2002

🔹Oct 2002

🔹Nov 2002

🔹Dec 2002

🔸2003

🔹Jan 2003

🔹Feb 2003

🔹Mar 2003

🔹Apr 2003

🔹May 2003

🔹Jun 2003

🔹Jul 2003

🔹Aug 2003

🔹Sep 2003

🔹Oct 2003

🔹Nov 2003

🔹Dec 2003

🔸2004

🔹Jan 2004

🔹Feb 2004

🔹Mar 2004

🔹Apr 2004

🔹May 2004

🔹Jun 2004

🔹Jul 2004

🔹Aug 2004

🔹Sep 2004

🔹Oct 2004

🔹Nov 2004

🔹Dec 2004

🔸2005

🔹Jan 2005

🔹Feb 2005

🔹Mar 2005

🔹Apr 2005

🔹May 2005

🔹Jun 2005

🔹Jul 2005

🔹Aug 2005

🔹Sep 2005

🔹Oct 2005

🔹Nov 2005

🔹Dec 2005

🔸2006

🔹Jan 2006

🔹Feb 2006

🔹Mar 2006

🔹Apr 2006

🔹May 2006

🔹Jun 2006

🔹Jul 2006

🔹Aug 2006

🔹Sep 2006

🔹Oct 2006

🔹Nov 2006

🔹Dec 2006

🔸2007

🔹Jan 2007

🔹Feb 2007

🔹Mar 2007

🔹Apr 2007

🔹May 2007

🔹Jun 2007

🔹Jul 2007

🔹Aug 2007

🔹Sep 2007

🔹Oct 2007

🔹Nov 2007

🔹Dec 2007

🔸2008

🔹Jan 2008

🔹Feb 2008

🔹Mar 2008

🔹Apr 2008

🔹May 2008

🔹Jun 2008

🔹Jul 2008

🔹Aug 2008

🔹Sep 2008

🔹Oct 2008

🔹Nov 2008

🔹Dec 2008

🔸2009

🔹Jan 2009

🔹Feb 2009

🔹Mar 2009

🔹Apr 2009

🔹May 2009

🔹Jun 2009

🔹Jul 2009

🔹Aug 2009

🔹Sep 2009

🔹Oct 2009

🔹Nov 2009

🔹Dec 2009

🔸2010

🔹Jan 2010

🔹Feb 2010

🔹Mar 2010

🔹Apr 2010

🔹May 2010

🔹Jun 2010

🔹Jul 2010

🔹Aug 2010

🔹Sep 2010

🔹Oct 2010

🔹Nov 2010

🔹Dec 2010

🔸2011

🔹Jan 2011

🔹Feb 2011

🔹Mar 2011

🔹Apr 2011

🔹May 2011

🔹Jun 2011

🔹Jul 2011

🔹Aug 2011

🔹Sep 2011

🔹Oct 2011

🔹Nov 2011

🔹Dec 2011

🔸2012

🔹Jan 2012

🔹Feb 2012

🔹Mar 2012

🔹Apr 2012

🔹May 2012

🔹Jun 2012

🔹Jul 2012

🔹Aug 2012

🔹Sep 2012

🔹Oct 2012

🔹Nov 2012

🔹Dec 2012

🔸2013

🔹Jan 2013

🔹Feb 2013

🔹Mar 2013

🔹Apr 2013

🔹May 2013

🔹Jun 2013

🔹Jul 2013

🔹Aug 2013

🔹Sep 2013

🔹Oct 2013

🔹Nov 2013

🔹Dec 2013

🔸2014

🔹Jan 2014

🔹Feb 2014

🔹Mar 2014

🔹Apr 2014

🔹May 2014

🔹Jun 2014

🔹Jul 2014

🔹Aug 2014

🔹Sep 2014

🔹Oct 2014

🔹Nov 2014

🔹Dec 2014

🔸2015

🔹Jan 2015

🔹Feb 2015

🔹Mar 2015

🔹Apr 2015

🔹May 2015

🔹Jun 2015

🔹Jul 2015

🔹Aug 2015

🔹Sep 2015

🔹Oct 2015

🔹Nov 2015

🔹Dec 2015

🔸2016

🔹Jan 2016

🔹Feb 2016

🔹Mar 2016

🔹Apr 2016

🔹May 2016

🔹Jun 2016

🔹Jul 2016

🔹Aug 2016

🔹Sep 2016

🔹Oct 2016

🔹Nov 2016

🔹Dec 2016

🔸2017

🔹Jan 2017

🔹Feb 2017

🔹Mar 2017

🔹May 2017

🔹Jun 2017

🔹Jul 2017

🔹Aug 2017

🔹Sep 2017

🔹Oct 2017

🔹Nov 2017

🔹Dec 2017

🔸2018

🔹Jan 2018

🔹Feb 2018

🔹Mar 2018

🔹Apr 2018

🔹May 2018

🔹Jun 2018

🔹Jul 2018

🔹Aug 2018

🔹Sep 2018

🔹Oct 2018

🔹Nov 2018

🔹Dec 2018

🔸2019

🔹Jan 2019

🔹Feb 2019

🔹Mar 2019

🔹Apr 2019

🔹May 2019

🔹Jun 2019

🔹Jul 2019

🔹Aug 2019

🔹Sep 2019

🔹Oct 2019

🔹Nov 2019

🔹Dec 2019

🔸2020

🔹Jan 2020

🔹Feb 2020

🔹Mar 2020

🔹Apr 2020

🔹May 2020

🔹Jun 2020

🔹Jul 2020

🔹Sep 2020

🔹Oct 2020

🔹Nov 2020

🔸2021

🔹Jun 2021

🔹Jul 2021

🔹Aug 2021

🔹Dec 2021

🔸2022

🔹Jan 2022

🔹Feb 2022

🔹Mar 2022

🔹Apr 2022

🔹May 2022

🔹Jun 2022

🔹Jul 2022

🔸2023

🔹Jan 2023

🔹Feb 2023

🔹Jun 2023

🔹Jul 2023

🔸2024

🔹Jan 2024

🔹Oct 2024

# pip install requests beautifulsoup4
import requests
from bs4 import BeautifulSoup
import time
BASE_URL = "https://lists.apple.com/archives/coreaudio-api"
START_YEAR = 2000
END_YEAR = 2024 # Update to the current year
MONTHS = ["Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"]
# Function to scrape messages from a single month
def scrape_year_month(year, month):
url = f"{BASE_URL}/{year}/{month}/index.html"
try:
print(f"⚡️ Fetching: {url}")
response = requests.get(url)
if response.status_code != 200:
print(f"❌ Page not found: {url}")
return []
soup = BeautifulSoup(response.text, 'html.parser')
topic_list = []
# The topics are in <li> elements inside <ul>
for li in soup.find_all('li'):
topic_link = li.find('a')
if topic_link:
topic_name = topic_link.get_text().strip()
# Only scrape topics that are valid messages and do not start with "Re:" or "Fwd:"
if 'msg' in topic_link['href'] and not (topic_name.lower().startswith("re:") or topic_name.lower().startswith("fwd:")):
topic_url = f"{BASE_URL}/{year}/{month}/{topic_link['href']}" # Full URL assembly
topic_list.append({
"topic": topic_name,
"url": topic_url
})
return topic_list
except Exception as e:
print(f"❌ Error accessing {url}: {e}")
return []
# Function to scrape all years and months, and write the results to messages.md
def scrape_all_years():
with open("messages.md", "w") as f:
for year in range(START_YEAR, END_YEAR + 1):
year_has_content = False # Track if there are messages for this year
for month in MONTHS:
topics = scrape_year_month(year, month)
if topics:
# Write the year header only once
if not year_has_content:
f.write(f"# 🔸{year}\n")
year_has_content = True
# Write the month header
f.write(f"### 🔹{month} {year}\n")
# Write each topic as a markdown link
for topic in topics:
f.write(f"[{topic['topic']}]({topic['url']})\n")
# Pause to avoid overwhelming the server
time.sleep(1)
if __name__ == "__main__":
scrape_all_years()
print("Scraping complete. Results written to messages.md")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment