Skip to content

Instantly share code, notes, and snippets.

@lenosi
Last active May 19, 2023 11:06
Show Gist options
  • Save lenosi/b9f35f2c6c28daf226f3800221f855fb to your computer and use it in GitHub Desktop.
Save lenosi/b9f35f2c6c28daf226f3800221f855fb to your computer and use it in GitHub Desktop.
Scraper for ActiveMQ Artemis releases
2.28.0:
release_date: '2023-02-03'
jdk_compatibility: 11+
2.27.1:
release_date: '2022-12-01'
jdk_compatibility: 11+
2.27.0:
release_date: '2022-11-14'
jdk_compatibility: 11+
2.26.0:
release_date: '2022-09-28'
jdk_compatibility: 11+
2.25.0:
release_date: '2022-09-08'
jdk_compatibility: 11+
2.24.0:
release_date: '2022-08-04'
jdk_compatibility: 11+
2.23.1:
release_date: '2022-06-21'
jdk_compatibility: 11+
2.23.0:
release_date: '2022-06-13'
jdk_compatibility: 11+
2.22.0:
release_date: '2022-05-05'
jdk_compatibility: 11+
2.21.0:
release_date: '2022-03-28'
jdk_compatibility: 11+
2.20.0:
release_date: '2021-12-21'
jdk_compatibility: 11+
2.19.1:
release_date: '2022-02-01'
jdk_compatibility: 8+
2.19.0:
release_date: '2021-10-18'
jdk_compatibility: '8'
2.18.0:
release_date: '2021-08-12'
jdk_compatibility: '8'
2.17.0:
release_date: '2021-02-16'
jdk_compatibility: '8'
2.16.0:
release_date: '2020-11-10'
jdk_compatibility: '8'
2.15.0:
release_date: '2020-08-31'
jdk_compatibility: '8'
2.14.0:
release_date: '2020-07-20'
jdk_compatibility: '8'
2.13.0:
release_date: '2020-05-26'
jdk_compatibility: '8'
2.12.0:
release_date: '2020-04-29'
jdk_compatibility: '8'
2.11.0:
release_date: '2020-01-15'
jdk_compatibility: '8'
2.10.1:
release_date: '2019-09-26'
jdk_compatibility: '8'
2.10.0:
release_date: '2019-09-05'
jdk_compatibility: '8'
2.9.0:
release_date: '2019-06-06'
jdk_compatibility: '8'
2.8.1:
release_date: '2019-05-22'
jdk_compatibility: '8'
2.8.0:
release_date: '2019-05-08'
jdk_compatibility: '8'
2.7.0:
release_date: '2019-03-20'
jdk_compatibility: '8'
2.6.4:
release_date: '2019-01-28'
jdk_compatibility: '8'
2.6.3:
release_date: '2018-09-08'
jdk_compatibility: '8'
2.6.2:
release_date: '2018-06-25'
jdk_compatibility: '8'
2.6.1:
release_date: '2018-06-14'
jdk_compatibility: '8'
2.6.0:
release_date: '2018-05-22'
jdk_compatibility: '8'
2.5.0:
release_date: '2018-03-21'
jdk_compatibility: '8'
2.4.0:
release_date: '2017-11-07'
jdk_compatibility: '8'
2.3.0:
release_date: '2017-09-14'
jdk_compatibility: '8'
2.2.0:
release_date: '2017-07-31'
jdk_compatibility: '8'
2.1.0:
release_date: '2017-05-15'
jdk_compatibility: '8'
2.0.0:
release_date: '2017-03-22'
jdk_compatibility: '8'
1.5.6:
release_date: '2018-02-26'
jdk_compatibility: '8'
1.5.5:
release_date: '2017-05-15'
jdk_compatibility: '8'
1.5.4:
release_date: '2017-03-31'
jdk_compatibility: '8'
1.5.3:
release_date: '2017-02-20'
jdk_compatibility: '8'
1.5.2:
release_date: '2017-01-25'
jdk_compatibility: '8'
1.5.1:
release_date: '2016-12-14'
jdk_compatibility: '8'
1.5.0:
release_date: '2016-11-10'
jdk_compatibility: '8'
1.4.0:
release_date: '2016-09-13'
jdk_compatibility: '7'
1.3.0:
release_date: '2016-06-17'
jdk_compatibility: '7'
1.2.0:
release_date: '2016-01-11'
jdk_compatibility: '7'
1.1.0:
release_date: '2015-10-05'
jdk_compatibility: '7'
1.0.0:
release_date: '2015-06-02'
jdk_compatibility: '7'
import requests
from bs4 import BeautifulSoup
import re
from datetime import datetime
import yaml
from urllib.parse import urljoin
from dateutil.parser import parse
def get_html_content(url):
with requests.Session() as session:
response = session.get(url)
return response.text
def extract_version_date_jdk(html_content):
version_pattern = re.compile(r"\d+\.\d+\.\d+")
soup = BeautifulSoup(html_content, "html.parser")
h4_tags = soup.find_all("h4", id=lambda id: id and "activemq-artemis" in id)
releases = []
for h4_tag in h4_tags:
version_match = version_pattern.search(h4_tag.text)
if version_match:
version = version_match.group()
date_match = re.search(r"\((.+)\)", h4_tag.text)
date = date_match.group(1).strip() if date_match else None
if date:
date = standardize_date(date)
p_tag = h4_tag.find_next_sibling("p")
jdk_strong_tag = p_tag.find("strong")
jdk = jdk_strong_tag.text.strip() if jdk_strong_tag else None
releases.append({'version': version, 'release_date': date, 'jdk_compatibility': jdk})
return releases
def standardize_date(date_str):
try:
date_obj = parse(date_str)
return date_obj.strftime("%Y-%m-%d") # Standardize to YYYY-MM-DD format
except ValueError:
return None
# Define the base URL of the Apache ActiveMQ Artemis downloads page
base_url = 'https://activemq.apache.org/components/artemis/download/'
# Get the HTML content from the releases page
releases_url = base_url
html_content = get_html_content(releases_url)
# Get the HTML content from the past releases page
past_releases_url = urljoin(base_url, 'past_releases')
past_html_content = get_html_content(past_releases_url)
# Parse the HTML content to extract version numbers, release dates, and JDK compatibility of past releases
releases = extract_version_date_jdk(html_content)
releases.extend(extract_version_date_jdk(past_html_content))
# Create a dictionary to store the version numbers, release dates, and JDK compatibility
release_data = {
release['version']: {
'release_date': release['release_date'],
'jdk_compatibility': release['jdk_compatibility']
}
for release in releases
}
# Write release data to a YAML file
output_file = 'release_data.yaml'
with open(output_file, 'w') as file:
yaml.dump(release_data, file, default_flow_style=False, allow_unicode=True, sort_keys=False)
print(f"Release data has been written to {output_file}.")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment