Skip to content

Instantly share code, notes, and snippets.

@naufalso
Created March 13, 2025 04:02
Show Gist options
  • Save naufalso/ae2d1dd3dcbbdbd726e6c51b6d116332 to your computer and use it in GitHub Desktop.
Save naufalso/ae2d1dd3dcbbdbd726e6c51b6d116332 to your computer and use it in GitHub Desktop.
a simple python function to fetch the git readme
# Author: Naufal Suryanto
import requests
import re
from markdownify import markdownify as md
from urllib.parse import urlparse
def fetch_url_readme_from_git(url):
"""
Fetch the README file content from a Git repository given a URL.
Args:
url (str): The URL of the Git repository page.
Returns:
str: The content of the README file if found, otherwise an empty string.
"""
if not url:
return ""
try:
headers = {
'User-Agent': (
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) '
'AppleWebKit/537.36 (KHTML, like Gecko) '
'Chrome/133.0.0.0 Safari/537.36'
),
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
}
# Fetch the original URL and convert its HTML to markdown
response = requests.get(url, headers=headers, timeout=10)
response.raise_for_status()
markdown_content = md(response.text, heading_style="ATX")
markdown_content = re.sub(r'(\n\s*){3,}', '\n\n', markdown_content).strip()
# Search for a markdown link labeled "readme"
readme_match = re.search(r'\[readme\]\(([^)]+)\)', markdown_content, re.IGNORECASE)
if not readme_match:
return ""
# Build the absolute URL for the README file
readme_url = readme_match.group(1).split(" ")[0] #readme_match.group(1)
parsed_url = urlparse(url)
if not readme_url.startswith("http"):
readme_url = f"{parsed_url.scheme}://{parsed_url.netloc}{readme_url}"
# Convert GitHub's blob URL to the raw URL
readme_url = readme_url.replace("blob", "raw")
# Fetch and return the README file content
readme_response = requests.get(readme_url, headers=headers, timeout=10)
readme_response.raise_for_status()
return readme_response.text
except Exception as e:
print(f"Error fetching README from {url}: {e}")
return ""
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment