Skip to content

Instantly share code, notes, and snippets.

@mirontoli
Last active April 30, 2025 02:29
Show Gist options
  • Save mirontoli/d08db01fb854ad880baede3a74a3ab85 to your computer and use it in GitHub Desktop.
Save mirontoli/d08db01fb854ad880baede3a74a3ab85 to your computer and use it in GitHub Desktop.
# This is a simple python script
# that loads all the pages through Azure DevOps APIs
# including their content (in markdown) and remote urls
# it can then be used in an AI solution
from azure.identity import DefaultAzureCredential
import requests
import json
# run az login first
credential = DefaultAzureCredential()
# Get the bearer token
azdo_scope = "499b84ac-1321-427f-aa17-267ca6975798/.default"
token = credential.get_token(azdo_scope).token
headers = {"Authorization": f"Bearer {token}"}
# set the variables
organization = "tolle"
project = "my-project"
wiki_name = "my-knowledge"
base_url = f"https://dev.azure.com/{organization}/{project}"
pages_base_url = f"{base_url}/_apis/wiki/wikis/{wiki_name}/pages"
# Recursive function to flatten the JSON structure
def flatten_pages(pages, result=None):
if result is None:
result = []
for page in pages:
# Add the current page's path and remoteUrl to the result
if page.get("path") != "/":
print(f"Page path: {page.get('path')}")
result.append(
{
"path": page.get("path"),
"remoteUrl": page.get("remoteUrl"),
}
)
# If there are subPages, recursively process them
if "subPages" in page and page["subPages"]:
flatten_pages(page["subPages"], result)
return result
url = f"{pages_base_url}?path=/&recursionLevel=full&includeContent=True&api-version=7.1"
response = requests.get(url, headers=headers)
response.raise_for_status()
wiki_pages = response.json()
flat_pages = flatten_pages([wiki_pages])
# Initialize an array to store the new objects
pages_with_content = []
# Iterate through each page in the flattened list
for page in flat_pages:
# Get the content of the page using the Azure DevOps API
page_path = page["path"]
page_url = f"{pages_base_url}?path={page_path}&includeContent=True&api-version=7.1"
response = requests.get(page_url, headers=headers)
p = response.json()
# Check if the request was successful
if response.status_code == 200:
content = p.get("content")
if content:
pages_with_content.append(
{
"content": content,
"path": page["path"],
"remoteUrl": page["remoteUrl"],
}
)
else:
print(
f"Failed to fetch content for {page['remoteUrl']}: {response.status_code} - {response.text}"
)
# Print the resulting array
print(json.dumps(pages_with_content, indent=2))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment