Last active
April 30, 2025 02:29
-
-
Save mirontoli/d08db01fb854ad880baede3a74a3ab85 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# This is a simple python script | |
# that loads all the pages through Azure DevOps APIs | |
# including their content (in markdown) and remote urls | |
# it can then be used in an AI solution | |
from azure.identity import DefaultAzureCredential | |
import requests | |
import json | |
# run az login first | |
credential = DefaultAzureCredential() | |
# Get the bearer token | |
azdo_scope = "499b84ac-1321-427f-aa17-267ca6975798/.default" | |
token = credential.get_token(azdo_scope).token | |
headers = {"Authorization": f"Bearer {token}"} | |
# set the variables | |
organization = "tolle" | |
project = "my-project" | |
wiki_name = "my-knowledge" | |
base_url = f"https://dev.azure.com/{organization}/{project}" | |
pages_base_url = f"{base_url}/_apis/wiki/wikis/{wiki_name}/pages" | |
# Recursive function to flatten the JSON structure | |
def flatten_pages(pages, result=None): | |
if result is None: | |
result = [] | |
for page in pages: | |
# Add the current page's path and remoteUrl to the result | |
if page.get("path") != "/": | |
print(f"Page path: {page.get('path')}") | |
result.append( | |
{ | |
"path": page.get("path"), | |
"remoteUrl": page.get("remoteUrl"), | |
} | |
) | |
# If there are subPages, recursively process them | |
if "subPages" in page and page["subPages"]: | |
flatten_pages(page["subPages"], result) | |
return result | |
url = f"{pages_base_url}?path=/&recursionLevel=full&includeContent=True&api-version=7.1" | |
response = requests.get(url, headers=headers) | |
response.raise_for_status() | |
wiki_pages = response.json() | |
flat_pages = flatten_pages([wiki_pages]) | |
# Initialize an array to store the new objects | |
pages_with_content = [] | |
# Iterate through each page in the flattened list | |
for page in flat_pages: | |
# Get the content of the page using the Azure DevOps API | |
page_path = page["path"] | |
page_url = f"{pages_base_url}?path={page_path}&includeContent=True&api-version=7.1" | |
response = requests.get(page_url, headers=headers) | |
p = response.json() | |
# Check if the request was successful | |
if response.status_code == 200: | |
content = p.get("content") | |
if content: | |
pages_with_content.append( | |
{ | |
"content": content, | |
"path": page["path"], | |
"remoteUrl": page["remoteUrl"], | |
} | |
) | |
else: | |
print( | |
f"Failed to fetch content for {page['remoteUrl']}: {response.status_code} - {response.text}" | |
) | |
# Print the resulting array | |
print(json.dumps(pages_with_content, indent=2)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment