mirontoli · April 30, 2025 02:29
diff --git a/load_azdo_wiki_content.py b/load_azdo_wiki_content.py
 # This is a simple python script
 # that loads all the pages through Azure DevOps APIs
 # including their content (in markdown) and remote urls
 # it can then be used in an AI solution

 from azure.identity import DefaultAzureCredential
 import requests
 import json

 # run az login first
 credential = DefaultAzureCredential()
 # Get the bearer token
 azdo_scope = "499b84ac-1321-427f-aa17-267ca6975798/.default"
 token = credential.get_token(azdo_scope).token

 headers = {"Authorization": f"Bearer {token}"}

 # set the variables
 organization = "tolle"
 project = "my-project"
 wiki_name = "my-knowledge"

 base_url = f"https://dev.azure.com/{organization}/{project}"
 pages_base_url = f"{base_url}/_apis/wiki/wikis/{wiki_name}/pages"


 # Recursive function to flatten the JSON structure
 def flatten_pages(pages, result=None):
    if result is None:
        result = []

    for page in pages:
        # Add the current page's path and remoteUrl to the result
        if page.get("path") != "/":
            print(f"Page path: {page.get('path')}")
            result.append(
                {
                    "path": page.get("path"),
                    "remoteUrl": page.get("remoteUrl"),
                }
            )

        # If there are subPages, recursively process them
        if "subPages" in page and page["subPages"]:
            flatten_pages(page["subPages"], result)

    return result


 url = f"{pages_base_url}?path=/&recursionLevel=full&includeContent=True&api-version=7.1"
 response = requests.get(url, headers=headers)
 response.raise_for_status()
 wiki_pages = response.json()
 flat_pages = flatten_pages([wiki_pages])

 # Initialize an array to store the new objects
 pages_with_content = []

 # Iterate through each page in the flattened list
 for page in flat_pages:
    # Get the content of the page using the Azure DevOps API
    page_path = page["path"]
    page_url = f"{pages_base_url}?path={page_path}&includeContent=True&api-version=7.1"

    response = requests.get(page_url, headers=headers)
    p = response.json()
    # Check if the request was successful
    if response.status_code == 200:
        content = p.get("content")
        if content:
            pages_with_content.append(
                {
                    "content": content,
                    "path": page["path"],
                    "remoteUrl": page["remoteUrl"],
                }
            )
    else:
        print(
            f"Failed to fetch content for {page['remoteUrl']}: {response.status_code} - {response.text}"
        )


 # Print the resulting array
 print(json.dumps(pages_with_content, indent=2))
	# This is a simple python script
	# that loads all the pages through Azure DevOps APIs
	# including their content (in markdown) and remote urls
	# it can then be used in an AI solution

	from azure.identity import DefaultAzureCredential
	import requests
	import json

	# run az login first
	credential = DefaultAzureCredential()
	# Get the bearer token
	azdo_scope = "499b84ac-1321-427f-aa17-267ca6975798/.default"
	token = credential.get_token(azdo_scope).token

	headers = {"Authorization": f"Bearer {token}"}

	# set the variables
	organization = "tolle"
	project = "my-project"
	wiki_name = "my-knowledge"

	base_url = f"https://dev.azure.com/{organization}/{project}"
	pages_base_url = f"{base_url}/_apis/wiki/wikis/{wiki_name}/pages"


	# Recursive function to flatten the JSON structure
	def flatten_pages(pages, result=None):
	if result is None:
	result = []

	for page in pages:
	# Add the current page's path and remoteUrl to the result
	if page.get("path") != "/":
	print(f"Page path: {page.get('path')}")
	result.append(
	{
	"path": page.get("path"),
	"remoteUrl": page.get("remoteUrl"),
	}
	)

	# If there are subPages, recursively process them
	if "subPages" in page and page["subPages"]:
	flatten_pages(page["subPages"], result)

	return result


	url = f"{pages_base_url}?path=/&recursionLevel=full&includeContent=True&api-version=7.1"
	response = requests.get(url, headers=headers)
	response.raise_for_status()
	wiki_pages = response.json()
	flat_pages = flatten_pages([wiki_pages])

	# Initialize an array to store the new objects
	pages_with_content = []

	# Iterate through each page in the flattened list
	for page in flat_pages:
	# Get the content of the page using the Azure DevOps API
	page_path = page["path"]
	page_url = f"{pages_base_url}?path={page_path}&includeContent=True&api-version=7.1"

	response = requests.get(page_url, headers=headers)
	p = response.json()
	# Check if the request was successful
	if response.status_code == 200:
	content = p.get("content")
	if content:
	pages_with_content.append(
	{
	"content": content,
	"path": page["path"],
	"remoteUrl": page["remoteUrl"],
	}
	)
	else:
	print(
	f"Failed to fetch content for {page['remoteUrl']}: {response.status_code} - {response.text}"
	)


	# Print the resulting array
	print(json.dumps(pages_with_content, indent=2))