Skip to content

Instantly share code, notes, and snippets.

@aufi
Last active October 8, 2024 20:04
Show Gist options
  • Save aufi/e94bce5a8c66e06e7935f61a96b128a4 to your computer and use it in GitHub Desktop.
Save aufi/e94bce5a8c66e06e7935f61a96b128a4 to your computer and use it in GitHub Desktop.
import argparse
import csv
import json
import requests
import sys
from dataclasses import dataclass
from pprint import pprint
# Use python >= 3.9 (datatypes and list)
parser = argparse.ArgumentParser(description='Github mardown docs crawler.')
parser.add_argument('-r','--repo', type=str, help='Repository name (example: org/reponame)',
nargs='?', default='')
parser.add_argument('-o','--org', type=str, help='Organization name',
nargs='?', default='')
parser.add_argument('-v','--verbose', dest='verbose', action='store_const', const=True, default=False,
help='Print verbose output (including all API requests).')
args = parser.parse_args()
############################################################
BASE_HOSTNAME = "https://api.github.com"
TOKEN = ""
EXT = ".md" # consider adoc etc. or regex query expression
############################################################
@dataclass
class DocEntry:
repo: str # maybe dup
path: str
url: str
title: str
keywords: list[str]
content: str = ''
############################################################
def debugPrint(str):
if args.verbose:
pprint(str)
def req(url, data = None):
print("Querying %s" % url)
h = {"Authorization": "Bearer %s" % TOKEN, "Accept": "application/vnd.github+json"}
if data:
r = requests.post(url, data=data, headers=h)
else:
r = requests.get(url, headers=h)
if r.ok:
respData = json.loads(r.text)
debugPrint(r)
debugPrint(respData)
return respData
else:
print("ERROR", r, data)
exit(1)
#def find_default_brach(repo):
# req("%s/git/trees/main?recursive=1" % repo)
def list_repo_docs(repo):
files = req("%s/repos/%s/git/trees/main?recursive=1" % (BASE_HOSTNAME, repo))
docfiles = [f for f in files['tree'] if f['path'].endswith(EXT)]
docs = []
for docfile in docfiles:
docs.append(process_doc(repo, docfile))
return docs
def process_doc(repo, doc):
doc_url = "%s/repos/%s/git/trees/main/%s" % (BASE_HOSTNAME, repo, doc['path']) # špatně
title = doc['path'].split(EXT)[0].replace("/", "_").lower()
keywords = []
keywords.append(repo.split("/")[-1]) # skip org name
keywords.append(title) # omit extension
# fetch and analyze the file
# raw, nojson# content = req(doc_url)
return DocEntry(repo=repo, path=doc['path'], url=doc_url, keywords=keywords, title=title)
def print_doc_list(docs):
print("Captured docs:")
csvw = csv.writer(sys.stdout, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
csvw.writerow(docs[0].__annotations__.keys())
for doc in docs:
csvw.writerow(doc.__dict__.values())
#############################################################
if args.repo:
docs = list_repo_docs(args.repo)
pprint(docs)
print_doc_list(docs)
repo path url title keywords content
aufi/tackle2-hub README.md https://api.github.com/repos/aufi/tackle2-hub/git/trees/main/README.md readme ['tackle2-hub', 'readme']
aufi/tackle2-hub docs/README.md https://api.github.com/repos/aufi/tackle2-hub/git/trees/main/docs/README.md docs_readme ['tackle2-hub', 'docs_readme']
aufi/tackle2-hub docs/addon-guide.md https://api.github.com/repos/aufi/tackle2-hub/git/trees/main/docs/addon-guide.md docs_addon-guide ['tackle2-hub', 'docs_addon-guide']
aufi/tackle2-hub docs/bucket.md https://api.github.com/repos/aufi/tackle2-hub/git/trees/main/docs/bucket.md docs_bucket ['tackle2-hub', 'docs_bucket']
aufi/tackle2-hub docs/files.md https://api.github.com/repos/aufi/tackle2-hub/git/trees/main/docs/files.md docs_files ['tackle2-hub', 'docs_files']
aufi/tackle2-hub docs/questionnaire-yaml.md https://api.github.com/repos/aufi/tackle2-hub/git/trees/main/docs/questionnaire-yaml.md docs_questionnaire-yaml ['tackle2-hub', 'docs_questionnaire-yaml']
aufi/tackle2-hub docs/reaping.md https://api.github.com/repos/aufi/tackle2-hub/git/trees/main/docs/reaping.md docs_reaping ['tackle2-hub', 'docs_reaping']
aufi/tackle2-hub docs/test-api-matrix.md https://api.github.com/repos/aufi/tackle2-hub/git/trees/main/docs/test-api-matrix.md docs_test-api-matrix ['tackle2-hub', 'docs_test-api-matrix']
aufi/tackle2-hub hack/README.md https://api.github.com/repos/aufi/tackle2-hub/git/trees/main/hack/README.md hack_readme ['tackle2-hub', 'hack_readme']
aufi/tackle2-hub hack/cmd/addon/README.md https://api.github.com/repos/aufi/tackle2-hub/git/trees/main/hack/cmd/addon/README.md hack_cmd_addon_readme ['tackle2-hub', 'hack_cmd_addon_readme']
aufi/tackle2-hub hack/tool/README.md https://api.github.com/repos/aufi/tackle2-hub/git/trees/main/hack/tool/README.md hack_tool_readme ['tackle2-hub', 'hack_tool_readme']
aufi/tackle2-hub test/README.md https://api.github.com/repos/aufi/tackle2-hub/git/trees/main/test/README.md test_readme ['tackle2-hub', 'test_readme']
aufi/tackle2-hub test/api/README.md https://api.github.com/repos/aufi/tackle2-hub/git/trees/main/test/api/README.md test_api_readme ['tackle2-hub', 'test_api_readme']
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment