Last active
October 8, 2024 20:04
-
-
Save aufi/e94bce5a8c66e06e7935f61a96b128a4 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import argparse | |
import csv | |
import json | |
import requests | |
import sys | |
from dataclasses import dataclass | |
from pprint import pprint | |
# Use python >= 3.9 (datatypes and list) | |
parser = argparse.ArgumentParser(description='Github mardown docs crawler.') | |
parser.add_argument('-r','--repo', type=str, help='Repository name (example: org/reponame)', | |
nargs='?', default='') | |
parser.add_argument('-o','--org', type=str, help='Organization name', | |
nargs='?', default='') | |
parser.add_argument('-v','--verbose', dest='verbose', action='store_const', const=True, default=False, | |
help='Print verbose output (including all API requests).') | |
args = parser.parse_args() | |
############################################################ | |
BASE_HOSTNAME = "https://api.github.com" | |
TOKEN = "" | |
EXT = ".md" # consider adoc etc. or regex query expression | |
############################################################ | |
@dataclass | |
class DocEntry: | |
repo: str # maybe dup | |
path: str | |
url: str | |
title: str | |
keywords: list[str] | |
content: str = '' | |
############################################################ | |
def debugPrint(str): | |
if args.verbose: | |
pprint(str) | |
def req(url, data = None): | |
print("Querying %s" % url) | |
h = {"Authorization": "Bearer %s" % TOKEN, "Accept": "application/vnd.github+json"} | |
if data: | |
r = requests.post(url, data=data, headers=h) | |
else: | |
r = requests.get(url, headers=h) | |
if r.ok: | |
respData = json.loads(r.text) | |
debugPrint(r) | |
debugPrint(respData) | |
return respData | |
else: | |
print("ERROR", r, data) | |
exit(1) | |
#def find_default_brach(repo): | |
# req("%s/git/trees/main?recursive=1" % repo) | |
def list_repo_docs(repo): | |
files = req("%s/repos/%s/git/trees/main?recursive=1" % (BASE_HOSTNAME, repo)) | |
docfiles = [f for f in files['tree'] if f['path'].endswith(EXT)] | |
docs = [] | |
for docfile in docfiles: | |
docs.append(process_doc(repo, docfile)) | |
return docs | |
def process_doc(repo, doc): | |
doc_url = "%s/repos/%s/git/trees/main/%s" % (BASE_HOSTNAME, repo, doc['path']) # špatně | |
title = doc['path'].split(EXT)[0].replace("/", "_").lower() | |
keywords = [] | |
keywords.append(repo.split("/")[-1]) # skip org name | |
keywords.append(title) # omit extension | |
# fetch and analyze the file | |
# raw, nojson# content = req(doc_url) | |
return DocEntry(repo=repo, path=doc['path'], url=doc_url, keywords=keywords, title=title) | |
def print_doc_list(docs): | |
print("Captured docs:") | |
csvw = csv.writer(sys.stdout, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL) | |
csvw.writerow(docs[0].__annotations__.keys()) | |
for doc in docs: | |
csvw.writerow(doc.__dict__.values()) | |
############################################################# | |
if args.repo: | |
docs = list_repo_docs(args.repo) | |
pprint(docs) | |
print_doc_list(docs) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
repo | path | url | title | keywords | content | |
---|---|---|---|---|---|---|
aufi/tackle2-hub | README.md | https://api.github.com/repos/aufi/tackle2-hub/git/trees/main/README.md | readme | ['tackle2-hub', 'readme'] | ||
aufi/tackle2-hub | docs/README.md | https://api.github.com/repos/aufi/tackle2-hub/git/trees/main/docs/README.md | docs_readme | ['tackle2-hub', 'docs_readme'] | ||
aufi/tackle2-hub | docs/addon-guide.md | https://api.github.com/repos/aufi/tackle2-hub/git/trees/main/docs/addon-guide.md | docs_addon-guide | ['tackle2-hub', 'docs_addon-guide'] | ||
aufi/tackle2-hub | docs/bucket.md | https://api.github.com/repos/aufi/tackle2-hub/git/trees/main/docs/bucket.md | docs_bucket | ['tackle2-hub', 'docs_bucket'] | ||
aufi/tackle2-hub | docs/files.md | https://api.github.com/repos/aufi/tackle2-hub/git/trees/main/docs/files.md | docs_files | ['tackle2-hub', 'docs_files'] | ||
aufi/tackle2-hub | docs/questionnaire-yaml.md | https://api.github.com/repos/aufi/tackle2-hub/git/trees/main/docs/questionnaire-yaml.md | docs_questionnaire-yaml | ['tackle2-hub', 'docs_questionnaire-yaml'] | ||
aufi/tackle2-hub | docs/reaping.md | https://api.github.com/repos/aufi/tackle2-hub/git/trees/main/docs/reaping.md | docs_reaping | ['tackle2-hub', 'docs_reaping'] | ||
aufi/tackle2-hub | docs/test-api-matrix.md | https://api.github.com/repos/aufi/tackle2-hub/git/trees/main/docs/test-api-matrix.md | docs_test-api-matrix | ['tackle2-hub', 'docs_test-api-matrix'] | ||
aufi/tackle2-hub | hack/README.md | https://api.github.com/repos/aufi/tackle2-hub/git/trees/main/hack/README.md | hack_readme | ['tackle2-hub', 'hack_readme'] | ||
aufi/tackle2-hub | hack/cmd/addon/README.md | https://api.github.com/repos/aufi/tackle2-hub/git/trees/main/hack/cmd/addon/README.md | hack_cmd_addon_readme | ['tackle2-hub', 'hack_cmd_addon_readme'] | ||
aufi/tackle2-hub | hack/tool/README.md | https://api.github.com/repos/aufi/tackle2-hub/git/trees/main/hack/tool/README.md | hack_tool_readme | ['tackle2-hub', 'hack_tool_readme'] | ||
aufi/tackle2-hub | test/README.md | https://api.github.com/repos/aufi/tackle2-hub/git/trees/main/test/README.md | test_readme | ['tackle2-hub', 'test_readme'] | ||
aufi/tackle2-hub | test/api/README.md | https://api.github.com/repos/aufi/tackle2-hub/git/trees/main/test/api/README.md | test_api_readme | ['tackle2-hub', 'test_api_readme'] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment