Created
May 29, 2022 18:49
-
-
Save niflostancu/999a2e2280ee043c0035fda73f7b15af to your computer and use it in GitHub Desktop.
Builds a Doku wiki Table of Contents from an exported projects .csv file (for PM) https://ocw.cs.pub.ro/courses/pm/prj2022
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
import sys | |
import csv | |
import re | |
COLUMN_PATTERNS = { | |
r"nume\s+prenume": "name", | |
r"grupa": "group", | |
r"asistent": "assistant", | |
r"titlu": "title", | |
r"descriere": "description", | |
r"wiki": "wiki_url" | |
} | |
WIKI_URL_PATTERN = r"(?:https?://)?ocw\.cs\.pub\.ro/courses/pm/prj2022/(.+)" | |
def parse_csv_header(line): | |
header_map = {} | |
for j, caption in enumerate(line): | |
caption = caption.strip() | |
if not caption: | |
continue | |
for re_pat, field in COLUMN_PATTERNS.items(): | |
if field in header_map: | |
continue # already mapped | |
if re.search(re_pat, caption, re.IGNORECASE): | |
header_map[field] = j | |
break | |
for field in COLUMN_PATTERNS.values(): | |
if field not in header_map: | |
raise Exception(f"CSV header didn't contain field '{field}'! Please check?") | |
return header_map | |
def parse_csv_line(line, header_map): | |
obj = {} | |
for field, j in header_map.items(): | |
if j >= len(line): | |
obj[field] = "" | |
continue | |
obj[field] = str(line[j]).strip() | |
return obj | |
def build_doku_table(grouped_projects): | |
text = "" | |
for group, items in grouped_projects.items(): | |
text += ( | |
"\n" | |
f'==== {group} ==== \n' | |
f'^ Proiect ^ Realizator ^ Asistent ^ Descriere ^ \n' | |
) | |
for item in items: | |
url = item["wiki_url"] | |
match = re.match(WIKI_URL_PATTERN, url) | |
if match: | |
url = "pm:prj2022:" + ":".join(match[1].split("/")) | |
# strip newlines from description | |
description = item["description"] | |
description = description.replace("\r", "") | |
description = description.replace("\n", " ") | |
text += ( | |
f'| [[{url} | {item["title"]}]] | {item["name"]} | {item["assistant"]} | {description} |' | |
"\n" | |
) | |
return text | |
if __name__ == "__main__": | |
if len(sys.argv) < 2: | |
print("Usage: make-wiki-table.py [PROJECTS_CSV]") | |
sys.exit(1) | |
grouped_projects = {} | |
with open(sys.argv[1], newline='') as f: | |
csvr = csv.reader(f, delimiter=',') | |
header = None | |
for idx, line in enumerate(csvr): | |
if idx == 0: # first row is a header | |
header = parse_csv_header(line) | |
continue | |
if not header: | |
raise Exception("Unable to extract header!") | |
if len(line) < 3: | |
continue | |
project_data = parse_csv_line(line, header) | |
if not project_data.get("wiki_url", ""): | |
continue | |
group = project_data["group"] | |
grouped_projects.setdefault(group, []) | |
grouped_projects[group].append(project_data) | |
# to debug: | |
# print(grouped_projects) | |
print(build_doku_table(grouped_projects)) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment