lueck · October 23, 2025 10:04
diff --git a/.gitignore b/.gitignore
 *~

 /dist
 /output
 __pycache__
diff --git a/about.md b/about.md
diff --git a/cases.tsv b/cases.tsv
diff --git a/driver.py b/driver.py
 import subprocess


 def cmd(command, outfname, errfname, kase):
    """Call command directly. The command name and the parameters are
    passed as a list of strings, like in ["ls", "-l" ].

    """
    with open(outfname, "w+") as outf, open(errfname, "w+") as errf:
        subprocess.run(command, stdout = outf, stderr = errf, shell = False, check = False)


 def shell(command, outfname, errfname, kase):
    """Call command via the system shell. The command name and the
    parameters are passed as a single string, like in "ls -l".

    """
    with open(outfname, "w+") as outf, open(errfname, "w+") as errf:
        subprocess.run(" ".join(command), stdout = outf, stderr = errf, shell = True, check = False)



 DRIVER_FUNCTIONS = [ cmd, shell ]
diff --git a/params.py b/params.py
 NO_PARAMS = ["endpoint", "project", "#project"]


 def equals(kase, quote = ""):
    """Makes parameters of style key1=value1 key2=value2 ...
    """
    rc = []
    for k, v in kase.items():
        if k in NO_PARAMS or not v: continue
        if k.startswith("#"):
            k = k[1:]
        rc += [k + "=" + v + quote]
    return rc


 def query(kase, quote = ""):
    """Makes query-style parameters ?key1=value1&Dkey2=value2 ...
    """
    rc = "?"
    i = 0
    for k, v in kase.items():
        if k in NO_PARAMS or not v: continue
        if k.startswith("#"):
            k = k[1:]
        if i != 0:
            rc += "&"
        rc += k + "=" + quote + v + quote
        i += 1
    return [rc]


 def bash(kase, quote = ""):
    """Makes bash-style parameters: --key1 value1 --key2 value2 ...
    """
    rc = []
    for k, v in kase.items():
        if k in NO_PARAMS or not v: continue
        if k.startswith("#"):
            k = k[1:]
        rc += ["--" + k, quote + v + quote]
    return rc



 def java(kase, quote = ""):
    """Makes parameters in Java-properties style -Dkey1=value1 -Dkey2=value2 ...
    """
    rc = []
    for k, v in kase.items():
        if k in NO_PARAMS or not v: continue
        if k.startswith("#"):
            k = k[1:]
        rc += ["-D" + k + "=" + quote + v + quote]
    return rc


 PARAM_FUNCTIONS = [ equals, query, bash, java ]
diff --git a/run.py b/run.py
 import argparse
 import csv
 import sys
 import os

 # The functions do not need to be in scope! We only access them via the function lists.
 from params import PARAM_FUNCTIONS, NO_PARAMS
 from driver import DRIVER_FUNCTIONS

 param_func_names = [ f.__name__ for f in PARAM_FUNCTIONS ]

 param_func_dict = dict(zip(param_func_names, PARAM_FUNCTIONS))

 driver_func_names = [ f.__name__ for f in DRIVER_FUNCTIONS ]

 driver_func_dict = dict(zip(driver_func_names, DRIVER_FUNCTIONS))


 PROJECT_URL = ""


 cli = argparse.ArgumentParser(
    prog = os.path.basename(__file__),
    description = "Make output for the DTS community test cases running the configured implementation",
    epilog = "For more info see " + PROJECT_URL)

 cli.add_argument(
    "-i",
    "--infile",
    metavar = "FILE",
    type = argparse.FileType("r"),
    default = sys.stdin,
    help = "CSV/TSV file with test cases, one case per line. Defaults to stdin")

 cli.add_argument(
    "--delim",
    metavar = "CHARACTER",
    default = "\t",
    help = "Delimiter. Default to TAB for TSV input.")

 cli.add_argument(
    "-n",
    "--navigation",
    metavar = "COMMAND",
    default = "./navigation.sh",
    help = "Command to be called for a test case on the navigation endpoint. Defaults to ./navigation.sh")

 cli.add_argument(
    "-d",
    "--document",
    metavar = "COMMAND",
    default = "./document.sh",
    help = "Command to be called for a test case on the document endpoint. Default to ./document.sh")

 cli.add_argument(
    "-o",
    "--outputDir",
    metavar = "DIRECTORY",
    default = "output",
    help = "Base directory of the output files. Default to: output")


 cli.add_argument(
    "-p",
    "--parameterStyle",
    metavar = "FUNCTION_NAME",
    default = param_func_names[0],
    choices = param_func_names,
    help = "How parameters are passed to the endpoint commands. Allowed values: " + str(param_func_names) + ". Default to: " + param_func_names[0])

 cli.add_argument(
    "-r",
    "--driver",
    metavar = "FUNCTION_NAME",
    default = driver_func_names[0],
    choices = driver_func_names,
    help = "How the endpoint command is called. Allowed values: " + str(driver_func_names) + ". Defaults to: " + driver_func_names[0])

 cli.add_argument(
    "--log",
    metavar = "FILE",
    default = sys.stderr,
    type = argparse.FileType("w+"),
    help = "Log file. Defaults to stderr")


 def run(input, navigation, document, paramfun = PARAM_FUNCTIONS[0], driverfun = DRIVER_FUNCTIONS[0], caseParamDelim = "\t", outdir = "out", logger = sys.stderr):
    cases = csv.DictReader(input, delimiter = caseParamDelim)
    for kase in cases:
        print(kase, file = logger)
        dir = os.path.join(outdir, kase["#project"])
        out_basename = os.path.join(dir, outfile(kase))
        os.makedirs(dir, exist_ok = True)
        if kase["endpoint"] == "navigation":
            proc = [navigation] + paramfun(kase)
            print(proc, file = logger)
            driverfun(proc, out_basename + ".json", out_basename + ".log", kase)
        elif kase["endpoint"] == "document":
            proc = [document] + paramfun(kase)
            print(proc, file = logger)
            driverfun(proc, out_basename + ".xml", out_basename + ".log", kase)
        else:
            print("unknown endpoint: " + kase["endpoint"])


 def outfile(kase):
    """The name of the output file must not contain the full resource
    URI because of the slashes. We thus use the last segment of the
    path part as fake resource in the file name.

    ENDPOINT?resource=...&...

    The return value does not contain a suffix. It can be added by the
    calling function.

    """
    resource_basename = os.path.basename(kase["resource"])
    rc = kase["endpoint"] + "?resource=" + resource_basename
    for k, v in kase.items():
        if k in NO_PARAMS + ["#resource", "resource"] or not v: continue
        if k.startswith("#"):
            k = k[1::]
        rc += "&" + k + "=" + v
    return rc


 if __name__ == "__main__":
    args = cli.parse_args()
    print(args, file = args.log)
    run(args.infile,
        args.navigation,
        args.document,
        paramfun = param_func_dict[args.parameterStyle],
        driverfun = driver_func_dict[args.driver],
        caseParamDelim = args.delim,
        outdir = args.outputDir,
        logger = args.log)
#project	endpoint	ref	start	end	down	tree	page	mediaType	resource
MyDapytains	navigation								https://raw.githubusercontent.com/distributed-text-services/MyDapytains/refs/heads/main/tests/tei/base_tei.xml
MyDapytains	navigation								https://raw.githubusercontent.com/distributed-text-services/MyDapytains/refs/heads/main/tests/tei/double_tree_lb.xml
MyDapytains	navigation								https://raw.githubusercontent.com/distributed-text-services/MyDapytains/refs/heads/main/tests/tei/generated_lb.xml
MyDapytains	navigation								https://raw.githubusercontent.com/distributed-text-services/MyDapytains/refs/heads/main/tests/tei/lb_diff_ab.xml
MyDapytains	navigation								https://raw.githubusercontent.com/distributed-text-services/MyDapytains/refs/heads/main/tests/tei/lb_same_ab.xml
MyDapytains	navigation								https://raw.githubusercontent.com/distributed-text-services/MyDapytains/refs/heads/main/tests/tei/lb_uneven_ab.xml
MyDapytains	navigation								https://raw.githubusercontent.com/distributed-text-services/MyDapytains/refs/heads/main/tests/tei/lb_uneven_ab_ending_node.xml
MyDapytains	navigation								https://raw.githubusercontent.com/distributed-text-services/MyDapytains/refs/heads/main/tests/tei/multiple_tree.xml
MyDapytains	navigation					alpha			https://raw.githubusercontent.com/distributed-text-services/MyDapytains/refs/heads/main/tests/tei/multiple_tree.xml
MyDapytains	navigation								https://raw.githubusercontent.com/distributed-text-services/MyDapytains/refs/heads/main/tests/tei/simple_doc.xml
MyDapytains	navigation								https://raw.githubusercontent.com/distributed-text-services/MyDapytains/refs/heads/main/tests/tei/tei_with_two_traversing.xml
MyDapytains	navigation								https://raw.githubusercontent.com/distributed-text-services/MyDapytains/refs/heads/main/tests/tei/tei_with_two_traversing_with_n.xml
MyDapytains	navigation								https://raw.githubusercontent.com/distributed-text-services/MyDapytains/refs/heads/main/tests/tei/test_citeData.xml
MyDapytains	navigation								https://raw.githubusercontent.com/distributed-text-services/MyDapytains/refs/heads/main/tests/tei/test_citeData_two_levels.xml
MyDapytains	navigation								https://raw.githubusercontent.com/distributed-text-services/MyDapytains/refs/heads/main/tests/tei/xml_entity.xml
MyDapytains	navigation								https://raw.githubusercontent.com/distributed-text-services/MyDapytains/refs/heads/main/tests/tei/xml_entity_tail.xml
dts-transformations	navigation								https://raw.githubusercontent.com/SCDH/dts-transformations/refs/heads/main/test/john.xml
dts-transformations	navigation					wadm			https://raw.githubusercontent.com/SCDH/dts-transformations/refs/heads/main/test/john.xml
dts-transformations	navigation					page-milestones			https://raw.githubusercontent.com/SCDH/dts-transformations/refs/heads/main/test/john.xml
dts-transformations	navigation					page-content-by-intersection			https://raw.githubusercontent.com/SCDH/dts-transformations/refs/heads/main/test/john.xml
dts-transformations	navigation					page-content-xquery-like			https://raw.githubusercontent.com/SCDH/dts-transformations/refs/heads/main/test/john.xml
dts-transformations	navigation					page-content-by-intersection-2			https://raw.githubusercontent.com/SCDH/dts-transformations/refs/heads/main/test/john.xml
dts-transformations	navigation					page-level2-start-end			https://raw.githubusercontent.com/SCDH/dts-transformations/refs/heads/main/test/john.xml
dts-transformations	navigation					page-hateoas			https://raw.githubusercontent.com/SCDH/dts-transformations/refs/heads/main/test/john.xml
dts-transformations	document		p.1.start	p.1.end		page-hateoas			https://raw.githubusercontent.com/SCDH/dts-transformations/refs/heads/main/test/john.xml
dts-transformations	navigation		Matt:1:3	Matt:2:2					https://raw.githubusercontent.com/SCDH/dts-transformations/refs/heads/main/test/matt.xml
dts-transformations	navigation		Matt:1:3	Matt:2:3					https://raw.githubusercontent.com/SCDH/dts-transformations/refs/heads/main/test/matt.xml
dts-transformations	navigation		Matt:1:1	Matt:2:3					https://raw.githubusercontent.com/SCDH/dts-transformations/refs/heads/main/test/matt.xml
dts-transformations	navigation	Matt:2							https://raw.githubusercontent.com/SCDH/dts-transformations/refs/heads/main/test/matt.xml
dts-transformations	navigation				1				https://raw.githubusercontent.com/SCDH/dts-transformations/refs/heads/main/test/matt.xml
dts-transformations	navigation				2				https://raw.githubusercontent.com/SCDH/dts-transformations/refs/heads/main/test/matt.xml
dts-transformations	navigation				3				https://raw.githubusercontent.com/SCDH/dts-transformations/refs/heads/main/test/matt.xml
dts-transformations	navigation				4				https://raw.githubusercontent.com/SCDH/dts-transformations/refs/heads/main/test/matt.xml
dts-transformations	navigation		Matt:1:3	Matt:2:2	()				https://raw.githubusercontent.com/SCDH/dts-transformations/refs/heads/main/test/matt.xml
dts-transformations	navigation	Matt:2			0				https://raw.githubusercontent.com/SCDH/dts-transformations/refs/heads/main/test/matt.xml
dts-transformations	navigation	Matt:2:2			0				https://raw.githubusercontent.com/SCDH/dts-transformations/refs/heads/main/test/matt.xml
dts-transformations	navigation	Matt			0				https://raw.githubusercontent.com/SCDH/dts-transformations/refs/heads/main/test/matt.xml
dts-transformations	navigation	Matt:2			1				https://raw.githubusercontent.com/SCDH/dts-transformations/refs/heads/main/test/matt.xml
dts-transformations	navigation	Matt:2:2			1				https://raw.githubusercontent.com/SCDH/dts-transformations/refs/heads/main/test/matt.xml
dts-transformations	navigation	Matt			1				https://raw.githubusercontent.com/SCDH/dts-transformations/refs/heads/main/test/matt.xml
	import subprocess


	def cmd(command, outfname, errfname, kase):
	"""Call command directly. The command name and the parameters are
	passed as a list of strings, like in ["ls", "-l" ].

	"""
	with open(outfname, "w+") as outf, open(errfname, "w+") as errf:
	subprocess.run(command, stdout = outf, stderr = errf, shell = False, check = False)


	def shell(command, outfname, errfname, kase):
	"""Call command via the system shell. The command name and the
	parameters are passed as a single string, like in "ls -l".

	"""
	with open(outfname, "w+") as outf, open(errfname, "w+") as errf:
	subprocess.run(" ".join(command), stdout = outf, stderr = errf, shell = True, check = False)



	DRIVER_FUNCTIONS = [ cmd, shell ]
	NO_PARAMS = ["endpoint", "project", "#project"]


	def equals(kase, quote = ""):
	"""Makes parameters of style key1=value1 key2=value2 ...
	"""
	rc = []
	for k, v in kase.items():
	if k in NO_PARAMS or not v: continue
	if k.startswith("#"):
	k = k[1:]
	rc += [k + "=" + v + quote]
	return rc


	def query(kase, quote = ""):
	"""Makes query-style parameters ?key1=value1&Dkey2=value2 ...
	"""
	rc = "?"
	i = 0
	for k, v in kase.items():
	if k in NO_PARAMS or not v: continue
	if k.startswith("#"):
	k = k[1:]
	if i != 0:
	rc += "&"
	rc += k + "=" + quote + v + quote
	i += 1
	return [rc]


	def bash(kase, quote = ""):
	"""Makes bash-style parameters: --key1 value1 --key2 value2 ...
	"""
	rc = []
	for k, v in kase.items():
	if k in NO_PARAMS or not v: continue
	if k.startswith("#"):
	k = k[1:]
	rc += ["--" + k, quote + v + quote]
	return rc



	def java(kase, quote = ""):
	"""Makes parameters in Java-properties style -Dkey1=value1 -Dkey2=value2 ...
	"""
	rc = []
	for k, v in kase.items():
	if k in NO_PARAMS or not v: continue
	if k.startswith("#"):
	k = k[1:]
	rc += ["-D" + k + "=" + quote + v + quote]
	return rc


	PARAM_FUNCTIONS = [ equals, query, bash, java ]
	import argparse
	import csv
	import sys
	import os

	# The functions do not need to be in scope! We only access them via the function lists.
	from params import PARAM_FUNCTIONS, NO_PARAMS
	from driver import DRIVER_FUNCTIONS

	param_func_names = [ f.__name__ for f in PARAM_FUNCTIONS ]

	param_func_dict = dict(zip(param_func_names, PARAM_FUNCTIONS))

	driver_func_names = [ f.__name__ for f in DRIVER_FUNCTIONS ]

	driver_func_dict = dict(zip(driver_func_names, DRIVER_FUNCTIONS))


	PROJECT_URL = ""


	cli = argparse.ArgumentParser(
	prog = os.path.basename(__file__),
	description = "Make output for the DTS community test cases running the configured implementation",
	epilog = "For more info see " + PROJECT_URL)

	cli.add_argument(
	"-i",
	"--infile",
	metavar = "FILE",
	type = argparse.FileType("r"),
	default = sys.stdin,
	help = "CSV/TSV file with test cases, one case per line. Defaults to stdin")

	cli.add_argument(
	"--delim",
	metavar = "CHARACTER",
	default = "\t",
	help = "Delimiter. Default to TAB for TSV input.")

	cli.add_argument(
	"-n",
	"--navigation",
	metavar = "COMMAND",
	default = "./navigation.sh",
	help = "Command to be called for a test case on the navigation endpoint. Defaults to ./navigation.sh")

	cli.add_argument(
	"-d",
	"--document",
	metavar = "COMMAND",
	default = "./document.sh",
	help = "Command to be called for a test case on the document endpoint. Default to ./document.sh")

	cli.add_argument(
	"-o",
	"--outputDir",
	metavar = "DIRECTORY",
	default = "output",
	help = "Base directory of the output files. Default to: output")


	cli.add_argument(
	"-p",
	"--parameterStyle",
	metavar = "FUNCTION_NAME",
	default = param_func_names[0],
	choices = param_func_names,
	help = "How parameters are passed to the endpoint commands. Allowed values: " + str(param_func_names) + ". Default to: " + param_func_names[0])

	cli.add_argument(
	"-r",
	"--driver",
	metavar = "FUNCTION_NAME",
	default = driver_func_names[0],
	choices = driver_func_names,
	help = "How the endpoint command is called. Allowed values: " + str(driver_func_names) + ". Defaults to: " + driver_func_names[0])

	cli.add_argument(
	"--log",
	metavar = "FILE",
	default = sys.stderr,
	type = argparse.FileType("w+"),
	help = "Log file. Defaults to stderr")


	def run(input, navigation, document, paramfun = PARAM_FUNCTIONS[0], driverfun = DRIVER_FUNCTIONS[0], caseParamDelim = "\t", outdir = "out", logger = sys.stderr):
	cases = csv.DictReader(input, delimiter = caseParamDelim)
	for kase in cases:
	print(kase, file = logger)
	dir = os.path.join(outdir, kase["#project"])
	out_basename = os.path.join(dir, outfile(kase))
	os.makedirs(dir, exist_ok = True)
	if kase["endpoint"] == "navigation":
	proc = [navigation] + paramfun(kase)
	print(proc, file = logger)
	driverfun(proc, out_basename + ".json", out_basename + ".log", kase)
	elif kase["endpoint"] == "document":
	proc = [document] + paramfun(kase)
	print(proc, file = logger)
	driverfun(proc, out_basename + ".xml", out_basename + ".log", kase)
	else:
	print("unknown endpoint: " + kase["endpoint"])


	def outfile(kase):
	"""The name of the output file must not contain the full resource
	URI because of the slashes. We thus use the last segment of the
	path part as fake resource in the file name.

	ENDPOINT?resource=...&...

	The return value does not contain a suffix. It can be added by the
	calling function.

	"""
	resource_basename = os.path.basename(kase["resource"])
	rc = kase["endpoint"] + "?resource=" + resource_basename
	for k, v in kase.items():
	if k in NO_PARAMS + ["#resource", "resource"] or not v: continue
	if k.startswith("#"):
	k = k[1::]
	rc += "&" + k + "=" + v
	return rc


	if __name__ == "__main__":
	args = cli.parse_args()
	print(args, file = args.log)
	run(args.infile,
	args.navigation,
	args.document,
	paramfun = param_func_dict[args.parameterStyle],
	driverfun = driver_func_dict[args.driver],
	caseParamDelim = args.delim,
	outdir = args.outputDir,
	logger = args.log)