-
-
Save ezfe/05ff86cb42ecdffcb9cc22f47664d4f7 to your computer and use it in GitHub Desktop.
import Foundation | |
import Ink | |
func tidy(_ inputStr: String) -> String { | |
let task = Process() | |
task.launchPath = "/usr/local/Cellar/tidy-html5/5.6.0/bin/tidy" | |
task.arguments = ["--show-body-only", "yes", "--show-info", "no", "--show-warnings", "no", "--show-errors", "0"] | |
let input = Pipe() | |
let output = Pipe() | |
let err = Pipe() | |
task.standardInput = input | |
task.standardOutput = output | |
task.standardError = err | |
task.launch() | |
let inputData = inputStr.data(using: .utf8)! | |
input.fileHandleForWriting.write(inputData) | |
try! input.fileHandleForWriting.close() | |
let outputData = output.fileHandleForReading.readDataToEndOfFile() | |
let outputStr = String(data: outputData, encoding: .utf8) | |
return outputStr! | |
} | |
struct TestCase: Decodable { | |
let markdown: String | |
let html: String | |
} | |
let testURL = URL(string: "https://spec.commonmark.org/0.29/spec.json")! | |
let data = try! Data(contentsOf: testURL) | |
let jsonDecoder = JSONDecoder() | |
let testCases = try! jsonDecoder.decode(Array<TestCase>.self, from: data) | |
let mdParser = MarkdownParser() | |
var pass = 0 | |
for testCase in testCases { | |
let gotHTML = mdParser.html(from: testCase.markdown) | |
if (gotHTML == testCase.html) { | |
pass += 1 | |
} else { | |
let gotTidied = tidy(gotHTML) | |
let testTidied = tidy(testCase.html) | |
if (gotTidied != "" && gotTidied == testTidied) { | |
pass += 1 | |
} else { | |
print("Input:\n\(testCase.markdown)") | |
print("----------------------------------------------------------------") | |
print(testCase.html.replacingOccurrences(of: "\n", with: "\\n")) | |
print("----------------------------------------------------------------") | |
print(gotHTML.replacingOccurrences(of: "\n", with: "\\n")) | |
print("================================================================") | |
} | |
} | |
} | |
print("\(pass)/\(testCases.count) passed") |
@mattt Definitely agree, that starts to become non-trivial to implement but I'm taking a look at at least reducing the number of failing tests that are trivial/non-consequential.
@mattt So I added a case checking for \n at the end versus not, bringing the passing count up to 122. Going to try running them through tidy then comparing next.
@mattt Looks like 202 pass when I run them through tidy. I've updated the script and the output gist to show these results.
Thanks for working on this! I just compared this to the output of the official tests on the CommonMark repo, and there are a few discrepancies. First, the repo has been updated with an additional test, meaning the test numbers are off by one for most of the file. After downloading the spec.txt that matches the json you're using, from the website, I get 189 out of 649 passing. It seems the problem is that if tidy hits an error, it outputs a blank string, leading tidy(gotHTML) == tidy(testCase.html)
to return true, even thought the HTML isn't actually being compared. The python tests are using their own normalizing function. Look at test 609 as an example of the problem.
Here are my (bash) commands for running the python tests:
git clone https://github.com/commonmark/commonmark-spec.git
cd commonmark-spec
curl https://spec.commonmark.org/0.29/spec.txt -o spec.txt
python3 test/spec_tests.py -p "ink -" -s spec.txt
Note that this relies on a not-yet-merged modification to Ink to read stdin
@john-mueller ah yes, HTML parsing errors would def. be an issue–definitely can be improved–When tidy() output is blank is treated as failing it decreases to 193.
@ezfe Thanks! Taking a quick look over everything, it looks like some of the failures result here result from trivial differences in whitespace that wouldn't affect rendering (trailing newlines, spaces in self-closing
<hr />
tags, etc.). While instructive, I'd be interested to see a breakdown that distinguishes between failures that affect how a document is rendered in a browser.