Last active
July 17, 2021 11:38
-
-
Save ambv/01c2c4ef79515f158dea00ad93498696 to your computer and use it in GitHub Desktop.
Use Hypothesis for smoketests of https://github.com/python/cpython/pull/27091
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Runs PBTs on `bytes.count` and `bytes.find`. Also confirms results match Python 3.9. | |
How to use: | |
- build CPython 3.11 from source | |
- run ./python.exe -m ensurepip | |
- run ./python.exe -m pip install hypothesis | |
- run ./python.exe run_hypo.py # this file | |
Note: `MAX` value below generates tests that on my machine execute between | |
2 - 5 minutes. Decrease it if it's too slow for you. | |
""" | |
import atexit | |
from collections import Counter | |
import os | |
import subprocess | |
from tempfile import NamedTemporaryFile | |
from textwrap import dedent | |
import unittest | |
from hypothesis import given, settings, HealthCheck | |
from hypothesis.strategies import binary | |
MAX = 256 | |
stats: Counter[int] = Counter() | |
too_slow = HealthCheck.too_slow | |
def confirm_count_on_python39( | |
needle: bytes, haystack: bytes, count: int, found: int | |
) -> None: | |
content = dedent( | |
f""" | |
needle = {needle!r} | |
haystack = {haystack!r} | |
count = {count!r} | |
found = {found!r} | |
assert haystack.count(needle) == count | |
assert haystack.find(needle) == found | |
""" | |
).lstrip() | |
with NamedTemporaryFile("w", suffix=".py", delete=False) as f: | |
f.write(content) | |
try: | |
subprocess.run(["python3.9", f.name], check=True) | |
finally: | |
os.unlink(f.name) | |
class TestCount(unittest.TestCase): | |
@given(binary(max_size=MAX)) | |
def test_count_of_self_is_one(self, b): | |
stats['t1'] += 1 | |
self.assertEqual(b.count(b), 1) | |
@settings(deadline=None, suppress_health_check=[too_slow]) | |
@given(binary(max_size=MAX), binary(min_size=MAX + 1, max_size=16 * MAX)) | |
def test_count_doesnt_crash(self, needle, haystack): | |
stats['t2'] += 1 | |
count = haystack.count(needle) | |
self.assertGreaterEqual(count, 0) | |
if count: | |
stats['t2.count'] += 1 | |
found = haystack.find(needle) | |
self.assertNotEqual(found, -1) | |
confirm_count_on_python39(needle, haystack, count, found) | |
self.assertEqual(needle.count(haystack), 0) | |
self.assertEqual(needle.find(haystack), -1) | |
mid = len(haystack) // 2 | |
for i in range(1, 100, 3): | |
needle = haystack[mid:mid+i] | |
found = haystack.find(needle) | |
self.assertLessEqual(found, mid) | |
if found == mid: | |
stats['t2.found'] += 1 | |
count = haystack.count(needle) | |
confirm_count_on_python39(needle, haystack, count, found) | |
def print_stats(): | |
print(stats) | |
if __name__ == "__main__": | |
atexit.register(print_stats) | |
unittest.main() |
test_count_doesnt_crash
will have a very large minimum example size, withlen(haystack) >= 157
. Probably not worth doing anything about this, but FYI.- Maybe restricting your bytestrings to a small alphabet could be interesting, by increaing collisions? e.g. with
from_regex(b"[abc]{257,}", fullmatch=True)
- Driving this with a coverage-guided fuzzer would probably be interesting; I'd suggest Atheris since there's a lot of C code involved. If you want to try HypoFuzz for Python code though just let me know, OSS devs are welcome to a free copy.
Otherwise this looks good to me!
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
@Zac-HD, any simple ideas how to make those more interesting?