Created
November 17, 2015 14:39
-
-
Save aclements/cd6cd1a93732099ff982 to your computer and use it in GitHub Desktop.
Symbolic bug finder for golang/go#13143
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# PYTHONPATH=/home/austin/r/stp/build/bindings/python | |
import sys, collections | |
import sym | |
_PageSize = 8192 | |
_PageShift = 13 | |
_HeapAllocChunk = 1 << 20 | |
_MaxArena32 = 2 << 30 | |
ptrSize = 4 | |
_PhysPageSize = 4096 | |
PAGE_OFFSET = 0xC0000000 # Linux default 3G/1G split | |
UserTop = PAGE_OFFSET-1 | |
TASK_SIZE = PAGE_OFFSET - 0x01000000 | |
class H(object): | |
def __init__(self): | |
# mallocinit | |
arenaSize = 128 << 20 | |
bitmapSize = _MaxArena32 / (ptrSize * 8 / 4) | |
spansSize = _MaxArena32 / _PageSize * ptrSize | |
spansSize = round(spansSize, _PageSize) | |
firstmoduledata_end = 0x1000 | |
p = round(firstmoduledata_end + (1<<18), 1<<20) | |
pSize = bitmapSize + spansSize + arenaSize + _PageSize | |
p, pSize = sym.uintptr(p), sym.uintptr(pSize) | |
#p = sym.uintptr(0) # <- Quick way to the bug | |
# XXX Probably don't need arena_reserved | |
p, self.arena_reserved = sysReserve(p, pSize) | |
p1 = round(p, _PageSize) | |
self.arena_start = sym.uintptr(p1 + spansSize + bitmapSize) | |
self.arena_used = self.arena_start | |
self.arena_end = sym.uintptr(p + pSize) | |
self.spans_len = spansSize / ptrSize | |
def print_(self): | |
sym.print_("h.arena_start=%s h.arena_used=%s h.arena_end=%s", | |
self.arena_start, self.arena_used, self.arena_end) | |
def round(n, a): | |
return (n + (a - 1)) & ~sym.uintptr(a - 1) | |
def mHeap_Grow(npage): | |
sym.print_("mHeap_Grow %s", npage) | |
npage = round(npage, (64<<10)/_PageSize) | |
sym.print_("mHeap_Grow rounded %s", npage) | |
ask = npage << _PageShift | |
if ask < _HeapAllocChunk: | |
ask = _HeapAllocChunk | |
v = mHeap_SysAlloc(ask) | |
if v == 0: | |
if ask > npage<<_PageShift: | |
ask = npage << _PageShift | |
v = mHeap_SysAlloc(ask) | |
if v == 0: | |
print "out of memory" | |
return False | |
s_start, s_npages = v>>_PageShift, ask>>_PageShift | |
p = s_start | |
p -= h.arena_start >> _PageShift | |
min_i = p | |
max_i = p+s_npages-1 | |
sym.print_("min_i=%s max_i=%s", min_i, max_i) | |
assert min_i >= 0 | |
assert min_i < h.spans_len | |
assert max_i >= 0 | |
assert max_i < h.spans_len | |
return True | |
def mHeap_SysAlloc(n): | |
# n uintptr | |
# returns uintptr | |
sym.print_("mHeap_SysAlloc %s", n) | |
if n > h.arena_end - h.arena_used: | |
sym.print_("mHeap_SysAlloc overflow arena %d", h.arena_end - h.arena_used) | |
p_size = round(n + _PageSize, 256<<20) | |
new_end = h.arena_end + p_size # BUG: This can wrap around | |
if False and new_end < h.arena_end: # NEW NEW NEW | |
print "out of memory (large allocation 2)" | |
return 0 | |
#if new_end <= h.arena_start + _MaxArena32: | |
# NEW NEW NEW | |
if h.arena_end <= new_end and new_end <= h.arena_start + _MaxArena32: | |
sym.print_("mHeap_SysAlloc has space below _MaxArena32 %s <= %s", new_end, h.arena_start + _MaxArena32) | |
# BUG: sysReserve calls mmap *without* MAP_FIXED, so mmap | |
# can return an address *below* arena_start. Then we'll | |
# take the second path below (p+p_size <= ...), then we'll | |
# take the "has space" path below, return p, and then wrap | |
# around backwards when trying to compute the first index | |
# and use a huge index. | |
p, reserved = sysReserve(h.arena_end, p_size) | |
if p == 0: | |
return 0 | |
if p == h.arena_end: | |
h.arena_end = new_end | |
h.arena_reserved = reserved | |
#elif p+p_size <= h.arena_start+_MaxArena32: | |
# NEW NEW NEW | |
elif h.arena_start <= p and p+p_size <= h.arena_start+_MaxArena32: | |
h.arena_end = p + p_size | |
used = p + (-p & (_PageSize - 1)) | |
h.arena_used = used | |
h.arena_reserved = reserved | |
else: | |
sysFree(p, p_size) | |
if n <= h.arena_end - h.arena_used: | |
sym.print_("mHeap_SysAlloc has space") | |
p = h.arena_used | |
sysMap(p, n, h.arena_reserved) | |
h.arena_used = p + n | |
return p | |
sym.print_("mHeap_SysAlloc no space; sysAlloc %s", p_size) | |
p_size = round(n, _PageSize) + _PageSize | |
p = sysAlloc(p_size) | |
if p == 0: | |
return 0 | |
if p < h.arena_start or p+p_size-h.arena_start >= _MaxArena32: | |
print "runtime: memory allocated by OS not in usable range" | |
sysFree(p, p_size) | |
return 0 | |
p_end = p + p_size | |
p += (0-p) & (_PageSize - 1) | |
if p + n > h.arena_used: | |
h.arena_used = p + n | |
if p_end > h.arena_end: | |
h.arena_end = p_end | |
return p | |
def largeAloc(size): | |
# size uintptr | |
if size == 0: | |
return 0 | |
if size + _PageSize < size: | |
print "out of memory (large allocation)" | |
return 0 | |
npages = size >> _PageShift | |
if size & (_PageSize - 1) != 0: | |
npages += 1 | |
return mHeap_Grow(npages) | |
def sysReserve(v, n): | |
p = mmap(v, n, False) # _PROT_NONE, _MAP_ANON|_MAP_PRIVATE | |
return p, True | |
def sysMap(v, n, reserved): | |
p = mmap(v, n, True) # _PROT_READ|_PROT_WRITE, _MAP_ANON|_MAP_FIXED|_MAP_PRIVATE | |
if p == 0: | |
print "sysMap: out of memory" | |
sys.exit(0) | |
if p != v: | |
print "sysMap: cannot map pages in arena address space" | |
sys.exit(0) | |
def sysAlloc(n): | |
return mmap(0, n, False) # _PROT_READ|_PROT_WRITE, _MAP_ANON|_MAP_PRIVATE | |
def sysFree(v, n): | |
munmap(v, n) | |
# | |
# Linux implementation | |
# | |
vmas = [] | |
def find_vma_ends_after(addr): | |
for i, (start, end) in enumerate(vmas): | |
if end > addr: | |
return i | |
return len(vmas) | |
def remove_vma(start, end): | |
i = 0 | |
while i < len(vmas): | |
vmaStart, vmaEnd = vmas[i] | |
if vmaStart < start and end < vmaEnd: | |
# Split this region | |
vmas.insert(i + 1, (end, vmaEnd)) | |
assert end != 0 | |
vmaEnd = start | |
elif start <= vmaStart and vmaEnd <= end: | |
# Remove region | |
vmas.pop(i) | |
continue | |
elif start <= vmaStart and end > vmaStart: | |
# Left truncate | |
vmaStart = end | |
elif start < vmaEnd and end >= vmaEnd: | |
# Right truncate | |
vmaEnd = start | |
vmas[i] = (vmaStart, vmaEnd) | |
assert vmaStart != 0 | |
i += 1 | |
def check_vmas(): | |
return | |
lastEnd = 0 | |
for (start, end) in vmas: | |
assert lastEnd <= start | |
assert start < end | |
assert end <= PAGE_OFFSET | |
lastEnd = end | |
def munmap(addr, n): | |
assert addr & (_PhysPageSize - 1) == 0 | |
n = round(n, _PhysPageSize) | |
remove_vma(addr, addr + n) | |
def mmap(addr, n, fixed): # do_mmap_pgoff | |
sym.print_("mmap %s %s %s", addr, n, fixed) | |
if n == 0: | |
return 0 # EINVAL | |
n = round(n, _PhysPageSize) | |
if n == 0: | |
return 0 # ENOMEM | |
addr = get_unmapped_area(addr, n, fixed) | |
if addr == 0: | |
return 0 # error | |
remove_vma(addr, addr + n) | |
assert addr != 0 | |
vmas.insert(find_vma_ends_after(addr), (addr, addr + n)) | |
check_vmas() | |
return addr | |
def get_unmapped_area(addr, n, fixed): | |
if n > TASK_SIZE: | |
return 0 # ENOMEM | |
addr = arch_get_unmapped_area(addr, n, fixed) | |
if addr == 0: | |
return 0 # error | |
sym.print_("addr=%s n=%s TASK_SIZE=%s TASK_SIZE-n=%s", addr, n, TASK_SIZE, TASK_SIZE - n) | |
if addr > TASK_SIZE - n: | |
return 0 # ENOMEM | |
if addr & (_PhysPageSize - 1) != 0: | |
return 0 # EINVAL | |
return addr | |
def arch_get_unmapped_area(addr, n, fixed): | |
if fixed: | |
return addr | |
if n > TASK_SIZE: | |
return 0 # ENOMEM | |
# Linux uses the following to satisfy the request for addr if it | |
# can. However, returning an arbitrary unmapped region strictly | |
# generalizes this, so in order to cut down on paths, we only do | |
# that. | |
# if addr != 0: | |
# addr = round(addr, _PhysPageSize) | |
# if TASK_SIZE - n >= addr: | |
# vmai = find_vma_ends_after(addr) | |
# if vmai == len(vmas) or addr + n <= vmas[vmai][0]: | |
# # Non-fixed mmap is allowed to return addr, but also | |
# # allowed to return something else entirely. Do both. | |
# if sym.fork(): | |
# return addr | |
return unmapped_area(n) | |
def unmapped_area(n): | |
lastEnd = _PhysPageSize # First page off limits | |
addr = sym.uintptr("unmapped_addr") | |
anyRoom = False | |
for (start, end) in vmas: | |
if start - lastEnd >= n: | |
anyRoom = True | |
if sym.fork(): | |
sym.assume(addr + n <= start) | |
break | |
lastEnd = end | |
else: | |
# After last VMA | |
if PAGE_OFFSET - lastEnd < n: | |
# There isn't enough room after the last VMA. | |
if anyRoom: | |
# There was room earlier and we just didn't take it. | |
# Other branches took those spaces, so just kill off | |
# this branch. | |
sys.exit(0) | |
return 0 | |
sym.assume(addr + n < PAGE_OFFSET) | |
sym.assume(addr >= lastEnd) | |
sym.assume(addr & (_PhysPageSize - 1) == 0) | |
sym.print_("picked base %s for %s byte alloc", addr, n) | |
return addr | |
h = H() | |
def run(): | |
h.print_() | |
try: | |
# Do enough symbolic allocations to cover all code paths in a | |
# single trace. | |
alloc1, alloc2, alloc3 \ | |
= sym.uintptr("alloc1"), sym.uintptr("alloc2"), sym.uintptr("alloc3") | |
if largeAloc(alloc1) == 0: | |
sym.print_("alloc1 failed") | |
h.print_() | |
# if largeAloc(alloc2) == 0: | |
# print "alloc2 failed" | |
# return | |
# h.print_() | |
# if largeAloc(alloc3) == 0: | |
# print "alloc3 failed" | |
# return | |
# h.print_() | |
if largeAloc(0x8 * _PageSize) == 0: | |
print "final alloc failed" | |
return | |
check_vmas() | |
print "success", [(sym.get(s), sym.get(e)) for s, e in vmas] | |
# print "success", [(sym.exprString(s), sym.exprString(e)) for s, e in vmas] | |
except AssertionError: | |
sym.print_("alloc=[%s,%s,%s]", alloc1, alloc2, alloc3) | |
h.print_() | |
sym.show_prints() | |
raise | |
run() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os, sys | |
import stp | |
# The stp bindings are rather incomplete. | |
from stp.stp import _lib as _stplib | |
s = stp.Solver() | |
_id = 0 | |
prints = [] | |
def namegen(name): | |
global _id | |
i = _id | |
_id += 1 | |
return "%s_%d" % (name, i) | |
def uintptr(name): | |
if isinstance(name, stp.Expr): | |
return name | |
if isinstance(name, basestring): | |
return s.bitvec(namegen(name), 32) | |
if isinstance(name, (int, long)): | |
return s.bitvecval(32, name) | |
raise TypeError("don't know how to uintptr %r" % name) | |
uintptrType = _stplib.vc_bvType(s.vc, 32) | |
uintptrArrayType = _stplib.vc_arrayType(s.vc, uintptrType, uintptrType) | |
def uintptrArray(name): | |
if isinstance(name, UintptrArray): | |
return name | |
if isinstance(name, basestring): | |
se = _stplib.vc_varExpr(s.vc, namegen(name), uintptrArrayType) | |
return UintptrArray(s, se) | |
raise TypeError("don't know how to uintptrArray %r" % name) | |
class UintptrArray(object): | |
def __init__(self, s, expr): | |
self.s = s | |
self.expr = expr | |
def __toExpr(self, x, width): | |
if isinstance(x, (int, long)): | |
return self.s.bitvecval(width, x) | |
return x | |
def __getitem__(self, idx): | |
idx = self.__toExpr(idx, 32) | |
return stp.Expr(self.s, 32, _stplib.vc_readExpr(self.s.vc, self.expr, idx.expr)) | |
def write(self, idx, val): | |
idx = self.__toExpr(idx, 32) | |
val = self.__toExpr(val, 32) | |
return UintptrArray(self.s, _stplib.vc_writeExpr(self.s.vc, self.expr, idx.expr, val.expr)) | |
def assume(expr): | |
s.add(expr) | |
def print_(fmt, *exprs): | |
prints.append((fmt, exprs)) | |
def fork(): | |
pid = os.fork() | |
if pid == 0: | |
return True | |
else: | |
_, status = os.waitpid(pid, 0) | |
if status != 0: | |
if os.WIFEXITED(status): | |
sys.exit(os.WEXITSTATUS(status)) | |
print "exit status:", status | |
sys.exit(1) | |
return False | |
def _branch(expr): | |
canTrue, canFalse = s.check(expr), s.check(s.not_(expr)) | |
if canTrue and canFalse: | |
if fork(): | |
s.add(expr) | |
return True | |
else: | |
s.add(s.not_(expr)) | |
return False | |
elif canTrue: | |
return True | |
elif canFalse: | |
return False | |
else: | |
raise RuntimeError("%s is neither satisfiable nor unsatisfiable" % expr) | |
stp.Expr.__nonzero__ = _branch | |
def get(expr): | |
if isinstance(expr, stp.Expr): | |
# XXX vc_getCounterExample is flaky. Sometimes it just returns | |
# constant zeros when given a full expression. Simplifying the | |
# expression first helps. | |
expr2 = _stplib.vc_simplify(s.vc, expr.expr) | |
value = _stplib.vc_getCounterExample(s.vc, expr2) | |
return _stplib.getBVUnsignedLongLong(value) | |
return expr | |
def show_prints(): | |
# Force there to be a current counterexample. | |
s.push() | |
_stplib.vc_query(s.vc, s.false().expr) | |
for fmt, exprs in prints: | |
print fmt % tuple(map(get, exprs)) | |
s.pop() | |
def exprString(expr): | |
return _stplib.exprString(expr.expr) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment