|
# WARNING WARNING WARNING WARNING WARNING |
|
# This code is "tainted" -- it is derived from disassembled/decompiled macOS code |
|
# Please read the Asahi Linux reverse engineering policy before continuing |
|
# https://asahilinux.org/copyright/#reverse-engineering-policy |
|
# WARNING WARNING WARNING WARNING WARNING |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
from proxyclient.m1n1.setup import * |
|
from proxyclient.m1n1.hw.dart import DART, DARTRegs |
|
from proxyclient.m1n1.utils import * |
|
import struct |
|
import sys |
|
import time |
|
from PIL import Image, ImageDraw |
|
|
|
def divroundup(val, div): |
|
return (val + div - 1) // div |
|
|
|
##### THIS FOR ENCODE |
|
|
|
if len(sys.argv) < 2: |
|
print(f"Usage: {sys.argv[0]} input_img") |
|
sys.exit(1) |
|
|
|
input_fn = sys.argv[1] |
|
|
|
image_data = b'' |
|
with Image.open(input_fn) as im: |
|
W, H = im.size |
|
|
|
for y in range(H): |
|
for x in range(W): |
|
px = im.getpixel((x ,y)) |
|
(r, g, b) = px |
|
image_data += struct.pack("BBBB", r, g, b, 255) |
|
# image_data += struct.pack("<H", (r >> 3) | ((b >> 2) << 5) | ((g >> 3) << 11)) |
|
# image_data += struct.pack("<I", (r << 2) | (b << 12) | (g << 22)) |
|
|
|
input_sz = W*H*4 |
|
input_sz_aligned = align_up(input_sz) |
|
|
|
output_sz = W*H*4 |
|
output_sz_aligned = align_up(output_sz) |
|
|
|
print(f"Using size {input_sz_aligned:08X} for input image") |
|
print(f"Using size {output_sz_aligned:08X} for output data") |
|
|
|
# ##### THIS FOR DECODE |
|
|
|
# if len(sys.argv) < 2: |
|
# print(f"Usage: {sys.argv[0]} input.jpg") |
|
# sys.exit(1) |
|
|
|
# input_fn = sys.argv[1] |
|
|
|
# with open(input_fn, 'rb') as f: |
|
# jpeg_data = f.read() |
|
|
|
# found_sof0 = False |
|
|
|
# jpeg_work = jpeg_data |
|
# while jpeg_work: |
|
# seg_marker = struct.unpack(">H", jpeg_work[:2])[0] |
|
# print(f"Seg {seg_marker:04X}") |
|
# if seg_marker == 0xFFD8: |
|
# # SOI |
|
# jpeg_work = jpeg_work[2:] |
|
# elif seg_marker == 0xFFDA: |
|
# # SOS |
|
# break |
|
# else: |
|
# seg_len = struct.unpack(">H", jpeg_work[2:4])[0] |
|
# assert seg_len >= 2 |
|
# # print(seg_len) |
|
# seg_data = jpeg_work[4:4 + seg_len - 2] |
|
# # print(seg_data) |
|
# jpeg_work = jpeg_work[4 + seg_len - 2:] |
|
|
|
# if seg_marker == 0xFFC0: |
|
# # SOF0 |
|
# assert not found_sof0 |
|
# found_sof0 = True |
|
# sof0 = struct.unpack(">BHHB", seg_data[:6]) |
|
# # print(sof0) |
|
# (jpeg_bpp, jpeg_H, jpeg_W, jpeg_components_cnt) = sof0 |
|
# assert jpeg_bpp == 8 |
|
# assert jpeg_components_cnt == 3 |
|
# jpeg_components = [] |
|
# for i in range(jpeg_components_cnt): |
|
# comp_id, comp_sampling, comp_quant = seg_data[6+3*i:6+3*i+3] |
|
# # print(comp_id, comp_sampling, comp_quant) |
|
# jpeg_components.append((comp_id, comp_sampling >> 4, comp_sampling & 0xF, comp_quant)) |
|
# # print(jpeg_components) |
|
|
|
# # assert jpeg_components == [(1, 2, 2, 0), (2, 1, 1, 1), (3, 1, 1, 1)] |
|
|
|
# assert found_sof0 |
|
# print(f"JPEG is {jpeg_W}x{jpeg_H}") |
|
|
|
# jpeg_sz_aligned = align_up(len(jpeg_data)) |
|
# print(f"Using size {jpeg_sz_aligned:08X} for JPEG data") |
|
|
|
# # FIXME how much larger do we need? needed +16 at least once, set large for debugging |
|
# output_W = 2 * jpeg_W |
|
# output_H = 2 * jpeg_H |
|
# output_img_sz = 4 * output_W * output_H |
|
# output_img_sz_aligned = align_up(output_img_sz) |
|
# print(f"Using size {output_img_sz_aligned:08X} for output image") |
|
|
|
class R_STATUS(Register32): |
|
DONE = 0 |
|
TIMEOUT = 1 |
|
RD_BUF_OVERFLOW = 2 |
|
WR_BUF_OVERFLOW = 3 |
|
CODEC_BUF_OVERFLOW = 4 |
|
SOME_KIND_OF_MACROBLOCK_SIZE_ERROR = 5 |
|
AXI_ERROR = 6 |
|
|
|
|
|
class R_JPEG_IO_FLAGS(Register32): |
|
# 0x0 = 4:4:4 |
|
# 0x1 = 4:2:2 |
|
# 0x2 = 4:2:0 |
|
# 0x3 = monochrome |
|
# 0x4 = 4 components ??? seems to work with 422 with 444 tiling params ????? |
|
# 0x6 = indicate 4:1:1 in file, but setting CODEC = 2 doesn't actually work (broken) |
|
SUBSAMPLING_MODE = 2, 0 |
|
# not sure what this is supposed to do |
|
MAKE_DECODE_WORK_BREAK_ENCODE = 3 |
|
OUTPUT_MACROBLOCKS_UNFLIPPED_H = 4 |
|
OUTPUT_8BYTE_CHUNKS_CORRECTLY = 5 |
|
|
|
|
|
class R_JPEG_OUTPUT_FLAGS(Register32): |
|
# bit0 doesn't seem to do anything |
|
SKIP_HEADERS = 1 # output only SOS/EOI, no SOI/DQT/SOF0/DHT |
|
OUTPUT_SOF0_AFTER_DHT = 2 # output SOF0 after DHT instead of before it |
|
# bit3 doesn't seem to do anything |
|
COMPRESS_WORSE = 4 # not sure exactly what this does |
|
|
|
|
|
class R_QTBL_SEL(Register32): |
|
COMPONENT0 = 1, 0 |
|
COMPONENT1 = 3, 2 |
|
COMPONENT2 = 5, 4 |
|
COMPONENT3 = 7, 6 # guessed |
|
|
|
class JPEGRegs(RegMap): |
|
REG_0x0 = 0x0, Register32 |
|
REG_0x4 = 0x4, Register32 |
|
MODE = 0x8, Register32 |
|
REG_0xc = 0xc, Register32 |
|
|
|
REG_0x20 = 0x20, Register32 |
|
STATUS = 0x24, R_STATUS |
|
|
|
# 0 = YUV 444 |
|
# 1 = YUV 422 |
|
# 2 = YUV 411 |
|
# 3 = YUV 420 |
|
# 4 = YUV 400 |
|
CODEC = 0x28, Register32 |
|
|
|
REG_0x2c = 0x2c, Register32 |
|
REG_0x30 = 0x30, Register32 |
|
REG_0x34 = 0x34, Register32 |
|
REG_0x38 = 0x38, Register32 # this changes the output drastically if set to 1 for decode; breaks encode if not set to 1 |
|
|
|
# not sure what the difference is. siting? type2 seems to win over type1 |
|
CHROMA_HALVE_H_TYPE1 = 0x3c, Register32 |
|
CHROMA_HALVE_H_TYPE2 = 0x40, Register32 |
|
CHROMA_HALVE_V_TYPE1 = 0x44, Register32 |
|
CHROMA_HALVE_V_TYPE2 = 0x48, Register32 |
|
|
|
# if double and quadruple both set --> double |
|
CHROMA_DOUBLE_H = 0x4c, Register32 |
|
CHROMA_QUADRUPLE_H = 0x50, Register32 |
|
CHROMA_DOUBLE_V = 0x54, Register32 |
|
|
|
# details not fully understood yet |
|
PX_USE_PLANE1 = 0x58, Register32 |
|
PX_TILES_W = 0x5c, Register32 |
|
PX_TILES_H = 0x60, Register32 |
|
PX_PLANE0_WIDTH = 0x64, Register32 |
|
PX_PLANE0_HEIGHT = 0x68, Register32 |
|
PX_PLANE0_TILING_H = 0x6c, Register32 |
|
PX_PLANE0_TILING_V = 0x70, Register32 |
|
PX_PLANE0_STRIDE = 0x74, Register32 |
|
PX_PLANE1_WIDTH = 0x78, Register32 |
|
PX_PLANE1_HEIGHT = 0x7c, Register32 |
|
PX_PLANE1_TILING_H = 0x80, Register32 |
|
PX_PLANE1_TILING_V = 0x84, Register32 |
|
PX_PLANE1_STRIDE = 0x88, Register32 |
|
|
|
INPUT_START1 = 0x8c, Register32 |
|
INPUT_START2 = 0x90, Register32 |
|
REG_0x94 = 0x94, Register32 |
|
REG_0x98 = 0x98, Register32 |
|
INPUT_END = 0x9c, Register32 |
|
|
|
OUTPUT_START1 = 0xa0, Register32 |
|
OUTPUT_START2 = 0xa4, Register32 |
|
OUTPUT_END = 0xa8, Register32 |
|
|
|
MATRIX_MULT = irange(0xAC, 11, 4), Register32 |
|
DITHER = irange(0xD8, 10, 4), Register32 |
|
|
|
# 0 = RGB101010 |
|
# 1 = YUV10 linear (partially tested, details not understood) |
|
# 2 = RGB888 |
|
# 3 = RGB565 |
|
# 4 = YUV planar (partially tested, details not understood) |
|
# 5 = YUV linear (partially tested, details not understood) |
|
ENCODE_PIXEL_FORMAT = 0x100, Register32 |
|
# RGB888: R, G, B = byte pos |
|
# RGB101010: R, G, B = 0/1/2 = low/mid/high bits |
|
# RGB565: R, G, B = 0/1/2 = low/mid/high bits |
|
ENCODE_COMPONENT0_POS = 0x104, Register32 |
|
ENCODE_COMPONENT1_POS = 0x108, Register32 |
|
ENCODE_COMPONENT2_POS = 0x10c, Register32 |
|
ENCODE_COMPONENT3_POS = 0x110, Register32 |
|
|
|
CONVERT_COLOR_SPACE = 0x114, Register32 |
|
|
|
REG_0x118 = 0x118, Register32 |
|
REG_0x11c = 0x11c, Register32 |
|
|
|
REG_0x120 = 0x120, Register32 |
|
UNTESTED_SURFACE_TILING = 0x124, Register32 # this doesn't seem to work??? |
|
REG_0x128 = 0x128, Register32 |
|
REG_0x12c = 0x12c, Register32 |
|
|
|
DECODE_MACROBLOCKS_W = 0x130, Register32 |
|
DECODE_MACROBLOCKS_H = 0x134, Register32 |
|
RIGHT_EDGE_PIXELS = 0x138, Register32 |
|
BOTTOM_EDGE_PIXELS = 0x13c, Register32 |
|
RIGHT_EDGE_SAMPLES = 0x140, Register32 |
|
BOTTOM_EDGE_SAMPLES = 0x144, Register32 |
|
|
|
SCALE_FACTOR = 0x148, Register32 # 0-3 --> /1 /2 /4 /8 |
|
|
|
# 0 = YUV 444 (2P) |
|
# 1 = YUV 422 (2P) |
|
# 2 = YUV 420 (2P) |
|
# 3 = YUV 422 (1P) |
|
# 4 = driver mentions YUV10 444 (1P) but it does not appear to work (driver also says it doesn't) |
|
# 5 = RGB888 |
|
# 6 = RGB565 |
|
# 7 = driver mentions RGB101010 but it does not appear to work (driver also says it doesn't) |
|
DECODE_PIXEL_FORMAT = 0x14c, Register32 |
|
YUV422_ORDER = 0x150, Register32 # 0 = Cb Y'0 Cr Y'1 1 = Y'0 Cb Y'1 Cr |
|
RGBA_ORDER = 0x154, Register32 # 0 = BGRA 1 = RGBA |
|
RGBA_ALPHA = 0x158, Register32 |
|
|
|
REG_0x15c = 0x15c, Register32 |
|
|
|
REG_0x160 = 0x160, Register32 |
|
REG_0x164 = 0x164, Register32 |
|
# REG_0x168 = 0x168, Register32 |
|
REG_0x16c = 0x16c, Register32 |
|
|
|
REG_0x170 = 0x170, Register32 |
|
# REG_0x174 = 0x174, Register32 |
|
PERFCOUNTER = 0x178, Register32 # guessed |
|
# REG_0x17c = 0x17c, Register32 |
|
|
|
# REG_0x180 = 0x180, Register32 |
|
TIMEOUT = 0x184, Register32 |
|
HWREV = 0x188, Register32 |
|
# REG_0x18c = 0x18c, Register32 |
|
|
|
# REG_0x190 = 0x190, Register32 |
|
# REG_0x194 = 0x194, Register32 |
|
# REG_0x198 = 0x198, Register32 |
|
REG_0x19c = 0x19c, Register32 |
|
|
|
ENABLE_RST_LOGGING = 0x1a0, Register32 |
|
RST_LOG_ENTRIES = 0x1a4, Register32 |
|
|
|
REG_0x1a8 = 0x1a8, Register32 |
|
REG_0x1ac = 0x1ac, Register32 |
|
|
|
REG_0x1b0 = 0x1b0, Register32 |
|
REG_0x1b4 = 0x1b4, Register32 |
|
# REG_0x1b8 = 0x1b8, Register32 |
|
REG_0x1bc = 0x1bc, Register32 |
|
|
|
REG_0x1c0 = 0x1c0, Register32 |
|
REG_0x1c4 = 0x1c4, Register32 |
|
REG_0x1c8 = 0x1c8, Register32 |
|
REG_0x1cc = 0x1cc, Register32 |
|
|
|
REG_0x1d0 = 0x1d0, Register32 |
|
REG_0x1d4 = 0x1d4, Register32 |
|
# REG_0x1d8 = 0x1d8, Register32 |
|
# REG_0x1dc = 0x1dc, Register32 |
|
|
|
# REG_0x1e0 = 0x1e0, Register32 |
|
# REG_0x1e4 = 0x1e4, Register32 |
|
# REG_0x1e8 = 0x1e8, Register32 |
|
# REG_0x1ec = 0x1ec, Register32 |
|
|
|
# REG_0x1f0 = 0x1f0, Register32 |
|
# REG_0x1f4 = 0x1f4, Register32 |
|
# REG_0x1f8 = 0x1f8, Register32 |
|
REG_0x1fc = 0x1fc, Register32 |
|
|
|
REG_0x200 = 0x200, Register32 |
|
REG_0x204 = 0x204, Register32 |
|
REG_0x208 = 0x208, Register32 |
|
REG_0x20c = 0x20c, Register32 |
|
|
|
REG_0x210 = 0x210, Register32 |
|
REG_0x214 = 0x214, Register32 |
|
REG_0x218 = 0x218, Register32 |
|
REG_0x21c = 0x21c, Register32 |
|
|
|
REG_0x220 = 0x220, Register32 |
|
REG_0x224 = 0x224, Register32 |
|
REG_0x228 = 0x228, Register32 |
|
REG_0x22c = 0x22c, Register32 |
|
|
|
REG_0x230 = 0x230, Register32 |
|
REG_0x234 = 0x234, Register32 |
|
# REG_0x238 = 0x238, Register32 |
|
REG_0x23c = 0x23c, Register32 |
|
|
|
REG_0x240 = 0x240, Register32 |
|
REG_0x244 = 0x244, Register32 |
|
REG_0x248 = 0x248, Register32 |
|
REG_0x24c = 0x24c, Register32 |
|
|
|
REG_0x250 = 0x250, Register32 |
|
REG_0x254 = 0x254, Register32 |
|
REG_0x258 = 0x258, Register32 |
|
REG_0x25c = 0x25c, Register32 |
|
|
|
JPEG_IO_FLAGS = 0x1000, R_JPEG_IO_FLAGS |
|
REG_0x1004 = 0x1004, Register32 |
|
# REG_0x1008 = 0x1008, Register32 |
|
QTBL_SEL = 0x100c, R_QTBL_SEL |
|
|
|
HUFFMAN_TABLE = 0x1010, Register32 # fixme what _exactly_ does this control |
|
RST_INTERVAL = 0x1014, Register32 # 16 bits effective |
|
JPEG_HEIGHT = 0x1018, Register32 |
|
JPEG_WIDTH = 0x101c, Register32 |
|
|
|
COMPRESSED_BYTES = 0x1020, Register32 |
|
JPEG_OUTPUT_FLAGS = 0x1024, R_JPEG_OUTPUT_FLAGS |
|
REG_0x1028 = 0x1028, Register32 |
|
REG_0x102c = 0x102c, Register32 |
|
|
|
BITSTREAM_CORRUPTION = 0x1030, Register32 |
|
# REG_0x1034 = 0x1034, Register32 |
|
# REG_0x1038 = 0x1038, Register32 |
|
# REG_0x103c = 0x103c, Register32 |
|
|
|
QTBL = irange(0x1100, 64, 4), Register32 |
|
|
|
# todo what's the format? |
|
RSTLOG = irange(0x2000, 1024, 4), Register32 |
|
|
|
|
|
p.pmgr_adt_clocks_enable('/arm-io/dart-jpeg0') |
|
p.pmgr_adt_clocks_enable('/arm-io/jpeg0') |
|
|
|
dart = DART.from_adt(u, '/arm-io/dart-jpeg0') |
|
dart.initialize() |
|
|
|
jpeg_base, _ = u.adt['/arm-io/jpeg0'].get_reg(0) |
|
jpeg = JPEGRegs(u, jpeg_base) |
|
|
|
|
|
def reset_block(): |
|
jpeg.MODE.val = 0x100 |
|
jpeg.MODE.val = 0x13e |
|
|
|
set_default_regs() |
|
|
|
jpeg.MODE.val = 0x17f |
|
for _ in range(10000): |
|
v = jpeg.REG_0x1004.val |
|
if v == 0: |
|
break |
|
print(f"reset 1 -- {v}") |
|
if (v := jpeg.REG_0x1004.val) != 0: |
|
print(f"reset 1 failed! -- {v}") |
|
assert False |
|
|
|
jpeg.RST_INTERVAL.val = 1 |
|
for _ in range(2500): |
|
v = jpeg.RST_INTERVAL.val |
|
if v == 1: |
|
break |
|
print(f"reset 2 -- {v}") |
|
if (v := jpeg.RST_INTERVAL.val) != 1: |
|
print(f"reset 2 failed! -- {v}") |
|
assert False |
|
jpeg.RST_INTERVAL.val = 0 |
|
|
|
jpeg.ENABLE_RST_LOGGING.val = 0 |
|
jpeg.REG_0x1a8.val = 0 |
|
jpeg.REG_0x1ac.val = 0 |
|
jpeg.REG_0x1b0.val = 0 |
|
jpeg.REG_0x1b4.val = 0 |
|
jpeg.REG_0x1bc.val = 0 |
|
jpeg.REG_0x1c0.val = 0 |
|
jpeg.REG_0x1c4.val = 0 |
|
jpeg.REG_0x1c8.val = 0 |
|
jpeg.REG_0x1cc.val = 0 |
|
jpeg.REG_0x1d0.val = 0 |
|
jpeg.REG_0x1d4.val = 0 |
|
|
|
jpeg.MODE.val = 0x143 |
|
|
|
def set_default_regs(param1=0): |
|
jpeg.REG_0x0.val = 0 |
|
jpeg.REG_0x0.val = 0 |
|
jpeg.REG_0x4.val = 0 |
|
jpeg.CODEC.val = 0 |
|
jpeg.REG_0x2c.val = 0 |
|
jpeg.REG_0x30.val = 0 |
|
jpeg.REG_0x34.val = 1 |
|
jpeg.REG_0x38.val = 1 |
|
jpeg.CHROMA_HALVE_H_TYPE1.val = 0 |
|
jpeg.CHROMA_HALVE_H_TYPE2.val = 0 |
|
jpeg.CHROMA_HALVE_V_TYPE1.val = 0 |
|
jpeg.CHROMA_HALVE_V_TYPE2.val = 0 |
|
jpeg.CHROMA_DOUBLE_H.val = 0 |
|
jpeg.CHROMA_QUADRUPLE_H.val = 0 |
|
jpeg.CHROMA_DOUBLE_V.val = 0 |
|
jpeg.REG_0x15c.val = 0 |
|
jpeg.PX_USE_PLANE1.val = 0 |
|
jpeg.PX_TILES_W.val = 1 |
|
jpeg.PX_TILES_H.val = 1 |
|
jpeg.PX_PLANE0_WIDTH.val = 1 |
|
jpeg.PX_PLANE0_HEIGHT.val = 1 |
|
jpeg.PX_PLANE0_TILING_H.val = 1 |
|
jpeg.PX_PLANE0_TILING_V.val = 1 |
|
jpeg.PX_PLANE0_STRIDE.val = 1 |
|
jpeg.PX_PLANE1_WIDTH.val = 1 |
|
jpeg.PX_PLANE1_HEIGHT.val = 1 |
|
jpeg.PX_PLANE1_TILING_H.val = 1 |
|
jpeg.PX_PLANE1_TILING_V.val = 1 |
|
jpeg.PX_PLANE1_STRIDE.val = 1 |
|
jpeg.INPUT_START1.val = 0 |
|
jpeg.INPUT_START2.val = 0 |
|
jpeg.REG_0x94.val = 1 |
|
jpeg.REG_0x98.val = 1 |
|
jpeg.INPUT_END.val = 0xffffffff |
|
jpeg.OUTPUT_START1.val = 0 |
|
jpeg.OUTPUT_START2.val = 0 |
|
jpeg.OUTPUT_END.val = 0xffffffff |
|
for i in range(11): |
|
jpeg.MATRIX_MULT[i].val = 0 |
|
for i in range(10): |
|
jpeg.DITHER[i].val = 0xff |
|
jpeg.ENCODE_PIXEL_FORMAT.val = 0 |
|
jpeg.ENCODE_COMPONENT0_POS.val = 0 |
|
jpeg.ENCODE_COMPONENT1_POS.val = 0 |
|
jpeg.ENCODE_COMPONENT2_POS.val = 0 |
|
jpeg.ENCODE_COMPONENT3_POS.val = 0 |
|
jpeg.CONVERT_COLOR_SPACE.val = 0 |
|
jpeg.REG_0x118.val = 0 |
|
jpeg.REG_0x11c.val = 0 |
|
jpeg.REG_0x120.val = 0 |
|
jpeg.UNTESTED_SURFACE_TILING.val = 0 |
|
jpeg.REG_0x128.val = 0 |
|
jpeg.REG_0x12c.val = 0 |
|
jpeg.DECODE_MACROBLOCKS_W.val = 0 |
|
jpeg.DECODE_MACROBLOCKS_H.val = 0 |
|
jpeg.SCALE_FACTOR.val = 0 |
|
jpeg.DECODE_PIXEL_FORMAT.val = 0 |
|
jpeg.YUV422_ORDER.val = 0 |
|
jpeg.RGBA_ORDER.val = 0 |
|
jpeg.RGBA_ALPHA.val = 0 |
|
jpeg.RIGHT_EDGE_PIXELS.val = 0 |
|
jpeg.BOTTOM_EDGE_PIXELS.val = 0 |
|
jpeg.RIGHT_EDGE_SAMPLES.val = 0 |
|
jpeg.BOTTOM_EDGE_SAMPLES.val = 0 |
|
|
|
# this is always done on the m1 max hwrev |
|
jpeg.REG_0x1fc.val = 0 |
|
jpeg.REG_0x200.val = 0 |
|
jpeg.REG_0x204.val = 0 |
|
jpeg.REG_0x208.val = 0 |
|
jpeg.REG_0x214.val = 0 |
|
jpeg.REG_0x218.val = 0 |
|
jpeg.REG_0x21c.val = 0 |
|
jpeg.REG_0x220.val = 0 |
|
jpeg.REG_0x224.val = 0 |
|
jpeg.REG_0x228.val = 0 |
|
jpeg.REG_0x22c.val = 0 |
|
jpeg.REG_0x230.val = 0 |
|
jpeg.REG_0x234.val = 0x1f40 |
|
jpeg.REG_0x244.val = 0 |
|
jpeg.REG_0x248.val = 0 |
|
jpeg.REG_0x258.val = 0 |
|
jpeg.REG_0x25c.val = 0 |
|
jpeg.REG_0x23c.val = 0 |
|
jpeg.REG_0x240.val = 0 |
|
jpeg.REG_0x250.val = 0 |
|
jpeg.REG_0x254.val = 0 |
|
|
|
jpeg.REG_0x160.val = param1 |
|
jpeg.TIMEOUT.val = 0 |
|
jpeg.REG_0x20.val = 0xff |
|
|
|
def decode(input_iova, input_sz, output_iova, output_sz): |
|
jpeg.REG_0x34.val = 1 |
|
jpeg.REG_0x2c.val = 0 |
|
jpeg.REG_0x38.val = 0 |
|
jpeg.CODEC.val = 0 |
|
jpeg.DECODE_PIXEL_FORMAT = 5 |
|
|
|
# image boundary |
|
jpeg.PX_USE_PLANE1.val = 1 |
|
jpeg.PX_PLANE0_WIDTH.val = jpeg_W*4 - 1 |
|
jpeg.PX_PLANE0_HEIGHT.val = jpeg_H - 1 |
|
# jpeg.PX_PLANE1_WIDTH.val = jpeg_W * 2 - 1 # HACK HACK |
|
# jpeg.PX_PLANE1_HEIGHT.val = jpeg_H - 1 |
|
# jpeg.TIMEOUT.val = 100200 |
|
jpeg.TIMEOUT.val = 266000000 |
|
|
|
jpeg.REG_0x94 = 0x1f |
|
jpeg.REG_0x98 = 1 |
|
|
|
# jpeg.CHROMA_HALVE_H_TYPE1.val = 1 |
|
# jpeg.YUV422_ORDER.val = 1 |
|
# jpeg.UNTESTED_SURFACE_TILING = 1 |
|
|
|
# wrcnvset_w_h |
|
jpeg.DECODE_MACROBLOCKS_W.val = divroundup(jpeg_W, 8) |
|
jpeg.DECODE_MACROBLOCKS_H.val = divroundup(jpeg_H, 8) |
|
right_edge_px = jpeg_W - divroundup(jpeg_W, 8)*8 + 8 |
|
bot_edge_px = jpeg_H - divroundup(jpeg_H, 8)*8 + 8 |
|
# XXX changing this does not seem to do anything |
|
jpeg.RIGHT_EDGE_PIXELS.val = right_edge_px |
|
jpeg.BOTTOM_EDGE_PIXELS.val = bot_edge_px |
|
jpeg.RIGHT_EDGE_SAMPLES.val = right_edge_px // 2 |
|
jpeg.BOTTOM_EDGE_SAMPLES.val = bot_edge_px // 2 |
|
|
|
jpeg.PX_TILES_H.val = divroundup(jpeg_H, 8) |
|
jpeg.PX_TILES_W.val = divroundup(jpeg_W, 8) |
|
jpeg.PX_PLANE0_TILING_H.val = 4 |
|
jpeg.PX_PLANE0_TILING_V.val = 8 |
|
jpeg.PX_PLANE1_TILING_H.val = 0 |
|
jpeg.PX_PLANE1_TILING_V.val = 0 |
|
|
|
jpeg.MATRIX_MULT[0].val = 0x100 |
|
jpeg.MATRIX_MULT[1].val = 0x0 |
|
jpeg.MATRIX_MULT[2].val = 0x167 |
|
jpeg.MATRIX_MULT[3].val = 0x100 |
|
jpeg.MATRIX_MULT[4].val = 0xffffffa8 |
|
jpeg.MATRIX_MULT[5].val = 0xffffff49 |
|
jpeg.MATRIX_MULT[6].val = 0x100 |
|
jpeg.MATRIX_MULT[7].val = 0x1c6 |
|
jpeg.MATRIX_MULT[8].val = 0x0 |
|
jpeg.MATRIX_MULT[9].val = 0x0 |
|
jpeg.MATRIX_MULT[10].val = 0xffffff80 |
|
|
|
# submode |
|
jpeg.RGBA_ALPHA.val = 0xff |
|
jpeg.RGBA_ORDER.val = 1 |
|
|
|
jpeg.SCALE_FACTOR.val = 0 |
|
|
|
# pointers |
|
jpeg.INPUT_START1.val = input_iova |
|
jpeg.INPUT_START2.val = 0xdeadbeef |
|
jpeg.INPUT_END.val = input_iova + input_sz |
|
jpeg.OUTPUT_START1.val = output_iova |
|
jpeg.OUTPUT_START2.val = output_iova + jpeg_W * 4 # HACK |
|
jpeg.OUTPUT_END.val = output_iova + output_sz |
|
jpeg.PX_PLANE0_STRIDE.val = output_W * 4 |
|
jpeg.PX_PLANE1_STRIDE.val = output_W * 4 # HACK |
|
|
|
jpeg.REG_0x1ac.val = 0x0 |
|
jpeg.REG_0x1b0.val = 0x0 |
|
jpeg.REG_0x1b4.val = 0x0 |
|
jpeg.REG_0x1bc.val = 0x0 |
|
jpeg.REG_0x1c0.val = 0x0 |
|
jpeg.REG_0x1c4.val = 0x0 |
|
|
|
jpeg.REG_0x118.val = 0x0 |
|
jpeg.REG_0x11c.val = 0x1 |
|
|
|
jpeg.MODE.val = 0x177 |
|
jpeg.REG_0x1028.val = 0x400 |
|
|
|
jpeg.JPEG_IO_FLAGS.val = 0x1f |
|
jpeg.REG_0x0.val = 0x1 |
|
jpeg.REG_0x1004 = 0x1 |
|
|
|
def encode(input_iova, input_sz, output_iova, output_sz): |
|
jpeg.MODE.val = 0x17f |
|
jpeg.REG_0x38.val = 0x1 # if not set nothing happens |
|
jpeg.REG_0x2c.val = 0x1 # if not set only header is output |
|
jpeg.REG_0x34.val = 0x0 # if set output is a JPEG but weird with no footer |
|
jpeg.CODEC.val = 0 |
|
|
|
jpeg.PX_USE_PLANE1.val = 0x0 |
|
jpeg.PX_PLANE0_WIDTH.val = W*4 - 1 |
|
jpeg.PX_PLANE0_HEIGHT.val = H - 1 |
|
jpeg.PX_PLANE1_WIDTH.val = 0xffffffff |
|
jpeg.PX_PLANE1_HEIGHT.val = 0xffffffff |
|
jpeg.TIMEOUT.val = 0xfdad680 |
|
|
|
jpeg.PX_TILES_W.val = divroundup(W, 8) |
|
jpeg.PX_TILES_H.val = divroundup(H, 8) |
|
jpeg.PX_PLANE0_TILING_H.val = 0x4 |
|
jpeg.PX_PLANE0_TILING_V.val = 0x8 |
|
jpeg.PX_PLANE0_STRIDE.val = W*4 |
|
jpeg.PX_PLANE1_STRIDE.val = 0 |
|
|
|
# none of this seems to affect anything???? |
|
jpeg.REG_0x94.val = 0xc # c/2 for 444; 8/2 for 422; 3/1 for 411; b/2 for 400 |
|
jpeg.REG_0x98.val = 0x2 |
|
jpeg.REG_0x20c.val = W |
|
jpeg.REG_0x210.val = H |
|
|
|
jpeg.CONVERT_COLOR_SPACE.val = 1 |
|
jpeg.MATRIX_MULT[0].val = 0x4d |
|
jpeg.MATRIX_MULT[1].val = 0x96 |
|
jpeg.MATRIX_MULT[2].val = 0x1d |
|
jpeg.MATRIX_MULT[3].val = 0xffffffd5 |
|
jpeg.MATRIX_MULT[4].val = 0xffffffab |
|
jpeg.MATRIX_MULT[5].val = 0x80 |
|
jpeg.MATRIX_MULT[6].val = 0x80 |
|
jpeg.MATRIX_MULT[7].val = 0xffffff95 |
|
jpeg.MATRIX_MULT[8].val = 0xffffffeb |
|
jpeg.MATRIX_MULT[9].val = 0x0 |
|
jpeg.MATRIX_MULT[10].val = 0x80 |
|
# jpeg.MATRIX_MULT[0].val = 0x80 |
|
# jpeg.MATRIX_MULT[1].val = 0 |
|
# jpeg.MATRIX_MULT[2].val = 0 |
|
# jpeg.MATRIX_MULT[3].val = 0 |
|
# jpeg.MATRIX_MULT[4].val = 0x80 |
|
# jpeg.MATRIX_MULT[5].val = 0 |
|
# jpeg.MATRIX_MULT[6].val = 0 |
|
# jpeg.MATRIX_MULT[7].val = 0 |
|
# jpeg.MATRIX_MULT[8].val = 0x80 |
|
# jpeg.MATRIX_MULT[9].val = 0 |
|
# jpeg.MATRIX_MULT[10].val = 0x80 |
|
|
|
jpeg.ENCODE_PIXEL_FORMAT.val = 2 |
|
jpeg.ENCODE_COMPONENT0_POS.val = 0 |
|
jpeg.ENCODE_COMPONENT1_POS.val = 1 |
|
jpeg.ENCODE_COMPONENT2_POS.val = 2 |
|
jpeg.ENCODE_COMPONENT3_POS.val = 3 |
|
|
|
jpeg.INPUT_START1 = input_iova |
|
jpeg.INPUT_START2 = input_iova |
|
jpeg.INPUT_END = input_iova + input_sz + 7 # NOTE +7 |
|
jpeg.OUTPUT_START1 = output_iova |
|
jpeg.OUTPUT_START2 = 0xdeadbeef |
|
jpeg.OUTPUT_END = output_iova + output_sz |
|
|
|
jpeg.REG_0x118.val = 0x1 |
|
jpeg.REG_0x11c.val = 0x0 |
|
|
|
jpeg.ENABLE_RST_LOGGING = 1 |
|
|
|
jpeg.MODE.val = 0x16f |
|
jpeg.JPEG_IO_FLAGS.val = 0x30 |
|
jpeg.JPEG_WIDTH.val = W |
|
jpeg.JPEG_HEIGHT.val = H |
|
jpeg.RST_INTERVAL.val = 0 |
|
jpeg.JPEG_OUTPUT_FLAGS.val = 0 |
|
|
|
|
|
|
|
jpeg.QTBL[0].val = 0xa06e64a0 |
|
jpeg.QTBL[1].val = 0xf0ffffff |
|
jpeg.QTBL[2].val = 0x78788cbe |
|
jpeg.QTBL[3].val = 0xffffffff |
|
jpeg.QTBL[4].val = 0x8c82a0f0 |
|
jpeg.QTBL[5].val = 0xffffffff |
|
jpeg.QTBL[6].val = 0x8caadcff |
|
jpeg.QTBL[7].val = 0xffffffff |
|
jpeg.QTBL[8].val = 0xb4dcffff |
|
jpeg.QTBL[9].val = 0xffffffff |
|
jpeg.QTBL[10].val = 0xf0ffffff |
|
jpeg.QTBL[11].val = 0xffffffff |
|
jpeg.QTBL[12].val = 0xffffffff |
|
jpeg.QTBL[13].val = 0xffffffff |
|
jpeg.QTBL[14].val = 0xffffffff |
|
jpeg.QTBL[15].val = 0xffffffff |
|
|
|
jpeg.QTBL[16].val = 0xaab4f0ff |
|
jpeg.QTBL[17].val = 0xffffffff |
|
jpeg.QTBL[18].val = 0xb4d2ffff |
|
jpeg.QTBL[19].val = 0xffffffff |
|
jpeg.QTBL[20].val = 0xf0ffffff |
|
jpeg.QTBL[21].val = 0xffffffff |
|
jpeg.QTBL[22].val = 0xffffffff |
|
jpeg.QTBL[23].val = 0xffffffff |
|
jpeg.QTBL[24].val = 0xffffffff |
|
jpeg.QTBL[25].val = 0xffffffff |
|
jpeg.QTBL[26].val = 0xffffffff |
|
jpeg.QTBL[27].val = 0xffffffff |
|
jpeg.QTBL[28].val = 0xffffffff |
|
jpeg.QTBL[29].val = 0xffffffff |
|
jpeg.QTBL[30].val = 0xffffffff |
|
jpeg.QTBL[31].val = 0xffffffff |
|
|
|
jpeg.QTBL[32].val = 0x01010201 |
|
jpeg.QTBL[33].val = 0x01020202 |
|
jpeg.QTBL[34].val = 0x02030202 |
|
jpeg.QTBL[35].val = 0x03030604 |
|
jpeg.QTBL[36].val = 0x03030303 |
|
jpeg.QTBL[37].val = 0x07050804 |
|
jpeg.QTBL[38].val = 0x0608080a |
|
jpeg.QTBL[39].val = 0x0908070b |
|
jpeg.QTBL[40].val = 0x080a0e0d |
|
jpeg.QTBL[41].val = 0x0b0a0a0c |
|
jpeg.QTBL[42].val = 0x0a08080b |
|
jpeg.QTBL[43].val = 0x100c0c0d |
|
jpeg.QTBL[44].val = 0x0f0f0f0f |
|
jpeg.QTBL[45].val = 0x090b1011 |
|
jpeg.QTBL[46].val = 0x0f0e110d |
|
jpeg.QTBL[47].val = 0x0e0e0e01 |
|
|
|
jpeg.QTBL[48].val = 0x04040405 |
|
jpeg.QTBL[49].val = 0x04050905 |
|
jpeg.QTBL[50].val = 0x05090f0a |
|
jpeg.QTBL[51].val = 0x080a0f1a |
|
jpeg.QTBL[52].val = 0x13090913 |
|
jpeg.QTBL[53].val = 0x1a1a1a1a |
|
jpeg.QTBL[54].val = 0x0d1a1a1a |
|
jpeg.QTBL[55].val = 0x1a1a1a1a |
|
jpeg.QTBL[56].val = 0x1a1a1a1a |
|
jpeg.QTBL[57].val = 0x1a1a1a1a |
|
jpeg.QTBL[58].val = 0x1a1a1a1a |
|
jpeg.QTBL[59].val = 0x1a1a1a1a |
|
jpeg.QTBL[60].val = 0x1a1a1a1a |
|
jpeg.QTBL[61].val = 0x1a1a1a1a |
|
jpeg.QTBL[62].val = 0x1a1a1a1a |
|
jpeg.QTBL[63].val = 0x1a1a1a1a |
|
|
|
jpeg.HUFFMAN_TABLE.val = 0x3c |
|
jpeg.QTBL_SEL.val = 0xff |
|
jpeg.REG_0x0.val = 0x1 |
|
jpeg.REG_0x1004.val = 0x1 |
|
|
|
|
|
print(f"HW revision is {jpeg.HWREV}") |
|
reset_block() |
|
|
|
##### THIS FOR ENCODE |
|
input_buf_phys = u.heap.memalign(0x4000, input_sz_aligned) |
|
output_buf_phys = u.heap.memalign(0x4000, output_sz_aligned) |
|
print(f"buffers (phys) {input_buf_phys:016X} {output_buf_phys:016X}") |
|
|
|
input_buf_iova = dart.iomap(0, input_buf_phys, input_sz_aligned) |
|
output_buf_iova = dart.iomap(0, output_buf_phys, output_sz_aligned) |
|
print(f"buffers (iova) {input_buf_iova:08X} {output_buf_iova:08X}") |
|
dart.dump_all() |
|
|
|
iface.writemem(input_buf_phys, image_data + b'\xAA' * (input_sz_aligned - len(image_data))) |
|
iface.writemem(output_buf_phys, b'\xAA' * output_sz_aligned) |
|
|
|
encode(input_buf_iova, input_sz_aligned, output_buf_iova, output_sz_aligned) |
|
|
|
time.sleep(1) |
|
|
|
print(jpeg.STATUS.reg) |
|
print(jpeg.PERFCOUNTER.reg) |
|
print(jpeg.COMPRESSED_BYTES.reg) |
|
|
|
# print(jpeg.RST_LOG_ENTRIES.val) |
|
# print(jpeg.RSTLOG[0].val) |
|
# print(jpeg.RSTLOG[1].val) |
|
# print(jpeg.RSTLOG[2].val) |
|
# print(jpeg.RSTLOG[3].val) |
|
|
|
output_data = iface.readmem(output_buf_phys, output_sz_aligned) |
|
|
|
with open('jpegblockout.bin', 'wb') as f: |
|
f.write(output_data) |
|
|
|
# #### THIS FOR DECODE |
|
# input_buf_phys = u.heap.memalign(0x4000, jpeg_sz_aligned) |
|
# output_buf_phys = u.heap.memalign(0x4000, output_img_sz_aligned) |
|
# print(f"buffers (phys) {input_buf_phys:016X} {output_buf_phys:016X}") |
|
|
|
# input_buf_iova = dart.iomap(0, input_buf_phys, jpeg_sz_aligned) |
|
# output_buf_iova = dart.iomap(0, output_buf_phys, output_img_sz_aligned) |
|
# print(f"buffers (iova) {input_buf_iova:08X} {output_buf_iova:08X}") |
|
# dart.dump_all() |
|
|
|
# # jpeg_data += b'\xaa' * (8 - (len(jpeg_data) % 8)) |
|
# # assert len(jpeg_data) % 8 == 0 |
|
# # jpeg_data_2 = b'' |
|
# # for i in range(len(jpeg_data) // 8): |
|
# # jpeg_data_2 += jpeg_data[i*8:(i+1)*8][::-1] |
|
# # jpeg_data = jpeg_data_2 |
|
|
|
# iface.writemem(input_buf_phys, jpeg_data) |
|
# iface.writemem(output_buf_phys, b'\xAA' * output_img_sz_aligned) |
|
# print("JPEG uploaded") |
|
|
|
# decode(input_buf_iova, jpeg_sz_aligned, output_buf_iova, output_img_sz_aligned) |
|
|
|
# time.sleep(1) |
|
|
|
# print(jpeg.STATUS.reg) |
|
# print(jpeg.PERFCOUNTER.reg) |
|
|
|
# output_data = iface.readmem(output_buf_phys, output_img_sz_aligned) |
|
# with open('testtest.bin', 'wb') as f: |
|
# f.write(output_data) |
|
|
|
# img = Image.new(mode='RGBA', size=(output_W * 4, output_H * 4)) |
|
# draw = ImageDraw.Draw(img) |
|
# output_elemsz = 4 |
|
# output_stride = output_W * 4 |
|
# for y in range(output_H): |
|
# for x in range(output_W): |
|
# block = output_data[y*output_stride + x*output_elemsz:y*output_stride + (x+1)*output_elemsz] |
|
|
|
# r, g, b, a = block |
|
# # val = struct.unpack("<H", block)[0] |
|
# # r = (val >> 11 & 0x1F) * 8 |
|
# # g = (val >> 5 & 0x3F) * 7 |
|
# # b = (val & 0x1F) * 8 |
|
# # a = 255 |
|
|
|
# # img.putpixel((x, y), (r, g, b, a)) |
|
# draw.rectangle((x*4,y*4,(x+1)*4,(y+1)*4), fill=(r, g, b, a)) |
|
|
|
|
|
|
|
# # cb, y0, cr, y1 = block |
|
|
|
# # y0 -= 16 |
|
# # y1 -= 16 |
|
# # cb -= 128 |
|
# # cr -= 128 |
|
|
|
# # cb /= 255 |
|
# # y0 /= 255 |
|
# # cr /= 255 |
|
# # y1 /= 255 |
|
|
|
# # r0 = y0 + 1.13983 * cr |
|
# # g0 = y0 - 0.39465 * cb - 0.58060 * cr |
|
# # b0 = y0 + 2.03211 * cb |
|
# # r1 = y1 + 1.13983 * cr |
|
# # g1 = y1 - 0.39465 * cb - 0.58060 * cr |
|
# # b1 = y1 + 2.03211 * cb |
|
|
|
# # r0 = min(255, max(0, int(r0 * 255))) |
|
# # g0 = min(255, max(0, int(g0 * 255))) |
|
# # b0 = min(255, max(0, int(b0 * 255))) |
|
# # r1 = min(255, max(0, int(r1 * 255))) |
|
# # g1 = min(255, max(0, int(g1 * 255))) |
|
# # b1 = min(255, max(0, int(b1 * 255))) |
|
|
|
# # draw.rectangle((x*2*4,y*4,(x*2+1)*4,(y+1)*4), fill=(r0, g0, b0, 255)) |
|
# # draw.rectangle(((x*2+1)*4,y*4,(x*2+1+1)*4,(y+1)*4), fill=(r1, g1, b1, 255)) |
|
|
|
|
|
|
|
# for y in range(output_H // 4): |
|
# for x in range(output_W // 4): |
|
# draw.rectangle((x*4*8,y*4*8,(x+1)*4*8,(y+1)*4*8), outline=(0, 0, 0, 255)) |
|
# img.save('testtest.png') |
|
# # img.show() |