tenderworks · March 2, 2025 19:25
diff --git a/fjit.rb b/fjit.rb
 # frozen_string_literal: true

 # For now, run like this: `ruby --rjit --rjit-disable fjit.rb`
 #
 # Once RJIT is removed, the extra flags will not be necessary

 require "fiddle"
 require "ffi"
 require "jit_buffer"
 require "hacks"
 require "aarch64"
 require "strlen"
 require "benchmark/ips"

 module FJIT
  C = RubyVM::RJIT.const_get(:C)
  include AArch64::Registers

  def read_ptr ptr, offset
    Fiddle::Pointer.new(ptr)[offset, Fiddle::SIZEOF_VOIDP].unpack1("l!")
  end

  def loadi asm, out, num
    i = 0
    while num > 0
      if i == 0
        asm.movz out, num & 0xFFFF, lsl: 0
      else
        asm.movk out, num & 0xFFFF, lsl: (i * 16)
      end
      i += 1
      num >>= 16
    end
  end

  def attach_function name, params, ret
    params = params.map { "_" }.join(", ")
    class_eval "def self.#{name}(#{params}); end"

    m = method(name)
    rb_iseq = RubyVM::InstructionSequence.of(m)

    # Get the pointer to the iseq obj
    addr = Fiddle.dlwrap(rb_iseq)

    offset = Hacks::STRUCTS["RTypedData"]["data"][0]
    addr = read_ptr(read_ptr(addr, offset), 0)

    iseq_t = C.rb_iseq_t.new addr

    asm = AArch64::Assembler.new
    # X0 has the ec, x1 has the CFP

    # save x0 and X1 on the stack
    asm.stp X0, X1, [SP, -16], :!

    # save X30 (the branch link reg)
    asm.stp X29, X30, [SP, -16], :!

    # SP is in X0
    asm.ldr X0, [X1, C.rb_control_frame_t.offsetof(:sp)]

    # Put top of stack in X0
    asm.sub(X0, X0, (4 * 8))

    # Get the underlying string pointer
    loadi(asm, X2, Fiddle::Handle::DEFAULT["rb_string_value_cstr"])
    asm.blr X2

    # Call the function
    loadi(asm, X2, Fiddle::Handle::DEFAULT[name.to_s])
    asm.blr X2
    asm.ldp X29, X30, [SP], 16

    case ret
    when :int
      # convert to int
      asm.lsl(X0, X0, 1)
      asm.add(X0, X0, 1)
    else
      raise ArgumentError, "unknown type #{ret}"
    end

    # restore X0 and X1, but in to X1 and X2 to avoid mov
    asm.ldp X1, X2, [SP], 16

    # pop frame
    asm.add(X2, X2, C.rb_control_frame_t.size)
    asm.stur(X2, [X1, C.rb_execution_context_t.offsetof(:cfp)])

    asm.ret

    jit = JITBuffer.new 1024
    jit.writeable!
    asm.write_to jit
    jit.executable!
    iseq_t.body.jit_entry = jit.to_i
  end
 end

 module A
  extend FFI::Library
  ffi_lib 'c'
  attach_function :strlen, [:string], :int
 end

 module B
  def self.strlen x
    x.bytesize
  end
 end

 module C
  extend FJIT
  attach_function :strlen, [:string], :int
 end

 str = "foo"

 Benchmark.ips do |x|
  x.report("strlen-ffi")  { A.strlen(str) }
  x.report("strlen-ruby") { B.strlen(str) }
  x.report("strlen-cext") { Strlen.strlen(str) }
  x.report("ruby-direct") { str.bytesize }
  x.report("strlen-fjit") { C.strlen(str) }
  x.compare!
 end
	# frozen_string_literal: true

	# For now, run like this: `ruby --rjit --rjit-disable fjit.rb`
	#
	# Once RJIT is removed, the extra flags will not be necessary

	require "fiddle"
	require "ffi"
	require "jit_buffer"
	require "hacks"
	require "aarch64"
	require "strlen"
	require "benchmark/ips"

	module FJIT
	C = RubyVM::RJIT.const_get(:C)
	include AArch64::Registers

	def read_ptr ptr, offset
	Fiddle::Pointer.new(ptr)[offset, Fiddle::SIZEOF_VOIDP].unpack1("l!")
	end

	def loadi asm, out, num
	i = 0
	while num > 0
	if i == 0
	asm.movz out, num & 0xFFFF, lsl: 0
	else
	asm.movk out, num & 0xFFFF, lsl: (i * 16)
	end
	i += 1
	num >>= 16
	end
	end

	def attach_function name, params, ret
	params = params.map { "_" }.join(", ")
	class_eval "def self.#{name}(#{params}); end"

	m = method(name)
	rb_iseq = RubyVM::InstructionSequence.of(m)

	# Get the pointer to the iseq obj
	addr = Fiddle.dlwrap(rb_iseq)

	offset = Hacks::STRUCTS["RTypedData"]["data"][0]
	addr = read_ptr(read_ptr(addr, offset), 0)

	iseq_t = C.rb_iseq_t.new addr

	asm = AArch64::Assembler.new
	# X0 has the ec, x1 has the CFP

	# save x0 and X1 on the stack
	asm.stp X0, X1, [SP, -16], :!

	# save X30 (the branch link reg)
	asm.stp X29, X30, [SP, -16], :!

	# SP is in X0
	asm.ldr X0, [X1, C.rb_control_frame_t.offsetof(:sp)]

	# Put top of stack in X0
	asm.sub(X0, X0, (4 * 8))

	# Get the underlying string pointer
	loadi(asm, X2, Fiddle::Handle::DEFAULT["rb_string_value_cstr"])
	asm.blr X2

	# Call the function
	loadi(asm, X2, Fiddle::Handle::DEFAULT[name.to_s])
	asm.blr X2
	asm.ldp X29, X30, [SP], 16

	case ret
	when :int
	# convert to int
	asm.lsl(X0, X0, 1)
	asm.add(X0, X0, 1)
	else
	raise ArgumentError, "unknown type #{ret}"
	end

	# restore X0 and X1, but in to X1 and X2 to avoid mov
	asm.ldp X1, X2, [SP], 16

	# pop frame
	asm.add(X2, X2, C.rb_control_frame_t.size)
	asm.stur(X2, [X1, C.rb_execution_context_t.offsetof(:cfp)])

	asm.ret

	jit = JITBuffer.new 1024
	jit.writeable!
	asm.write_to jit
	jit.executable!
	iseq_t.body.jit_entry = jit.to_i
	end
	end

	module A
	extend FFI::Library
	ffi_lib 'c'
	attach_function :strlen, [:string], :int
	end

	module B
	def self.strlen x
	x.bytesize
	end
	end

	module C
	extend FJIT
	attach_function :strlen, [:string], :int
	end

	str = "foo"

	Benchmark.ips do \|x\|
	x.report("strlen-ffi") { A.strlen(str) }
	x.report("strlen-ruby") { B.strlen(str) }
	x.report("strlen-cext") { Strlen.strlen(str) }
	x.report("ruby-direct") { str.bytesize }
	x.report("strlen-fjit") { C.strlen(str) }
	x.compare!
	end