tkf · January 20, 2019 09:01
diff --git a/index.ipynb b/index.ipynb
 {
 "cells": [
  {
   "outputs": [
    {
     "output_type": "execute_result",
     "data": {
      "text/plain": "inner_ismissing (generic function with 2 methods)"
     },
     "metadata": {},
     "execution_count": 1
    }
   ],
   "cell_type": "code",
   "source": [
    "using BenchmarkTools\n",
    "\n",
    "macro simd_if(simd, loop)\n",
    "    # Aggressively using `$` since `esc(loop)` did not work with\n",
    "    # `@simd` macro.\n",
    "    ex = quote\n",
    "        if $simd === Val(:ivdep)\n",
    "            $Base.@simd ivdep $loop\n",
    "        elseif $simd === Val(true)\n",
    "            $Base.@simd $loop\n",
    "        else\n",
    "            $loop\n",
    "        end\n",
    "    end\n",
    "    return esc(ex)\n",
    "end\n",
    "\n",
    "function inner_isa_missing(x, y, simd = Val(false))\n",
    "    s = zero(eltype(x))\n",
    "    @inbounds @simd_if simd for i = eachindex(x)\n",
    "        xi = x[i]  # this helps type inference\n",
    "        yi = y[i]\n",
    "        # Nested if helps type inference:\n",
    "        if !(xi isa Missing)\n",
    "            if !(yi isa Missing)\n",
    "                s += xi * yi\n",
    "            end\n",
    "        end\n",
    "    end\n",
    "    return s\n",
    "end\n",
    "\n",
    "function inner_isa_missing_notmp(x, y, simd = Val(false))\n",
    "    s = zero(eltype(x))\n",
    "    @inbounds @simd_if simd for i = eachindex(x)\n",
    "        if !(x[i] isa Missing)\n",
    "            if !(y[i] isa Missing)\n",
    "                s += x[i] * y[i]\n",
    "            end\n",
    "        end\n",
    "    end\n",
    "    return s\n",
    "end\n",
    "\n",
    "function inner_isa_missing_nonesting(x, y, simd = Val(false))\n",
    "    s = zero(eltype(x))\n",
    "    @inbounds @simd_if simd for i = eachindex(x)\n",
    "        xi = x[i]\n",
    "        yi = y[i]\n",
    "        if !(xi isa Missing) &&!(yi isa Missing)\n",
    "            s += xi * yi\n",
    "        end\n",
    "    end\n",
    "    return s\n",
    "end\n",
    "\n",
    "function inner_ismissing(x, y, simd = Val(false))\n",
    "    s = zero(eltype(x))\n",
    "    @inbounds @simd_if simd for i = eachindex(x)\n",
    "        xi = x[i]  # this helps type inference\n",
    "        yi = y[i]\n",
    "        if !ismissing(xi) & !ismissing(yi)\n",
    "            s += xi * yi\n",
    "        end\n",
    "    end\n",
    "    return s\n",
    "end"
   ],
   "metadata": {},
   "execution_count": 1
  },
  {
   "outputs": [],
   "cell_type": "markdown",
   "source": [
    "Define benchmarks"
   ],
   "metadata": {}
  },
  {
   "outputs": [],
   "cell_type": "code",
   "source": [
    "n = 2^10\n",
    "xs = [abs(x) > 0.5 ? missing : x for x in randn(n)]\n",
    "ys = [abs(x) > 0.5 ? missing : x for x in randn(n)]\n",
    "\n",
    "suite = BenchmarkGroup()\n",
    "\n",
    "functions = [\n",
    "    inner_isa_missing\n",
    "    inner_isa_missing_notmp\n",
    "    inner_isa_missing_nonesting\n",
    "    inner_ismissing\n",
    "]\n",
    "for f in functions\n",
    "    suite[nameof(f)] = BenchmarkGroup()\n",
    "    suite[nameof(f)][\"nosimd\"] = @benchmarkable $f($xs, $ys)\n",
    "    suite[nameof(f)][\"simd\"] = @benchmarkable $f($xs, $ys, Val(true))\n",
    "end"
   ],
   "metadata": {},
   "execution_count": 2
  },
  {
   "outputs": [],
   "cell_type": "markdown",
   "source": [
    "Run benchmarks:"
   ],
   "metadata": {}
  },
  {
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "4-element BenchmarkTools.BenchmarkGroup:\n",
      "  tags: []\n",
      "  :inner_isa_missing_nonesting => 2-element BenchmarkTools.BenchmarkGroup:\n",
      "\t  tags: []\n",
      "\t  \"nosimd\" => BenchmarkTools.Trial: \n",
      "\t\t  memory estimate:  0 bytes\n",
      "\t\t  allocs estimate:  0\n",
      "\t\t  --------------\n",
      "\t\t  minimum time:     3.175 μs (0.00% GC)\n",
      "\t\t  median time:      3.586 μs (0.00% GC)\n",
      "\t\t  mean time:        3.796 μs (0.00% GC)\n",
      "\t\t  maximum time:     17.913 μs (0.00% GC)\n",
      "\t\t  --------------\n",
      "\t\t  samples:          10000\n",
      "\t\t  evals/sample:     1\n",
      "\t  \"simd\" => BenchmarkTools.Trial: \n",
      "\t\t  memory estimate:  0 bytes\n",
      "\t\t  allocs estimate:  0\n",
      "\t\t  --------------\n",
      "\t\t  minimum time:     1.489 μs (0.00% GC)\n",
      "\t\t  median time:      1.563 μs (0.00% GC)\n",
      "\t\t  mean time:        1.589 μs (0.00% GC)\n",
      "\t\t  maximum time:     17.407 μs (0.00% GC)\n",
      "\t\t  --------------\n",
      "\t\t  samples:          10000\n",
      "\t\t  evals/sample:     1\n",
      "  :inner_isa_missing => 2-element BenchmarkTools.BenchmarkGroup:\n",
      "\t  tags: []\n",
      "\t  \"nosimd\" => BenchmarkTools.Trial: \n",
      "\t\t  memory estimate:  0 bytes\n",
      "\t\t  allocs estimate:  0\n",
      "\t\t  --------------\n",
      "\t\t  minimum time:     890.000 ns (0.00% GC)\n",
      "\t\t  median time:      950.000 ns (0.00% GC)\n",
      "\t\t  mean time:        986.385 ns (0.00% GC)\n",
      "\t\t  maximum time:     14.609 μs (0.00% GC)\n",
      "\t\t  --------------\n",
      "\t\t  samples:          10000\n",
      "\t\t  evals/sample:     1\n",
      "\t  \"simd\" => BenchmarkTools.Trial: \n",
      "\t\t  memory estimate:  0 bytes\n",
      "\t\t  allocs estimate:  0\n",
      "\t\t  --------------\n",
      "\t\t  minimum time:     1.288 μs (0.00% GC)\n",
      "\t\t  median time:      1.493 μs (0.00% GC)\n",
      "\t\t  mean time:        1.508 μs (0.00% GC)\n",
      "\t\t  maximum time:     14.874 μs (0.00% GC)\n",
      "\t\t  --------------\n",
      "\t\t  samples:          10000\n",
      "\t\t  evals/sample:     1\n",
      "  :inner_ismissing => 2-element BenchmarkTools.BenchmarkGroup:\n",
      "\t  tags: []\n",
      "\t  \"nosimd\" => BenchmarkTools.Trial: \n",
      "\t\t  memory estimate:  0 bytes\n",
      "\t\t  allocs estimate:  0\n",
      "\t\t  --------------\n",
      "\t\t  minimum time:     2.402 μs (0.00% GC)\n",
      "\t\t  median time:      2.906 μs (0.00% GC)\n",
      "\t\t  mean time:        2.845 μs (0.00% GC)\n",
      "\t\t  maximum time:     16.090 μs (0.00% GC)\n",
      "\t\t  --------------\n",
      "\t\t  samples:          10000\n",
      "\t\t  evals/sample:     1\n",
      "\t  \"simd\" => BenchmarkTools.Trial: \n",
      "\t\t  memory estimate:  0 bytes\n",
      "\t\t  allocs estimate:  0\n",
      "\t\t  --------------\n",
      "\t\t  minimum time:     1.380 μs (0.00% GC)\n",
      "\t\t  median time:      1.558 μs (0.00% GC)\n",
      "\t\t  mean time:        1.993 μs (0.00% GC)\n",
      "\t\t  maximum time:     14.775 μs (0.00% GC)\n",
      "\t\t  --------------\n",
      "\t\t  samples:          10000\n",
      "\t\t  evals/sample:     1\n",
      "  :inner_isa_missing_notmp => 2-element BenchmarkTools.BenchmarkGroup:\n",
      "\t  tags: []\n",
      "\t  \"nosimd\" => BenchmarkTools.Trial: \n",
      "\t\t  memory estimate:  0 bytes\n",
      "\t\t  allocs estimate:  0\n",
      "\t\t  --------------\n",
      "\t\t  minimum time:     2.633 μs (0.00% GC)\n",
      "\t\t  median time:      3.166 μs (0.00% GC)\n",
      "\t\t  mean time:        3.259 μs (0.00% GC)\n",
      "\t\t  maximum time:     17.693 μs (0.00% GC)\n",
      "\t\t  --------------\n",
      "\t\t  samples:          10000\n",
      "\t\t  evals/sample:     1\n",
      "\t  \"simd\" => BenchmarkTools.Trial: \n",
      "\t\t  memory estimate:  0 bytes\n",
      "\t\t  allocs estimate:  0\n",
      "\t\t  --------------\n",
      "\t\t  minimum time:     1.522 μs (0.00% GC)\n",
      "\t\t  median time:      1.707 μs (0.00% GC)\n",
      "\t\t  mean time:        1.753 μs (0.00% GC)\n",
      "\t\t  maximum time:     15.844 μs (0.00% GC)\n",
      "\t\t  --------------\n",
      "\t\t  samples:          10000\n",
      "\t\t  evals/sample:     1"
     ]
    }
   ],
   "cell_type": "code",
   "source": [
    "results = run(suite; verbose = (@__MODULE__) === Main)\n",
    "show(IOContext(stdout, :compact => false), results)\n",
    "flush(stdout)"
   ],
   "metadata": {},
   "execution_count": 3
  },
  {
   "outputs": [
    {
     "output_type": "execute_result",
     "data": {
      "text/plain": "4×3 DataFrame\n│ Row │ name                        │ nosimd     │ simd      │\n│     │ \u001b[90mSymbol\u001b[39m                      │ \u001b[90mBenchmar…\u001b[39m  │ \u001b[90mBenchmar…\u001b[39m │\n├─────┼─────────────────────────────┼────────────┼───────────┤\n│ 1   │ inner_isa_missing           │ 890.000 ns │ 1.288 μs  │\n│ 2   │ inner_isa_missing_notmp     │ 2.633 μs   │ 1.522 μs  │\n│ 3   │ inner_isa_missing_nonesting │ 3.175 μs   │ 1.489 μs  │\n│ 4   │ inner_ismissing             │ 2.402 μs   │ 1.380 μs  │",
      "text/html": [
       "<table class=\"data-frame\"><thead><tr><th></th><th>name</th><th>nosimd</th><th>simd</th></tr><tr><th></th><th>Symbol</th><th>Benchmar…</th><th>Benchmar…</th></tr></thead><tbody><p>4 rows × 3 columns</p><tr><th>1</th><td>inner_isa_missing</td><td>890.000 ns</td><td>1.288 μs</td></tr><tr><th>2</th><td>inner_isa_missing_notmp</td><td>2.633 μs</td><td>1.522 μs</td></tr><tr><th>3</th><td>inner_isa_missing_nonesting</td><td>3.175 μs</td><td>1.489 μs</td></tr><tr><th>4</th><td>inner_ismissing</td><td>2.402 μs</td><td>1.380 μs</td></tr></tbody></table>"
      ]
     },
     "metadata": {},
     "execution_count": 4
    }
   ],
   "cell_type": "code",
   "source": [
    "using DataFrames\n",
    "df = DataFrame(\n",
    "    name = nameof.(functions),\n",
    "    nosimd = [results[nameof(f)][\"nosimd\"] for f in functions],\n",
    "    simd = [results[nameof(f)][\"simd\"] for f in functions],\n",
    ")"
   ],
   "metadata": {},
   "execution_count": 4
  },
  {
   "outputs": [],
   "cell_type": "markdown",
   "source": [
    "As, IIUC, `@simd` macro relies on LLVM to do SIMD, I have to look at\n",
    "the native code to check if the compiler generated SIMD\n",
    "instructions.  But the difference is not obvious (at least just by\n",
    "looking at SIMD instructions in the native code)."
   ],
   "metadata": {}
  },
  {
   "outputs": [
    {
     "output_type": "execute_result",
     "data": {
      "text/plain": "\tvxorpd\t%xmm0, %xmm0, %xmm0\n\tvmovsd\t(%r8,%rcx,8), %xmm1     # xmm1 = mem[0],zero\n\tvmulsd\t(%rdx,%rcx,8), %xmm1, %xmm1\n\tvaddsd\t%xmm1, %xmm0, %xmm0\n\tvxorps\t%xmm0, %xmm0, %xmm0"
     },
     "metadata": {},
     "execution_count": 5
    }
   ],
   "cell_type": "code",
   "source": [
    "using InteractiveUtils\n",
    "native_code(f, args) = sprint(code_native, f, Base.typesof(args...))\n",
    "grep(pattern, c) = Text(join([m.match for m in eachmatch(pattern, c)], \"\\n\"))\n",
    "\n",
    "grep(r\".*[xyz]mm.*\", native_code(inner_isa_missing, (xs, ys)))"
   ],
   "metadata": {},
   "execution_count": 5
  },
  {
   "outputs": [
    {
     "output_type": "execute_result",
     "data": {
      "text/plain": "\tvxorpd\t%xmm0, %xmm0, %xmm0\n\tvmovsd\t(%r8,%rcx,8), %xmm1     # xmm1 = mem[0],zero\n\tvmulsd\t(%rdx,%rcx,8), %xmm1, %xmm1\n\tvaddsd\t%xmm1, %xmm0, %xmm0\n\tvxorps\t%xmm0, %xmm0, %xmm0"
     },
     "metadata": {},
     "execution_count": 6
    }
   ],
   "cell_type": "code",
   "source": [
    "grep(r\".*[xyz]mm.*\", native_code(inner_isa_missing, (xs, ys, Val(true))))"
   ],
   "metadata": {},
   "execution_count": 6
  },
  {
   "outputs": [
    {
     "output_type": "execute_result",
     "data": {
      "text/plain": "\tvmovsd\t(%r9,%rbp,8), %xmm0     # xmm0 = mem[0],zero\n\tvmulsd\t(%rax,%rbp,8), %xmm0, %xmm0\n\tvaddsd\t(%rcx), %xmm0, %xmm0\n\tvmovq\t%xmm0, %r15"
     },
     "metadata": {},
     "execution_count": 7
    }
   ],
   "cell_type": "code",
   "source": [
    "grep(r\".*[xyz]mm.*\", native_code(inner_isa_missing_notmp, (xs, ys)))"
   ],
   "metadata": {},
   "execution_count": 7
  },
  {
   "outputs": [
    {
     "output_type": "execute_result",
     "data": {
      "text/plain": "\tvmovsd\t(%r9,%rbx,8), %xmm0     # xmm0 = mem[0],zero\n\tvmulsd\t(%rdi,%rbx,8), %xmm0, %xmm0\n\tvaddsd\t(%rsi), %xmm0, %xmm0\n\tvmovq\t%xmm0, %r14"
     },
     "metadata": {},
     "execution_count": 8
    }
   ],
   "cell_type": "code",
   "source": [
    "grep(r\".*[xyz]mm.*\", native_code(inner_isa_missing_notmp, (xs, ys, Val(true))))"
   ],
   "metadata": {},
   "execution_count": 8
  },
  {
   "outputs": [],
   "cell_type": "markdown",
   "source": [
    "Environment"
   ],
   "metadata": {}
  },
  {
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Julia Version 1.2.0-DEV.185\n",
      "Commit 51849dfe51 (2019-01-20 00:31 UTC)\n",
      "Platform Info:\n",
      "  OS: Linux (x86_64-pc-linux-gnu)\n",
      "  CPU: Intel(R) Core(TM) i7-8550U CPU @ 1.80GHz\n",
      "  WORD_SIZE: 64\n",
      "  LIBM: libopenlibm\n",
      "  LLVM: libLLVM-6.0.1 (ORCJIT, skylake)\n"
     ]
    }
   ],
   "cell_type": "code",
   "source": [
    "using InteractiveUtils\n",
    "versioninfo()"
   ],
   "metadata": {},
   "execution_count": 9
  },
  {
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "    Status `~/.julia/environments/v1.2/Project.toml`\n",
      "  [34da2185] Compat v1.3.0\n",
      "  [a93c6f00] DataFrames v0.17.0+ [`~/.julia/dev/DataFrames`]\n",
      "  [82899510] IteratorInterfaceExtensions v0.1.1\n",
      "  [189a3867] Reexport v0.2.0\n",
      "  [3783bdb8] TableTraits v0.4.0\n",
      "  [bd369af6] Tables v0.1.12\n"
     ]
    }
   ],
   "cell_type": "code",
   "source": [
    "using Pkg\n",
    "pkg\"status DataFrames\""
   ],
   "metadata": {},
   "execution_count": 10
  },
  {
   "outputs": [],
   "cell_type": "markdown",
   "source": [
    "*This notebook was generated using [Literate.jl](https://github.com/fredrikekre/Literate.jl).*"
   ],
   "metadata": {}
  }
 ],
 "nbformat_minor": 3,
 "metadata": {
  "language_info": {
   "file_extension": ".jl",
   "mimetype": "application/julia",
   "name": "julia",
   "version": "1.2.0-DEV.185"
  },
  "kernelspec": {
   "name": "julia-1.2",
   "display_name": "Julia 1.2.0-DEV.185",
   "language": "julia"
  }
 },
 "nbformat": 4
 }
	{
	"cells": [
	{
	"outputs": [
	{
	"output_type": "execute_result",
	"data": {
	"text/plain": "inner_ismissing (generic function with 2 methods)"
	},
	"metadata": {},
	"execution_count": 1
	}
	],
	"cell_type": "code",
	"source": [
	"using BenchmarkTools\n",
	"\n",
	"macro simd_if(simd, loop)\n",
	" # Aggressively using `$` since `esc(loop)` did not work with\n",
	" # `@simd` macro.\n",
	" ex = quote\n",
	" if $simd === Val(:ivdep)\n",
	" $Base.@simd ivdep $loop\n",
	" elseif $simd === Val(true)\n",
	" $Base.@simd $loop\n",
	" else\n",
	" $loop\n",
	" end\n",
	" end\n",
	" return esc(ex)\n",
	"end\n",
	"\n",
	"function inner_isa_missing(x, y, simd = Val(false))\n",
	" s = zero(eltype(x))\n",
	" @inbounds @simd_if simd for i = eachindex(x)\n",
	" xi = x[i] # this helps type inference\n",
	" yi = y[i]\n",
	" # Nested if helps type inference:\n",
	" if !(xi isa Missing)\n",
	" if !(yi isa Missing)\n",
	" s += xi * yi\n",
	" end\n",
	" end\n",
	" end\n",
	" return s\n",
	"end\n",
	"\n",
	"function inner_isa_missing_notmp(x, y, simd = Val(false))\n",
	" s = zero(eltype(x))\n",
	" @inbounds @simd_if simd for i = eachindex(x)\n",
	" if !(x[i] isa Missing)\n",
	" if !(y[i] isa Missing)\n",
	" s += x[i] * y[i]\n",
	" end\n",
	" end\n",
	" end\n",
	" return s\n",
	"end\n",
	"\n",
	"function inner_isa_missing_nonesting(x, y, simd = Val(false))\n",
	" s = zero(eltype(x))\n",
	" @inbounds @simd_if simd for i = eachindex(x)\n",
	" xi = x[i]\n",
	" yi = y[i]\n",
	" if !(xi isa Missing) &&!(yi isa Missing)\n",
	" s += xi * yi\n",
	" end\n",
	" end\n",
	" return s\n",
	"end\n",
	"\n",
	"function inner_ismissing(x, y, simd = Val(false))\n",
	" s = zero(eltype(x))\n",
	" @inbounds @simd_if simd for i = eachindex(x)\n",
	" xi = x[i] # this helps type inference\n",
	" yi = y[i]\n",
	" if !ismissing(xi) & !ismissing(yi)\n",
	" s += xi * yi\n",
	" end\n",
	" end\n",
	" return s\n",
	"end"
	],
	"metadata": {},
	"execution_count": 1
	},
	{
	"outputs": [],
	"cell_type": "markdown",
	"source": [
	"Define benchmarks"
	],
	"metadata": {}
	},
	{
	"outputs": [],
	"cell_type": "code",
	"source": [
	"n = 2^10\n",
	"xs = [abs(x) > 0.5 ? missing : x for x in randn(n)]\n",
	"ys = [abs(x) > 0.5 ? missing : x for x in randn(n)]\n",
	"\n",
	"suite = BenchmarkGroup()\n",
	"\n",
	"functions = [\n",
	" inner_isa_missing\n",
	" inner_isa_missing_notmp\n",
	" inner_isa_missing_nonesting\n",
	" inner_ismissing\n",
	"]\n",
	"for f in functions\n",
	" suite[nameof(f)] = BenchmarkGroup()\n",
	" suite[nameof(f)][\"nosimd\"] = @benchmarkable $f($xs, $ys)\n",
	" suite[nameof(f)][\"simd\"] = @benchmarkable $f($xs, $ys, Val(true))\n",
	"end"
	],
	"metadata": {},
	"execution_count": 2
	},
	{
	"outputs": [],
	"cell_type": "markdown",
	"source": [
	"Run benchmarks:"
	],
	"metadata": {}
	},
	{
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"4-element BenchmarkTools.BenchmarkGroup:\n",
	" tags: []\n",
	" :inner_isa_missing_nonesting => 2-element BenchmarkTools.BenchmarkGroup:\n",
	"\t tags: []\n",
	"\t \"nosimd\" => BenchmarkTools.Trial: \n",
	"\t\t memory estimate: 0 bytes\n",
	"\t\t allocs estimate: 0\n",
	"\t\t --------------\n",
	"\t\t minimum time: 3.175 μs (0.00% GC)\n",
	"\t\t median time: 3.586 μs (0.00% GC)\n",
	"\t\t mean time: 3.796 μs (0.00% GC)\n",
	"\t\t maximum time: 17.913 μs (0.00% GC)\n",
	"\t\t --------------\n",
	"\t\t samples: 10000\n",
	"\t\t evals/sample: 1\n",
	"\t \"simd\" => BenchmarkTools.Trial: \n",
	"\t\t memory estimate: 0 bytes\n",
	"\t\t allocs estimate: 0\n",
	"\t\t --------------\n",
	"\t\t minimum time: 1.489 μs (0.00% GC)\n",
	"\t\t median time: 1.563 μs (0.00% GC)\n",
	"\t\t mean time: 1.589 μs (0.00% GC)\n",
	"\t\t maximum time: 17.407 μs (0.00% GC)\n",
	"\t\t --------------\n",
	"\t\t samples: 10000\n",
	"\t\t evals/sample: 1\n",
	" :inner_isa_missing => 2-element BenchmarkTools.BenchmarkGroup:\n",
	"\t tags: []\n",
	"\t \"nosimd\" => BenchmarkTools.Trial: \n",
	"\t\t memory estimate: 0 bytes\n",
	"\t\t allocs estimate: 0\n",
	"\t\t --------------\n",
	"\t\t minimum time: 890.000 ns (0.00% GC)\n",
	"\t\t median time: 950.000 ns (0.00% GC)\n",
	"\t\t mean time: 986.385 ns (0.00% GC)\n",
	"\t\t maximum time: 14.609 μs (0.00% GC)\n",
	"\t\t --------------\n",
	"\t\t samples: 10000\n",
	"\t\t evals/sample: 1\n",
	"\t \"simd\" => BenchmarkTools.Trial: \n",
	"\t\t memory estimate: 0 bytes\n",
	"\t\t allocs estimate: 0\n",
	"\t\t --------------\n",
	"\t\t minimum time: 1.288 μs (0.00% GC)\n",
	"\t\t median time: 1.493 μs (0.00% GC)\n",
	"\t\t mean time: 1.508 μs (0.00% GC)\n",
	"\t\t maximum time: 14.874 μs (0.00% GC)\n",
	"\t\t --------------\n",
	"\t\t samples: 10000\n",
	"\t\t evals/sample: 1\n",
	" :inner_ismissing => 2-element BenchmarkTools.BenchmarkGroup:\n",
	"\t tags: []\n",
	"\t \"nosimd\" => BenchmarkTools.Trial: \n",
	"\t\t memory estimate: 0 bytes\n",
	"\t\t allocs estimate: 0\n",
	"\t\t --------------\n",
	"\t\t minimum time: 2.402 μs (0.00% GC)\n",
	"\t\t median time: 2.906 μs (0.00% GC)\n",
	"\t\t mean time: 2.845 μs (0.00% GC)\n",
	"\t\t maximum time: 16.090 μs (0.00% GC)\n",
	"\t\t --------------\n",
	"\t\t samples: 10000\n",
	"\t\t evals/sample: 1\n",
	"\t \"simd\" => BenchmarkTools.Trial: \n",
	"\t\t memory estimate: 0 bytes\n",
	"\t\t allocs estimate: 0\n",
	"\t\t --------------\n",
	"\t\t minimum time: 1.380 μs (0.00% GC)\n",
	"\t\t median time: 1.558 μs (0.00% GC)\n",
	"\t\t mean time: 1.993 μs (0.00% GC)\n",
	"\t\t maximum time: 14.775 μs (0.00% GC)\n",
	"\t\t --------------\n",
	"\t\t samples: 10000\n",
	"\t\t evals/sample: 1\n",
	" :inner_isa_missing_notmp => 2-element BenchmarkTools.BenchmarkGroup:\n",
	"\t tags: []\n",
	"\t \"nosimd\" => BenchmarkTools.Trial: \n",
	"\t\t memory estimate: 0 bytes\n",
	"\t\t allocs estimate: 0\n",
	"\t\t --------------\n",
	"\t\t minimum time: 2.633 μs (0.00% GC)\n",
	"\t\t median time: 3.166 μs (0.00% GC)\n",
	"\t\t mean time: 3.259 μs (0.00% GC)\n",
	"\t\t maximum time: 17.693 μs (0.00% GC)\n",
	"\t\t --------------\n",
	"\t\t samples: 10000\n",
	"\t\t evals/sample: 1\n",
	"\t \"simd\" => BenchmarkTools.Trial: \n",
	"\t\t memory estimate: 0 bytes\n",
	"\t\t allocs estimate: 0\n",
	"\t\t --------------\n",
	"\t\t minimum time: 1.522 μs (0.00% GC)\n",
	"\t\t median time: 1.707 μs (0.00% GC)\n",
	"\t\t mean time: 1.753 μs (0.00% GC)\n",
	"\t\t maximum time: 15.844 μs (0.00% GC)\n",
	"\t\t --------------\n",
	"\t\t samples: 10000\n",
	"\t\t evals/sample: 1"
	]
	}
	],
	"cell_type": "code",
	"source": [
	"results = run(suite; verbose = (@__MODULE__) === Main)\n",
	"show(IOContext(stdout, :compact => false), results)\n",
	"flush(stdout)"
	],
	"metadata": {},
	"execution_count": 3
	},
	{
	"outputs": [
	{
	"output_type": "execute_result",
	"data": {
	"text/plain": "4×3 DataFrame\n│ Row │ name │ nosimd │ simd │\n│ │ \u001b[90mSymbol\u001b[39m │ \u001b[90mBenchmar…\u001b[39m │ \u001b[90mBenchmar…\u001b[39m │\n├─────┼─────────────────────────────┼────────────┼───────────┤\n│ 1 │ inner_isa_missing │ 890.000 ns │ 1.288 μs │\n│ 2 │ inner_isa_missing_notmp │ 2.633 μs │ 1.522 μs │\n│ 3 │ inner_isa_missing_nonesting │ 3.175 μs │ 1.489 μs │\n│ 4 │ inner_ismissing │ 2.402 μs │ 1.380 μs │",
	"text/html": [
	"<table class=\"data-frame\"><thead><tr><th></th><th>name</th><th>nosimd</th><th>simd</th></tr><tr><th></th><th>Symbol</th><th>Benchmar…</th><th>Benchmar…</th></tr></thead><tbody><p>4 rows × 3 columns</p><tr><th>1</th><td>inner_isa_missing</td><td>890.000 ns</td><td>1.288 μs</td></tr><tr><th>2</th><td>inner_isa_missing_notmp</td><td>2.633 μs</td><td>1.522 μs</td></tr><tr><th>3</th><td>inner_isa_missing_nonesting</td><td>3.175 μs</td><td>1.489 μs</td></tr><tr><th>4</th><td>inner_ismissing</td><td>2.402 μs</td><td>1.380 μs</td></tr></tbody></table>"
	]
	},
	"metadata": {},
	"execution_count": 4
	}
	],
	"cell_type": "code",
	"source": [
	"using DataFrames\n",
	"df = DataFrame(\n",
	" name = nameof.(functions),\n",
	" nosimd = [results[nameof(f)][\"nosimd\"] for f in functions],\n",
	" simd = [results[nameof(f)][\"simd\"] for f in functions],\n",
	")"
	],
	"metadata": {},
	"execution_count": 4
	},
	{
	"outputs": [],
	"cell_type": "markdown",
	"source": [
	"As, IIUC, `@simd` macro relies on LLVM to do SIMD, I have to look at\n",
	"the native code to check if the compiler generated SIMD\n",
	"instructions. But the difference is not obvious (at least just by\n",
	"looking at SIMD instructions in the native code)."
	],
	"metadata": {}
	},
	{
	"outputs": [
	{
	"output_type": "execute_result",
	"data": {
	"text/plain": "\tvxorpd\t%xmm0, %xmm0, %xmm0\n\tvmovsd\t(%r8,%rcx,8), %xmm1 # xmm1 = mem[0],zero\n\tvmulsd\t(%rdx,%rcx,8), %xmm1, %xmm1\n\tvaddsd\t%xmm1, %xmm0, %xmm0\n\tvxorps\t%xmm0, %xmm0, %xmm0"
	},
	"metadata": {},
	"execution_count": 5
	}
	],
	"cell_type": "code",
	"source": [
	"using InteractiveUtils\n",
	"native_code(f, args) = sprint(code_native, f, Base.typesof(args...))\n",
	"grep(pattern, c) = Text(join([m.match for m in eachmatch(pattern, c)], \"\\n\"))\n",
	"\n",
	"grep(r\".[xyz]mm.\", native_code(inner_isa_missing, (xs, ys)))"
	],
	"metadata": {},
	"execution_count": 5
	},
	{
	"outputs": [
	{
	"output_type": "execute_result",
	"data": {
	"text/plain": "\tvxorpd\t%xmm0, %xmm0, %xmm0\n\tvmovsd\t(%r8,%rcx,8), %xmm1 # xmm1 = mem[0],zero\n\tvmulsd\t(%rdx,%rcx,8), %xmm1, %xmm1\n\tvaddsd\t%xmm1, %xmm0, %xmm0\n\tvxorps\t%xmm0, %xmm0, %xmm0"
	},
	"metadata": {},
	"execution_count": 6
	}
	],
	"cell_type": "code",
	"source": [
	"grep(r\".[xyz]mm.\", native_code(inner_isa_missing, (xs, ys, Val(true))))"
	],
	"metadata": {},
	"execution_count": 6
	},
	{
	"outputs": [
	{
	"output_type": "execute_result",
	"data": {
	"text/plain": "\tvmovsd\t(%r9,%rbp,8), %xmm0 # xmm0 = mem[0],zero\n\tvmulsd\t(%rax,%rbp,8), %xmm0, %xmm0\n\tvaddsd\t(%rcx), %xmm0, %xmm0\n\tvmovq\t%xmm0, %r15"
	},
	"metadata": {},
	"execution_count": 7
	}
	],
	"cell_type": "code",
	"source": [
	"grep(r\".[xyz]mm.\", native_code(inner_isa_missing_notmp, (xs, ys)))"
	],
	"metadata": {},
	"execution_count": 7
	},
	{
	"outputs": [
	{
	"output_type": "execute_result",
	"data": {
	"text/plain": "\tvmovsd\t(%r9,%rbx,8), %xmm0 # xmm0 = mem[0],zero\n\tvmulsd\t(%rdi,%rbx,8), %xmm0, %xmm0\n\tvaddsd\t(%rsi), %xmm0, %xmm0\n\tvmovq\t%xmm0, %r14"
	},
	"metadata": {},
	"execution_count": 8
	}
	],
	"cell_type": "code",
	"source": [
	"grep(r\".[xyz]mm.\", native_code(inner_isa_missing_notmp, (xs, ys, Val(true))))"
	],
	"metadata": {},
	"execution_count": 8
	},
	{
	"outputs": [],
	"cell_type": "markdown",
	"source": [
	"Environment"
	],
	"metadata": {}
	},
	{
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"Julia Version 1.2.0-DEV.185\n",
	"Commit 51849dfe51 (2019-01-20 00:31 UTC)\n",
	"Platform Info:\n",
	" OS: Linux (x86_64-pc-linux-gnu)\n",
	" CPU: Intel(R) Core(TM) i7-8550U CPU @ 1.80GHz\n",
	" WORD_SIZE: 64\n",
	" LIBM: libopenlibm\n",
	" LLVM: libLLVM-6.0.1 (ORCJIT, skylake)\n"
	]
	}
	],
	"cell_type": "code",
	"source": [
	"using InteractiveUtils\n",
	"versioninfo()"
	],
	"metadata": {},
	"execution_count": 9
	},
	{
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	" Status `~/.julia/environments/v1.2/Project.toml`\n",
	" [34da2185] Compat v1.3.0\n",
	" [a93c6f00] DataFrames v0.17.0+ [`~/.julia/dev/DataFrames`]\n",
	" [82899510] IteratorInterfaceExtensions v0.1.1\n",
	" [189a3867] Reexport v0.2.0\n",
	" [3783bdb8] TableTraits v0.4.0\n",
	" [bd369af6] Tables v0.1.12\n"
	]
	}
	],
	"cell_type": "code",
	"source": [
	"using Pkg\n",
	"pkg\"status DataFrames\""
	],
	"metadata": {},
	"execution_count": 10
	},
	{
	"outputs": [],
	"cell_type": "markdown",
	"source": [
	"This notebook was generated using [Literate.jl](https://github.com/fredrikekre/Literate.jl)."
	],
	"metadata": {}
	}
	],
	"nbformat_minor": 3,
	"metadata": {
	"language_info": {
	"file_extension": ".jl",
	"mimetype": "application/julia",
	"name": "julia",
	"version": "1.2.0-DEV.185"
	},
	"kernelspec": {
	"name": "julia-1.2",
	"display_name": "Julia 1.2.0-DEV.185",
	"language": "julia"
	}
	},
	"nbformat": 4
	}