Created
July 8, 2014 04:00
-
-
Save JayKickliter/12a2661016d446c86c3b to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"metadata": { | |
"language": "Julia", | |
"name": "", | |
"signature": "sha256:3b2d60d98b839338f5308066590074242994759648c87951cef90b7c08ac1955" | |
}, | |
"nbformat": 3, | |
"nbformat_minor": 0, | |
"worksheets": [ | |
{ | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Import [IPPDSP.jl](https://github.com/JayKickliter/IPPDSP.jl) Package" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"import IPPDSP" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 1 | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Import [DSP.jl](https://github.com/JuliaDSP/DSP.jl) Package" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"import DSP" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 2 | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Prepare Filter Taps and Signal" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"SignalType = Float64\n", | |
"TapsType = Float64\n", | |
"taps = ones( TapsType, 32 )\n", | |
"signal = ones( SignalType, 100_000_000 )\n", | |
"buffer = similar( signal );" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 3 | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Setup IPP FIR Filter" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"flt = IPPDSP.FIRFilter( Float64, taps );" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 4 | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Native Julia Time-Domain `fir_filter!` & `fir_filter`\n", | |
"\n", | |
"### Two Versions\n", | |
"* **`fir_filter!`**: Uses a pre-allocated buffer\n", | |
"* **`fir_filter`**: Creates an output buffer and calls `fir_filter!`\n", | |
"\n", | |
"### Special Macros\n", | |
"**`@inbounds`** to turn off bound checking. Since the vector lengths are checked before the loop, it is safe to do so. It also speeds up the code.\n", | |
"\n", | |
"**`@simd`** hints to the compiler that it the code lends itself to SIMD vectorization." | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"function fir_filter!{T}( buffer::Vector{T}, taps::Vector{T}, signal::Vector{T} )\n", | |
" signal_len = length( signal )\n", | |
" taps_len = length( taps )\n", | |
" buf_len = length( buffer )\n", | |
" signal_len >= taps_len && buf_len == signal_len || throw()\n", | |
" for n = 1:taps_len-1\n", | |
" for m = 1:n\n", | |
" @inbounds buffer[n] += taps[m] * signal[n-m+1]\n", | |
" end\n", | |
" end\n", | |
" for n = taps_len:signal_len\n", | |
" base_sig_idx = n-taps_len\n", | |
" @simd for m = 1:taps_len\n", | |
" @inbounds buffer[n] += taps[m] * signal[base_sig_idx+m]\n", | |
" end\n", | |
" end\n", | |
" return buffer\n", | |
"end" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"metadata": {}, | |
"output_type": "pyout", | |
"prompt_number": 5, | |
"text": [ | |
"fir_filter! (generic function with 1 method)" | |
] | |
} | |
], | |
"prompt_number": 5 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"fir_filter( taps, signal ) = fir_filter!( similar(signal), taps, signal )" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"metadata": {}, | |
"output_type": "pyout", | |
"prompt_number": 6, | |
"text": [ | |
"fir_filter (generic function with 1 method)" | |
] | |
} | |
], | |
"prompt_number": 6 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"code_native( fir_filter!, ( Vector{SignalType}, Vector{TapsType}, Vector{SignalType} ))" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"\t.section\t__TEXT,__text,regular,pure_instructions\n", | |
"Filename: In[5]\n", | |
"Source line: 2\n", | |
"\tpush\tRBP\n", | |
"\tmov\tRBP, RSP\n", | |
"Source line: 2\n", | |
"\tpush\tR15\n", | |
"\tpush\tR14\n", | |
"\tpush\tR13\n", | |
"\tpush\tR12\n", | |
"\tpush\tRBX\n", | |
"\tpush\tRAX\n", | |
"\tmov\tR15, QWORD PTR [RSI]\n", | |
"\tmov\tR14, QWORD PTR [RSI + 8]\n", | |
"Source line: 3\n", | |
"\tmov\tR13, QWORD PTR [R14 + 16]\n", | |
"Source line: 2\n", | |
"\tmov\tR12, QWORD PTR [RSI + 16]\n", | |
"Source line: 2\n", | |
"\tmov\tRAX, QWORD PTR [R12 + 16]\n", | |
"Source line: 3\n", | |
"\tmov\tQWORD PTR [RBP - 48], RAX\n", | |
"\tcmp\tR13, RAX\n", | |
"\tjg\t14\n", | |
"Source line: 4\n", | |
"\tmov\tRAX, QWORD PTR [RBP - 48]\n", | |
"\tcmp\tQWORD PTR [R15 + 16], RAX\n", | |
"\tje\t20\n", | |
"\tmovabs\tRAX, 140553122728944\n", | |
"Source line: 5\n", | |
"\tmov\tRDI, QWORD PTR [RAX]\n", | |
"\txor\tESI, ESI\n", | |
"\txor\tEDX, EDX\n", | |
"\tcall\tQWORD PTR [RDI + 8]\n", | |
"Source line: 6\n", | |
"\tlea\tR8, QWORD PTR [R13 - 1]\n", | |
"\ttest\tR8, R8\n", | |
"\tjle\t111\n", | |
"\tmov\tECX, 1\n", | |
"\txor\tR9D, R9D\n", | |
"\ttest\tRCX, RCX\n", | |
"\tjle\t79\n", | |
"Source line: 8\n", | |
"\tmov\tR10, QWORD PTR [R15 + 8]\n", | |
"\tlea\tRBX, QWORD PTR [8*R9]\n", | |
"\tadd\tRBX, QWORD PTR [R12 + 8]\n", | |
"\tvmovsd\tXMM0, QWORD PTR [R10 + 8*RCX - 8]\n", | |
"\tmov\tRAX, QWORD PTR [R14 + 8]\n", | |
"\txor\tEDX, EDX\n", | |
"\tlea\tRSI, QWORD PTR [8*RDX]\n", | |
"\tmov\tRDI, RBX\n", | |
"\tsub\tRDI, RSI\n", | |
"\tvmovsd\tXMM1, QWORD PTR [RAX]\n", | |
"\tvmulsd\tXMM1, XMM1, QWORD PTR [RDI]\n", | |
"\tvaddsd\tXMM0, XMM0, XMM1\n", | |
"\tvmovsd\tQWORD PTR [R10 + 8*RCX - 8], XMM0\n", | |
"\tadd\tRAX, 8\n", | |
"\tinc\tRDX\n", | |
"\tcmp\tRCX, RDX\n", | |
"\tjne\t-49\n", | |
"Source line: 6\n", | |
"\tinc\tRCX\n", | |
"Source line: 8\n", | |
"\tinc\tR9\n", | |
"\tcmp\tRCX, R13\n", | |
"\tjne\t-103\n", | |
"\tmov\tRAX, QWORD PTR [RBP - 48]\n", | |
"\tcmp\tR13, RAX\n", | |
"Source line: 11\n", | |
"\tcmovle\tR8, RAX\n", | |
"\tlea\tRAX, QWORD PTR [R8 + 1]\n", | |
"\tcmp\tR13, RAX\n", | |
"\tje\t129\n", | |
"\txor\tECX, ECX\n", | |
"\ttest\tR13, R13\n", | |
"Source line: 33\n", | |
"\tmov\tR9, R13\n", | |
"\tcmovle\tR9, RCX\n", | |
"\tlea\tRAX, QWORD PTR [R9 - 1]\n", | |
"\tcmp\tR9, 1\n", | |
"\tjo\t121\n", | |
"\tlea\tRSI, QWORD PTR [RAX + 1]\n", | |
"\tadd\tRAX, 1\n", | |
"\tjo\t107\n", | |
"\ttest\tRSI, RSI\n", | |
"\tjle\t64\n", | |
"Source line: 48\n", | |
"\tlea\tRDI, QWORD PTR [8*RCX]\n", | |
"\tadd\tRDI, QWORD PTR [R12 + 8]\n", | |
"Source line: 14\n", | |
"\tmov\tRAX, QWORD PTR [R15 + 8]\n", | |
"\tvmovsd\tXMM0, QWORD PTR [RAX + 8*R13 - 8]\n", | |
"\tmov\tRDX, QWORD PTR [R14 + 8]\n", | |
"\tvmovsd\tXMM1, QWORD PTR [RDX]\n", | |
"\tvmulsd\tXMM1, XMM1, QWORD PTR [RDI]\n", | |
"\tvaddsd\tXMM0, XMM0, XMM1\n", | |
"\tvmovsd\tQWORD PTR [RAX + 8*R13 - 8], XMM0\n", | |
"Source line: 41\n", | |
"\tadd\tRDX, 8\n", | |
"\tadd\tRDI, 8\n", | |
"\tdec\tRSI\n", | |
"\tjne\t-36\n", | |
"Source line: 48\n", | |
"\tinc\tRCX\n", | |
"Source line: 11\n", | |
"\tcmp\tR13, R8\n", | |
"\tlea\tR13, QWORD PTR [R13 + 1]\n", | |
"\tjne\t-117\n", | |
"Source line: 17\n", | |
"\tmov\tRAX, R15\n", | |
"\tadd\tRSP, 8\n", | |
"\tpop\tRBX\n", | |
"\tpop\tR12\n", | |
"\tpop\tR13\n", | |
"\tpop\tR14\n", | |
"\tpop\tR15\n", | |
"\tpop\tRBP\n", | |
"\tret\n", | |
"Source line: 33\n", | |
"\tmovabs\tRAX, 4481457312\n", | |
"\tmov\tRDI, QWORD PTR [RAX]\n", | |
"\tmovabs\tRAX, 4468907616\n", | |
"\tmov\tESI, 33\n", | |
"\tcall\tRAX\n" | |
] | |
} | |
], | |
"prompt_number": 7 | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Precompile Functions\n", | |
"\n", | |
"Not necessary, but it to get accurate first-run @time results. Using `taps` as a small signal-vector." | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"IPPDSP.filt( flt, signal )\n", | |
"IPPDSP.filt!( flt, buffer, signal )\n", | |
"filt( taps, one(TapsType), signal )\n", | |
"DSP.firfilt( taps, signal )\n", | |
"fir_filter( taps, signal );" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 8 | |
}, | |
{ | |
"cell_type": "heading", | |
"level": 2, | |
"metadata": {}, | |
"source": [ | |
"Times" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### Julia's Built-in `filt()`\n", | |
"\n", | |
"Internally implemented with FFT convolution (I think)." | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"@time filt( taps, one(taps[1]), signal );" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"elapsed time: " | |
] | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"5.367116163 seconds (800017656 bytes allocated, 0.31% gc time)\n" | |
] | |
} | |
], | |
"prompt_number": 9 | |
}, | |
{ | |
"cell_type": "heading", | |
"level": 3, | |
"metadata": {}, | |
"source": [ | |
"DSP.jl `firfilt()`" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"@time DSP.firfilt( taps, signal );" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"elapsed time: " | |
] | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"1.672305065 seconds (804912080 bytes allocated, 3.34% gc time)\n" | |
] | |
} | |
], | |
"prompt_number": 10 | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### IPPDSP `filt()`\n", | |
"\n", | |
"Uses multiple dispatch to call the correct IPP function name." | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"@time IPPDSP.filt( flt, signal );" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"elapsed time: " | |
] | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"0.74280871 seconds (800073668 bytes allocated, 9.43% gc time)\n" | |
] | |
} | |
], | |
"prompt_number": 11 | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### IPPDSP `filt!()`\n", | |
"\n", | |
"IPP has depreciated its in-place functions. `filt!` calls the same same IPP function as above, but with a pre-allocated buffer. It may be possible to pass the IPP function the same pointer for both source and destination; more testing needed." | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"@time IPPDSP.filt!( flt, buffer, signal );" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"elapsed time: " | |
] | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"0.301538253 seconds (112 bytes allocated)\n" | |
] | |
} | |
], | |
"prompt_number": 12 | |
}, | |
{ | |
"cell_type": "heading", | |
"level": 3, | |
"metadata": {}, | |
"source": [ | |
"Native `fir_filter()`" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"@time fir_filter( taps, signal );" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"elapsed time: " | |
] | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"3.451685513 seconds (800000128 bytes allocated)\n" | |
] | |
} | |
], | |
"prompt_number": 13 | |
}, | |
{ | |
"cell_type": "heading", | |
"level": 3, | |
"metadata": {}, | |
"source": [ | |
"Native `fir_filter!()`" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"@time fir_filter!( buffer, taps, signal);" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"elapsed time: " | |
] | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"2.851588211 seconds (80 bytes allocated)\n" | |
] | |
} | |
], | |
"prompt_number": 14 | |
} | |
], | |
"metadata": {} | |
} | |
] | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment