August 3, 2015 21:53 · September 9, 2015 21:40 · September 11, 2015 22:49 · December 26, 2015 23:32 · December 27, 2015 23:41 · February 11, 2016 20:52
 diff --git a/DESCRIPTION b/DESCRIPTION
 index cc23502..4d6e10c 100644
 --- a/DESCRIPTION
 +++ b/DESCRIPTION
 @@ -1,6 +1,6 @@
 Package: data.table
 -Version: 1.9.5
 -Title: Extension of Data.frame
 +Version: 1.9.5.1
 +Title: Extension of Data.frame (+KeepSource -ByteCompile)
 ###  compare times for sample.int() vs internal function sample2()
 compareSampleTimes = function(popSizeList=c(1e5, 1e6, 1e7, 1e8, 1e9),
    sampleSizeList=c(10, 100, 1000, 10000),
    numReplications=1000) {
    for (sampleSize in sampleSizeList) {
        for (popSize in popSizeList)  {
            elapsed1 = system.time(replicate(numReplications, sample.int(popSize, sampleSize)))[["elapsed"]]
            elapsed2 = system.time(replicate(numReplications, .Internal(sample2(popSize, sampleSize))))[["elapsed"]]
            cat(sprintf("Sample %d from %.0e: %f vs %f seconds\n", sampleSize, popSize, elapsed1, elapsed2))
        }
 bestAllocation = function(treatedList=c(0,1,8,39,152),  # treated in each category
                          totalsList=rep(200, 5),       # treated + untreated in each
                          numToAdd=100) {               # number new treated available
    addedList = rep(0, length(treatedList))  # start with nothing added
    while (numToAdd > 0) {
        ratio = (treatedList + addedList) / (totalsList + addedList)
        lowest = which.min(ratio)
        addedList[[lowest]] = addedList[[lowest]] + 1
        numToAdd = numToAdd - 1
    }
 // gcc -fno-inline -std=gnu99 -Wall -O3 -g -march=native l1d.c -o l1d

 #include <sys/types.h>
 #include <stdint.h>
 #include <string.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <x86intrin.h>
 #include <math.h>
 // gcc -fno-inline -std=gnu99 -Wall -O3 -g -march=native avx.c -o avx

 #include <sys/types.h>
 #include <stdint.h>
 #include <string.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <x86intrin.h>
 #include <math.h>
 #include <malloc.h>
 /* function trace of one iteration from http://nicst.de/bench-user-irq-detect.html */

 /* lxdetectirq_thread_capture_start(struct lxdetectirq_capture const * const c) */
 /*	ioctl(c->fds[0], PERF_EVENT_IOC_ENABLE, PERF_IOC_FLAG_GROUP) */
           timer-5372  [003] .... 382853.609575: syscall_trace_enter_phase1 <-tracesys
           timer-5372  [003] .... 382853.609575: context_tracking_user_exit <-syscall_trace_enter_phase1
           timer-5372  [003] .... 382853.609575: context_tracking_exit <-context_tracking_user_exit
           timer-5372  [003] d... 382853.609576: context_tracking_recursion_enter <-context_tracking_exit
           timer-5372  [003] d... 382853.609576: rcu_user_exit <-context_tracking_exit
           timer-5372  [003] d... 382853.609576: vtime_account_user <-context_tracking_exit
 /* function_graph trace of one iteration from http://nicst.de/bench-user-irq-detect.html */

 /* lxdetectirq_thread_capture_start(struct lxdetectirq_capture const * const c) */
 /*	ioctl(c->fds[0], PERF_EVENT_IOC_ENABLE, PERF_IOC_FLAG_GROUP) */
 381887.380150 |   3)               |  syscall_trace_enter_phase1() {
 381887.380150 |   3)               |    context_tracking_user_exit() {
 381887.380151 |   3)               |      context_tracking_exit() {
 381887.380151 |   3)   0.025 us    |        context_tracking_recursion_enter();
 381887.380151 |   3)   0.026 us    |        rcu_user_exit();
 381887.380151 |   3)               |        vtime_account_user() {
 nate@haswell:~/src$ likwid-perfctr -m -g UOPS_ISSUED_ANY:PMC0,UOPS_EXECUTED_CORE:PMC1,UOPS_RETIRED_ALL:PMC2,BR_INST_RETIRED_NEAR_TAKEN:PMC3 -C 1 fusion
 -------------------------------------------------------------
 -------------------------------------------------------------
 CPU type:	Intel Core Haswell processor
 CPU clock:	3.39 GHz
 -------------------------------------------------------------
 fusion
 two_micro_two_macro: sum1=10000000, sum2=9999999
 one_micro_two_macro: sum1=10000000, sum2=9999999
 one_micro_one_macro: sum1=10000000, sum2=9999999
 gcc-4.8 -O0 fft-test-portable
 Self-test passed
     Size    Time per FFT (ns)
        4    min=84  mean=84  sd=0.01%
       16    min=540  mean=541  sd=0.03%
       64    min=3035  mean=3037  sd=0.13%
      256    min=15759  mean=15763  sd=0.02%
     1024    min=77969  mean=77984  sd=0.01%
     4096    min=375086  mean=375292  sd=0.04%
    16384    min=1765013  mean=1765401  sd=0.02%
 gcc-4.8 -O0 fft-test-model
 Self-test passed
     Size    Time per FFT (ns)
        4    min=47  mean=47  sd=0.01%
       16    min=411  mean=412  sd=0.07%
       64    min=2524  mean=2525  sd=0.03%
      256    min=13656  mean=13661  sd=0.02%
     1024    min=69013  mean=69024  sd=0.01%
     4096    min=337457  mean=337591  sd=0.02%
    16384    min=1584876  mean=1585286  sd=0.02%
	diff --git a/DESCRIPTION b/DESCRIPTION
	index cc23502..4d6e10c 100644
	--- a/DESCRIPTION
	+++ b/DESCRIPTION
	@@ -1,6 +1,6 @@
	Package: data.table
	-Version: 1.9.5
	-Title: Extension of Data.frame
	+Version: 1.9.5.1
	+Title: Extension of Data.frame (+KeepSource -ByteCompile)
	### compare times for sample.int() vs internal function sample2()
	compareSampleTimes = function(popSizeList=c(1e5, 1e6, 1e7, 1e8, 1e9),
	sampleSizeList=c(10, 100, 1000, 10000),
	numReplications=1000) {
	for (sampleSize in sampleSizeList) {
	for (popSize in popSizeList) {
	elapsed1 = system.time(replicate(numReplications, sample.int(popSize, sampleSize)))[["elapsed"]]
	elapsed2 = system.time(replicate(numReplications, .Internal(sample2(popSize, sampleSize))))[["elapsed"]]
	cat(sprintf("Sample %d from %.0e: %f vs %f seconds\n", sampleSize, popSize, elapsed1, elapsed2))
	}
	bestAllocation = function(treatedList=c(0,1,8,39,152), # treated in each category
	totalsList=rep(200, 5), # treated + untreated in each
	numToAdd=100) { # number new treated available
	addedList = rep(0, length(treatedList)) # start with nothing added
	while (numToAdd > 0) {
	ratio = (treatedList + addedList) / (totalsList + addedList)
	lowest = which.min(ratio)
	addedList[[lowest]] = addedList[[lowest]] + 1
	numToAdd = numToAdd - 1
	}
	// gcc -fno-inline -std=gnu99 -Wall -O3 -g -march=native l1d.c -o l1d

	#include <sys/types.h>
	#include <stdint.h>
	#include <string.h>
	#include <stdio.h>
	#include <stdlib.h>
	#include <x86intrin.h>
	#include <math.h>
	/* function trace of one iteration from http://nicst.de/bench-user-irq-detect.html */

	/* lxdetectirq_thread_capture_start(struct lxdetectirq_capture const * const c) */
	/* ioctl(c->fds[0], PERF_EVENT_IOC_ENABLE, PERF_IOC_FLAG_GROUP) */
	timer-5372 [003] .... 382853.609575: syscall_trace_enter_phase1 <-tracesys
	timer-5372 [003] .... 382853.609575: context_tracking_user_exit <-syscall_trace_enter_phase1
	timer-5372 [003] .... 382853.609575: context_tracking_exit <-context_tracking_user_exit
	timer-5372 [003] d... 382853.609576: context_tracking_recursion_enter <-context_tracking_exit
	timer-5372 [003] d... 382853.609576: rcu_user_exit <-context_tracking_exit
	timer-5372 [003] d... 382853.609576: vtime_account_user <-context_tracking_exit
	/* function_graph trace of one iteration from http://nicst.de/bench-user-irq-detect.html */

	/* lxdetectirq_thread_capture_start(struct lxdetectirq_capture const * const c) */
	/* ioctl(c->fds[0], PERF_EVENT_IOC_ENABLE, PERF_IOC_FLAG_GROUP) */
	381887.380150 \| 3) \| syscall_trace_enter_phase1() {
	381887.380150 \| 3) \| context_tracking_user_exit() {
	381887.380151 \| 3) \| context_tracking_exit() {
	381887.380151 \| 3) 0.025 us \| context_tracking_recursion_enter();
	381887.380151 \| 3) 0.026 us \| rcu_user_exit();
	381887.380151 \| 3) \| vtime_account_user() {
	nate@haswell:~/src$ likwid-perfctr -m -g UOPS_ISSUED_ANY:PMC0,UOPS_EXECUTED_CORE:PMC1,UOPS_RETIRED_ALL:PMC2,BR_INST_RETIRED_NEAR_TAKEN:PMC3 -C 1 fusion
	-------------------------------------------------------------
	-------------------------------------------------------------
	CPU type: Intel Core Haswell processor
	CPU clock: 3.39 GHz
	-------------------------------------------------------------
	fusion
	two_micro_two_macro: sum1=10000000, sum2=9999999
	one_micro_two_macro: sum1=10000000, sum2=9999999
	one_micro_one_macro: sum1=10000000, sum2=9999999
	gcc-4.8 -O0 fft-test-portable
	Self-test passed
	Size Time per FFT (ns)
	4 min=84 mean=84 sd=0.01%
	16 min=540 mean=541 sd=0.03%
	64 min=3035 mean=3037 sd=0.13%
	256 min=15759 mean=15763 sd=0.02%
	1024 min=77969 mean=77984 sd=0.01%
	4096 min=375086 mean=375292 sd=0.04%
	16384 min=1765013 mean=1765401 sd=0.02%
	gcc-4.8 -O0 fft-test-model
	Self-test passed
	Size Time per FFT (ns)
	4 min=47 mean=47 sd=0.01%
	16 min=411 mean=412 sd=0.07%
	64 min=2524 mean=2525 sd=0.03%
	256 min=13656 mean=13661 sd=0.02%
	1024 min=69013 mean=69024 sd=0.01%
	4096 min=337457 mean=337591 sd=0.02%
	16384 min=1584876 mean=1585286 sd=0.02%