Skip to content

Instantly share code, notes, and snippets.

### compare times for sample.int() vs internal function sample2()
compareSampleTimes = function(popSizeList=c(1e5, 1e6, 1e7, 1e8, 1e9),
sampleSizeList=c(10, 100, 1000, 10000),
numReplications=1000) {
for (sampleSize in sampleSizeList) {
for (popSize in popSizeList) {
elapsed1 = system.time(replicate(numReplications, sample.int(popSize, sampleSize)))[["elapsed"]]
elapsed2 = system.time(replicate(numReplications, .Internal(sample2(popSize, sampleSize))))[["elapsed"]]
cat(sprintf("Sample %d from %.0e: %f vs %f seconds\n", sampleSize, popSize, elapsed1, elapsed2))
}
diff --git a/DESCRIPTION b/DESCRIPTION
index cc23502..4d6e10c 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,6 +1,6 @@
Package: data.table
-Version: 1.9.5
-Title: Extension of Data.frame
+Version: 1.9.5.1
+Title: Extension of Data.frame (+KeepSource -ByteCompile)
--- R-3.2.1/src/main/memory.c.orig 2015-07-31 23:15:07.017151621 -0700
+++ R-3.2.1/src/main/memory.c 2015-07-31 23:17:10.185150073 -0700
@@ -3724,11 +3724,21 @@
static FILE *R_MemReportingOutfile;
static R_size_t R_MemReportingThreshold;
+static void printLineNum(FILE *file, SEXP srcref) {
+ if (srcref && !isNull(srcref)) {
+ int line = asInteger(srcref);
+ fprintf(file, "#%d ", line);
// gcc -march=native -g -std=gnu99 -Wall -Wextra -O3 symmetric.c -o symmetric -DUSE_ALG
// (where USE_ALG is one of USE_NATE, USE_KARIM, USE_BASIC, or USE_CONDITIONAL
// Or if using https://code.google.com/p/likwid/ with -m markers:
// gcc -march=native -g -std=gnu99 -Wall -Wextra -O3 symmetric.c -o symmetric -DLIKWID -llikwid -lpthread -lm -DUSE_ALG
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
@nkurz
nkurz / c.c
Created December 22, 2014 23:30
// C implementation for Pathfinding Benchmark by [email protected]
// See https://github.com/logicchains/LPATHBench for details
// Summary of benchmarks (see bottom for full numbers)
// 8981 LANGUAGE C 623
// 8981 LANGUAGE C++/clang 734
// 8981 LANGUAGE C++/gcc 755
// Best results compiling with GCC 4.7 or 4.8 -O2
// clang, icc and GCC 4.9 slightly worse with -O1, -O2, -O3, -Ofast
// -O3 and -Ofast much worse for all GCC. -O1 mixed but worse.
// Calculate cycles spent on overhead of function calls
// See http://cs.coloradocollege.edu/~bylvisaker/CallReturn/
// gcc -g -std=gnu99 -O3 -Wall -Wextra call-return.c -o call-return
#include <stdio.h>
#include <stdint.h>
#include <stdlib.h>
#define DEFAULT_LOOP_COUNT (1000 * 1000)
@nkurz
nkurz / broadcast.c
Created October 25, 2014 01:13
Illustrate the performance difference between vector broadcast from memory vs load and shuffle
// cc -fno-inline -g -march=native -std=gnu99 -O3 -Wall -Wextra broadcast.c -o broadcast
// works with 'gcc 4.8.2' and 'icc 14.03', but crashes with 'clang 3.4' because of alignment
// usage: broadcast [-r repeat] [-s size]
#ifdef LIKWID
#include <likwid.h>
#else
#define likwid_markerInit()
#define likwid_markerThreadInit()
#define likwid_markerStartRegion(name)
@nkurz
nkurz / sub.asm
Last active August 29, 2015 14:07
Switching from SUB to SBB changes runtime by 15%. Can you explain why?
; Minimal example, see also http://stackoverflow.com/q/26266953/3766665
; To build (Linux):
; nasm -felf64 func.asm
; ld func.o
; Then run:
; perf stat -r10 ./a.out
; On Haswell and Sandy Bridge, observed runtime varies
; ~15% depending on whether sub or sbb is used in the loop
section .text
global _start
@nkurz
nkurz / same-function.c
Created October 8, 2014 21:27
Identical loops that execute in different but consistent times
// gcc -std=gnu99 -O3 -Wall -Wextra same-function.c -o same-function
// Identical loops that execute in different but consistent times
#if COPY_AND_RUN_TO_TEST
for n in 0 1 2 3 4 5 6 7 8 9;
do echo same-function ${n}:;
/usr/bin/time -f "%e seconds" same-function ${n};
/usr/bin/time -f "%e seconds" same-function ${n};
/usr/bin/time -f "%e seconds" same-function ${n};
done
@nkurz
nkurz / constant_division.objdump
Created September 25, 2014 21:15
Objdump of different compilers for constant_division.c
GCC 4.8.1
4019f0: 66 0f 6f 00 movdqa (%rax),%xmm0
4019f4: 48 83 c0 10 add $0x10,%rax
4019f8: 48 39 c5 cmp %rax,%rbp
4019fb: 66 0f 6f c8 movdqa %xmm0,%xmm1
4019ff: 66 0f 6f e0 movdqa %xmm0,%xmm4
401a03: 66 0f 62 c8 punpckldq %xmm0,%xmm1
401a07: 66 0f 6a e0 punpckhdq %xmm0,%xmm4
401a0b: 66 0f f4 cb pmuludq %xmm3,%xmm1
401a0f: 66 0f f4 e3 pmuludq %xmm3,%xmm4