Skip to content

Instantly share code, notes, and snippets.

@methodmissing
Created January 13, 2010 18:58
Show Gist options
  • Select an option

  • Save methodmissing/276470 to your computer and use it in GitHub Desktop.

Select an option

Save methodmissing/276470 to your computer and use it in GitHub Desktop.
methodmissing:rubyenterpriseedition187-248 lourens$ ./ruby -Ilib obj_hash_bench.rb
Rehearsal ---------------------------------------------------
Hash#[:array] 0.030000 0.000000 0.030000 ( 0.032720)
Hash#[:bignum] 0.030000 0.000000 0.030000 ( 0.028503)
Hash#[:hash] 0.030000 0.000000 0.030000 ( 0.031888)
Hash#[:numeric] 0.030000 0.000000 0.030000 ( 0.032113)
Hash#[:range] 0.030000 0.000000 0.030000 ( 0.031008)
Hash#[:time] 0.030000 0.000000 0.030000 ( 0.030119)
Hash#[:rexp] 0.030000 0.000000 0.030000 ( 0.030865)
Hash#[:string] 0.040000 0.000000 0.040000 ( 0.033808)
Hash#[:struct] 0.030000 0.000000 0.030000 ( 0.035937)
------------------------------------------ total: 0.280000sec
user system total real
Hash#[:array] 0.030000 0.000000 0.030000 ( 0.032219)
Hash#[:bignum] 0.030000 0.000000 0.030000 ( 0.029214)
Hash#[:hash] 0.030000 0.010000 0.040000 ( 0.032463)
Hash#[:numeric] 0.040000 0.000000 0.040000 ( 0.032096)
Hash#[:range] 0.030000 0.000000 0.030000 ( 0.032393)
Hash#[:time] 0.030000 0.000000 0.030000 ( 0.032640)
Hash#[:rexp] 0.030000 0.000000 0.030000 ( 0.032326)
Hash#[:string] 0.040000 0.000000 0.040000 ( 0.035568)
Hash#[:struct] 0.030000 0.000000 0.030000 ( 0.036105)
GC allocations 988, GC collections 9, GC time 4206
methodmissing:rubyenterpriseedition187-248 lourens$ ./ruby -Ilib obj_hash_bench.rb
Rehearsal ---------------------------------------------------
Hash#[:array] 0.120000 0.000000 0.120000 ( 0.124507)
Hash#[:bignum] 0.030000 0.000000 0.030000 ( 0.029930)
Hash#[:hash] 0.150000 0.010000 0.160000 ( 0.154381)
Hash#[:numeric] 0.030000 0.000000 0.030000 ( 0.037716)
Hash#[:range] 0.060000 0.000000 0.060000 ( 0.065550)
Hash#[:time] 0.030000 0.000000 0.030000 ( 0.037035)
Hash#[:rexp] 0.040000 0.000000 0.040000 ( 0.039917)
Hash#[:string] 0.050000 0.000000 0.050000 ( 0.049951)
Hash#[:struct] 0.040000 0.000000 0.040000 ( 0.035129)
------------------------------------------ total: 0.560000sec
user system total real
Hash#[:array] 0.120000 0.010000 0.130000 ( 0.124881)
Hash#[:bignum] 0.030000 0.000000 0.030000 ( 0.029093)
Hash#[:hash] 0.150000 0.000000 0.150000 ( 0.149078)
Hash#[:numeric] 0.040000 0.000000 0.040000 ( 0.037757)
Hash#[:range] 0.060000 0.000000 0.060000 ( 0.063429)
Hash#[:time] 0.030000 0.000000 0.030000 ( 0.035603)
Hash#[:rexp] 0.030000 0.000000 0.030000 ( 0.040986)
Hash#[:string] 0.050000 0.000000 0.050000 ( 0.052101)
Hash#[:struct] 0.030000 0.000000 0.030000 ( 0.036544)
GC allocations 400987, GC collections 9, GC time 4686
diff --git a/array.c b/array.c
index 89f74d0..e4304cc 100644
--- a/array.c
+++ b/array.c
@@ -15,6 +15,7 @@
#include "ruby.h"
#include "util.h"
#include "st.h"
+#include "cached_obj_hash.h"
VALUE rb_cArray;
static ID id_cmp;
@@ -61,8 +62,8 @@ rb_ary_modify(ary)
VALUE ary;
{
VALUE *ptr;
-
rb_ary_modify_check(ary);
+ COND_EXPIRE_CACHED_OBJ_HASH(ary);
if (FL_TEST(ary, ELTS_SHARED)) {
ptr = ALLOC_N(VALUE, RARRAY(ary)->len);
FL_UNSET(ary, ELTS_SHARED);
@@ -2703,17 +2704,17 @@ recursive_hash(ary, dummy, recur)
{
long i, h;
VALUE n;
-
if (recur) {
return LONG2FIX(0);
}
-
+ GET_CACHED_OBJ_HASH(ary);
h = RARRAY(ary)->len;
for (i=0; i<RARRAY(ary)->len; i++) {
h = (h << 1) | (h<0 ? 1 : 0);
n = rb_hash(RARRAY(ary)->ptr[i]);
h ^= NUM2LONG(n);
}
+ CACHE_OBJ_HASH(ary,h);
return LONG2FIX(h);
}
diff --git a/bignum.c b/bignum.c
index 8e27369..d9b35a4 100644
--- a/bignum.c
+++ b/bignum.c
@@ -12,6 +12,7 @@
#include "ruby.h"
#include "rubysig.h"
+#include "cached_obj_hash.h"
#include <math.h>
#include <float.h>
@@ -2230,11 +2231,13 @@ rb_big_hash(x)
{
long i, len, key;
BDIGIT *digits;
+ GET_CACHED_OBJ_HASH(x);
key = 0; digits = BDIGITS(x); len = RBIGNUM(x)->len;
for (i=0; i<len; i++) {
key ^= *digits++;
}
+ CACHE_OBJ_HASH_DIRECT(x,key);
return LONG2FIX(key);
}
diff --git a/cached_obj_hash.h b/cached_obj_hash.h
new file mode 100644
index 0000000..6b74565
--- /dev/null
+++ b/cached_obj_hash.h
@@ -0,0 +1,67 @@
+#ifndef HASH_CACHE_H
+#define HASH_CACHE_H
+#include "st.h"
+
+RUBY_EXTERN st_table *rb_obj_hash_cache;
+
+#define LIKELY_CACHE_OBJ_HASH FL_USER6
+#define CACHED_OBJ_HASH FL_USER7
+#define FREE_CACHED_OBJ_HASH(obj) do {\
+ if FL_TEST(obj,CACHED_OBJ_HASH){ \
+ st_delete(rb_obj_hash_cache, (st_data_t*)obj, 0); \
+ } \
+} while (0)
+#define EXPIRE_CACHED_OBJ_HASH(obj) FREE_CACHED_OBJ_HASH(obj)
+#define COND_EXPIRE_CACHED_OBJ_HASH(obj) do {\
+ if FL_TEST(obj,CACHED_OBJ_HASH){ \
+ EXPIRE_CACHED_OBJ_HASH(obj); \
+ FL_UNSET(obj,CACHED_OBJ_HASH); \
+ } \
+} while (0)
+#define CACHE_OBJ_HASH_DIRECT(obj,val) do {\
+ st_add_direct(rb_obj_hash_cache, (st_data_t)obj, (st_data_t)val); \
+ FL_SET(obj,CACHED_OBJ_HASH); \
+} while (0)
+#define CACHE_OBJ_HASH(obj,val) do {\
+ if(FL_TEST(obj,LIKELY_CACHE_OBJ_HASH)){ \
+ CACHE_OBJ_HASH_DIRECT(obj,val); \
+ }else{ \
+ FL_SET(obj,LIKELY_CACHE_OBJ_HASH); \
+ } \
+} while (0)
+#define EXT_TYPE(obj,type) (RBASIC(obj)->klass == type)
+#define CACHE_EXT_OBJ_HASH_P(o) (EXT_TYPE(o,rb_cTime) || EXT_TYPE(o,rb_cRange))
+#define COND_CACHE_OBJ_HASH_P(obj) do {\
+ switch (TYPE(obj)) { \
+ case T_ARRAY: \
+ case T_BIGNUM: \
+ case T_HASH: \
+ case T_FLOAT: \
+ case T_REGEXP: \
+ case T_STRING: \
+ case T_STRUCT: \
+ cache_obj_hash = 1; \
+ break; \
+ default: \
+ if(!SPECIAL_CONST_P(obj) && CACHE_EXT_OBJ_HASH_P(obj)) cache_obj_hash = 1; \
+ break; \
+ } \
+} while (0)
+#define COND_CACHE_OBJ_HASH(obj,val) do {\
+ if (cache_obj_hash) { \
+ CACHE_OBJ_HASH(obj,val); \
+ } \
+} while (0)
+#define GET_CACHED_OBJ_HASH(obj) do {\
+ st_data_t cached_hash; \
+ if FL_TEST(obj,CACHED_OBJ_HASH){ \
+ st_lookup(rb_obj_hash_cache, (st_data_t)obj, &cached_hash); \
+ return LONG2FIX(cached_hash); \
+ } \
+} while (0)
+#define COND_GET_CACHED_OBJ_HASH(obj) do {\
+ if (cache_obj_hash) { \
+ GET_CACHED_OBJ_HASH(obj); \
+ } \
+} while (0)
+#endif
\ No newline at end of file
diff --git a/gc.c b/gc.c
index 8c1c6d6..40964cc 100644
--- a/gc.c
+++ b/gc.c
@@ -18,6 +18,7 @@
#include "node.h"
#include "env.h"
#include "re.h"
+#include "cached_obj_hash.h"
#include <stdio.h>
#include <setjmp.h>
#include <math.h>
@@ -1919,6 +1920,7 @@ static int
obj_free(obj)
VALUE obj;
{
+ st_data_t hsh;
switch (BUILTIN_TYPE(obj)) {
case T_NIL:
case T_FIXNUM:
@@ -1927,7 +1929,7 @@ obj_free(obj)
rb_bug("obj_free() called for broken object");
break;
}
-
+ FREE_CACHED_OBJ_HASH(obj);
if (FL_TEST(obj, FL_EXIVAR)) {
rb_free_generic_ivar((VALUE)obj);
}
diff --git a/hash.c b/hash.c
index 8086e22..eb51e2e 100644
--- a/hash.c
+++ b/hash.c
@@ -16,6 +16,7 @@
#include "st.h"
#include "util.h"
#include "rubysig.h"
+#include "cached_obj_hash.h"
#ifdef __APPLE__
#include <crt_externs.h>
@@ -32,6 +33,7 @@ rb_hash_modify(hash)
if (OBJ_FROZEN(hash)) rb_error_frozen("hash");
if (!OBJ_TAINTED(hash) && rb_safe_level() >= 4)
rb_raise(rb_eSecurityError, "Insecure: can't modify hash");
+ COND_EXPIRE_CACHED_OBJ_HASH(hash);
}
VALUE
@@ -81,14 +83,21 @@ VALUE
rb_hash(obj)
VALUE obj;
{
+ long hash;
+ int cache_obj_hash = 0;
+ COND_CACHE_OBJ_HASH_P(obj);
+ COND_GET_CACHED_OBJ_HASH(obj);
VALUE hval = rb_funcall(obj, id_hash, 0);
retry:
switch (TYPE(hval)) {
case T_FIXNUM:
+ COND_CACHE_OBJ_HASH(obj,FIX2LONG(hval));
return hval;
case T_BIGNUM:
- return LONG2FIX(((long*)(RBIGNUM_DIGITS(hval)))[0]);
+ hash = ((long*)(RBIGNUM_DIGITS(hval)))[0];
+ COND_CACHE_OBJ_HASH(obj,hash);
+ return LONG2FIX(hash);
default:
hval = rb_to_int(hval);
@@ -1650,7 +1659,11 @@ static VALUE
rb_hash_hash(hash)
VALUE hash;
{
- return rb_exec_recursive(recursive_hash, hash, 0);
+ VALUE hval;
+ GET_CACHED_OBJ_HASH(hash);
+ hval = rb_exec_recursive(recursive_hash, hash, 0);
+ CACHE_OBJ_HASH_DIRECT(hash,FIX2LONG(hval));
+ return hval;
}
diff --git a/numeric.c b/numeric.c
index 634c2eb..275fd80 100644
--- a/numeric.c
+++ b/numeric.c
@@ -12,6 +12,7 @@
#include "ruby.h"
#include "env.h"
+#include "cached_obj_hash.h"
#include <ctype.h>
#include <math.h>
#include <stdio.h>
@@ -892,7 +893,7 @@ flo_hash(num)
double d;
char *c;
int i, hash;
-
+ GET_CACHED_OBJ_HASH(num);
d = RFLOAT(num)->value;
if (d == 0) d = fabs(d);
c = (char*)&d;
@@ -900,6 +901,7 @@ flo_hash(num)
hash = (hash * 971) ^ (unsigned char)c[i];
}
if (hash < 0) hash = -hash;
+ CACHE_OBJ_HASH_DIRECT(num,(long)hash);
return INT2FIX(hash);
}
diff --git a/range.c b/range.c
index 864ca9f..6b06a6e 100644
--- a/range.c
+++ b/range.c
@@ -11,6 +11,7 @@
**********************************************************************/
#include "ruby.h"
+#include "cached_obj_hash.h"
VALUE rb_cRange;
static ID id_cmp, id_succ, id_beg, id_end, id_excl;
@@ -212,13 +213,13 @@ range_hash(range)
{
long hash = EXCL(range);
VALUE v;
-
+ GET_CACHED_OBJ_HASH(range);
v = rb_hash(rb_ivar_get(range, id_beg));
hash ^= v << 1;
v = rb_hash(rb_ivar_get(range, id_end));
hash ^= v << 9;
hash ^= EXCL(range) << 24;
-
+ CACHE_OBJ_HASH_DIRECT(range,hash);
return LONG2FIX(hash);
}
diff --git a/re.c b/re.c
index f2ac99f..c9ea6f4 100644
--- a/re.c
+++ b/re.c
@@ -12,6 +12,7 @@
#include "ruby.h"
#include "re.h"
#include <ctype.h>
+#include "cached_obj_hash.h"
VALUE rb_eRegexpError;
@@ -1533,7 +1534,7 @@ rb_reg_hash(re)
{
int hashval, len;
char *p;
-
+ GET_CACHED_OBJ_HASH(re);
rb_reg_check(re);
hashval = RREGEXP(re)->ptr->options;
len = RREGEXP(re)->len;
@@ -1542,7 +1543,7 @@ rb_reg_hash(re)
hashval = hashval * 33 + *p++;
}
hashval = hashval + (hashval>>5);
-
+ CACHE_OBJ_HASH_DIRECT(re,(long)hashval);
return INT2FIX(hashval);
}
diff --git a/ruby.h b/ruby.h
index 4d003a8..84652cb 100644
--- a/ruby.h
+++ b/ruby.h
@@ -785,4 +785,4 @@ void ruby_native_thread_kill _((int));
} /* extern "C" { */
#endif
-#endif /* ifndef RUBY_H */
+#endif /* ifndef RUBY_H */
\ No newline at end of file
diff --git a/string.c b/string.c
index 69430c6..56bfa45 100644
--- a/string.c
+++ b/string.c
@@ -14,6 +14,7 @@
#include "ruby.h"
#include "re.h"
+#include "cached_obj_hash.h"
#define BEG(no) regs->beg[no]
#define END(no) regs->end[no]
@@ -27,9 +28,9 @@
VALUE rb_cString;
-#define STR_TMPLOCK FL_USER1
-#define STR_ASSOC FL_USER3
-#define STR_NOCAPA (ELTS_SHARED|STR_ASSOC)
+#define STR_TMPLOCK FL_USER1
+#define STR_ASSOC FL_USER3
+#define STR_NOCAPA (ELTS_SHARED|STR_ASSOC)
#define RESIZE_CAPA(str,capacity) do {\
REALLOC_N(RSTRING(str)->ptr, char, (capacity)+1);\
@@ -510,6 +511,7 @@ rb_str_modify(str)
{
if (!str_independent(str))
str_make_independent(str);
+ COND_EXPIRE_CACHED_OBJ_HASH(str);
}
void
@@ -882,7 +884,11 @@ rb_str_hash(str)
register long len = RSTRING(str)->len;
register char *p = RSTRING(str)->ptr;
register int key = 0;
-
+ st_data_t cached_key;
+ if FL_TEST(str,CACHED_OBJ_HASH){
+ st_lookup(rb_obj_hash_cache, str, &cached_key);
+ return (int)cached_key;
+ }
#if defined(HASH_ELFHASH)
register unsigned int g;
@@ -908,6 +914,7 @@ rb_str_hash(str)
}
key = key + (key>>5);
#endif
+ CACHE_OBJ_HASH(str,key);
return key;
}
diff --git a/struct.c b/struct.c
index 9417c1c..e484544 100644
--- a/struct.c
+++ b/struct.c
@@ -12,6 +12,7 @@
#include "ruby.h"
#include "env.h"
+#include "cached_obj_hash.h"
VALUE rb_cStruct;
@@ -156,6 +157,7 @@ rb_struct_modify(s)
if (OBJ_FROZEN(s)) rb_error_frozen("Struct");
if (!OBJ_TAINTED(s) && rb_safe_level() >= 4)
rb_raise(rb_eSecurityError, "Insecure: can't modify Struct");
+ COND_EXPIRE_CACHED_OBJ_HASH(s);
}
static VALUE
@@ -812,13 +814,14 @@ rb_struct_hash(s)
{
long i, h;
VALUE n;
-
+ GET_CACHED_OBJ_HASH(s);
h = rb_hash(rb_obj_class(s));
for (i = 0; i < RSTRUCT(s)->len; i++) {
h = (h << 1) | (h<0 ? 1 : 0);
n = rb_hash(RSTRUCT(s)->ptr[i]);
h ^= NUM2LONG(n);
}
+ CACHE_OBJ_HASH(s,h);
return LONG2FIX(h);
}
diff --git a/time.c b/time.c
index 5cd1c9e..269325c 100644
--- a/time.c
+++ b/time.c
@@ -14,6 +14,7 @@
#include <sys/types.h>
#include <time.h>
#include <errno.h>
+#include "cached_obj_hash.h"
#ifdef HAVE_UNISTD_H
#include <unistd.h>
@@ -66,6 +67,7 @@ time_modify(time)
rb_check_frozen(time);
if (!OBJ_TAINTED(time) && rb_safe_level() >= 4)
rb_raise(rb_eSecurityError, "Insecure: can't modify Time");
+ COND_EXPIRE_CACHED_OBJ_HASH(time);
}
/*
@@ -1047,9 +1049,10 @@ time_hash(time)
{
struct time_object *tobj;
long hash;
-
+ GET_CACHED_OBJ_HASH(time);
GetTimeval(time, tobj);
hash = tobj->tv.tv_sec ^ tobj->tv.tv_usec;
+ CACHE_OBJ_HASH(time,hash);
return LONG2FIX(hash);
}
diff --git a/variable.c b/variable.c
index 53143ca..666b7f5 100644
--- a/variable.c
+++ b/variable.c
@@ -20,6 +20,7 @@
st_table *rb_global_tbl;
st_table *rb_class_tbl;
+st_table *rb_obj_hash_cache;
static ID autoload, classpath, tmp_classpath;
void
@@ -27,6 +28,7 @@ Init_var_tables()
{
rb_global_tbl = st_init_numtable();
rb_class_tbl = st_init_numtable();
+ rb_obj_hash_cache = st_init_numtable();
autoload = rb_intern("__autoload__");
classpath = rb_intern("__classpath__");
tmp_classpath = rb_intern("__tmp_classpath__");
require "benchmark"
ObjArray = %w(a b c d e)
ObjBignum = 3233232322233323
ObjHash = { :a => 'a', :b => 'b', :c => 'c' }
ObjNumeric = 12.337869
ObjRange = 'a'..'z'
ObjTime = Time.now
ObjRexp = /[\d]{1,3}:[\d]{1,3}/
ObjString = 'a' * 100
ObjStruct = Struct.new(:a,:b,:c,:d,:e,:f,:g,:h)
HASH = { ObjArray => :array,
ObjBignum => :bignum,
ObjHash => :hash,
ObjNumeric => :numeric,
ObjRange => :range,
ObjTime => :time,
ObjRexp => :rexp,
ObjString => :string,
ObjStruct => :struct }
TESTS = 100_000
GC.enable_stats
Benchmark.bmbm do |results|
results.report("Hash#[:array]") { TESTS.times { HASH[ObjArray] } }
results.report("Hash#[:bignum]") { TESTS.times { HASH[ObjBignum] } }
results.report("Hash#[:hash]") { TESTS.times { HASH[ObjHash] } }
results.report("Hash#[:numeric]") { TESTS.times { HASH[ObjNumeric] } }
results.report("Hash#[:range]") { TESTS.times { HASH[ObjRange] } }
results.report("Hash#[:time]") { TESTS.times { HASH[ObjTime] } }
results.report("Hash#[:rexp]") { TESTS.times { HASH[ObjRexp] } }
results.report("Hash#[:string]") { TESTS.times { HASH[ObjString] } }
results.report("Hash#[:struct]") { TESTS.times { HASH[ObjStruct] } }
end
puts "\n\nGC allocations #{GC.num_allocations}, GC collections #{GC.collections}, GC time #{GC.time}"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment