Skip to content

Instantly share code, notes, and snippets.

@brianmario
Created April 20, 2011 18:46
Show Gist options
  • Save brianmario/932290 to your computer and use it in GitHub Desktop.
Save brianmario/932290 to your computer and use it in GitHub Desktop.
#include <ruby.h>
#define LINECOUNT_ACCURATE
static VALUE rb_String_line_count(int argc, VALUE * argv, VALUE self) {
unsigned char *str = (unsigned char *)RSTRING_PTR(self);
size_t len = RSTRING_LEN(self), n=0, i=0;
VALUE count_trailing;
if (len > 256) {
for (; i < len; i += 4) {
/* Count lines, but do it fast.
* Access the string by word chunks and XOR with "\n\n\n\n"
* which should set any bytes equal to '\n' (0xA) to zero.
* Then we perform some int underflowing to check if any
* bytes are zero. The bytes that are zero get marked with
* 0x80, so we can shift by 7 to move the MSB of each byte
* into the LSB, and then add all the LSBs.
*/
unsigned int word = *(unsigned int *)(str + i) ^ 0x0A0A0A0AUL;
word = (word - 0x01010101UL) & ~word & 0x80808080UL;
if (word) {
unsigned char *bytes = (unsigned char *)&word;
word >>= 7;
n += bytes[0] + bytes[1] + bytes[2] + bytes[3];
}
}
}
for(; i<len; i++) {
if (str[i] == '\n') n++;
}
if (rb_scan_args(argc, argv, "01", &count_trailing) == 1 && count_trailing) {
n++;
}
return ULONG2NUM(n);
}
void Init_string_ext() {
rb_define_method(rb_cString, "line_count", rb_String_line_count, -1);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment