Created
April 20, 2011 18:46
-
-
Save brianmario/932290 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <ruby.h> | |
#define LINECOUNT_ACCURATE | |
static VALUE rb_String_line_count(int argc, VALUE * argv, VALUE self) { | |
unsigned char *str = (unsigned char *)RSTRING_PTR(self); | |
size_t len = RSTRING_LEN(self), n=0, i=0; | |
VALUE count_trailing; | |
if (len > 256) { | |
for (; i < len; i += 4) { | |
/* Count lines, but do it fast. | |
* Access the string by word chunks and XOR with "\n\n\n\n" | |
* which should set any bytes equal to '\n' (0xA) to zero. | |
* Then we perform some int underflowing to check if any | |
* bytes are zero. The bytes that are zero get marked with | |
* 0x80, so we can shift by 7 to move the MSB of each byte | |
* into the LSB, and then add all the LSBs. | |
*/ | |
unsigned int word = *(unsigned int *)(str + i) ^ 0x0A0A0A0AUL; | |
word = (word - 0x01010101UL) & ~word & 0x80808080UL; | |
if (word) { | |
unsigned char *bytes = (unsigned char *)&word; | |
word >>= 7; | |
n += bytes[0] + bytes[1] + bytes[2] + bytes[3]; | |
} | |
} | |
} | |
for(; i<len; i++) { | |
if (str[i] == '\n') n++; | |
} | |
if (rb_scan_args(argc, argv, "01", &count_trailing) == 1 && count_trailing) { | |
n++; | |
} | |
return ULONG2NUM(n); | |
} | |
void Init_string_ext() { | |
rb_define_method(rb_cString, "line_count", rb_String_line_count, -1); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment