Created
April 8, 2013 17:30
-
-
Save bradley219/5338662 to your computer and use it in GitHub Desktop.
MySQL extension for user-defined function HAMMING_DISTANCE, which quickly calculates hamming distance between two 64-bit integers passed as arguments.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#ifdef STANDARD | |
/* STANDARD is defined, don't use any mysql functions */ | |
#include <stdlib.h> | |
#include <stdio.h> | |
#include <string.h> | |
#ifdef __WIN__ | |
typedef unsigned __int64 ulonglong; /* Microsofts 64 bit types */ | |
typedef __int64 longlong; | |
#else | |
typedef unsigned long long ulonglong; | |
typedef long long longlong; | |
#endif /*__WIN__*/ | |
#else | |
#include <my_global.h> | |
#include <my_sys.h> | |
#if defined(MYSQL_SERVER) | |
#include <m_string.h> /* To get strmov() */ | |
#else | |
/* when compiled as standalone */ | |
#include <string.h> | |
#define strmov(a,b) stpcpy(a,b) | |
#define bzero(a,b) memset(a,0,b) | |
#endif | |
#endif | |
#include <mysql.h> | |
#include <ctype.h> | |
#ifdef _WIN32 | |
/* inet_aton needs winsock library */ | |
#pragma comment(lib, "ws2_32") | |
#endif | |
#ifdef HAVE_DLOPEN | |
#if !defined(HAVE_GETHOSTBYADDR_R) || !defined(HAVE_SOLARIS_STYLE_GETHOST) | |
static pthread_mutex_t LOCK_hostname; | |
#endif | |
/* These must be right or mysqld will not find the symbol! */ | |
my_bool hamming_distance_init( UDF_INIT *initid, UDF_ARGS *args, char *message ); | |
longlong hamming_distance( UDF_INIT *initid __attribute__((unused)), UDF_ARGS *args, char *is_null, char *error ); | |
void hamming_distance_deinit( UDF_INIT *initid ); | |
my_bool hamming_distance_init( UDF_INIT *initid, UDF_ARGS *args, char *message ) | |
{ | |
if( args->arg_count != 2 ) | |
{ | |
strcpy( message, "Wrong number of arguments to HAMMING_DISTANCE; accepts exactly two arguments." ); | |
return 1; | |
} | |
if( ((args->arg_type[0] != INT_RESULT) && (args->args[0] != NULL)) || ((args->arg_type[1] != INT_RESULT) && (args->args[0] != NULL)) ) | |
{ | |
strcpy( message, "One or both arguments to HAMMING_DISTANCE is not an integer." ); | |
return 1; | |
} | |
initid->maybe_null = 1; | |
return 0; | |
} | |
void hamming_distance_deinit( UDF_INIT *initid __attribute__((unused)) ) | |
{ | |
return; | |
} | |
longlong hamming_distance( UDF_INIT *initid __attribute__((unused)), UDF_ARGS *args, char *is_null, char *error ) | |
{ | |
if( (args->args[0] == NULL) || (args->args[1] == NULL) ) | |
{ | |
*is_null = 1; | |
return 0; | |
} | |
longlong hash1 = *((longlong*) args->args[0]); | |
longlong hash2 = *((longlong*) args->args[1]); | |
longlong x = hash1 ^ hash2; | |
const longlong m1 = 0x5555555555555555ULL; | |
const longlong m2 = 0x3333333333333333ULL; | |
const longlong h01 = 0x0101010101010101ULL; | |
const longlong m4 = 0x0f0f0f0f0f0f0f0fULL; | |
x -= (x >> 1) & m1; | |
x = (x & m2) + ((x >> 2) & m2); | |
x = (x + (x >> 4)) & m4; | |
return (x * h01)>>56; | |
} | |
#endif /* HAVE_DLOPEN */ |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Is it faster than BIT_COUNT( hash1 ^ hash2 ) ?