Skip to content

Instantly share code, notes, and snippets.

@nmmmnu
Created July 19, 2011 22:32
Show Gist options
  • Save nmmmnu/1093910 to your computer and use it in GitHub Desktop.
Save nmmmnu/1093910 to your computer and use it in GitHub Desktop.
Indexing library for PHP/MySQL
CREATE TABLE `data_index` (
`id` int(11) NOT NULL,
`num` smallint(6) NOT NULL,
`word` varchar(33) DEFAULT NULL,
PRIMARY KEY (`id`,`num`),
KEY `word` (`word`,`id`),
KEY `id` (`id`,`word`)
);
<?
require_once "lib_ix.php";
mysql_connect('localhost', 'root', 'pass');
mysql_select_db('database');
mysqlix_index("data", "id", "`desc`", "data_index");
<?
function mysqlix_create($ixtable){
return "
create table `$ixtable`(
id int not null ,
num smallint not null ,
word varchar(33) ,
primary key(id, num) ,
index(word, id) ,
index(id, word)
)
";
}
function mysqlix_value_format($v){
$v = preg_replace('/[^0-9A-Za-z]/', ' ', $v);
$v = strtolower($v);
$vals = explode(" ", $v);
$vv = array();
foreach($vals as $v){
$v = trim($v);
if ($v == "")
continue;
$vv[] = mysql_real_escape_string(md5($v));
}
return $vv;
}
function mysqlix_hot_index($k, $v, $ixtable){
$vals = mysqlix_value_format($v);
//print_r($vals);
mysql_query( "DELETE FROM `$ixtable` WHERE id = $k" );
$no = 0;
foreach($vals as $v){
mysql_query( "REPLACE INTO `$ixtable` VALUES( $k, $no, '$v' )" );
$no++;
}
}
function mysqlix_index($table, $key, $value, $ixtable=""){
if ($ixtable == "")
$ixtable = $table . "_index";
if ($value == "")
return;
$sql = "SELECT `$key` as k, $value as v FROM `$table` ";
mysqlix_index_sql($sql, $ixtable);
}
function mysqlix_index_sql($sql, $ixtable){
$result = mysql_query( $sql );
while ($row = mysql_fetch_array($result)) {
echo sprintf("%8s | %-100s \n", $row[0], $row[1]);
mysqlix_hot_index($row[0], $row[1], $ixtable);
}
}
function mysqlix_search($value, $ixtable, $limit=100){
$v = mysqlix_value_format($value);
if (count($v) == 0)
return array();
$DISTINCT = count($v) > 1 ? "DISTINCT" : "";
$sql = "
SELECT $DISTINCT
t0.id as id
FROM
";
for( $i = 0; $i < count($v); $i++){
$sql = $sql . "
`$ixtable` t$i ";
if ( $i < count($v) -1 )
$sql = $sql . ",";
$sql = $sql . "\n";
}
$sql = $sql . "
WHERE
";
for( $i = 0; $i < count($v); $i++){
$val = $v[$i];
$sql = $sql . "
t$i.word = '$val' AND
";
}
if (count($v) > 1){
for( $i = 0 + 1; $i < count($v); $i++){
$sql = $sql . "
t0.id = t$i.id AND
";
}
}
$sql = $sql . "
1 = 1
LIMIT
$limit
";
echo $sql;
$result = mysql_query($sql);
$x = array();
while ($row = mysql_fetch_array($result))
$x[] = $row['id'];
return array_unique($x);
}
<?
require_once "lib_ix.php";
mysql_connect('localhost', 'root', 'pass');
mysql_select_db('database');
$x = mysqlix_search($argv[1], "data_index");
print_r($x);
@nmmmnu
Copy link
Author

nmmmnu commented Jul 19, 2011

However, performance is poor if there are lots of data.
I tried with ~7M (index is ~28M) and if you search by 2-3 words, result comes in 4-5 sec.

@nmmmnu
Copy link
Author

nmmmnu commented Jul 19, 2011

Possible fix could be dropping "num" column and making only one primary on "word" + "id", but performance is still poor.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment