Created
April 18, 2012 20:34
-
-
Save lalinsky/2416390 to your computer and use it in GitHub Desktop.
Simple audio fingerprint matching in PHP
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
include "utils.php"; | |
$fp = parse_fp($_GET['fp']); | |
$dbh = new PDO('mysql:host=localhost;dbname=fingerprint', 'fingerprint'); | |
$dbh->beginTransaction(); | |
$sth = $dbh->prepare("INSERT INTO fp (length) VALUES (?)"); | |
$sth->execute(array(count($fp))); | |
$fp_id = $dbh->lastInsertId(); | |
$sth = $dbh->prepare(" | |
INSERT INTO fp_item (fp_id, position, value, query_value) | |
VALUES (?, ?, ?, ?) | |
"); | |
$i = 0; | |
foreach ($fp as $value) { | |
$sth->execute(array($fp_id, $i, $value, strip_fp_index_bits($value))); | |
$i += 1; | |
} | |
$dbh->commit(); | |
?> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- MySQL dump 10.13 Distrib 5.1.58, for debian-linux-gnu (x86_64) | |
-- | |
-- Host: localhost Database: fingerprint | |
-- ------------------------------------------------------ | |
-- Server version 5.1.58-1ubuntu1 | |
/*!40101 SET @OLD_CHARACTER_SET_CLIENT=@@CHARACTER_SET_CLIENT */; | |
/*!40101 SET @OLD_CHARACTER_SET_RESULTS=@@CHARACTER_SET_RESULTS */; | |
/*!40101 SET @OLD_COLLATION_CONNECTION=@@COLLATION_CONNECTION */; | |
/*!40101 SET NAMES utf8 */; | |
/*!40103 SET @OLD_TIME_ZONE=@@TIME_ZONE */; | |
/*!40103 SET TIME_ZONE='+00:00' */; | |
/*!40014 SET @OLD_UNIQUE_CHECKS=@@UNIQUE_CHECKS, UNIQUE_CHECKS=0 */; | |
/*!40014 SET @OLD_FOREIGN_KEY_CHECKS=@@FOREIGN_KEY_CHECKS, FOREIGN_KEY_CHECKS=0 */; | |
/*!40101 SET @OLD_SQL_MODE=@@SQL_MODE, SQL_MODE='NO_AUTO_VALUE_ON_ZERO' */; | |
/*!40111 SET @OLD_SQL_NOTES=@@SQL_NOTES, SQL_NOTES=0 */; | |
-- | |
-- Table structure for table `fp` | |
-- | |
DROP TABLE IF EXISTS `fp`; | |
/*!40101 SET @saved_cs_client = @@character_set_client */; | |
/*!40101 SET character_set_client = utf8 */; | |
CREATE TABLE `fp` ( | |
`id` int(11) NOT NULL AUTO_INCREMENT, | |
`length` int(11) NOT NULL, | |
PRIMARY KEY (`id`) | |
) ENGINE=InnoDB AUTO_INCREMENT=2 DEFAULT CHARSET=latin1; | |
/*!40101 SET character_set_client = @saved_cs_client */; | |
-- | |
-- Table structure for table `fp_item` | |
-- | |
DROP TABLE IF EXISTS `fp_item`; | |
/*!40101 SET @saved_cs_client = @@character_set_client */; | |
/*!40101 SET character_set_client = utf8 */; | |
CREATE TABLE `fp_item` ( | |
`fp_id` int(11) NOT NULL, | |
`position` int(11) NOT NULL, | |
`value` int(11) NOT NULL, | |
`query_value` int(11) NOT NULL, | |
PRIMARY KEY (`fp_id`,`position`), | |
KEY `fp_item_idx_query` (`query_value`), | |
CONSTRAINT `fp_item_fk_fp_id` FOREIGN KEY (`fp_id`) REFERENCES `fp` (`id`) | |
) ENGINE=InnoDB DEFAULT CHARSET=latin1; | |
/*!40101 SET character_set_client = @saved_cs_client */; | |
/*!40103 SET TIME_ZONE=@OLD_TIME_ZONE */; | |
/*!40101 SET SQL_MODE=@OLD_SQL_MODE */; | |
/*!40014 SET FOREIGN_KEY_CHECKS=@OLD_FOREIGN_KEY_CHECKS */; | |
/*!40014 SET UNIQUE_CHECKS=@OLD_UNIQUE_CHECKS */; | |
/*!40101 SET CHARACTER_SET_CLIENT=@OLD_CHARACTER_SET_CLIENT */; | |
/*!40101 SET CHARACTER_SET_RESULTS=@OLD_CHARACTER_SET_RESULTS */; | |
/*!40101 SET COLLATION_CONNECTION=@OLD_COLLATION_CONNECTION */; | |
/*!40111 SET SQL_NOTES=@OLD_SQL_NOTES */; | |
-- Dump completed on 2012-04-18 20:28:21 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
include "utils.php"; | |
$fp = parse_fp($_GET['fp']); | |
$dbh = new PDO('mysql:host=localhost;dbname=fingerprint', 'fingerprint'); | |
$query = prepare_fp_query($fp); | |
$results = $dbh->query(" | |
SELECT fp_id, count(*) AS score | |
FROM fp_item | |
WHERE query_value IN (" . implode(",", $query) . ") | |
GROUP BY fp_id | |
ORDER BY count(*) DESC | |
"); | |
echo "Possible candidates:<br /><ol>"; | |
$candidate_ids = array(); | |
foreach ($results as $result) { | |
$candidate_ids[] = $result["fp_id"]; | |
echo "<li> ID " . $result["fp_id"] . " (" . $result["score"] . ")</li>"; | |
} | |
echo "</ol>"; | |
$items = $dbh->query(" | |
SELECT fp_id, value | |
FROM fp_item | |
WHERE fp_id IN (" . implode(",", $candidate_ids) . ") | |
ORDER BY fp_id, position | |
"); | |
$candidates = array(); | |
foreach ($items as $item) { | |
$candidates[$item['fp_id']][] = $item['value']; | |
} | |
echo "Matches:<br /><ol>"; | |
foreach ($candidate_ids as $id) { | |
list($score, $offset) = compare_fp($candidates[$id], $fp); | |
echo "<li> ID $id with score $score, starting at " . format_time($offset) . "</li>"; | |
if ($score < 0.5) { | |
break; | |
} | |
} | |
echo "</ol>"; | |
?> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
$popcnt_table_8bit = array( | |
0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,1,2,2,3,2,3,3,4,2,3,3,4,3,4,4,5, | |
1,2,2,3,2,3,3,4,2,3,3,4,3,4,4,5,2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6, | |
1,2,2,3,2,3,3,4,2,3,3,4,3,4,4,5,2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6, | |
2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6,3,4,4,5,4,5,5,6,4,5,5,6,5,6,6,7, | |
1,2,2,3,2,3,3,4,2,3,3,4,3,4,4,5,2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6, | |
2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6,3,4,4,5,4,5,5,6,4,5,5,6,5,6,6,7, | |
2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6,3,4,4,5,4,5,5,6,4,5,5,6,5,6,6,7, | |
3,4,4,5,4,5,5,6,4,5,5,6,5,6,6,7,4,5,5,6,5,6,6,7,5,6,6,7,6,7,7,8, | |
); | |
// count the number of set bits (1s) in the given number | |
function popcount($x) { | |
global $popcnt_table_8bit; | |
return | |
$popcnt_table_8bit[($x >> 0) & 255] + | |
$popcnt_table_8bit[($x >> 8) & 255] + | |
$popcnt_table_8bit[($x >> 16) & 255] + | |
$popcnt_table_8bit[($x >> 24) & 255]; | |
} | |
// convert a fingerprint string into an array of numbers | |
function parse_fp($string) { | |
return explode(',', $_GET['fp']); | |
} | |
// take the lowest 20 bits of the given number | |
function strip_fp_index_bits($x) { | |
return $x & ((1 << 20) - 1); | |
} | |
function prepare_fp_query($fp) { | |
$query = array(); | |
foreach ($fp as $x) { | |
$query[] = strip_fp_index_bits($x); | |
} | |
return $query; | |
} | |
function bit_error_rate($fp1, $fp2, $offset) { | |
if ($offset > 0) { | |
$fp1 = array_slice($fp1, $offset); | |
} | |
elseif ($offset < 0) { | |
$fp2 = array_slice($fp2, -$offset); | |
} | |
$max_i = min(count($fp1), count($fp2)); | |
$errors = 0; | |
$count = 0; | |
for ($i = 0; $i < $max_i; $i++) { | |
$errors += popcount($fp1[$i] ^ $fp2[$i]); // count the set bits in the XOR of the two fingerprints | |
$count += 1; | |
} | |
return max(0.0, 1.0 - 2.0 * $errors / (32.0 * $count)); | |
} | |
function invert_fp($fp) { | |
$map = array(); | |
foreach ($fp as $i => $x) { | |
$map[$x][] = $i; | |
} | |
return $map; | |
} | |
function compare_fp($fp1, $fp2) { | |
// reduce the fingerprint items to 20 bits | |
$fp1_s = prepare_fp_query($fp1); | |
$fp2_s = prepare_fp_query($fp2); | |
// create small inverted indexes of the fingerprints | |
$map1 = invert_fp($fp1_s); | |
$map2 = invert_fp($fp2_s); | |
// find items that are present in both fingerprints | |
$common = array_intersect_key($map1, $map2); | |
$common = array_keys($common); | |
// find all matching offsets | |
$offsets = array(); | |
foreach ($common as $a) { | |
foreach ($map1[$a] as $i) { | |
foreach ($map2[$a] as $j) { | |
$offset = $i - $j; | |
if (!isset($offsets[$offset])) { | |
$offsets[$offset] = 0; | |
} | |
$offsets[$offset] += 1; | |
} | |
} | |
} | |
arsort($offsets); | |
// calculate the actual similarity for the top 20% matching offsets | |
$matches = array(); | |
$max_count = null; | |
foreach ($offsets as $offset => $count) { | |
if (is_null($max_count)) { | |
$max_count = $count; | |
} | |
if ($count < 0.8 * $max_count) { | |
break; | |
} | |
$matches[$offset] = bit_error_rate($fp1, $fp2, $offset); | |
} | |
arsort($matches); | |
foreach ($matches as $offset => $score) { | |
$time_offset = $offset * 0.1238; // each fingerprint item represents 0.1238 seconds | |
return array($score, $time_offset); | |
} | |
return array(0.0, 0); | |
} | |
function format_time($time) { | |
$sign = $time < 0 ? "-" : ""; | |
$time = abs($time); | |
$secs = floor($time); | |
$msecs = ($time - $secs) * 1000; | |
return $sign . sprintf("%d:%02d.%d", $secs / 60, $secs % 60, $msecs); | |
} | |
?> | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment